1 /* 2 * linux/fs/binfmt_elf.c 3 * 4 * These are the functions used to load ELF format executables as used 5 * on SVr4 machines. Information on the format may be found in the book 6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support 7 * Tools". 8 * 9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com). 10 */ 11 12 #include <linux/module.h> 13 #include <linux/kernel.h> 14 #include <linux/fs.h> 15 #include <linux/mm.h> 16 #include <linux/mman.h> 17 #include <linux/errno.h> 18 #include <linux/signal.h> 19 #include <linux/binfmts.h> 20 #include <linux/string.h> 21 #include <linux/file.h> 22 #include <linux/slab.h> 23 #include <linux/personality.h> 24 #include <linux/elfcore.h> 25 #include <linux/init.h> 26 #include <linux/highuid.h> 27 #include <linux/compiler.h> 28 #include <linux/highmem.h> 29 #include <linux/pagemap.h> 30 #include <linux/vmalloc.h> 31 #include <linux/security.h> 32 #include <linux/random.h> 33 #include <linux/elf.h> 34 #include <linux/elf-randomize.h> 35 #include <linux/utsname.h> 36 #include <linux/coredump.h> 37 #include <linux/sched.h> 38 #include <linux/sched/coredump.h> 39 #include <linux/dax.h> 40 #include <linux/uaccess.h> 41 #include <asm/param.h> 42 #include <asm/page.h> 43 44 #ifndef user_long_t 45 #define user_long_t long 46 #endif 47 #ifndef user_siginfo_t 48 #define user_siginfo_t siginfo_t 49 #endif 50 51 static int load_elf_binary(struct linux_binprm *bprm); 52 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, 53 int, int, unsigned long); 54 55 #ifdef CONFIG_USELIB 56 static int load_elf_library(struct file *); 57 #else 58 #define load_elf_library NULL 59 #endif 60 61 /* 62 * If we don't support core dumping, then supply a NULL so we 63 * don't even try. 64 */ 65 #ifdef CONFIG_ELF_CORE 66 static int elf_core_dump(struct coredump_params *cprm); 67 #else 68 #define elf_core_dump NULL 69 #endif 70 71 #if ELF_EXEC_PAGESIZE > PAGE_SIZE 72 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE 73 #else 74 #define ELF_MIN_ALIGN PAGE_SIZE 75 #endif 76 77 #ifndef ELF_CORE_EFLAGS 78 #define ELF_CORE_EFLAGS 0 79 #endif 80 81 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1)) 82 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1)) 83 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1)) 84 85 static struct linux_binfmt elf_format = { 86 .module = THIS_MODULE, 87 .load_binary = load_elf_binary, 88 .load_shlib = load_elf_library, 89 .core_dump = elf_core_dump, 90 .min_coredump = ELF_EXEC_PAGESIZE, 91 }; 92 93 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) 94 95 static int set_brk(unsigned long start, unsigned long end, int prot) 96 { 97 start = ELF_PAGEALIGN(start); 98 end = ELF_PAGEALIGN(end); 99 if (end > start) { 100 /* 101 * Map the last of the bss segment. 102 * If the header is requesting these pages to be 103 * executable, honour that (ppc32 needs this). 104 */ 105 int error = vm_brk_flags(start, end - start, 106 prot & PROT_EXEC ? VM_EXEC : 0); 107 if (error) 108 return error; 109 } 110 current->mm->start_brk = current->mm->brk = end; 111 return 0; 112 } 113 114 /* We need to explicitly zero any fractional pages 115 after the data section (i.e. bss). This would 116 contain the junk from the file that should not 117 be in memory 118 */ 119 static int padzero(unsigned long elf_bss) 120 { 121 unsigned long nbyte; 122 123 nbyte = ELF_PAGEOFFSET(elf_bss); 124 if (nbyte) { 125 nbyte = ELF_MIN_ALIGN - nbyte; 126 if (clear_user((void __user *) elf_bss, nbyte)) 127 return -EFAULT; 128 } 129 return 0; 130 } 131 132 /* Let's use some macros to make this stack manipulation a little clearer */ 133 #ifdef CONFIG_STACK_GROWSUP 134 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items)) 135 #define STACK_ROUND(sp, items) \ 136 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL) 137 #define STACK_ALLOC(sp, len) ({ \ 138 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \ 139 old_sp; }) 140 #else 141 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items)) 142 #define STACK_ROUND(sp, items) \ 143 (((unsigned long) (sp - items)) &~ 15UL) 144 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) 145 #endif 146 147 #ifndef ELF_BASE_PLATFORM 148 /* 149 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. 150 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value 151 * will be copied to the user stack in the same manner as AT_PLATFORM. 152 */ 153 #define ELF_BASE_PLATFORM NULL 154 #endif 155 156 static int 157 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, 158 unsigned long load_addr, unsigned long interp_load_addr) 159 { 160 unsigned long p = bprm->p; 161 int argc = bprm->argc; 162 int envc = bprm->envc; 163 elf_addr_t __user *argv; 164 elf_addr_t __user *envp; 165 elf_addr_t __user *sp; 166 elf_addr_t __user *u_platform; 167 elf_addr_t __user *u_base_platform; 168 elf_addr_t __user *u_rand_bytes; 169 const char *k_platform = ELF_PLATFORM; 170 const char *k_base_platform = ELF_BASE_PLATFORM; 171 unsigned char k_rand_bytes[16]; 172 int items; 173 elf_addr_t *elf_info; 174 int ei_index = 0; 175 const struct cred *cred = current_cred(); 176 struct vm_area_struct *vma; 177 178 /* 179 * In some cases (e.g. Hyper-Threading), we want to avoid L1 180 * evictions by the processes running on the same package. One 181 * thing we can do is to shuffle the initial stack for them. 182 */ 183 184 p = arch_align_stack(p); 185 186 /* 187 * If this architecture has a platform capability string, copy it 188 * to userspace. In some cases (Sparc), this info is impossible 189 * for userspace to get any other way, in others (i386) it is 190 * merely difficult. 191 */ 192 u_platform = NULL; 193 if (k_platform) { 194 size_t len = strlen(k_platform) + 1; 195 196 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); 197 if (__copy_to_user(u_platform, k_platform, len)) 198 return -EFAULT; 199 } 200 201 /* 202 * If this architecture has a "base" platform capability 203 * string, copy it to userspace. 204 */ 205 u_base_platform = NULL; 206 if (k_base_platform) { 207 size_t len = strlen(k_base_platform) + 1; 208 209 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len); 210 if (__copy_to_user(u_base_platform, k_base_platform, len)) 211 return -EFAULT; 212 } 213 214 /* 215 * Generate 16 random bytes for userspace PRNG seeding. 216 */ 217 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes)); 218 u_rand_bytes = (elf_addr_t __user *) 219 STACK_ALLOC(p, sizeof(k_rand_bytes)); 220 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes))) 221 return -EFAULT; 222 223 /* Create the ELF interpreter info */ 224 elf_info = (elf_addr_t *)current->mm->saved_auxv; 225 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 226 #define NEW_AUX_ENT(id, val) \ 227 do { \ 228 elf_info[ei_index++] = id; \ 229 elf_info[ei_index++] = val; \ 230 } while (0) 231 232 #ifdef ARCH_DLINFO 233 /* 234 * ARCH_DLINFO must come first so PPC can do its special alignment of 235 * AUXV. 236 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in 237 * ARCH_DLINFO changes 238 */ 239 ARCH_DLINFO; 240 #endif 241 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); 242 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE); 243 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); 244 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff); 245 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); 246 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum); 247 NEW_AUX_ENT(AT_BASE, interp_load_addr); 248 NEW_AUX_ENT(AT_FLAGS, 0); 249 NEW_AUX_ENT(AT_ENTRY, exec->e_entry); 250 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid)); 251 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid)); 252 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid)); 253 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid)); 254 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); 255 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); 256 #ifdef ELF_HWCAP2 257 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); 258 #endif 259 NEW_AUX_ENT(AT_EXECFN, bprm->exec); 260 if (k_platform) { 261 NEW_AUX_ENT(AT_PLATFORM, 262 (elf_addr_t)(unsigned long)u_platform); 263 } 264 if (k_base_platform) { 265 NEW_AUX_ENT(AT_BASE_PLATFORM, 266 (elf_addr_t)(unsigned long)u_base_platform); 267 } 268 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { 269 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); 270 } 271 #undef NEW_AUX_ENT 272 /* AT_NULL is zero; clear the rest too */ 273 memset(&elf_info[ei_index], 0, 274 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]); 275 276 /* And advance past the AT_NULL entry. */ 277 ei_index += 2; 278 279 sp = STACK_ADD(p, ei_index); 280 281 items = (argc + 1) + (envc + 1) + 1; 282 bprm->p = STACK_ROUND(sp, items); 283 284 /* Point sp at the lowest address on the stack */ 285 #ifdef CONFIG_STACK_GROWSUP 286 sp = (elf_addr_t __user *)bprm->p - items - ei_index; 287 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */ 288 #else 289 sp = (elf_addr_t __user *)bprm->p; 290 #endif 291 292 293 /* 294 * Grow the stack manually; some architectures have a limit on how 295 * far ahead a user-space access may be in order to grow the stack. 296 */ 297 vma = find_extend_vma(current->mm, bprm->p); 298 if (!vma) 299 return -EFAULT; 300 301 /* Now, let's put argc (and argv, envp if appropriate) on the stack */ 302 if (__put_user(argc, sp++)) 303 return -EFAULT; 304 argv = sp; 305 envp = argv + argc + 1; 306 307 /* Populate argv and envp */ 308 p = current->mm->arg_end = current->mm->arg_start; 309 while (argc-- > 0) { 310 size_t len; 311 if (__put_user((elf_addr_t)p, argv++)) 312 return -EFAULT; 313 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); 314 if (!len || len > MAX_ARG_STRLEN) 315 return -EINVAL; 316 p += len; 317 } 318 if (__put_user(0, argv)) 319 return -EFAULT; 320 current->mm->arg_end = current->mm->env_start = p; 321 while (envc-- > 0) { 322 size_t len; 323 if (__put_user((elf_addr_t)p, envp++)) 324 return -EFAULT; 325 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN); 326 if (!len || len > MAX_ARG_STRLEN) 327 return -EINVAL; 328 p += len; 329 } 330 if (__put_user(0, envp)) 331 return -EFAULT; 332 current->mm->env_end = p; 333 334 /* Put the elf_info on the stack in the right place. */ 335 sp = (elf_addr_t __user *)envp + 1; 336 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t))) 337 return -EFAULT; 338 return 0; 339 } 340 341 #ifndef elf_map 342 343 static unsigned long elf_map(struct file *filep, unsigned long addr, 344 struct elf_phdr *eppnt, int prot, int type, 345 unsigned long total_size) 346 { 347 unsigned long map_addr; 348 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr); 349 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr); 350 addr = ELF_PAGESTART(addr); 351 size = ELF_PAGEALIGN(size); 352 353 /* mmap() will return -EINVAL if given a zero size, but a 354 * segment with zero filesize is perfectly valid */ 355 if (!size) 356 return addr; 357 358 /* 359 * total_size is the size of the ELF (interpreter) image. 360 * The _first_ mmap needs to know the full size, otherwise 361 * randomization might put this image into an overlapping 362 * position with the ELF binary image. (since size < total_size) 363 * So we first map the 'big' image - and unmap the remainder at 364 * the end. (which unmap is needed for ELF images with holes.) 365 */ 366 if (total_size) { 367 total_size = ELF_PAGEALIGN(total_size); 368 map_addr = vm_mmap(filep, addr, total_size, prot, type, off); 369 if (!BAD_ADDR(map_addr)) 370 vm_munmap(map_addr+size, total_size-size); 371 } else 372 map_addr = vm_mmap(filep, addr, size, prot, type, off); 373 374 return(map_addr); 375 } 376 377 #endif /* !elf_map */ 378 379 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr) 380 { 381 int i, first_idx = -1, last_idx = -1; 382 383 for (i = 0; i < nr; i++) { 384 if (cmds[i].p_type == PT_LOAD) { 385 last_idx = i; 386 if (first_idx == -1) 387 first_idx = i; 388 } 389 } 390 if (first_idx == -1) 391 return 0; 392 393 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz - 394 ELF_PAGESTART(cmds[first_idx].p_vaddr); 395 } 396 397 /** 398 * load_elf_phdrs() - load ELF program headers 399 * @elf_ex: ELF header of the binary whose program headers should be loaded 400 * @elf_file: the opened ELF binary file 401 * 402 * Loads ELF program headers from the binary file elf_file, which has the ELF 403 * header pointed to by elf_ex, into a newly allocated array. The caller is 404 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure. 405 */ 406 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex, 407 struct file *elf_file) 408 { 409 struct elf_phdr *elf_phdata = NULL; 410 int retval, size, err = -1; 411 412 /* 413 * If the size of this structure has changed, then punt, since 414 * we will be doing the wrong thing. 415 */ 416 if (elf_ex->e_phentsize != sizeof(struct elf_phdr)) 417 goto out; 418 419 /* Sanity check the number of program headers... */ 420 if (elf_ex->e_phnum < 1 || 421 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr)) 422 goto out; 423 424 /* ...and their total size. */ 425 size = sizeof(struct elf_phdr) * elf_ex->e_phnum; 426 if (size > ELF_MIN_ALIGN) 427 goto out; 428 429 elf_phdata = kmalloc(size, GFP_KERNEL); 430 if (!elf_phdata) 431 goto out; 432 433 /* Read in the program headers */ 434 retval = kernel_read(elf_file, elf_ex->e_phoff, 435 (char *)elf_phdata, size); 436 if (retval != size) { 437 err = (retval < 0) ? retval : -EIO; 438 goto out; 439 } 440 441 /* Success! */ 442 err = 0; 443 out: 444 if (err) { 445 kfree(elf_phdata); 446 elf_phdata = NULL; 447 } 448 return elf_phdata; 449 } 450 451 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE 452 453 /** 454 * struct arch_elf_state - arch-specific ELF loading state 455 * 456 * This structure is used to preserve architecture specific data during 457 * the loading of an ELF file, throughout the checking of architecture 458 * specific ELF headers & through to the point where the ELF load is 459 * known to be proceeding (ie. SET_PERSONALITY). 460 * 461 * This implementation is a dummy for architectures which require no 462 * specific state. 463 */ 464 struct arch_elf_state { 465 }; 466 467 #define INIT_ARCH_ELF_STATE {} 468 469 /** 470 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header 471 * @ehdr: The main ELF header 472 * @phdr: The program header to check 473 * @elf: The open ELF file 474 * @is_interp: True if the phdr is from the interpreter of the ELF being 475 * loaded, else false. 476 * @state: Architecture-specific state preserved throughout the process 477 * of loading the ELF. 478 * 479 * Inspects the program header phdr to validate its correctness and/or 480 * suitability for the system. Called once per ELF program header in the 481 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its 482 * interpreter. 483 * 484 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load 485 * with that return code. 486 */ 487 static inline int arch_elf_pt_proc(struct elfhdr *ehdr, 488 struct elf_phdr *phdr, 489 struct file *elf, bool is_interp, 490 struct arch_elf_state *state) 491 { 492 /* Dummy implementation, always proceed */ 493 return 0; 494 } 495 496 /** 497 * arch_check_elf() - check an ELF executable 498 * @ehdr: The main ELF header 499 * @has_interp: True if the ELF has an interpreter, else false. 500 * @interp_ehdr: The interpreter's ELF header 501 * @state: Architecture-specific state preserved throughout the process 502 * of loading the ELF. 503 * 504 * Provides a final opportunity for architecture code to reject the loading 505 * of the ELF & cause an exec syscall to return an error. This is called after 506 * all program headers to be checked by arch_elf_pt_proc have been. 507 * 508 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load 509 * with that return code. 510 */ 511 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp, 512 struct elfhdr *interp_ehdr, 513 struct arch_elf_state *state) 514 { 515 /* Dummy implementation, always proceed */ 516 return 0; 517 } 518 519 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */ 520 521 /* This is much more generalized than the library routine read function, 522 so we keep this separate. Technically the library read function 523 is only provided so that we can read a.out libraries that have 524 an ELF header */ 525 526 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, 527 struct file *interpreter, unsigned long *interp_map_addr, 528 unsigned long no_base, struct elf_phdr *interp_elf_phdata) 529 { 530 struct elf_phdr *eppnt; 531 unsigned long load_addr = 0; 532 int load_addr_set = 0; 533 unsigned long last_bss = 0, elf_bss = 0; 534 int bss_prot = 0; 535 unsigned long error = ~0UL; 536 unsigned long total_size; 537 int i; 538 539 /* First of all, some simple consistency checks */ 540 if (interp_elf_ex->e_type != ET_EXEC && 541 interp_elf_ex->e_type != ET_DYN) 542 goto out; 543 if (!elf_check_arch(interp_elf_ex)) 544 goto out; 545 if (!interpreter->f_op->mmap) 546 goto out; 547 548 total_size = total_mapping_size(interp_elf_phdata, 549 interp_elf_ex->e_phnum); 550 if (!total_size) { 551 error = -EINVAL; 552 goto out; 553 } 554 555 eppnt = interp_elf_phdata; 556 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { 557 if (eppnt->p_type == PT_LOAD) { 558 int elf_type = MAP_PRIVATE | MAP_DENYWRITE; 559 int elf_prot = 0; 560 unsigned long vaddr = 0; 561 unsigned long k, map_addr; 562 563 if (eppnt->p_flags & PF_R) 564 elf_prot = PROT_READ; 565 if (eppnt->p_flags & PF_W) 566 elf_prot |= PROT_WRITE; 567 if (eppnt->p_flags & PF_X) 568 elf_prot |= PROT_EXEC; 569 vaddr = eppnt->p_vaddr; 570 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) 571 elf_type |= MAP_FIXED; 572 else if (no_base && interp_elf_ex->e_type == ET_DYN) 573 load_addr = -vaddr; 574 575 map_addr = elf_map(interpreter, load_addr + vaddr, 576 eppnt, elf_prot, elf_type, total_size); 577 total_size = 0; 578 if (!*interp_map_addr) 579 *interp_map_addr = map_addr; 580 error = map_addr; 581 if (BAD_ADDR(map_addr)) 582 goto out; 583 584 if (!load_addr_set && 585 interp_elf_ex->e_type == ET_DYN) { 586 load_addr = map_addr - ELF_PAGESTART(vaddr); 587 load_addr_set = 1; 588 } 589 590 /* 591 * Check to see if the section's size will overflow the 592 * allowed task size. Note that p_filesz must always be 593 * <= p_memsize so it's only necessary to check p_memsz. 594 */ 595 k = load_addr + eppnt->p_vaddr; 596 if (BAD_ADDR(k) || 597 eppnt->p_filesz > eppnt->p_memsz || 598 eppnt->p_memsz > TASK_SIZE || 599 TASK_SIZE - eppnt->p_memsz < k) { 600 error = -ENOMEM; 601 goto out; 602 } 603 604 /* 605 * Find the end of the file mapping for this phdr, and 606 * keep track of the largest address we see for this. 607 */ 608 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz; 609 if (k > elf_bss) 610 elf_bss = k; 611 612 /* 613 * Do the same thing for the memory mapping - between 614 * elf_bss and last_bss is the bss section. 615 */ 616 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz; 617 if (k > last_bss) { 618 last_bss = k; 619 bss_prot = elf_prot; 620 } 621 } 622 } 623 624 /* 625 * Now fill out the bss section: first pad the last page from 626 * the file up to the page boundary, and zero it from elf_bss 627 * up to the end of the page. 628 */ 629 if (padzero(elf_bss)) { 630 error = -EFAULT; 631 goto out; 632 } 633 /* 634 * Next, align both the file and mem bss up to the page size, 635 * since this is where elf_bss was just zeroed up to, and where 636 * last_bss will end after the vm_brk_flags() below. 637 */ 638 elf_bss = ELF_PAGEALIGN(elf_bss); 639 last_bss = ELF_PAGEALIGN(last_bss); 640 /* Finally, if there is still more bss to allocate, do it. */ 641 if (last_bss > elf_bss) { 642 error = vm_brk_flags(elf_bss, last_bss - elf_bss, 643 bss_prot & PROT_EXEC ? VM_EXEC : 0); 644 if (error) 645 goto out; 646 } 647 648 error = load_addr; 649 out: 650 return error; 651 } 652 653 /* 654 * These are the functions used to load ELF style executables and shared 655 * libraries. There is no binary dependent code anywhere else. 656 */ 657 658 #ifndef STACK_RND_MASK 659 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */ 660 #endif 661 662 static unsigned long randomize_stack_top(unsigned long stack_top) 663 { 664 unsigned long random_variable = 0; 665 666 if ((current->flags & PF_RANDOMIZE) && 667 !(current->personality & ADDR_NO_RANDOMIZE)) { 668 random_variable = get_random_long(); 669 random_variable &= STACK_RND_MASK; 670 random_variable <<= PAGE_SHIFT; 671 } 672 #ifdef CONFIG_STACK_GROWSUP 673 return PAGE_ALIGN(stack_top) + random_variable; 674 #else 675 return PAGE_ALIGN(stack_top) - random_variable; 676 #endif 677 } 678 679 static int load_elf_binary(struct linux_binprm *bprm) 680 { 681 struct file *interpreter = NULL; /* to shut gcc up */ 682 unsigned long load_addr = 0, load_bias = 0; 683 int load_addr_set = 0; 684 char * elf_interpreter = NULL; 685 unsigned long error; 686 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL; 687 unsigned long elf_bss, elf_brk; 688 int bss_prot = 0; 689 int retval, i; 690 unsigned long elf_entry; 691 unsigned long interp_load_addr = 0; 692 unsigned long start_code, end_code, start_data, end_data; 693 unsigned long reloc_func_desc __maybe_unused = 0; 694 int executable_stack = EXSTACK_DEFAULT; 695 struct pt_regs *regs = current_pt_regs(); 696 struct { 697 struct elfhdr elf_ex; 698 struct elfhdr interp_elf_ex; 699 } *loc; 700 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE; 701 702 loc = kmalloc(sizeof(*loc), GFP_KERNEL); 703 if (!loc) { 704 retval = -ENOMEM; 705 goto out_ret; 706 } 707 708 /* Get the exec-header */ 709 loc->elf_ex = *((struct elfhdr *)bprm->buf); 710 711 retval = -ENOEXEC; 712 /* First of all, some simple consistency checks */ 713 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0) 714 goto out; 715 716 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN) 717 goto out; 718 if (!elf_check_arch(&loc->elf_ex)) 719 goto out; 720 if (!bprm->file->f_op->mmap) 721 goto out; 722 723 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file); 724 if (!elf_phdata) 725 goto out; 726 727 elf_ppnt = elf_phdata; 728 elf_bss = 0; 729 elf_brk = 0; 730 731 start_code = ~0UL; 732 end_code = 0; 733 start_data = 0; 734 end_data = 0; 735 736 for (i = 0; i < loc->elf_ex.e_phnum; i++) { 737 if (elf_ppnt->p_type == PT_INTERP) { 738 /* This is the program interpreter used for 739 * shared libraries - for now assume that this 740 * is an a.out format binary 741 */ 742 retval = -ENOEXEC; 743 if (elf_ppnt->p_filesz > PATH_MAX || 744 elf_ppnt->p_filesz < 2) 745 goto out_free_ph; 746 747 retval = -ENOMEM; 748 elf_interpreter = kmalloc(elf_ppnt->p_filesz, 749 GFP_KERNEL); 750 if (!elf_interpreter) 751 goto out_free_ph; 752 753 retval = kernel_read(bprm->file, elf_ppnt->p_offset, 754 elf_interpreter, 755 elf_ppnt->p_filesz); 756 if (retval != elf_ppnt->p_filesz) { 757 if (retval >= 0) 758 retval = -EIO; 759 goto out_free_interp; 760 } 761 /* make sure path is NULL terminated */ 762 retval = -ENOEXEC; 763 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0') 764 goto out_free_interp; 765 766 interpreter = open_exec(elf_interpreter); 767 retval = PTR_ERR(interpreter); 768 if (IS_ERR(interpreter)) 769 goto out_free_interp; 770 771 /* 772 * If the binary is not readable then enforce 773 * mm->dumpable = 0 regardless of the interpreter's 774 * permissions. 775 */ 776 would_dump(bprm, interpreter); 777 778 /* Get the exec headers */ 779 retval = kernel_read(interpreter, 0, 780 (void *)&loc->interp_elf_ex, 781 sizeof(loc->interp_elf_ex)); 782 if (retval != sizeof(loc->interp_elf_ex)) { 783 if (retval >= 0) 784 retval = -EIO; 785 goto out_free_dentry; 786 } 787 788 break; 789 } 790 elf_ppnt++; 791 } 792 793 elf_ppnt = elf_phdata; 794 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++) 795 switch (elf_ppnt->p_type) { 796 case PT_GNU_STACK: 797 if (elf_ppnt->p_flags & PF_X) 798 executable_stack = EXSTACK_ENABLE_X; 799 else 800 executable_stack = EXSTACK_DISABLE_X; 801 break; 802 803 case PT_LOPROC ... PT_HIPROC: 804 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt, 805 bprm->file, false, 806 &arch_state); 807 if (retval) 808 goto out_free_dentry; 809 break; 810 } 811 812 /* Some simple consistency checks for the interpreter */ 813 if (elf_interpreter) { 814 retval = -ELIBBAD; 815 /* Not an ELF interpreter */ 816 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0) 817 goto out_free_dentry; 818 /* Verify the interpreter has a valid arch */ 819 if (!elf_check_arch(&loc->interp_elf_ex)) 820 goto out_free_dentry; 821 822 /* Load the interpreter program headers */ 823 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex, 824 interpreter); 825 if (!interp_elf_phdata) 826 goto out_free_dentry; 827 828 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */ 829 elf_ppnt = interp_elf_phdata; 830 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++) 831 switch (elf_ppnt->p_type) { 832 case PT_LOPROC ... PT_HIPROC: 833 retval = arch_elf_pt_proc(&loc->interp_elf_ex, 834 elf_ppnt, interpreter, 835 true, &arch_state); 836 if (retval) 837 goto out_free_dentry; 838 break; 839 } 840 } 841 842 /* 843 * Allow arch code to reject the ELF at this point, whilst it's 844 * still possible to return an error to the code that invoked 845 * the exec syscall. 846 */ 847 retval = arch_check_elf(&loc->elf_ex, 848 !!interpreter, &loc->interp_elf_ex, 849 &arch_state); 850 if (retval) 851 goto out_free_dentry; 852 853 /* Flush all traces of the currently running executable */ 854 retval = flush_old_exec(bprm); 855 if (retval) 856 goto out_free_dentry; 857 858 /* Do this immediately, since STACK_TOP as used in setup_arg_pages 859 may depend on the personality. */ 860 SET_PERSONALITY2(loc->elf_ex, &arch_state); 861 if (elf_read_implies_exec(loc->elf_ex, executable_stack)) 862 current->personality |= READ_IMPLIES_EXEC; 863 864 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) 865 current->flags |= PF_RANDOMIZE; 866 867 setup_new_exec(bprm); 868 install_exec_creds(bprm); 869 870 /* Do this so that we can load the interpreter, if need be. We will 871 change some of these later */ 872 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), 873 executable_stack); 874 if (retval < 0) 875 goto out_free_dentry; 876 877 current->mm->start_stack = bprm->p; 878 879 /* Now we do a little grungy work by mmapping the ELF image into 880 the correct location in memory. */ 881 for(i = 0, elf_ppnt = elf_phdata; 882 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) { 883 int elf_prot = 0, elf_flags; 884 unsigned long k, vaddr; 885 unsigned long total_size = 0; 886 887 if (elf_ppnt->p_type != PT_LOAD) 888 continue; 889 890 if (unlikely (elf_brk > elf_bss)) { 891 unsigned long nbyte; 892 893 /* There was a PT_LOAD segment with p_memsz > p_filesz 894 before this one. Map anonymous pages, if needed, 895 and clear the area. */ 896 retval = set_brk(elf_bss + load_bias, 897 elf_brk + load_bias, 898 bss_prot); 899 if (retval) 900 goto out_free_dentry; 901 nbyte = ELF_PAGEOFFSET(elf_bss); 902 if (nbyte) { 903 nbyte = ELF_MIN_ALIGN - nbyte; 904 if (nbyte > elf_brk - elf_bss) 905 nbyte = elf_brk - elf_bss; 906 if (clear_user((void __user *)elf_bss + 907 load_bias, nbyte)) { 908 /* 909 * This bss-zeroing can fail if the ELF 910 * file specifies odd protections. So 911 * we don't check the return value 912 */ 913 } 914 } 915 } 916 917 if (elf_ppnt->p_flags & PF_R) 918 elf_prot |= PROT_READ; 919 if (elf_ppnt->p_flags & PF_W) 920 elf_prot |= PROT_WRITE; 921 if (elf_ppnt->p_flags & PF_X) 922 elf_prot |= PROT_EXEC; 923 924 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE; 925 926 vaddr = elf_ppnt->p_vaddr; 927 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) { 928 elf_flags |= MAP_FIXED; 929 } else if (loc->elf_ex.e_type == ET_DYN) { 930 /* Try and get dynamic programs out of the way of the 931 * default mmap base, as well as whatever program they 932 * might try to exec. This is because the brk will 933 * follow the loader, and is not movable. */ 934 load_bias = ELF_ET_DYN_BASE - vaddr; 935 if (current->flags & PF_RANDOMIZE) 936 load_bias += arch_mmap_rnd(); 937 load_bias = ELF_PAGESTART(load_bias); 938 total_size = total_mapping_size(elf_phdata, 939 loc->elf_ex.e_phnum); 940 if (!total_size) { 941 retval = -EINVAL; 942 goto out_free_dentry; 943 } 944 } 945 946 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, 947 elf_prot, elf_flags, total_size); 948 if (BAD_ADDR(error)) { 949 retval = IS_ERR((void *)error) ? 950 PTR_ERR((void*)error) : -EINVAL; 951 goto out_free_dentry; 952 } 953 954 if (!load_addr_set) { 955 load_addr_set = 1; 956 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset); 957 if (loc->elf_ex.e_type == ET_DYN) { 958 load_bias += error - 959 ELF_PAGESTART(load_bias + vaddr); 960 load_addr += load_bias; 961 reloc_func_desc = load_bias; 962 } 963 } 964 k = elf_ppnt->p_vaddr; 965 if (k < start_code) 966 start_code = k; 967 if (start_data < k) 968 start_data = k; 969 970 /* 971 * Check to see if the section's size will overflow the 972 * allowed task size. Note that p_filesz must always be 973 * <= p_memsz so it is only necessary to check p_memsz. 974 */ 975 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz || 976 elf_ppnt->p_memsz > TASK_SIZE || 977 TASK_SIZE - elf_ppnt->p_memsz < k) { 978 /* set_brk can never work. Avoid overflows. */ 979 retval = -EINVAL; 980 goto out_free_dentry; 981 } 982 983 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz; 984 985 if (k > elf_bss) 986 elf_bss = k; 987 if ((elf_ppnt->p_flags & PF_X) && end_code < k) 988 end_code = k; 989 if (end_data < k) 990 end_data = k; 991 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz; 992 if (k > elf_brk) { 993 bss_prot = elf_prot; 994 elf_brk = k; 995 } 996 } 997 998 loc->elf_ex.e_entry += load_bias; 999 elf_bss += load_bias; 1000 elf_brk += load_bias; 1001 start_code += load_bias; 1002 end_code += load_bias; 1003 start_data += load_bias; 1004 end_data += load_bias; 1005 1006 /* Calling set_brk effectively mmaps the pages that we need 1007 * for the bss and break sections. We must do this before 1008 * mapping in the interpreter, to make sure it doesn't wind 1009 * up getting placed where the bss needs to go. 1010 */ 1011 retval = set_brk(elf_bss, elf_brk, bss_prot); 1012 if (retval) 1013 goto out_free_dentry; 1014 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) { 1015 retval = -EFAULT; /* Nobody gets to see this, but.. */ 1016 goto out_free_dentry; 1017 } 1018 1019 if (elf_interpreter) { 1020 unsigned long interp_map_addr = 0; 1021 1022 elf_entry = load_elf_interp(&loc->interp_elf_ex, 1023 interpreter, 1024 &interp_map_addr, 1025 load_bias, interp_elf_phdata); 1026 if (!IS_ERR((void *)elf_entry)) { 1027 /* 1028 * load_elf_interp() returns relocation 1029 * adjustment 1030 */ 1031 interp_load_addr = elf_entry; 1032 elf_entry += loc->interp_elf_ex.e_entry; 1033 } 1034 if (BAD_ADDR(elf_entry)) { 1035 retval = IS_ERR((void *)elf_entry) ? 1036 (int)elf_entry : -EINVAL; 1037 goto out_free_dentry; 1038 } 1039 reloc_func_desc = interp_load_addr; 1040 1041 allow_write_access(interpreter); 1042 fput(interpreter); 1043 kfree(elf_interpreter); 1044 } else { 1045 elf_entry = loc->elf_ex.e_entry; 1046 if (BAD_ADDR(elf_entry)) { 1047 retval = -EINVAL; 1048 goto out_free_dentry; 1049 } 1050 } 1051 1052 kfree(interp_elf_phdata); 1053 kfree(elf_phdata); 1054 1055 set_binfmt(&elf_format); 1056 1057 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES 1058 retval = arch_setup_additional_pages(bprm, !!elf_interpreter); 1059 if (retval < 0) 1060 goto out; 1061 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */ 1062 1063 retval = create_elf_tables(bprm, &loc->elf_ex, 1064 load_addr, interp_load_addr); 1065 if (retval < 0) 1066 goto out; 1067 /* N.B. passed_fileno might not be initialized? */ 1068 current->mm->end_code = end_code; 1069 current->mm->start_code = start_code; 1070 current->mm->start_data = start_data; 1071 current->mm->end_data = end_data; 1072 current->mm->start_stack = bprm->p; 1073 1074 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { 1075 current->mm->brk = current->mm->start_brk = 1076 arch_randomize_brk(current->mm); 1077 #ifdef compat_brk_randomized 1078 current->brk_randomized = 1; 1079 #endif 1080 } 1081 1082 if (current->personality & MMAP_PAGE_ZERO) { 1083 /* Why this, you ask??? Well SVr4 maps page 0 as read-only, 1084 and some applications "depend" upon this behavior. 1085 Since we do not have the power to recompile these, we 1086 emulate the SVr4 behavior. Sigh. */ 1087 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, 1088 MAP_FIXED | MAP_PRIVATE, 0); 1089 } 1090 1091 #ifdef ELF_PLAT_INIT 1092 /* 1093 * The ABI may specify that certain registers be set up in special 1094 * ways (on i386 %edx is the address of a DT_FINI function, for 1095 * example. In addition, it may also specify (eg, PowerPC64 ELF) 1096 * that the e_entry field is the address of the function descriptor 1097 * for the startup routine, rather than the address of the startup 1098 * routine itself. This macro performs whatever initialization to 1099 * the regs structure is required as well as any relocations to the 1100 * function descriptor entries when executing dynamically links apps. 1101 */ 1102 ELF_PLAT_INIT(regs, reloc_func_desc); 1103 #endif 1104 1105 start_thread(regs, elf_entry, bprm->p); 1106 retval = 0; 1107 out: 1108 kfree(loc); 1109 out_ret: 1110 return retval; 1111 1112 /* error cleanup */ 1113 out_free_dentry: 1114 kfree(interp_elf_phdata); 1115 allow_write_access(interpreter); 1116 if (interpreter) 1117 fput(interpreter); 1118 out_free_interp: 1119 kfree(elf_interpreter); 1120 out_free_ph: 1121 kfree(elf_phdata); 1122 goto out; 1123 } 1124 1125 #ifdef CONFIG_USELIB 1126 /* This is really simpleminded and specialized - we are loading an 1127 a.out library that is given an ELF header. */ 1128 static int load_elf_library(struct file *file) 1129 { 1130 struct elf_phdr *elf_phdata; 1131 struct elf_phdr *eppnt; 1132 unsigned long elf_bss, bss, len; 1133 int retval, error, i, j; 1134 struct elfhdr elf_ex; 1135 1136 error = -ENOEXEC; 1137 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex)); 1138 if (retval != sizeof(elf_ex)) 1139 goto out; 1140 1141 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) 1142 goto out; 1143 1144 /* First of all, some simple consistency checks */ 1145 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 || 1146 !elf_check_arch(&elf_ex) || !file->f_op->mmap) 1147 goto out; 1148 1149 /* Now read in all of the header information */ 1150 1151 j = sizeof(struct elf_phdr) * elf_ex.e_phnum; 1152 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */ 1153 1154 error = -ENOMEM; 1155 elf_phdata = kmalloc(j, GFP_KERNEL); 1156 if (!elf_phdata) 1157 goto out; 1158 1159 eppnt = elf_phdata; 1160 error = -ENOEXEC; 1161 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j); 1162 if (retval != j) 1163 goto out_free_ph; 1164 1165 for (j = 0, i = 0; i<elf_ex.e_phnum; i++) 1166 if ((eppnt + i)->p_type == PT_LOAD) 1167 j++; 1168 if (j != 1) 1169 goto out_free_ph; 1170 1171 while (eppnt->p_type != PT_LOAD) 1172 eppnt++; 1173 1174 /* Now use mmap to map the library into memory. */ 1175 error = vm_mmap(file, 1176 ELF_PAGESTART(eppnt->p_vaddr), 1177 (eppnt->p_filesz + 1178 ELF_PAGEOFFSET(eppnt->p_vaddr)), 1179 PROT_READ | PROT_WRITE | PROT_EXEC, 1180 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, 1181 (eppnt->p_offset - 1182 ELF_PAGEOFFSET(eppnt->p_vaddr))); 1183 if (error != ELF_PAGESTART(eppnt->p_vaddr)) 1184 goto out_free_ph; 1185 1186 elf_bss = eppnt->p_vaddr + eppnt->p_filesz; 1187 if (padzero(elf_bss)) { 1188 error = -EFAULT; 1189 goto out_free_ph; 1190 } 1191 1192 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + 1193 ELF_MIN_ALIGN - 1); 1194 bss = eppnt->p_memsz + eppnt->p_vaddr; 1195 if (bss > len) { 1196 error = vm_brk(len, bss - len); 1197 if (error) 1198 goto out_free_ph; 1199 } 1200 error = 0; 1201 1202 out_free_ph: 1203 kfree(elf_phdata); 1204 out: 1205 return error; 1206 } 1207 #endif /* #ifdef CONFIG_USELIB */ 1208 1209 #ifdef CONFIG_ELF_CORE 1210 /* 1211 * ELF core dumper 1212 * 1213 * Modelled on fs/exec.c:aout_core_dump() 1214 * Jeremy Fitzhardinge <jeremy@sw.oz.au> 1215 */ 1216 1217 /* 1218 * The purpose of always_dump_vma() is to make sure that special kernel mappings 1219 * that are useful for post-mortem analysis are included in every core dump. 1220 * In that way we ensure that the core dump is fully interpretable later 1221 * without matching up the same kernel and hardware config to see what PC values 1222 * meant. These special mappings include - vDSO, vsyscall, and other 1223 * architecture specific mappings 1224 */ 1225 static bool always_dump_vma(struct vm_area_struct *vma) 1226 { 1227 /* Any vsyscall mappings? */ 1228 if (vma == get_gate_vma(vma->vm_mm)) 1229 return true; 1230 1231 /* 1232 * Assume that all vmas with a .name op should always be dumped. 1233 * If this changes, a new vm_ops field can easily be added. 1234 */ 1235 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)) 1236 return true; 1237 1238 /* 1239 * arch_vma_name() returns non-NULL for special architecture mappings, 1240 * such as vDSO sections. 1241 */ 1242 if (arch_vma_name(vma)) 1243 return true; 1244 1245 return false; 1246 } 1247 1248 /* 1249 * Decide what to dump of a segment, part, all or none. 1250 */ 1251 static unsigned long vma_dump_size(struct vm_area_struct *vma, 1252 unsigned long mm_flags) 1253 { 1254 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) 1255 1256 /* always dump the vdso and vsyscall sections */ 1257 if (always_dump_vma(vma)) 1258 goto whole; 1259 1260 if (vma->vm_flags & VM_DONTDUMP) 1261 return 0; 1262 1263 /* support for DAX */ 1264 if (vma_is_dax(vma)) { 1265 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED)) 1266 goto whole; 1267 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE)) 1268 goto whole; 1269 return 0; 1270 } 1271 1272 /* Hugetlb memory check */ 1273 if (vma->vm_flags & VM_HUGETLB) { 1274 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) 1275 goto whole; 1276 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) 1277 goto whole; 1278 return 0; 1279 } 1280 1281 /* Do not dump I/O mapped devices or special mappings */ 1282 if (vma->vm_flags & VM_IO) 1283 return 0; 1284 1285 /* By default, dump shared memory if mapped from an anonymous file. */ 1286 if (vma->vm_flags & VM_SHARED) { 1287 if (file_inode(vma->vm_file)->i_nlink == 0 ? 1288 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED)) 1289 goto whole; 1290 return 0; 1291 } 1292 1293 /* Dump segments that have been written to. */ 1294 if (vma->anon_vma && FILTER(ANON_PRIVATE)) 1295 goto whole; 1296 if (vma->vm_file == NULL) 1297 return 0; 1298 1299 if (FILTER(MAPPED_PRIVATE)) 1300 goto whole; 1301 1302 /* 1303 * If this looks like the beginning of a DSO or executable mapping, 1304 * check for an ELF header. If we find one, dump the first page to 1305 * aid in determining what was mapped here. 1306 */ 1307 if (FILTER(ELF_HEADERS) && 1308 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { 1309 u32 __user *header = (u32 __user *) vma->vm_start; 1310 u32 word; 1311 mm_segment_t fs = get_fs(); 1312 /* 1313 * Doing it this way gets the constant folded by GCC. 1314 */ 1315 union { 1316 u32 cmp; 1317 char elfmag[SELFMAG]; 1318 } magic; 1319 BUILD_BUG_ON(SELFMAG != sizeof word); 1320 magic.elfmag[EI_MAG0] = ELFMAG0; 1321 magic.elfmag[EI_MAG1] = ELFMAG1; 1322 magic.elfmag[EI_MAG2] = ELFMAG2; 1323 magic.elfmag[EI_MAG3] = ELFMAG3; 1324 /* 1325 * Switch to the user "segment" for get_user(), 1326 * then put back what elf_core_dump() had in place. 1327 */ 1328 set_fs(USER_DS); 1329 if (unlikely(get_user(word, header))) 1330 word = 0; 1331 set_fs(fs); 1332 if (word == magic.cmp) 1333 return PAGE_SIZE; 1334 } 1335 1336 #undef FILTER 1337 1338 return 0; 1339 1340 whole: 1341 return vma->vm_end - vma->vm_start; 1342 } 1343 1344 /* An ELF note in memory */ 1345 struct memelfnote 1346 { 1347 const char *name; 1348 int type; 1349 unsigned int datasz; 1350 void *data; 1351 }; 1352 1353 static int notesize(struct memelfnote *en) 1354 { 1355 int sz; 1356 1357 sz = sizeof(struct elf_note); 1358 sz += roundup(strlen(en->name) + 1, 4); 1359 sz += roundup(en->datasz, 4); 1360 1361 return sz; 1362 } 1363 1364 static int writenote(struct memelfnote *men, struct coredump_params *cprm) 1365 { 1366 struct elf_note en; 1367 en.n_namesz = strlen(men->name) + 1; 1368 en.n_descsz = men->datasz; 1369 en.n_type = men->type; 1370 1371 return dump_emit(cprm, &en, sizeof(en)) && 1372 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) && 1373 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4); 1374 } 1375 1376 static void fill_elf_header(struct elfhdr *elf, int segs, 1377 u16 machine, u32 flags) 1378 { 1379 memset(elf, 0, sizeof(*elf)); 1380 1381 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1382 elf->e_ident[EI_CLASS] = ELF_CLASS; 1383 elf->e_ident[EI_DATA] = ELF_DATA; 1384 elf->e_ident[EI_VERSION] = EV_CURRENT; 1385 elf->e_ident[EI_OSABI] = ELF_OSABI; 1386 1387 elf->e_type = ET_CORE; 1388 elf->e_machine = machine; 1389 elf->e_version = EV_CURRENT; 1390 elf->e_phoff = sizeof(struct elfhdr); 1391 elf->e_flags = flags; 1392 elf->e_ehsize = sizeof(struct elfhdr); 1393 elf->e_phentsize = sizeof(struct elf_phdr); 1394 elf->e_phnum = segs; 1395 1396 return; 1397 } 1398 1399 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset) 1400 { 1401 phdr->p_type = PT_NOTE; 1402 phdr->p_offset = offset; 1403 phdr->p_vaddr = 0; 1404 phdr->p_paddr = 0; 1405 phdr->p_filesz = sz; 1406 phdr->p_memsz = 0; 1407 phdr->p_flags = 0; 1408 phdr->p_align = 0; 1409 return; 1410 } 1411 1412 static void fill_note(struct memelfnote *note, const char *name, int type, 1413 unsigned int sz, void *data) 1414 { 1415 note->name = name; 1416 note->type = type; 1417 note->datasz = sz; 1418 note->data = data; 1419 return; 1420 } 1421 1422 /* 1423 * fill up all the fields in prstatus from the given task struct, except 1424 * registers which need to be filled up separately. 1425 */ 1426 static void fill_prstatus(struct elf_prstatus *prstatus, 1427 struct task_struct *p, long signr) 1428 { 1429 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; 1430 prstatus->pr_sigpend = p->pending.signal.sig[0]; 1431 prstatus->pr_sighold = p->blocked.sig[0]; 1432 rcu_read_lock(); 1433 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); 1434 rcu_read_unlock(); 1435 prstatus->pr_pid = task_pid_vnr(p); 1436 prstatus->pr_pgrp = task_pgrp_vnr(p); 1437 prstatus->pr_sid = task_session_vnr(p); 1438 if (thread_group_leader(p)) { 1439 struct task_cputime cputime; 1440 1441 /* 1442 * This is the record for the group leader. It shows the 1443 * group-wide total, not its individual thread total. 1444 */ 1445 thread_group_cputime(p, &cputime); 1446 prstatus->pr_utime = ns_to_timeval(cputime.utime); 1447 prstatus->pr_stime = ns_to_timeval(cputime.stime); 1448 } else { 1449 u64 utime, stime; 1450 1451 task_cputime(p, &utime, &stime); 1452 prstatus->pr_utime = ns_to_timeval(utime); 1453 prstatus->pr_stime = ns_to_timeval(stime); 1454 } 1455 1456 prstatus->pr_cutime = ns_to_timeval(p->signal->cutime); 1457 prstatus->pr_cstime = ns_to_timeval(p->signal->cstime); 1458 } 1459 1460 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, 1461 struct mm_struct *mm) 1462 { 1463 const struct cred *cred; 1464 unsigned int i, len; 1465 1466 /* first copy the parameters from user space */ 1467 memset(psinfo, 0, sizeof(struct elf_prpsinfo)); 1468 1469 len = mm->arg_end - mm->arg_start; 1470 if (len >= ELF_PRARGSZ) 1471 len = ELF_PRARGSZ-1; 1472 if (copy_from_user(&psinfo->pr_psargs, 1473 (const char __user *)mm->arg_start, len)) 1474 return -EFAULT; 1475 for(i = 0; i < len; i++) 1476 if (psinfo->pr_psargs[i] == 0) 1477 psinfo->pr_psargs[i] = ' '; 1478 psinfo->pr_psargs[len] = 0; 1479 1480 rcu_read_lock(); 1481 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent)); 1482 rcu_read_unlock(); 1483 psinfo->pr_pid = task_pid_vnr(p); 1484 psinfo->pr_pgrp = task_pgrp_vnr(p); 1485 psinfo->pr_sid = task_session_vnr(p); 1486 1487 i = p->state ? ffz(~p->state) + 1 : 0; 1488 psinfo->pr_state = i; 1489 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i]; 1490 psinfo->pr_zomb = psinfo->pr_sname == 'Z'; 1491 psinfo->pr_nice = task_nice(p); 1492 psinfo->pr_flag = p->flags; 1493 rcu_read_lock(); 1494 cred = __task_cred(p); 1495 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid)); 1496 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid)); 1497 rcu_read_unlock(); 1498 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); 1499 1500 return 0; 1501 } 1502 1503 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm) 1504 { 1505 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv; 1506 int i = 0; 1507 do 1508 i += 2; 1509 while (auxv[i - 2] != AT_NULL); 1510 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv); 1511 } 1512 1513 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, 1514 const siginfo_t *siginfo) 1515 { 1516 mm_segment_t old_fs = get_fs(); 1517 set_fs(KERNEL_DS); 1518 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo); 1519 set_fs(old_fs); 1520 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata); 1521 } 1522 1523 #define MAX_FILE_NOTE_SIZE (4*1024*1024) 1524 /* 1525 * Format of NT_FILE note: 1526 * 1527 * long count -- how many files are mapped 1528 * long page_size -- units for file_ofs 1529 * array of [COUNT] elements of 1530 * long start 1531 * long end 1532 * long file_ofs 1533 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... 1534 */ 1535 static int fill_files_note(struct memelfnote *note) 1536 { 1537 struct vm_area_struct *vma; 1538 unsigned count, size, names_ofs, remaining, n; 1539 user_long_t *data; 1540 user_long_t *start_end_ofs; 1541 char *name_base, *name_curpos; 1542 1543 /* *Estimated* file count and total data size needed */ 1544 count = current->mm->map_count; 1545 size = count * 64; 1546 1547 names_ofs = (2 + 3 * count) * sizeof(data[0]); 1548 alloc: 1549 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ 1550 return -EINVAL; 1551 size = round_up(size, PAGE_SIZE); 1552 data = vmalloc(size); 1553 if (!data) 1554 return -ENOMEM; 1555 1556 start_end_ofs = data + 2; 1557 name_base = name_curpos = ((char *)data) + names_ofs; 1558 remaining = size - names_ofs; 1559 count = 0; 1560 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) { 1561 struct file *file; 1562 const char *filename; 1563 1564 file = vma->vm_file; 1565 if (!file) 1566 continue; 1567 filename = file_path(file, name_curpos, remaining); 1568 if (IS_ERR(filename)) { 1569 if (PTR_ERR(filename) == -ENAMETOOLONG) { 1570 vfree(data); 1571 size = size * 5 / 4; 1572 goto alloc; 1573 } 1574 continue; 1575 } 1576 1577 /* file_path() fills at the end, move name down */ 1578 /* n = strlen(filename) + 1: */ 1579 n = (name_curpos + remaining) - filename; 1580 remaining = filename - name_curpos; 1581 memmove(name_curpos, filename, n); 1582 name_curpos += n; 1583 1584 *start_end_ofs++ = vma->vm_start; 1585 *start_end_ofs++ = vma->vm_end; 1586 *start_end_ofs++ = vma->vm_pgoff; 1587 count++; 1588 } 1589 1590 /* Now we know exact count of files, can store it */ 1591 data[0] = count; 1592 data[1] = PAGE_SIZE; 1593 /* 1594 * Count usually is less than current->mm->map_count, 1595 * we need to move filenames down. 1596 */ 1597 n = current->mm->map_count - count; 1598 if (n != 0) { 1599 unsigned shift_bytes = n * 3 * sizeof(data[0]); 1600 memmove(name_base - shift_bytes, name_base, 1601 name_curpos - name_base); 1602 name_curpos -= shift_bytes; 1603 } 1604 1605 size = name_curpos - (char *)data; 1606 fill_note(note, "CORE", NT_FILE, size, data); 1607 return 0; 1608 } 1609 1610 #ifdef CORE_DUMP_USE_REGSET 1611 #include <linux/regset.h> 1612 1613 struct elf_thread_core_info { 1614 struct elf_thread_core_info *next; 1615 struct task_struct *task; 1616 struct elf_prstatus prstatus; 1617 struct memelfnote notes[0]; 1618 }; 1619 1620 struct elf_note_info { 1621 struct elf_thread_core_info *thread; 1622 struct memelfnote psinfo; 1623 struct memelfnote signote; 1624 struct memelfnote auxv; 1625 struct memelfnote files; 1626 user_siginfo_t csigdata; 1627 size_t size; 1628 int thread_notes; 1629 }; 1630 1631 /* 1632 * When a regset has a writeback hook, we call it on each thread before 1633 * dumping user memory. On register window machines, this makes sure the 1634 * user memory backing the register data is up to date before we read it. 1635 */ 1636 static void do_thread_regset_writeback(struct task_struct *task, 1637 const struct user_regset *regset) 1638 { 1639 if (regset->writeback) 1640 regset->writeback(task, regset, 1); 1641 } 1642 1643 #ifndef PRSTATUS_SIZE 1644 #define PRSTATUS_SIZE(S, R) sizeof(S) 1645 #endif 1646 1647 #ifndef SET_PR_FPVALID 1648 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V)) 1649 #endif 1650 1651 static int fill_thread_core_info(struct elf_thread_core_info *t, 1652 const struct user_regset_view *view, 1653 long signr, size_t *total) 1654 { 1655 unsigned int i; 1656 unsigned int regset_size = view->regsets[0].n * view->regsets[0].size; 1657 1658 /* 1659 * NT_PRSTATUS is the one special case, because the regset data 1660 * goes into the pr_reg field inside the note contents, rather 1661 * than being the whole note contents. We fill the reset in here. 1662 * We assume that regset 0 is NT_PRSTATUS. 1663 */ 1664 fill_prstatus(&t->prstatus, t->task, signr); 1665 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size, 1666 &t->prstatus.pr_reg, NULL); 1667 1668 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, 1669 PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus); 1670 *total += notesize(&t->notes[0]); 1671 1672 do_thread_regset_writeback(t->task, &view->regsets[0]); 1673 1674 /* 1675 * Each other regset might generate a note too. For each regset 1676 * that has no core_note_type or is inactive, we leave t->notes[i] 1677 * all zero and we'll know to skip writing it later. 1678 */ 1679 for (i = 1; i < view->n; ++i) { 1680 const struct user_regset *regset = &view->regsets[i]; 1681 do_thread_regset_writeback(t->task, regset); 1682 if (regset->core_note_type && regset->get && 1683 (!regset->active || regset->active(t->task, regset))) { 1684 int ret; 1685 size_t size = regset->n * regset->size; 1686 void *data = kmalloc(size, GFP_KERNEL); 1687 if (unlikely(!data)) 1688 return 0; 1689 ret = regset->get(t->task, regset, 1690 0, size, data, NULL); 1691 if (unlikely(ret)) 1692 kfree(data); 1693 else { 1694 if (regset->core_note_type != NT_PRFPREG) 1695 fill_note(&t->notes[i], "LINUX", 1696 regset->core_note_type, 1697 size, data); 1698 else { 1699 SET_PR_FPVALID(&t->prstatus, 1700 1, regset_size); 1701 fill_note(&t->notes[i], "CORE", 1702 NT_PRFPREG, size, data); 1703 } 1704 *total += notesize(&t->notes[i]); 1705 } 1706 } 1707 } 1708 1709 return 1; 1710 } 1711 1712 static int fill_note_info(struct elfhdr *elf, int phdrs, 1713 struct elf_note_info *info, 1714 const siginfo_t *siginfo, struct pt_regs *regs) 1715 { 1716 struct task_struct *dump_task = current; 1717 const struct user_regset_view *view = task_user_regset_view(dump_task); 1718 struct elf_thread_core_info *t; 1719 struct elf_prpsinfo *psinfo; 1720 struct core_thread *ct; 1721 unsigned int i; 1722 1723 info->size = 0; 1724 info->thread = NULL; 1725 1726 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL); 1727 if (psinfo == NULL) { 1728 info->psinfo.data = NULL; /* So we don't free this wrongly */ 1729 return 0; 1730 } 1731 1732 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo); 1733 1734 /* 1735 * Figure out how many notes we're going to need for each thread. 1736 */ 1737 info->thread_notes = 0; 1738 for (i = 0; i < view->n; ++i) 1739 if (view->regsets[i].core_note_type != 0) 1740 ++info->thread_notes; 1741 1742 /* 1743 * Sanity check. We rely on regset 0 being in NT_PRSTATUS, 1744 * since it is our one special case. 1745 */ 1746 if (unlikely(info->thread_notes == 0) || 1747 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) { 1748 WARN_ON(1); 1749 return 0; 1750 } 1751 1752 /* 1753 * Initialize the ELF file header. 1754 */ 1755 fill_elf_header(elf, phdrs, 1756 view->e_machine, view->e_flags); 1757 1758 /* 1759 * Allocate a structure for each thread. 1760 */ 1761 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) { 1762 t = kzalloc(offsetof(struct elf_thread_core_info, 1763 notes[info->thread_notes]), 1764 GFP_KERNEL); 1765 if (unlikely(!t)) 1766 return 0; 1767 1768 t->task = ct->task; 1769 if (ct->task == dump_task || !info->thread) { 1770 t->next = info->thread; 1771 info->thread = t; 1772 } else { 1773 /* 1774 * Make sure to keep the original task at 1775 * the head of the list. 1776 */ 1777 t->next = info->thread->next; 1778 info->thread->next = t; 1779 } 1780 } 1781 1782 /* 1783 * Now fill in each thread's information. 1784 */ 1785 for (t = info->thread; t != NULL; t = t->next) 1786 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size)) 1787 return 0; 1788 1789 /* 1790 * Fill in the two process-wide notes. 1791 */ 1792 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm); 1793 info->size += notesize(&info->psinfo); 1794 1795 fill_siginfo_note(&info->signote, &info->csigdata, siginfo); 1796 info->size += notesize(&info->signote); 1797 1798 fill_auxv_note(&info->auxv, current->mm); 1799 info->size += notesize(&info->auxv); 1800 1801 if (fill_files_note(&info->files) == 0) 1802 info->size += notesize(&info->files); 1803 1804 return 1; 1805 } 1806 1807 static size_t get_note_info_size(struct elf_note_info *info) 1808 { 1809 return info->size; 1810 } 1811 1812 /* 1813 * Write all the notes for each thread. When writing the first thread, the 1814 * process-wide notes are interleaved after the first thread-specific note. 1815 */ 1816 static int write_note_info(struct elf_note_info *info, 1817 struct coredump_params *cprm) 1818 { 1819 bool first = true; 1820 struct elf_thread_core_info *t = info->thread; 1821 1822 do { 1823 int i; 1824 1825 if (!writenote(&t->notes[0], cprm)) 1826 return 0; 1827 1828 if (first && !writenote(&info->psinfo, cprm)) 1829 return 0; 1830 if (first && !writenote(&info->signote, cprm)) 1831 return 0; 1832 if (first && !writenote(&info->auxv, cprm)) 1833 return 0; 1834 if (first && info->files.data && 1835 !writenote(&info->files, cprm)) 1836 return 0; 1837 1838 for (i = 1; i < info->thread_notes; ++i) 1839 if (t->notes[i].data && 1840 !writenote(&t->notes[i], cprm)) 1841 return 0; 1842 1843 first = false; 1844 t = t->next; 1845 } while (t); 1846 1847 return 1; 1848 } 1849 1850 static void free_note_info(struct elf_note_info *info) 1851 { 1852 struct elf_thread_core_info *threads = info->thread; 1853 while (threads) { 1854 unsigned int i; 1855 struct elf_thread_core_info *t = threads; 1856 threads = t->next; 1857 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus); 1858 for (i = 1; i < info->thread_notes; ++i) 1859 kfree(t->notes[i].data); 1860 kfree(t); 1861 } 1862 kfree(info->psinfo.data); 1863 vfree(info->files.data); 1864 } 1865 1866 #else 1867 1868 /* Here is the structure in which status of each thread is captured. */ 1869 struct elf_thread_status 1870 { 1871 struct list_head list; 1872 struct elf_prstatus prstatus; /* NT_PRSTATUS */ 1873 elf_fpregset_t fpu; /* NT_PRFPREG */ 1874 struct task_struct *thread; 1875 #ifdef ELF_CORE_COPY_XFPREGS 1876 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */ 1877 #endif 1878 struct memelfnote notes[3]; 1879 int num_notes; 1880 }; 1881 1882 /* 1883 * In order to add the specific thread information for the elf file format, 1884 * we need to keep a linked list of every threads pr_status and then create 1885 * a single section for them in the final core file. 1886 */ 1887 static int elf_dump_thread_status(long signr, struct elf_thread_status *t) 1888 { 1889 int sz = 0; 1890 struct task_struct *p = t->thread; 1891 t->num_notes = 0; 1892 1893 fill_prstatus(&t->prstatus, p, signr); 1894 elf_core_copy_task_regs(p, &t->prstatus.pr_reg); 1895 1896 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus), 1897 &(t->prstatus)); 1898 t->num_notes++; 1899 sz += notesize(&t->notes[0]); 1900 1901 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL, 1902 &t->fpu))) { 1903 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu), 1904 &(t->fpu)); 1905 t->num_notes++; 1906 sz += notesize(&t->notes[1]); 1907 } 1908 1909 #ifdef ELF_CORE_COPY_XFPREGS 1910 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) { 1911 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE, 1912 sizeof(t->xfpu), &t->xfpu); 1913 t->num_notes++; 1914 sz += notesize(&t->notes[2]); 1915 } 1916 #endif 1917 return sz; 1918 } 1919 1920 struct elf_note_info { 1921 struct memelfnote *notes; 1922 struct memelfnote *notes_files; 1923 struct elf_prstatus *prstatus; /* NT_PRSTATUS */ 1924 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ 1925 struct list_head thread_list; 1926 elf_fpregset_t *fpu; 1927 #ifdef ELF_CORE_COPY_XFPREGS 1928 elf_fpxregset_t *xfpu; 1929 #endif 1930 user_siginfo_t csigdata; 1931 int thread_status_size; 1932 int numnote; 1933 }; 1934 1935 static int elf_note_info_init(struct elf_note_info *info) 1936 { 1937 memset(info, 0, sizeof(*info)); 1938 INIT_LIST_HEAD(&info->thread_list); 1939 1940 /* Allocate space for ELF notes */ 1941 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL); 1942 if (!info->notes) 1943 return 0; 1944 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL); 1945 if (!info->psinfo) 1946 return 0; 1947 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL); 1948 if (!info->prstatus) 1949 return 0; 1950 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL); 1951 if (!info->fpu) 1952 return 0; 1953 #ifdef ELF_CORE_COPY_XFPREGS 1954 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL); 1955 if (!info->xfpu) 1956 return 0; 1957 #endif 1958 return 1; 1959 } 1960 1961 static int fill_note_info(struct elfhdr *elf, int phdrs, 1962 struct elf_note_info *info, 1963 const siginfo_t *siginfo, struct pt_regs *regs) 1964 { 1965 struct list_head *t; 1966 struct core_thread *ct; 1967 struct elf_thread_status *ets; 1968 1969 if (!elf_note_info_init(info)) 1970 return 0; 1971 1972 for (ct = current->mm->core_state->dumper.next; 1973 ct; ct = ct->next) { 1974 ets = kzalloc(sizeof(*ets), GFP_KERNEL); 1975 if (!ets) 1976 return 0; 1977 1978 ets->thread = ct->task; 1979 list_add(&ets->list, &info->thread_list); 1980 } 1981 1982 list_for_each(t, &info->thread_list) { 1983 int sz; 1984 1985 ets = list_entry(t, struct elf_thread_status, list); 1986 sz = elf_dump_thread_status(siginfo->si_signo, ets); 1987 info->thread_status_size += sz; 1988 } 1989 /* now collect the dump for the current */ 1990 memset(info->prstatus, 0, sizeof(*info->prstatus)); 1991 fill_prstatus(info->prstatus, current, siginfo->si_signo); 1992 elf_core_copy_regs(&info->prstatus->pr_reg, regs); 1993 1994 /* Set up header */ 1995 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS); 1996 1997 /* 1998 * Set up the notes in similar form to SVR4 core dumps made 1999 * with info from their /proc. 2000 */ 2001 2002 fill_note(info->notes + 0, "CORE", NT_PRSTATUS, 2003 sizeof(*info->prstatus), info->prstatus); 2004 fill_psinfo(info->psinfo, current->group_leader, current->mm); 2005 fill_note(info->notes + 1, "CORE", NT_PRPSINFO, 2006 sizeof(*info->psinfo), info->psinfo); 2007 2008 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); 2009 fill_auxv_note(info->notes + 3, current->mm); 2010 info->numnote = 4; 2011 2012 if (fill_files_note(info->notes + info->numnote) == 0) { 2013 info->notes_files = info->notes + info->numnote; 2014 info->numnote++; 2015 } 2016 2017 /* Try to dump the FPU. */ 2018 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, 2019 info->fpu); 2020 if (info->prstatus->pr_fpvalid) 2021 fill_note(info->notes + info->numnote++, 2022 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu); 2023 #ifdef ELF_CORE_COPY_XFPREGS 2024 if (elf_core_copy_task_xfpregs(current, info->xfpu)) 2025 fill_note(info->notes + info->numnote++, 2026 "LINUX", ELF_CORE_XFPREG_TYPE, 2027 sizeof(*info->xfpu), info->xfpu); 2028 #endif 2029 2030 return 1; 2031 } 2032 2033 static size_t get_note_info_size(struct elf_note_info *info) 2034 { 2035 int sz = 0; 2036 int i; 2037 2038 for (i = 0; i < info->numnote; i++) 2039 sz += notesize(info->notes + i); 2040 2041 sz += info->thread_status_size; 2042 2043 return sz; 2044 } 2045 2046 static int write_note_info(struct elf_note_info *info, 2047 struct coredump_params *cprm) 2048 { 2049 int i; 2050 struct list_head *t; 2051 2052 for (i = 0; i < info->numnote; i++) 2053 if (!writenote(info->notes + i, cprm)) 2054 return 0; 2055 2056 /* write out the thread status notes section */ 2057 list_for_each(t, &info->thread_list) { 2058 struct elf_thread_status *tmp = 2059 list_entry(t, struct elf_thread_status, list); 2060 2061 for (i = 0; i < tmp->num_notes; i++) 2062 if (!writenote(&tmp->notes[i], cprm)) 2063 return 0; 2064 } 2065 2066 return 1; 2067 } 2068 2069 static void free_note_info(struct elf_note_info *info) 2070 { 2071 while (!list_empty(&info->thread_list)) { 2072 struct list_head *tmp = info->thread_list.next; 2073 list_del(tmp); 2074 kfree(list_entry(tmp, struct elf_thread_status, list)); 2075 } 2076 2077 /* Free data possibly allocated by fill_files_note(): */ 2078 if (info->notes_files) 2079 vfree(info->notes_files->data); 2080 2081 kfree(info->prstatus); 2082 kfree(info->psinfo); 2083 kfree(info->notes); 2084 kfree(info->fpu); 2085 #ifdef ELF_CORE_COPY_XFPREGS 2086 kfree(info->xfpu); 2087 #endif 2088 } 2089 2090 #endif 2091 2092 static struct vm_area_struct *first_vma(struct task_struct *tsk, 2093 struct vm_area_struct *gate_vma) 2094 { 2095 struct vm_area_struct *ret = tsk->mm->mmap; 2096 2097 if (ret) 2098 return ret; 2099 return gate_vma; 2100 } 2101 /* 2102 * Helper function for iterating across a vma list. It ensures that the caller 2103 * will visit `gate_vma' prior to terminating the search. 2104 */ 2105 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma, 2106 struct vm_area_struct *gate_vma) 2107 { 2108 struct vm_area_struct *ret; 2109 2110 ret = this_vma->vm_next; 2111 if (ret) 2112 return ret; 2113 if (this_vma == gate_vma) 2114 return NULL; 2115 return gate_vma; 2116 } 2117 2118 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum, 2119 elf_addr_t e_shoff, int segs) 2120 { 2121 elf->e_shoff = e_shoff; 2122 elf->e_shentsize = sizeof(*shdr4extnum); 2123 elf->e_shnum = 1; 2124 elf->e_shstrndx = SHN_UNDEF; 2125 2126 memset(shdr4extnum, 0, sizeof(*shdr4extnum)); 2127 2128 shdr4extnum->sh_type = SHT_NULL; 2129 shdr4extnum->sh_size = elf->e_shnum; 2130 shdr4extnum->sh_link = elf->e_shstrndx; 2131 shdr4extnum->sh_info = segs; 2132 } 2133 2134 /* 2135 * Actual dumper 2136 * 2137 * This is a two-pass process; first we find the offsets of the bits, 2138 * and then they are actually written out. If we run out of core limit 2139 * we just truncate. 2140 */ 2141 static int elf_core_dump(struct coredump_params *cprm) 2142 { 2143 int has_dumped = 0; 2144 mm_segment_t fs; 2145 int segs, i; 2146 size_t vma_data_size = 0; 2147 struct vm_area_struct *vma, *gate_vma; 2148 struct elfhdr *elf = NULL; 2149 loff_t offset = 0, dataoff; 2150 struct elf_note_info info = { }; 2151 struct elf_phdr *phdr4note = NULL; 2152 struct elf_shdr *shdr4extnum = NULL; 2153 Elf_Half e_phnum; 2154 elf_addr_t e_shoff; 2155 elf_addr_t *vma_filesz = NULL; 2156 2157 /* 2158 * We no longer stop all VM operations. 2159 * 2160 * This is because those proceses that could possibly change map_count 2161 * or the mmap / vma pages are now blocked in do_exit on current 2162 * finishing this core dump. 2163 * 2164 * Only ptrace can touch these memory addresses, but it doesn't change 2165 * the map_count or the pages allocated. So no possibility of crashing 2166 * exists while dumping the mm->vm_next areas to the core file. 2167 */ 2168 2169 /* alloc memory for large data structures: too large to be on stack */ 2170 elf = kmalloc(sizeof(*elf), GFP_KERNEL); 2171 if (!elf) 2172 goto out; 2173 /* 2174 * The number of segs are recored into ELF header as 16bit value. 2175 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. 2176 */ 2177 segs = current->mm->map_count; 2178 segs += elf_core_extra_phdrs(); 2179 2180 gate_vma = get_gate_vma(current->mm); 2181 if (gate_vma != NULL) 2182 segs++; 2183 2184 /* for notes section */ 2185 segs++; 2186 2187 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid 2188 * this, kernel supports extended numbering. Have a look at 2189 * include/linux/elf.h for further information. */ 2190 e_phnum = segs > PN_XNUM ? PN_XNUM : segs; 2191 2192 /* 2193 * Collect all the non-memory information about the process for the 2194 * notes. This also sets up the file header. 2195 */ 2196 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs)) 2197 goto cleanup; 2198 2199 has_dumped = 1; 2200 2201 fs = get_fs(); 2202 set_fs(KERNEL_DS); 2203 2204 offset += sizeof(*elf); /* Elf header */ 2205 offset += segs * sizeof(struct elf_phdr); /* Program headers */ 2206 2207 /* Write notes phdr entry */ 2208 { 2209 size_t sz = get_note_info_size(&info); 2210 2211 sz += elf_coredump_extra_notes_size(); 2212 2213 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL); 2214 if (!phdr4note) 2215 goto end_coredump; 2216 2217 fill_elf_note_phdr(phdr4note, sz, offset); 2218 offset += sz; 2219 } 2220 2221 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); 2222 2223 if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz)) 2224 goto end_coredump; 2225 vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz)); 2226 if (!vma_filesz) 2227 goto end_coredump; 2228 2229 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2230 vma = next_vma(vma, gate_vma)) { 2231 unsigned long dump_size; 2232 2233 dump_size = vma_dump_size(vma, cprm->mm_flags); 2234 vma_filesz[i++] = dump_size; 2235 vma_data_size += dump_size; 2236 } 2237 2238 offset += vma_data_size; 2239 offset += elf_core_extra_data_size(); 2240 e_shoff = offset; 2241 2242 if (e_phnum == PN_XNUM) { 2243 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL); 2244 if (!shdr4extnum) 2245 goto end_coredump; 2246 fill_extnum_info(elf, shdr4extnum, e_shoff, segs); 2247 } 2248 2249 offset = dataoff; 2250 2251 if (!dump_emit(cprm, elf, sizeof(*elf))) 2252 goto end_coredump; 2253 2254 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note))) 2255 goto end_coredump; 2256 2257 /* Write program headers for segments dump */ 2258 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2259 vma = next_vma(vma, gate_vma)) { 2260 struct elf_phdr phdr; 2261 2262 phdr.p_type = PT_LOAD; 2263 phdr.p_offset = offset; 2264 phdr.p_vaddr = vma->vm_start; 2265 phdr.p_paddr = 0; 2266 phdr.p_filesz = vma_filesz[i++]; 2267 phdr.p_memsz = vma->vm_end - vma->vm_start; 2268 offset += phdr.p_filesz; 2269 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0; 2270 if (vma->vm_flags & VM_WRITE) 2271 phdr.p_flags |= PF_W; 2272 if (vma->vm_flags & VM_EXEC) 2273 phdr.p_flags |= PF_X; 2274 phdr.p_align = ELF_EXEC_PAGESIZE; 2275 2276 if (!dump_emit(cprm, &phdr, sizeof(phdr))) 2277 goto end_coredump; 2278 } 2279 2280 if (!elf_core_write_extra_phdrs(cprm, offset)) 2281 goto end_coredump; 2282 2283 /* write out the notes section */ 2284 if (!write_note_info(&info, cprm)) 2285 goto end_coredump; 2286 2287 if (elf_coredump_extra_notes_write(cprm)) 2288 goto end_coredump; 2289 2290 /* Align to page */ 2291 if (!dump_skip(cprm, dataoff - cprm->pos)) 2292 goto end_coredump; 2293 2294 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL; 2295 vma = next_vma(vma, gate_vma)) { 2296 unsigned long addr; 2297 unsigned long end; 2298 2299 end = vma->vm_start + vma_filesz[i++]; 2300 2301 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) { 2302 struct page *page; 2303 int stop; 2304 2305 page = get_dump_page(addr); 2306 if (page) { 2307 void *kaddr = kmap(page); 2308 stop = !dump_emit(cprm, kaddr, PAGE_SIZE); 2309 kunmap(page); 2310 put_page(page); 2311 } else 2312 stop = !dump_skip(cprm, PAGE_SIZE); 2313 if (stop) 2314 goto end_coredump; 2315 } 2316 } 2317 dump_truncate(cprm); 2318 2319 if (!elf_core_write_extra_data(cprm)) 2320 goto end_coredump; 2321 2322 if (e_phnum == PN_XNUM) { 2323 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum))) 2324 goto end_coredump; 2325 } 2326 2327 end_coredump: 2328 set_fs(fs); 2329 2330 cleanup: 2331 free_note_info(&info); 2332 kfree(shdr4extnum); 2333 vfree(vma_filesz); 2334 kfree(phdr4note); 2335 kfree(elf); 2336 out: 2337 return has_dumped; 2338 } 2339 2340 #endif /* CONFIG_ELF_CORE */ 2341 2342 static int __init init_elf_binfmt(void) 2343 { 2344 register_binfmt(&elf_format); 2345 return 0; 2346 } 2347 2348 static void __exit exit_elf_binfmt(void) 2349 { 2350 /* Remove the COFF and ELF loaders. */ 2351 unregister_binfmt(&elf_format); 2352 } 2353 2354 core_initcall(init_elf_binfmt); 2355 module_exit(exit_elf_binfmt); 2356 MODULE_LICENSE("GPL"); 2357