1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * linux/fs/binfmt_elf.c
4 *
5 * These are the functions used to load ELF format executables as used
6 * on SVr4 machines. Information on the format may be found in the book
7 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
8 * Tools".
9 *
10 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
11 */
12
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/fs.h>
16 #include <linux/log2.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/slab.h>
25 #include <linux/personality.h>
26 #include <linux/elfcore.h>
27 #include <linux/init.h>
28 #include <linux/highuid.h>
29 #include <linux/compiler.h>
30 #include <linux/highmem.h>
31 #include <linux/hugetlb.h>
32 #include <linux/pagemap.h>
33 #include <linux/vmalloc.h>
34 #include <linux/security.h>
35 #include <linux/random.h>
36 #include <linux/elf.h>
37 #include <linux/elf-randomize.h>
38 #include <linux/utsname.h>
39 #include <linux/coredump.h>
40 #include <linux/sched.h>
41 #include <linux/sched/coredump.h>
42 #include <linux/sched/task_stack.h>
43 #include <linux/sched/cputime.h>
44 #include <linux/sizes.h>
45 #include <linux/types.h>
46 #include <linux/cred.h>
47 #include <linux/dax.h>
48 #include <linux/uaccess.h>
49 #include <uapi/linux/rseq.h>
50 #include <linux/rseq.h>
51 #include <asm/param.h>
52 #include <asm/page.h>
53
54 #ifndef ELF_COMPAT
55 #define ELF_COMPAT 0
56 #endif
57
58 #ifndef user_long_t
59 #define user_long_t long
60 #endif
61 #ifndef user_siginfo_t
62 #define user_siginfo_t siginfo_t
63 #endif
64
65 /* That's for binfmt_elf_fdpic to deal with */
66 #ifndef elf_check_fdpic
67 #define elf_check_fdpic(ex) false
68 #endif
69
70 static int load_elf_binary(struct linux_binprm *bprm);
71
72 /*
73 * If we don't support core dumping, then supply a NULL so we
74 * don't even try.
75 */
76 #ifdef CONFIG_ELF_CORE
77 static int elf_core_dump(struct coredump_params *cprm);
78 #else
79 #define elf_core_dump NULL
80 #endif
81
82 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
83 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
84 #else
85 #define ELF_MIN_ALIGN PAGE_SIZE
86 #endif
87
88 #ifndef ELF_CORE_EFLAGS
89 #define ELF_CORE_EFLAGS 0
90 #endif
91
92 #define ELF_PAGESTART(_v) ((_v) & ~(int)(ELF_MIN_ALIGN-1))
93 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
94 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
95
96 static struct linux_binfmt elf_format = {
97 .module = THIS_MODULE,
98 .load_binary = load_elf_binary,
99 #ifdef CONFIG_COREDUMP
100 .core_dump = elf_core_dump,
101 .min_coredump = ELF_EXEC_PAGESIZE,
102 #endif
103 };
104
105 #define BAD_ADDR(x) (unlikely((unsigned long)(x) >= TASK_SIZE))
106
elf_coredump_set_mm_eflags(struct mm_struct * mm,u32 flags)107 static inline void elf_coredump_set_mm_eflags(struct mm_struct *mm, u32 flags)
108 {
109 #ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
110 mm->saved_e_flags = flags;
111 #endif
112 }
113
elf_coredump_get_mm_eflags(struct mm_struct * mm,u32 flags)114 static inline u32 elf_coredump_get_mm_eflags(struct mm_struct *mm, u32 flags)
115 {
116 #ifdef CONFIG_ARCH_HAS_ELF_CORE_EFLAGS
117 flags = mm->saved_e_flags;
118 #endif
119 return flags;
120 }
121
122 /*
123 * We need to explicitly zero any trailing portion of the page that follows
124 * p_filesz when it ends before the page ends (e.g. bss), otherwise this
125 * memory will contain the junk from the file that should not be present.
126 */
padzero(unsigned long address)127 static int padzero(unsigned long address)
128 {
129 unsigned long nbyte;
130
131 nbyte = ELF_PAGEOFFSET(address);
132 if (nbyte) {
133 nbyte = ELF_MIN_ALIGN - nbyte;
134 if (clear_user((void __user *)address, nbyte))
135 return -EFAULT;
136 }
137 return 0;
138 }
139
140 /* Let's use some macros to make this stack manipulation a little clearer */
141 #ifdef CONFIG_STACK_GROWSUP
142 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
143 #define STACK_ROUND(sp, items) \
144 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
145 #define STACK_ALLOC(sp, len) ({ \
146 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
147 old_sp; })
148 #else
149 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
150 #define STACK_ROUND(sp, items) \
151 (((unsigned long) (sp - items)) &~ 15UL)
152 #define STACK_ALLOC(sp, len) (sp -= len)
153 #endif
154
155 #ifndef ELF_BASE_PLATFORM
156 /*
157 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
158 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
159 * will be copied to the user stack in the same manner as AT_PLATFORM.
160 */
161 #define ELF_BASE_PLATFORM NULL
162 #endif
163
164 static int
create_elf_tables(struct linux_binprm * bprm,const struct elfhdr * exec,unsigned long interp_load_addr,unsigned long e_entry,unsigned long phdr_addr)165 create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
166 unsigned long interp_load_addr,
167 unsigned long e_entry, unsigned long phdr_addr)
168 {
169 struct mm_struct *mm = current->mm;
170 unsigned long p = bprm->p;
171 int argc = bprm->argc;
172 int envc = bprm->envc;
173 elf_addr_t __user *sp;
174 elf_addr_t __user *u_platform;
175 elf_addr_t __user *u_base_platform;
176 elf_addr_t __user *u_rand_bytes;
177 const char *k_platform = ELF_PLATFORM;
178 const char *k_base_platform = ELF_BASE_PLATFORM;
179 unsigned char k_rand_bytes[16];
180 int items;
181 elf_addr_t *elf_info;
182 elf_addr_t flags = 0;
183 int ei_index;
184 const struct cred *cred = current_cred();
185 struct vm_area_struct *vma;
186
187 /*
188 * In some cases (e.g. Hyper-Threading), we want to avoid L1
189 * evictions by the processes running on the same package. One
190 * thing we can do is to shuffle the initial stack for them.
191 */
192
193 p = arch_align_stack(p);
194
195 /*
196 * If this architecture has a platform capability string, copy it
197 * to userspace. In some cases (Sparc), this info is impossible
198 * for userspace to get any other way, in others (i386) it is
199 * merely difficult.
200 */
201 u_platform = NULL;
202 if (k_platform) {
203 size_t len = strlen(k_platform) + 1;
204
205 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
206 if (copy_to_user(u_platform, k_platform, len))
207 return -EFAULT;
208 }
209
210 /*
211 * If this architecture has a "base" platform capability
212 * string, copy it to userspace.
213 */
214 u_base_platform = NULL;
215 if (k_base_platform) {
216 size_t len = strlen(k_base_platform) + 1;
217
218 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
219 if (copy_to_user(u_base_platform, k_base_platform, len))
220 return -EFAULT;
221 }
222
223 /*
224 * Generate 16 random bytes for userspace PRNG seeding.
225 */
226 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
227 u_rand_bytes = (elf_addr_t __user *)
228 STACK_ALLOC(p, sizeof(k_rand_bytes));
229 if (copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
230 return -EFAULT;
231
232 /* Create the ELF interpreter info */
233 elf_info = (elf_addr_t *)mm->saved_auxv;
234 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
235 #define NEW_AUX_ENT(id, val) \
236 do { \
237 *elf_info++ = id; \
238 *elf_info++ = val; \
239 } while (0)
240
241 #ifdef ARCH_DLINFO
242 /*
243 * ARCH_DLINFO must come first so PPC can do its special alignment of
244 * AUXV.
245 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
246 * ARCH_DLINFO changes
247 */
248 ARCH_DLINFO;
249 #endif
250 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
251 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
252 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
253 NEW_AUX_ENT(AT_PHDR, phdr_addr);
254 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
255 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
256 NEW_AUX_ENT(AT_BASE, interp_load_addr);
257 if (bprm->interp_flags & BINPRM_FLAGS_PRESERVE_ARGV0)
258 flags |= AT_FLAGS_PRESERVE_ARGV0;
259 NEW_AUX_ENT(AT_FLAGS, flags);
260 NEW_AUX_ENT(AT_ENTRY, e_entry);
261 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
262 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
263 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
264 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
265 NEW_AUX_ENT(AT_SECURE, bprm->secureexec);
266 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
267 #ifdef ELF_HWCAP2
268 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
269 #endif
270 #ifdef ELF_HWCAP3
271 NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3);
272 #endif
273 #ifdef ELF_HWCAP4
274 NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4);
275 #endif
276 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
277 if (k_platform) {
278 NEW_AUX_ENT(AT_PLATFORM,
279 (elf_addr_t)(unsigned long)u_platform);
280 }
281 if (k_base_platform) {
282 NEW_AUX_ENT(AT_BASE_PLATFORM,
283 (elf_addr_t)(unsigned long)u_base_platform);
284 }
285 if (bprm->have_execfd) {
286 NEW_AUX_ENT(AT_EXECFD, bprm->execfd);
287 }
288 #ifdef CONFIG_RSEQ
289 NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
290 NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align());
291 #endif
292 #undef NEW_AUX_ENT
293 /* AT_NULL is zero; clear the rest too */
294 memset(elf_info, 0, (char *)mm->saved_auxv +
295 sizeof(mm->saved_auxv) - (char *)elf_info);
296
297 /* And advance past the AT_NULL entry. */
298 elf_info += 2;
299
300 ei_index = elf_info - (elf_addr_t *)mm->saved_auxv;
301 sp = STACK_ADD(p, ei_index);
302
303 items = (argc + 1) + (envc + 1) + 1;
304 bprm->p = STACK_ROUND(sp, items);
305
306 /* Point sp at the lowest address on the stack */
307 #ifdef CONFIG_STACK_GROWSUP
308 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
309 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
310 #else
311 sp = (elf_addr_t __user *)bprm->p;
312 #endif
313
314
315 /*
316 * Grow the stack manually; some architectures have a limit on how
317 * far ahead a user-space access may be in order to grow the stack.
318 */
319 if (mmap_write_lock_killable(mm))
320 return -EINTR;
321 vma = find_extend_vma_locked(mm, bprm->p);
322 mmap_write_unlock(mm);
323 if (!vma)
324 return -EFAULT;
325
326 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
327 if (put_user(argc, sp++))
328 return -EFAULT;
329
330 /* Populate list of argv pointers back to argv strings. */
331 p = mm->arg_end = mm->arg_start;
332 while (argc-- > 0) {
333 size_t len;
334 if (put_user((elf_addr_t)p, sp++))
335 return -EFAULT;
336 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
337 if (!len || len > MAX_ARG_STRLEN)
338 return -EINVAL;
339 p += len;
340 }
341 if (put_user(0, sp++))
342 return -EFAULT;
343 mm->arg_end = p;
344
345 /* Populate list of envp pointers back to envp strings. */
346 mm->env_end = mm->env_start = p;
347 while (envc-- > 0) {
348 size_t len;
349 if (put_user((elf_addr_t)p, sp++))
350 return -EFAULT;
351 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
352 if (!len || len > MAX_ARG_STRLEN)
353 return -EINVAL;
354 p += len;
355 }
356 if (put_user(0, sp++))
357 return -EFAULT;
358 mm->env_end = p;
359
360 /* Put the elf_info on the stack in the right place. */
361 if (copy_to_user(sp, mm->saved_auxv, ei_index * sizeof(elf_addr_t)))
362 return -EFAULT;
363 return 0;
364 }
365
366 /*
367 * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
368 * into memory at "addr". (Note that p_filesz is rounded up to the
369 * next page, so any extra bytes from the file must be wiped.)
370 */
elf_map(struct file * filep,unsigned long addr,const struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)371 static unsigned long elf_map(struct file *filep, unsigned long addr,
372 const struct elf_phdr *eppnt, int prot, int type,
373 unsigned long total_size)
374 {
375 unsigned long map_addr;
376 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
377 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
378 addr = ELF_PAGESTART(addr);
379 size = ELF_PAGEALIGN(size);
380
381 /* mmap() will return -EINVAL if given a zero size, but a
382 * segment with zero filesize is perfectly valid */
383 if (!size)
384 return addr;
385
386 /*
387 * total_size is the size of the ELF (interpreter) image.
388 * The _first_ mmap needs to know the full size, otherwise
389 * randomization might put this image into an overlapping
390 * position with the ELF binary image. (since size < total_size)
391 * So we first map the 'big' image - and unmap the remainder at
392 * the end. (which unmap is needed for ELF images with holes.)
393 */
394 if (total_size) {
395 total_size = ELF_PAGEALIGN(total_size);
396 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
397 if (!BAD_ADDR(map_addr))
398 vm_munmap(map_addr+size, total_size-size);
399 } else
400 map_addr = vm_mmap(filep, addr, size, prot, type, off);
401
402 if ((type & MAP_FIXED_NOREPLACE) &&
403 PTR_ERR((void *)map_addr) == -EEXIST)
404 pr_info("%d (%s): Uhuuh, elf segment at %px requested but the memory is mapped already\n",
405 task_pid_nr(current), current->comm, (void *)addr);
406
407 return(map_addr);
408 }
409
410 /*
411 * Map "eppnt->p_filesz" bytes from "filep" offset "eppnt->p_offset"
412 * into memory at "addr". Memory from "p_filesz" through "p_memsz"
413 * rounded up to the next page is zeroed.
414 */
elf_load(struct file * filep,unsigned long addr,const struct elf_phdr * eppnt,int prot,int type,unsigned long total_size)415 static unsigned long elf_load(struct file *filep, unsigned long addr,
416 const struct elf_phdr *eppnt, int prot, int type,
417 unsigned long total_size)
418 {
419 unsigned long zero_start, zero_end;
420 unsigned long map_addr;
421
422 if (eppnt->p_filesz) {
423 map_addr = elf_map(filep, addr, eppnt, prot, type, total_size);
424 if (BAD_ADDR(map_addr))
425 return map_addr;
426 if (eppnt->p_memsz > eppnt->p_filesz) {
427 zero_start = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
428 eppnt->p_filesz;
429 zero_end = map_addr + ELF_PAGEOFFSET(eppnt->p_vaddr) +
430 eppnt->p_memsz;
431
432 /*
433 * Zero the end of the last mapped page but ignore
434 * any errors if the segment isn't writable.
435 */
436 if (padzero(zero_start) && (prot & PROT_WRITE))
437 return -EFAULT;
438 }
439 } else {
440 map_addr = zero_start = ELF_PAGESTART(addr);
441 zero_end = zero_start + ELF_PAGEOFFSET(eppnt->p_vaddr) +
442 eppnt->p_memsz;
443 }
444 if (eppnt->p_memsz > eppnt->p_filesz) {
445 /*
446 * Map the last of the segment.
447 * If the header is requesting these pages to be
448 * executable, honour that (ppc32 needs this).
449 */
450 int error;
451
452 zero_start = ELF_PAGEALIGN(zero_start);
453 zero_end = ELF_PAGEALIGN(zero_end);
454
455 error = vm_brk_flags(zero_start, zero_end - zero_start,
456 prot & PROT_EXEC ? VM_EXEC : 0);
457 if (error)
458 map_addr = error;
459 }
460 return map_addr;
461 }
462
463
total_mapping_size(const struct elf_phdr * phdr,int nr)464 static unsigned long total_mapping_size(const struct elf_phdr *phdr, int nr)
465 {
466 elf_addr_t min_addr = -1;
467 elf_addr_t max_addr = 0;
468 bool pt_load = false;
469 int i;
470
471 for (i = 0; i < nr; i++) {
472 if (phdr[i].p_type == PT_LOAD) {
473 min_addr = min(min_addr, ELF_PAGESTART(phdr[i].p_vaddr));
474 max_addr = max(max_addr, phdr[i].p_vaddr + phdr[i].p_memsz);
475 pt_load = true;
476 }
477 }
478 return pt_load ? (max_addr - min_addr) : 0;
479 }
480
elf_read(struct file * file,void * buf,size_t len,loff_t pos)481 static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
482 {
483 ssize_t rv;
484
485 rv = kernel_read(file, buf, len, &pos);
486 if (unlikely(rv != len)) {
487 return (rv < 0) ? rv : -EIO;
488 }
489 return 0;
490 }
491
maximum_alignment(struct elf_phdr * cmds,int nr)492 static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
493 {
494 unsigned long alignment = 0;
495 int i;
496
497 for (i = 0; i < nr; i++) {
498 if (cmds[i].p_type == PT_LOAD) {
499 unsigned long p_align = cmds[i].p_align;
500
501 /* skip non-power of two alignments as invalid */
502 if (!is_power_of_2(p_align))
503 continue;
504 alignment = max(alignment, p_align);
505 }
506 }
507
508 /* ensure we align to at least one page */
509 return ELF_PAGEALIGN(alignment);
510 }
511
512 /**
513 * load_elf_phdrs() - load ELF program headers
514 * @elf_ex: ELF header of the binary whose program headers should be loaded
515 * @elf_file: the opened ELF binary file
516 *
517 * Loads ELF program headers from the binary file elf_file, which has the ELF
518 * header pointed to by elf_ex, into a newly allocated array. The caller is
519 * responsible for freeing the allocated data. Returns NULL upon failure.
520 */
load_elf_phdrs(const struct elfhdr * elf_ex,struct file * elf_file)521 static struct elf_phdr *load_elf_phdrs(const struct elfhdr *elf_ex,
522 struct file *elf_file)
523 {
524 struct elf_phdr *elf_phdata = NULL;
525 int retval = -1;
526 unsigned int size;
527
528 /*
529 * If the size of this structure has changed, then punt, since
530 * we will be doing the wrong thing.
531 */
532 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
533 goto out;
534
535 /* Sanity check the number of program headers... */
536 /* ...and their total size. */
537 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
538 if (size == 0 || size > 65536)
539 goto out;
540
541 elf_phdata = kmalloc(size, GFP_KERNEL);
542 if (!elf_phdata)
543 goto out;
544
545 /* Read in the program headers */
546 retval = elf_read(elf_file, elf_phdata, size, elf_ex->e_phoff);
547
548 out:
549 if (retval) {
550 kfree(elf_phdata);
551 elf_phdata = NULL;
552 }
553 return elf_phdata;
554 }
555
556 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
557
558 /**
559 * struct arch_elf_state - arch-specific ELF loading state
560 *
561 * This structure is used to preserve architecture specific data during
562 * the loading of an ELF file, throughout the checking of architecture
563 * specific ELF headers & through to the point where the ELF load is
564 * known to be proceeding (ie. SET_PERSONALITY).
565 *
566 * This implementation is a dummy for architectures which require no
567 * specific state.
568 */
569 struct arch_elf_state {
570 };
571
572 #define INIT_ARCH_ELF_STATE {}
573
574 /**
575 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
576 * @ehdr: The main ELF header
577 * @phdr: The program header to check
578 * @elf: The open ELF file
579 * @is_interp: True if the phdr is from the interpreter of the ELF being
580 * loaded, else false.
581 * @state: Architecture-specific state preserved throughout the process
582 * of loading the ELF.
583 *
584 * Inspects the program header phdr to validate its correctness and/or
585 * suitability for the system. Called once per ELF program header in the
586 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
587 * interpreter.
588 *
589 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
590 * with that return code.
591 */
arch_elf_pt_proc(struct elfhdr * ehdr,struct elf_phdr * phdr,struct file * elf,bool is_interp,struct arch_elf_state * state)592 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
593 struct elf_phdr *phdr,
594 struct file *elf, bool is_interp,
595 struct arch_elf_state *state)
596 {
597 /* Dummy implementation, always proceed */
598 return 0;
599 }
600
601 /**
602 * arch_check_elf() - check an ELF executable
603 * @ehdr: The main ELF header
604 * @has_interp: True if the ELF has an interpreter, else false.
605 * @interp_ehdr: The interpreter's ELF header
606 * @state: Architecture-specific state preserved throughout the process
607 * of loading the ELF.
608 *
609 * Provides a final opportunity for architecture code to reject the loading
610 * of the ELF & cause an exec syscall to return an error. This is called after
611 * all program headers to be checked by arch_elf_pt_proc have been.
612 *
613 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
614 * with that return code.
615 */
arch_check_elf(struct elfhdr * ehdr,bool has_interp,struct elfhdr * interp_ehdr,struct arch_elf_state * state)616 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
617 struct elfhdr *interp_ehdr,
618 struct arch_elf_state *state)
619 {
620 /* Dummy implementation, always proceed */
621 return 0;
622 }
623
624 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
625
make_prot(u32 p_flags,struct arch_elf_state * arch_state,bool has_interp,bool is_interp)626 static inline int make_prot(u32 p_flags, struct arch_elf_state *arch_state,
627 bool has_interp, bool is_interp)
628 {
629 int prot = 0;
630
631 if (p_flags & PF_R)
632 prot |= PROT_READ;
633 if (p_flags & PF_W)
634 prot |= PROT_WRITE;
635 if (p_flags & PF_X)
636 prot |= PROT_EXEC;
637
638 return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp);
639 }
640
641 /* This is much more generalized than the library routine read function,
642 so we keep this separate. Technically the library read function
643 is only provided so that we can read a.out libraries that have
644 an ELF header */
645
load_elf_interp(struct elfhdr * interp_elf_ex,struct file * interpreter,unsigned long no_base,struct elf_phdr * interp_elf_phdata,struct arch_elf_state * arch_state)646 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
647 struct file *interpreter,
648 unsigned long no_base, struct elf_phdr *interp_elf_phdata,
649 struct arch_elf_state *arch_state)
650 {
651 struct elf_phdr *eppnt;
652 unsigned long load_addr = 0;
653 int load_addr_set = 0;
654 unsigned long error = ~0UL;
655 unsigned long total_size;
656 int i;
657
658 /* First of all, some simple consistency checks */
659 if (interp_elf_ex->e_type != ET_EXEC &&
660 interp_elf_ex->e_type != ET_DYN)
661 goto out;
662 if (!elf_check_arch(interp_elf_ex) ||
663 elf_check_fdpic(interp_elf_ex))
664 goto out;
665 if (!can_mmap_file(interpreter))
666 goto out;
667
668 total_size = total_mapping_size(interp_elf_phdata,
669 interp_elf_ex->e_phnum);
670 if (!total_size) {
671 error = -EINVAL;
672 goto out;
673 }
674
675 eppnt = interp_elf_phdata;
676 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
677 if (eppnt->p_type == PT_LOAD) {
678 int elf_type = MAP_PRIVATE;
679 int elf_prot = make_prot(eppnt->p_flags, arch_state,
680 true, true);
681 unsigned long vaddr = 0;
682 unsigned long k, map_addr;
683
684 vaddr = eppnt->p_vaddr;
685 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
686 elf_type |= MAP_FIXED;
687 else if (no_base && interp_elf_ex->e_type == ET_DYN)
688 load_addr = -vaddr;
689
690 map_addr = elf_load(interpreter, load_addr + vaddr,
691 eppnt, elf_prot, elf_type, total_size);
692 total_size = 0;
693 error = map_addr;
694 if (BAD_ADDR(map_addr))
695 goto out;
696
697 if (!load_addr_set &&
698 interp_elf_ex->e_type == ET_DYN) {
699 load_addr = map_addr - ELF_PAGESTART(vaddr);
700 load_addr_set = 1;
701 }
702
703 /*
704 * Check to see if the section's size will overflow the
705 * allowed task size. Note that p_filesz must always be
706 * <= p_memsize so it's only necessary to check p_memsz.
707 */
708 k = load_addr + eppnt->p_vaddr;
709 if (BAD_ADDR(k) ||
710 eppnt->p_filesz > eppnt->p_memsz ||
711 eppnt->p_memsz > TASK_SIZE ||
712 TASK_SIZE - eppnt->p_memsz < k) {
713 error = -ENOMEM;
714 goto out;
715 }
716 }
717 }
718
719 error = load_addr;
720 out:
721 return error;
722 }
723
724 /*
725 * These are the functions used to load ELF style executables and shared
726 * libraries. There is no binary dependent code anywhere else.
727 */
728
parse_elf_property(const char * data,size_t * off,size_t datasz,struct arch_elf_state * arch,bool have_prev_type,u32 * prev_type)729 static int parse_elf_property(const char *data, size_t *off, size_t datasz,
730 struct arch_elf_state *arch,
731 bool have_prev_type, u32 *prev_type)
732 {
733 size_t o, step;
734 const struct gnu_property *pr;
735 int ret;
736
737 if (*off == datasz)
738 return -ENOENT;
739
740 if (WARN_ON_ONCE(*off > datasz || *off % ELF_GNU_PROPERTY_ALIGN))
741 return -EIO;
742 o = *off;
743 datasz -= *off;
744
745 if (datasz < sizeof(*pr))
746 return -ENOEXEC;
747 pr = (const struct gnu_property *)(data + o);
748 o += sizeof(*pr);
749 datasz -= sizeof(*pr);
750
751 if (pr->pr_datasz > datasz)
752 return -ENOEXEC;
753
754 WARN_ON_ONCE(o % ELF_GNU_PROPERTY_ALIGN);
755 step = round_up(pr->pr_datasz, ELF_GNU_PROPERTY_ALIGN);
756 if (step > datasz)
757 return -ENOEXEC;
758
759 /* Properties are supposed to be unique and sorted on pr_type: */
760 if (have_prev_type && pr->pr_type <= *prev_type)
761 return -ENOEXEC;
762 *prev_type = pr->pr_type;
763
764 ret = arch_parse_elf_property(pr->pr_type, data + o,
765 pr->pr_datasz, ELF_COMPAT, arch);
766 if (ret)
767 return ret;
768
769 *off = o + step;
770 return 0;
771 }
772
773 #define NOTE_DATA_SZ SZ_1K
774 #define NOTE_NAME_SZ (sizeof(NN_GNU_PROPERTY_TYPE_0))
775
parse_elf_properties(struct file * f,const struct elf_phdr * phdr,struct arch_elf_state * arch)776 static int parse_elf_properties(struct file *f, const struct elf_phdr *phdr,
777 struct arch_elf_state *arch)
778 {
779 union {
780 struct elf_note nhdr;
781 char data[NOTE_DATA_SZ];
782 } note;
783 loff_t pos;
784 ssize_t n;
785 size_t off, datasz;
786 int ret;
787 bool have_prev_type;
788 u32 prev_type;
789
790 if (!IS_ENABLED(CONFIG_ARCH_USE_GNU_PROPERTY) || !phdr)
791 return 0;
792
793 /* load_elf_binary() shouldn't call us unless this is true... */
794 if (WARN_ON_ONCE(phdr->p_type != PT_GNU_PROPERTY))
795 return -ENOEXEC;
796
797 /* If the properties are crazy large, that's too bad (for now): */
798 if (phdr->p_filesz > sizeof(note))
799 return -ENOEXEC;
800
801 pos = phdr->p_offset;
802 n = kernel_read(f, ¬e, phdr->p_filesz, &pos);
803
804 BUILD_BUG_ON(sizeof(note) < sizeof(note.nhdr) + NOTE_NAME_SZ);
805 if (n < 0 || n < sizeof(note.nhdr) + NOTE_NAME_SZ)
806 return -EIO;
807
808 if (note.nhdr.n_type != NT_GNU_PROPERTY_TYPE_0 ||
809 note.nhdr.n_namesz != NOTE_NAME_SZ ||
810 strncmp(note.data + sizeof(note.nhdr),
811 NN_GNU_PROPERTY_TYPE_0, n - sizeof(note.nhdr)))
812 return -ENOEXEC;
813
814 off = round_up(sizeof(note.nhdr) + NOTE_NAME_SZ,
815 ELF_GNU_PROPERTY_ALIGN);
816 if (off > n)
817 return -ENOEXEC;
818
819 if (note.nhdr.n_descsz > n - off)
820 return -ENOEXEC;
821 datasz = off + note.nhdr.n_descsz;
822
823 have_prev_type = false;
824 do {
825 ret = parse_elf_property(note.data, &off, datasz, arch,
826 have_prev_type, &prev_type);
827 have_prev_type = true;
828 } while (!ret);
829
830 return ret == -ENOENT ? 0 : ret;
831 }
832
load_elf_binary(struct linux_binprm * bprm)833 static int load_elf_binary(struct linux_binprm *bprm)
834 {
835 struct file *interpreter = NULL; /* to shut gcc up */
836 unsigned long load_bias = 0, phdr_addr = 0;
837 int first_pt_load = 1;
838 unsigned long error;
839 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
840 struct elf_phdr *elf_property_phdata = NULL;
841 unsigned long elf_brk;
842 bool brk_moved = false;
843 int retval, i;
844 unsigned long elf_entry;
845 unsigned long e_entry;
846 unsigned long interp_load_addr = 0;
847 unsigned long start_code, end_code, start_data, end_data;
848 unsigned long reloc_func_desc __maybe_unused = 0;
849 int executable_stack = EXSTACK_DEFAULT;
850 struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
851 struct elfhdr *interp_elf_ex = NULL;
852 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
853 struct mm_struct *mm;
854 struct pt_regs *regs;
855
856 retval = -ENOEXEC;
857 /* First of all, some simple consistency checks */
858 if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
859 goto out;
860
861 if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
862 goto out;
863 if (!elf_check_arch(elf_ex))
864 goto out;
865 if (elf_check_fdpic(elf_ex))
866 goto out;
867 if (!can_mmap_file(bprm->file))
868 goto out;
869
870 elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
871 if (!elf_phdata)
872 goto out;
873
874 elf_ppnt = elf_phdata;
875 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
876 char *elf_interpreter;
877
878 if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
879 elf_property_phdata = elf_ppnt;
880 continue;
881 }
882
883 if (elf_ppnt->p_type != PT_INTERP)
884 continue;
885
886 /*
887 * This is the program interpreter used for shared libraries -
888 * for now assume that this is an a.out format binary.
889 */
890 retval = -ENOEXEC;
891 if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
892 goto out_free_ph;
893
894 retval = -ENOMEM;
895 elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
896 if (!elf_interpreter)
897 goto out_free_ph;
898
899 retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
900 elf_ppnt->p_offset);
901 if (retval < 0)
902 goto out_free_interp;
903 /* make sure path is NULL terminated */
904 retval = -ENOEXEC;
905 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
906 goto out_free_interp;
907
908 interpreter = open_exec(elf_interpreter);
909 kfree(elf_interpreter);
910 retval = PTR_ERR(interpreter);
911 if (IS_ERR(interpreter))
912 goto out_free_ph;
913
914 /*
915 * If the binary is not readable then enforce mm->dumpable = 0
916 * regardless of the interpreter's permissions.
917 */
918 would_dump(bprm, interpreter);
919
920 interp_elf_ex = kmalloc_obj(*interp_elf_ex);
921 if (!interp_elf_ex) {
922 retval = -ENOMEM;
923 goto out_free_file;
924 }
925
926 /* Get the exec headers */
927 retval = elf_read(interpreter, interp_elf_ex,
928 sizeof(*interp_elf_ex), 0);
929 if (retval < 0)
930 goto out_free_dentry;
931
932 break;
933
934 out_free_interp:
935 kfree(elf_interpreter);
936 goto out_free_ph;
937 }
938
939 elf_ppnt = elf_phdata;
940 for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
941 switch (elf_ppnt->p_type) {
942 case PT_GNU_STACK:
943 if (elf_ppnt->p_flags & PF_X)
944 executable_stack = EXSTACK_ENABLE_X;
945 else
946 executable_stack = EXSTACK_DISABLE_X;
947 break;
948
949 case PT_LOPROC ... PT_HIPROC:
950 retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
951 bprm->file, false,
952 &arch_state);
953 if (retval)
954 goto out_free_dentry;
955 break;
956 }
957
958 /* Some simple consistency checks for the interpreter */
959 if (interpreter) {
960 retval = -ELIBBAD;
961 /* Not an ELF interpreter */
962 if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
963 goto out_free_dentry;
964 /* Verify the interpreter has a valid arch */
965 if (!elf_check_arch(interp_elf_ex) ||
966 elf_check_fdpic(interp_elf_ex))
967 goto out_free_dentry;
968
969 /* Load the interpreter program headers */
970 interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
971 interpreter);
972 if (!interp_elf_phdata)
973 goto out_free_dentry;
974
975 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
976 elf_property_phdata = NULL;
977 elf_ppnt = interp_elf_phdata;
978 for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
979 switch (elf_ppnt->p_type) {
980 case PT_GNU_PROPERTY:
981 elf_property_phdata = elf_ppnt;
982 break;
983
984 case PT_LOPROC ... PT_HIPROC:
985 retval = arch_elf_pt_proc(interp_elf_ex,
986 elf_ppnt, interpreter,
987 true, &arch_state);
988 if (retval)
989 goto out_free_dentry;
990 break;
991 }
992 }
993
994 retval = parse_elf_properties(interpreter ?: bprm->file,
995 elf_property_phdata, &arch_state);
996 if (retval)
997 goto out_free_dentry;
998
999 /*
1000 * Allow arch code to reject the ELF at this point, whilst it's
1001 * still possible to return an error to the code that invoked
1002 * the exec syscall.
1003 */
1004 retval = arch_check_elf(elf_ex,
1005 !!interpreter, interp_elf_ex,
1006 &arch_state);
1007 if (retval)
1008 goto out_free_dentry;
1009
1010 /* Flush all traces of the currently running executable */
1011 retval = begin_new_exec(bprm);
1012 if (retval)
1013 goto out_free_dentry;
1014
1015 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
1016 may depend on the personality. */
1017 SET_PERSONALITY2(*elf_ex, &arch_state);
1018 if (elf_read_implies_exec(*elf_ex, executable_stack))
1019 current->personality |= READ_IMPLIES_EXEC;
1020
1021 const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
1022 if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
1023 current->flags |= PF_RANDOMIZE;
1024
1025 setup_new_exec(bprm);
1026
1027 /* Do this so that we can load the interpreter, if need be. We will
1028 change some of these later */
1029 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
1030 executable_stack);
1031 if (retval < 0)
1032 goto out_free_dentry;
1033
1034 elf_brk = 0;
1035
1036 start_code = ~0UL;
1037 end_code = 0;
1038 start_data = 0;
1039 end_data = 0;
1040
1041 /* Now we do a little grungy work by mmapping the ELF image into
1042 the correct location in memory. */
1043 for(i = 0, elf_ppnt = elf_phdata;
1044 i < elf_ex->e_phnum; i++, elf_ppnt++) {
1045 int elf_prot, elf_flags;
1046 unsigned long k, vaddr;
1047 unsigned long total_size = 0;
1048 unsigned long alignment;
1049
1050 if (elf_ppnt->p_type != PT_LOAD)
1051 continue;
1052
1053 elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
1054 !!interpreter, false);
1055
1056 elf_flags = MAP_PRIVATE;
1057
1058 vaddr = elf_ppnt->p_vaddr;
1059 /*
1060 * The first time through the loop, first_pt_load is true:
1061 * layout will be calculated. Once set, use MAP_FIXED since
1062 * we know we've already safely mapped the entire region with
1063 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
1064 */
1065 if (!first_pt_load) {
1066 elf_flags |= MAP_FIXED;
1067 } else if (elf_ex->e_type == ET_EXEC) {
1068 /*
1069 * This logic is run once for the first LOAD Program
1070 * Header for ET_EXEC binaries. No special handling
1071 * is needed.
1072 */
1073 elf_flags |= MAP_FIXED_NOREPLACE;
1074 } else if (elf_ex->e_type == ET_DYN) {
1075 /*
1076 * This logic is run once for the first LOAD Program
1077 * Header for ET_DYN binaries to calculate the
1078 * randomization (load_bias) for all the LOAD
1079 * Program Headers.
1080 */
1081
1082 /*
1083 * Calculate the entire size of the ELF mapping
1084 * (total_size), used for the initial mapping,
1085 * due to load_addr_set which is set to true later
1086 * once the initial mapping is performed.
1087 *
1088 * Note that this is only sensible when the LOAD
1089 * segments are contiguous (or overlapping). If
1090 * used for LOADs that are far apart, this would
1091 * cause the holes between LOADs to be mapped,
1092 * running the risk of having the mapping fail,
1093 * as it would be larger than the ELF file itself.
1094 *
1095 * As a result, only ET_DYN does this, since
1096 * some ET_EXEC (e.g. ia64) may have large virtual
1097 * memory holes between LOADs.
1098 *
1099 */
1100 total_size = total_mapping_size(elf_phdata,
1101 elf_ex->e_phnum);
1102 if (!total_size) {
1103 retval = -EINVAL;
1104 goto out_free_dentry;
1105 }
1106
1107 /* Calculate any requested alignment. */
1108 alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
1109
1110 /**
1111 * DOC: PIE handling
1112 *
1113 * There are effectively two types of ET_DYN ELF
1114 * binaries: programs (i.e. PIE: ET_DYN with
1115 * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
1116 * without PT_INTERP, usually the ELF interpreter
1117 * itself). Loaders must be loaded away from programs
1118 * since the program may otherwise collide with the
1119 * loader (especially for ET_EXEC which does not have
1120 * a randomized position).
1121 *
1122 * For example, to handle invocations of
1123 * "./ld.so someprog" to test out a new version of
1124 * the loader, the subsequent program that the
1125 * loader loads must avoid the loader itself, so
1126 * they cannot share the same load range. Sufficient
1127 * room for the brk must be allocated with the
1128 * loader as well, since brk must be available with
1129 * the loader.
1130 *
1131 * Therefore, programs are loaded offset from
1132 * ELF_ET_DYN_BASE and loaders are loaded into the
1133 * independently randomized mmap region (0 load_bias
1134 * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
1135 *
1136 * See below for "brk" handling details, which is
1137 * also affected by program vs loader and ASLR.
1138 */
1139 if (interpreter) {
1140 /* On ET_DYN with PT_INTERP, we do the ASLR. */
1141 load_bias = ELF_ET_DYN_BASE;
1142 if (current->flags & PF_RANDOMIZE)
1143 load_bias += arch_mmap_rnd();
1144 /* Adjust alignment as requested. */
1145 if (alignment)
1146 load_bias &= ~(alignment - 1);
1147 elf_flags |= MAP_FIXED_NOREPLACE;
1148 } else {
1149 /*
1150 * For ET_DYN without PT_INTERP, we rely on
1151 * the architectures's (potentially ASLR) mmap
1152 * base address (via a load_bias of 0).
1153 *
1154 * When a large alignment is requested, we
1155 * must do the allocation at address "0" right
1156 * now to discover where things will load so
1157 * that we can adjust the resulting alignment.
1158 * In this case (load_bias != 0), we can use
1159 * MAP_FIXED_NOREPLACE to make sure the mapping
1160 * doesn't collide with anything.
1161 */
1162 if (alignment > ELF_MIN_ALIGN) {
1163 load_bias = elf_load(bprm->file, 0, elf_ppnt,
1164 elf_prot, elf_flags, total_size);
1165 if (BAD_ADDR(load_bias)) {
1166 retval = IS_ERR_VALUE(load_bias) ?
1167 PTR_ERR((void*)load_bias) : -EINVAL;
1168 goto out_free_dentry;
1169 }
1170 vm_munmap(load_bias, total_size);
1171 /* Adjust alignment as requested. */
1172 if (alignment)
1173 load_bias &= ~(alignment - 1);
1174 elf_flags |= MAP_FIXED_NOREPLACE;
1175 } else
1176 load_bias = 0;
1177 }
1178
1179 /*
1180 * Since load_bias is used for all subsequent loading
1181 * calculations, we must lower it by the first vaddr
1182 * so that the remaining calculations based on the
1183 * ELF vaddrs will be correctly offset. The result
1184 * is then page aligned.
1185 */
1186 load_bias = ELF_PAGESTART(load_bias - vaddr);
1187 }
1188
1189 error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
1190 elf_prot, elf_flags, total_size);
1191 if (BAD_ADDR(error)) {
1192 retval = IS_ERR_VALUE(error) ?
1193 PTR_ERR((void*)error) : -EINVAL;
1194 goto out_free_dentry;
1195 }
1196
1197 if (first_pt_load) {
1198 first_pt_load = 0;
1199 if (elf_ex->e_type == ET_DYN) {
1200 load_bias += error -
1201 ELF_PAGESTART(load_bias + vaddr);
1202 reloc_func_desc = load_bias;
1203 }
1204 }
1205
1206 /*
1207 * Figure out which segment in the file contains the Program
1208 * Header table, and map to the associated memory address.
1209 */
1210 if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
1211 elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
1212 phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
1213 elf_ppnt->p_vaddr;
1214 }
1215
1216 k = elf_ppnt->p_vaddr;
1217 if ((elf_ppnt->p_flags & PF_X) && k < start_code)
1218 start_code = k;
1219 if (start_data < k)
1220 start_data = k;
1221
1222 /*
1223 * Check to see if the section's size will overflow the
1224 * allowed task size. Note that p_filesz must always be
1225 * <= p_memsz so it is only necessary to check p_memsz.
1226 */
1227 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
1228 elf_ppnt->p_memsz > TASK_SIZE ||
1229 TASK_SIZE - elf_ppnt->p_memsz < k) {
1230 /* set_brk can never work. Avoid overflows. */
1231 retval = -EINVAL;
1232 goto out_free_dentry;
1233 }
1234
1235 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
1236
1237 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
1238 end_code = k;
1239 if (end_data < k)
1240 end_data = k;
1241 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1242 if (k > elf_brk)
1243 elf_brk = k;
1244 }
1245
1246 e_entry = elf_ex->e_entry + load_bias;
1247 phdr_addr += load_bias;
1248 elf_brk += load_bias;
1249 start_code += load_bias;
1250 end_code += load_bias;
1251 start_data += load_bias;
1252 end_data += load_bias;
1253
1254 if (interpreter) {
1255 elf_entry = load_elf_interp(interp_elf_ex,
1256 interpreter,
1257 load_bias, interp_elf_phdata,
1258 &arch_state);
1259 if (!IS_ERR_VALUE(elf_entry)) {
1260 /*
1261 * load_elf_interp() returns relocation
1262 * adjustment
1263 */
1264 interp_load_addr = elf_entry;
1265 elf_entry += interp_elf_ex->e_entry;
1266 }
1267 if (BAD_ADDR(elf_entry)) {
1268 retval = IS_ERR_VALUE(elf_entry) ?
1269 (int)elf_entry : -EINVAL;
1270 goto out_free_dentry;
1271 }
1272 reloc_func_desc = interp_load_addr;
1273
1274 exe_file_allow_write_access(interpreter);
1275 fput(interpreter);
1276
1277 kfree(interp_elf_ex);
1278 kfree(interp_elf_phdata);
1279 } else {
1280 elf_entry = e_entry;
1281 if (BAD_ADDR(elf_entry)) {
1282 retval = -EINVAL;
1283 goto out_free_dentry;
1284 }
1285 }
1286
1287 kfree(elf_phdata);
1288
1289 set_binfmt(&elf_format);
1290
1291 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1292 retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
1293 if (retval < 0)
1294 goto out;
1295 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1296
1297 retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
1298 e_entry, phdr_addr);
1299 if (retval < 0)
1300 goto out;
1301
1302 mm = current->mm;
1303 mm->end_code = end_code;
1304 mm->start_code = start_code;
1305 mm->start_data = start_data;
1306 mm->end_data = end_data;
1307 mm->start_stack = bprm->p;
1308
1309 elf_coredump_set_mm_eflags(mm, elf_ex->e_flags);
1310
1311 /**
1312 * DOC: "brk" handling
1313 *
1314 * For architectures with ELF randomization, when executing a
1315 * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
1316 * move the brk area out of the mmap region and into the unused
1317 * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
1318 * early with the stack growing down or other regions being put
1319 * into the mmap region by the kernel (e.g. vdso).
1320 *
1321 * In the CONFIG_COMPAT_BRK case, though, everything is turned
1322 * off because we're not allowed to move the brk at all.
1323 */
1324 if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
1325 IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
1326 elf_ex->e_type == ET_DYN && !interpreter) {
1327 elf_brk = ELF_ET_DYN_BASE;
1328 /* This counts as moving the brk, so let brk(2) know. */
1329 brk_moved = true;
1330 }
1331 mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
1332
1333 if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
1334 /*
1335 * If we didn't move the brk to ELF_ET_DYN_BASE (above),
1336 * leave a gap between .bss and brk.
1337 */
1338 if (!brk_moved)
1339 mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
1340
1341 mm->brk = mm->start_brk = arch_randomize_brk(mm);
1342 brk_moved = true;
1343 }
1344
1345 #ifdef compat_brk_randomized
1346 if (brk_moved)
1347 current->brk_randomized = 1;
1348 #endif
1349
1350 if (current->personality & MMAP_PAGE_ZERO) {
1351 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1352 and some applications "depend" upon this behavior.
1353 Since we do not have the power to recompile these, we
1354 emulate the SVr4 behavior. Sigh. */
1355 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1356 MAP_FIXED | MAP_PRIVATE, 0);
1357
1358 retval = do_mseal(0, PAGE_SIZE, 0);
1359 if (retval)
1360 pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n",
1361 task_pid_nr(current), retval);
1362 }
1363
1364 regs = current_pt_regs();
1365 #ifdef ELF_PLAT_INIT
1366 /*
1367 * The ABI may specify that certain registers be set up in special
1368 * ways (on i386 %edx is the address of a DT_FINI function, for
1369 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1370 * that the e_entry field is the address of the function descriptor
1371 * for the startup routine, rather than the address of the startup
1372 * routine itself. This macro performs whatever initialization to
1373 * the regs structure is required as well as any relocations to the
1374 * function descriptor entries when executing dynamically links apps.
1375 */
1376 ELF_PLAT_INIT(regs, reloc_func_desc);
1377 #endif
1378
1379 finalize_exec(bprm);
1380 START_THREAD(elf_ex, regs, elf_entry, bprm->p);
1381 retval = 0;
1382 out:
1383 return retval;
1384
1385 /* error cleanup */
1386 out_free_dentry:
1387 kfree(interp_elf_ex);
1388 kfree(interp_elf_phdata);
1389 out_free_file:
1390 exe_file_allow_write_access(interpreter);
1391 if (interpreter)
1392 fput(interpreter);
1393 out_free_ph:
1394 kfree(elf_phdata);
1395 goto out;
1396 }
1397
1398 #ifdef CONFIG_ELF_CORE
1399 /*
1400 * ELF core dumper
1401 *
1402 * Modelled on fs/exec.c:aout_core_dump()
1403 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1404 */
1405
1406 /* An ELF note in memory */
1407 struct memelfnote
1408 {
1409 const char *name;
1410 int type;
1411 unsigned int datasz;
1412 void *data;
1413 };
1414
notesize(struct memelfnote * en)1415 static int notesize(struct memelfnote *en)
1416 {
1417 int sz;
1418
1419 sz = sizeof(struct elf_note);
1420 sz += roundup(strlen(en->name) + 1, 4);
1421 sz += roundup(en->datasz, 4);
1422
1423 return sz;
1424 }
1425
writenote(struct memelfnote * men,struct coredump_params * cprm)1426 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1427 {
1428 struct elf_note en;
1429 en.n_namesz = strlen(men->name) + 1;
1430 en.n_descsz = men->datasz;
1431 en.n_type = men->type;
1432
1433 return dump_emit(cprm, &en, sizeof(en)) &&
1434 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1435 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1436 }
1437
fill_elf_header(struct elfhdr * elf,int segs,u16 machine,u32 flags)1438 static void fill_elf_header(struct elfhdr *elf, int segs,
1439 u16 machine, u32 flags)
1440 {
1441 memset(elf, 0, sizeof(*elf));
1442
1443 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1444 elf->e_ident[EI_CLASS] = ELF_CLASS;
1445 elf->e_ident[EI_DATA] = ELF_DATA;
1446 elf->e_ident[EI_VERSION] = EV_CURRENT;
1447 elf->e_ident[EI_OSABI] = ELF_OSABI;
1448
1449 elf->e_type = ET_CORE;
1450 elf->e_machine = machine;
1451 elf->e_version = EV_CURRENT;
1452 elf->e_phoff = sizeof(struct elfhdr);
1453 elf->e_flags = flags;
1454 elf->e_ehsize = sizeof(struct elfhdr);
1455 elf->e_phentsize = sizeof(struct elf_phdr);
1456 elf->e_phnum = segs;
1457 }
1458
fill_elf_note_phdr(struct elf_phdr * phdr,int sz,loff_t offset)1459 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1460 {
1461 phdr->p_type = PT_NOTE;
1462 phdr->p_offset = offset;
1463 phdr->p_vaddr = 0;
1464 phdr->p_paddr = 0;
1465 phdr->p_filesz = sz;
1466 phdr->p_memsz = 0;
1467 phdr->p_flags = 0;
1468 phdr->p_align = 4;
1469 }
1470
__fill_note(struct memelfnote * note,const char * name,int type,unsigned int sz,void * data)1471 static void __fill_note(struct memelfnote *note, const char *name, int type,
1472 unsigned int sz, void *data)
1473 {
1474 note->name = name;
1475 note->type = type;
1476 note->datasz = sz;
1477 note->data = data;
1478 }
1479
1480 #define fill_note(note, type, sz, data) \
1481 __fill_note(note, NN_ ## type, NT_ ## type, sz, data)
1482
1483 /*
1484 * fill up all the fields in prstatus from the given task struct, except
1485 * registers which need to be filled up separately.
1486 */
fill_prstatus(struct elf_prstatus_common * prstatus,struct task_struct * p,long signr)1487 static void fill_prstatus(struct elf_prstatus_common *prstatus,
1488 struct task_struct *p, long signr)
1489 {
1490 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1491 prstatus->pr_sigpend = p->pending.signal.sig[0];
1492 prstatus->pr_sighold = p->blocked.sig[0];
1493 rcu_read_lock();
1494 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1495 rcu_read_unlock();
1496 prstatus->pr_pid = task_pid_vnr(p);
1497 prstatus->pr_pgrp = task_pgrp_vnr(p);
1498 prstatus->pr_sid = task_session_vnr(p);
1499 if (thread_group_leader(p)) {
1500 struct task_cputime cputime;
1501
1502 /*
1503 * This is the record for the group leader. It shows the
1504 * group-wide total, not its individual thread total.
1505 */
1506 thread_group_cputime(p, &cputime);
1507 prstatus->pr_utime = ns_to_kernel_old_timeval(cputime.utime);
1508 prstatus->pr_stime = ns_to_kernel_old_timeval(cputime.stime);
1509 } else {
1510 u64 utime, stime;
1511
1512 task_cputime(p, &utime, &stime);
1513 prstatus->pr_utime = ns_to_kernel_old_timeval(utime);
1514 prstatus->pr_stime = ns_to_kernel_old_timeval(stime);
1515 }
1516
1517 prstatus->pr_cutime = ns_to_kernel_old_timeval(p->signal->cutime);
1518 prstatus->pr_cstime = ns_to_kernel_old_timeval(p->signal->cstime);
1519 }
1520
fill_psinfo(struct elf_prpsinfo * psinfo,struct task_struct * p,struct mm_struct * mm)1521 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1522 struct mm_struct *mm)
1523 {
1524 const struct cred *cred;
1525 unsigned int i, len;
1526 unsigned int state;
1527
1528 /* first copy the parameters from user space */
1529 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1530
1531 len = mm->arg_end - mm->arg_start;
1532 if (len >= ELF_PRARGSZ)
1533 len = ELF_PRARGSZ-1;
1534 if (copy_from_user(&psinfo->pr_psargs,
1535 (const char __user *)mm->arg_start, len))
1536 return -EFAULT;
1537 for(i = 0; i < len; i++)
1538 if (psinfo->pr_psargs[i] == 0)
1539 psinfo->pr_psargs[i] = ' ';
1540 psinfo->pr_psargs[len] = 0;
1541
1542 rcu_read_lock();
1543 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1544 rcu_read_unlock();
1545 psinfo->pr_pid = task_pid_vnr(p);
1546 psinfo->pr_pgrp = task_pgrp_vnr(p);
1547 psinfo->pr_sid = task_session_vnr(p);
1548
1549 state = READ_ONCE(p->__state);
1550 i = state ? ffz(~state) + 1 : 0;
1551 psinfo->pr_state = i;
1552 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1553 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1554 psinfo->pr_nice = task_nice(p);
1555 psinfo->pr_flag = p->flags;
1556 rcu_read_lock();
1557 cred = __task_cred(p);
1558 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1559 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1560 rcu_read_unlock();
1561 get_task_comm(psinfo->pr_fname, p);
1562
1563 return 0;
1564 }
1565
fill_auxv_note(struct memelfnote * note,struct mm_struct * mm)1566 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1567 {
1568 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1569 int i = 0;
1570 do
1571 i += 2;
1572 while (auxv[i - 2] != AT_NULL);
1573 fill_note(note, AUXV, i * sizeof(elf_addr_t), auxv);
1574 }
1575
fill_siginfo_note(struct memelfnote * note,user_siginfo_t * csigdata,const kernel_siginfo_t * siginfo)1576 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1577 const kernel_siginfo_t *siginfo)
1578 {
1579 copy_siginfo_to_external(csigdata, siginfo);
1580 fill_note(note, SIGINFO, sizeof(*csigdata), csigdata);
1581 }
1582
1583 /*
1584 * Format of NT_FILE note:
1585 *
1586 * long count -- how many files are mapped
1587 * long page_size -- units for file_ofs
1588 * array of [COUNT] elements of
1589 * long start
1590 * long end
1591 * long file_ofs
1592 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1593 */
fill_files_note(struct memelfnote * note,struct coredump_params * cprm)1594 static int fill_files_note(struct memelfnote *note, struct coredump_params *cprm)
1595 {
1596 unsigned count, size, names_ofs, remaining, n;
1597 user_long_t *data;
1598 user_long_t *start_end_ofs;
1599 char *name_base, *name_curpos;
1600 int i;
1601
1602 /* *Estimated* file count and total data size needed */
1603 count = cprm->vma_count;
1604 if (count > UINT_MAX / 64)
1605 return -EINVAL;
1606 size = count * 64;
1607
1608 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1609 alloc:
1610 /* paranoia check */
1611 if (size >= core_file_note_size_limit) {
1612 pr_warn_once("coredump Note size too large: %u (does kernel.core_file_note_size_limit sysctl need adjustment?\n",
1613 size);
1614 return -EINVAL;
1615 }
1616 size = round_up(size, PAGE_SIZE);
1617 /*
1618 * "size" can be 0 here legitimately.
1619 * Let it ENOMEM and omit NT_FILE section which will be empty anyway.
1620 */
1621 data = kvmalloc(size, GFP_KERNEL);
1622 if (ZERO_OR_NULL_PTR(data))
1623 return -ENOMEM;
1624
1625 start_end_ofs = data + 2;
1626 name_base = name_curpos = ((char *)data) + names_ofs;
1627 remaining = size - names_ofs;
1628 count = 0;
1629 for (i = 0; i < cprm->vma_count; i++) {
1630 struct core_vma_metadata *m = &cprm->vma_meta[i];
1631 struct file *file;
1632 const char *filename;
1633
1634 file = m->file;
1635 if (!file)
1636 continue;
1637 filename = file_path(file, name_curpos, remaining);
1638 if (IS_ERR(filename)) {
1639 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1640 kvfree(data);
1641 size = size * 5 / 4;
1642 goto alloc;
1643 }
1644 continue;
1645 }
1646
1647 /* file_path() fills at the end, move name down */
1648 /* n = strlen(filename) + 1: */
1649 n = (name_curpos + remaining) - filename;
1650 remaining = filename - name_curpos;
1651 memmove(name_curpos, filename, n);
1652 name_curpos += n;
1653
1654 *start_end_ofs++ = m->start;
1655 *start_end_ofs++ = m->end;
1656 *start_end_ofs++ = m->pgoff;
1657 count++;
1658 }
1659
1660 /* Now we know exact count of files, can store it */
1661 data[0] = count;
1662 data[1] = PAGE_SIZE;
1663 /*
1664 * Count usually is less than mm->map_count,
1665 * we need to move filenames down.
1666 */
1667 n = cprm->vma_count - count;
1668 if (n != 0) {
1669 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1670 memmove(name_base - shift_bytes, name_base,
1671 name_curpos - name_base);
1672 name_curpos -= shift_bytes;
1673 }
1674
1675 size = name_curpos - (char *)data;
1676 fill_note(note, FILE, size, data);
1677 return 0;
1678 }
1679
1680 #include <linux/regset.h>
1681
1682 struct elf_thread_core_info {
1683 struct elf_thread_core_info *next;
1684 struct task_struct *task;
1685 struct elf_prstatus prstatus;
1686 struct memelfnote notes[];
1687 };
1688
1689 struct elf_note_info {
1690 struct elf_thread_core_info *thread;
1691 struct memelfnote psinfo;
1692 struct memelfnote signote;
1693 struct memelfnote auxv;
1694 struct memelfnote files;
1695 user_siginfo_t csigdata;
1696 size_t size;
1697 int thread_notes;
1698 };
1699
1700 #ifdef CORE_DUMP_USE_REGSET
1701 /*
1702 * When a regset has a writeback hook, we call it on each thread before
1703 * dumping user memory. On register window machines, this makes sure the
1704 * user memory backing the register data is up to date before we read it.
1705 */
do_thread_regset_writeback(struct task_struct * task,const struct user_regset * regset)1706 static void do_thread_regset_writeback(struct task_struct *task,
1707 const struct user_regset *regset)
1708 {
1709 if (regset->writeback)
1710 regset->writeback(task, regset, 1);
1711 }
1712
1713 #ifndef PRSTATUS_SIZE
1714 #define PRSTATUS_SIZE sizeof(struct elf_prstatus)
1715 #endif
1716
1717 #ifndef SET_PR_FPVALID
1718 #define SET_PR_FPVALID(S) ((S)->pr_fpvalid = 1)
1719 #endif
1720
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,struct elf_note_info * info)1721 static int fill_thread_core_info(struct elf_thread_core_info *t,
1722 const struct user_regset_view *view,
1723 long signr, struct elf_note_info *info)
1724 {
1725 unsigned int note_iter, view_iter;
1726
1727 /*
1728 * NT_PRSTATUS is the one special case, because the regset data
1729 * goes into the pr_reg field inside the note contents, rather
1730 * than being the whole note contents. We fill the regset in here.
1731 * We assume that regset 0 is NT_PRSTATUS.
1732 */
1733 fill_prstatus(&t->prstatus.common, t->task, signr);
1734 regset_get(t->task, &view->regsets[0],
1735 sizeof(t->prstatus.pr_reg), &t->prstatus.pr_reg);
1736
1737 fill_note(&t->notes[0], PRSTATUS, PRSTATUS_SIZE, &t->prstatus);
1738 info->size += notesize(&t->notes[0]);
1739
1740 do_thread_regset_writeback(t->task, &view->regsets[0]);
1741
1742 /*
1743 * Each other regset might generate a note too. For each regset
1744 * that has no core_note_type or is inactive, skip it.
1745 */
1746 note_iter = 1;
1747 for (view_iter = 1; view_iter < view->n; ++view_iter) {
1748 const struct user_regset *regset = &view->regsets[view_iter];
1749 int note_type = regset->core_note_type;
1750 const char *note_name = regset->core_note_name;
1751 bool is_fpreg = note_type == NT_PRFPREG;
1752 void *data;
1753 int ret;
1754
1755 do_thread_regset_writeback(t->task, regset);
1756 if (!note_type) // not for coredumps
1757 continue;
1758 if (regset->active && regset->active(t->task, regset) <= 0)
1759 continue;
1760
1761 ret = regset_get_alloc(t->task, regset, ~0U, &data);
1762 if (ret < 0)
1763 continue;
1764
1765 if (WARN_ON_ONCE(note_iter >= info->thread_notes))
1766 break;
1767
1768 if (is_fpreg)
1769 SET_PR_FPVALID(&t->prstatus);
1770
1771 /* There should be a note name, but if not, guess: */
1772 if (WARN_ON_ONCE(!note_name))
1773 note_name = "LINUX";
1774 else
1775 /* Warn on non-legacy-compatible names, for now. */
1776 WARN_ON_ONCE(strcmp(note_name,
1777 is_fpreg ? "CORE" : "LINUX"));
1778
1779 __fill_note(&t->notes[note_iter], note_name, note_type,
1780 ret, data);
1781
1782 info->size += notesize(&t->notes[note_iter]);
1783 note_iter++;
1784 }
1785
1786 return 1;
1787 }
1788 #else
fill_thread_core_info(struct elf_thread_core_info * t,const struct user_regset_view * view,long signr,struct elf_note_info * info)1789 static int fill_thread_core_info(struct elf_thread_core_info *t,
1790 const struct user_regset_view *view,
1791 long signr, struct elf_note_info *info)
1792 {
1793 struct task_struct *p = t->task;
1794 elf_fpregset_t *fpu;
1795
1796 fill_prstatus(&t->prstatus.common, p, signr);
1797 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1798
1799 fill_note(&t->notes[0], PRSTATUS, sizeof(t->prstatus), &t->prstatus);
1800 info->size += notesize(&t->notes[0]);
1801
1802 fpu = kzalloc_obj(elf_fpregset_t);
1803 if (!fpu || !elf_core_copy_task_fpregs(p, fpu)) {
1804 kfree(fpu);
1805 return 1;
1806 }
1807
1808 t->prstatus.pr_fpvalid = 1;
1809 fill_note(&t->notes[1], PRFPREG, sizeof(*fpu), fpu);
1810 info->size += notesize(&t->notes[1]);
1811
1812 return 1;
1813 }
1814 #endif
1815
fill_note_info(struct elfhdr * elf,int phdrs,struct elf_note_info * info,struct coredump_params * cprm)1816 static int fill_note_info(struct elfhdr *elf, int phdrs,
1817 struct elf_note_info *info,
1818 struct coredump_params *cprm)
1819 {
1820 struct task_struct *dump_task = current;
1821 const struct user_regset_view *view;
1822 struct elf_thread_core_info *t;
1823 struct elf_prpsinfo *psinfo;
1824 struct core_thread *ct;
1825 u16 machine;
1826 u32 flags;
1827
1828 psinfo = kmalloc_obj(*psinfo);
1829 if (!psinfo)
1830 return 0;
1831 fill_note(&info->psinfo, PRPSINFO, sizeof(*psinfo), psinfo);
1832
1833 #ifdef CORE_DUMP_USE_REGSET
1834 view = task_user_regset_view(dump_task);
1835
1836 /*
1837 * Figure out how many notes we're going to need for each thread.
1838 */
1839 info->thread_notes = 0;
1840 for (int i = 0; i < view->n; ++i)
1841 if (view->regsets[i].core_note_type != 0)
1842 ++info->thread_notes;
1843
1844 /*
1845 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1846 * since it is our one special case.
1847 */
1848 if (unlikely(info->thread_notes == 0) ||
1849 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1850 WARN_ON(1);
1851 return 0;
1852 }
1853
1854 machine = view->e_machine;
1855 flags = view->e_flags;
1856 #else
1857 view = NULL;
1858 info->thread_notes = 2;
1859 machine = ELF_ARCH;
1860 flags = ELF_CORE_EFLAGS;
1861 #endif
1862
1863 /*
1864 * Override ELF e_flags with value taken from process,
1865 * if arch needs that.
1866 */
1867 flags = elf_coredump_get_mm_eflags(dump_task->mm, flags);
1868
1869 /*
1870 * Initialize the ELF file header.
1871 */
1872 fill_elf_header(elf, phdrs, machine, flags);
1873
1874 /*
1875 * Allocate a structure for each thread.
1876 */
1877 info->thread = kzalloc_flex(*info->thread, notes, info->thread_notes);
1878 if (unlikely(!info->thread))
1879 return 0;
1880
1881 info->thread->task = dump_task;
1882 for (ct = dump_task->signal->core_state->dumper.next; ct; ct = ct->next) {
1883 t = kzalloc_flex(*t, notes, info->thread_notes);
1884 if (unlikely(!t))
1885 return 0;
1886
1887 t->task = ct->task;
1888 t->next = info->thread->next;
1889 info->thread->next = t;
1890 }
1891
1892 /*
1893 * Now fill in each thread's information.
1894 */
1895 for (t = info->thread; t != NULL; t = t->next)
1896 if (!fill_thread_core_info(t, view, cprm->siginfo->si_signo, info))
1897 return 0;
1898
1899 /*
1900 * Fill in the two process-wide notes.
1901 */
1902 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1903 info->size += notesize(&info->psinfo);
1904
1905 fill_siginfo_note(&info->signote, &info->csigdata, cprm->siginfo);
1906 info->size += notesize(&info->signote);
1907
1908 fill_auxv_note(&info->auxv, current->mm);
1909 info->size += notesize(&info->auxv);
1910
1911 if (fill_files_note(&info->files, cprm) == 0)
1912 info->size += notesize(&info->files);
1913
1914 return 1;
1915 }
1916
1917 /*
1918 * Write all the notes for each thread. When writing the first thread, the
1919 * process-wide notes are interleaved after the first thread-specific note.
1920 */
write_note_info(struct elf_note_info * info,struct coredump_params * cprm)1921 static int write_note_info(struct elf_note_info *info,
1922 struct coredump_params *cprm)
1923 {
1924 bool first = true;
1925 struct elf_thread_core_info *t = info->thread;
1926
1927 do {
1928 int i;
1929
1930 if (!writenote(&t->notes[0], cprm))
1931 return 0;
1932
1933 if (first && !writenote(&info->psinfo, cprm))
1934 return 0;
1935 if (first && !writenote(&info->signote, cprm))
1936 return 0;
1937 if (first && !writenote(&info->auxv, cprm))
1938 return 0;
1939 if (first && info->files.data &&
1940 !writenote(&info->files, cprm))
1941 return 0;
1942
1943 for (i = 1; i < info->thread_notes; ++i)
1944 if (t->notes[i].data &&
1945 !writenote(&t->notes[i], cprm))
1946 return 0;
1947
1948 first = false;
1949 t = t->next;
1950 } while (t);
1951
1952 return 1;
1953 }
1954
free_note_info(struct elf_note_info * info)1955 static void free_note_info(struct elf_note_info *info)
1956 {
1957 struct elf_thread_core_info *threads = info->thread;
1958 while (threads) {
1959 unsigned int i;
1960 struct elf_thread_core_info *t = threads;
1961 threads = t->next;
1962 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1963 for (i = 1; i < info->thread_notes; ++i)
1964 kvfree(t->notes[i].data);
1965 kfree(t);
1966 }
1967 kfree(info->psinfo.data);
1968 kvfree(info->files.data);
1969 }
1970
fill_extnum_info(struct elfhdr * elf,struct elf_shdr * shdr4extnum,elf_addr_t e_shoff,int segs)1971 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1972 elf_addr_t e_shoff, int segs)
1973 {
1974 elf->e_shoff = e_shoff;
1975 elf->e_shentsize = sizeof(*shdr4extnum);
1976 elf->e_shnum = 1;
1977 elf->e_shstrndx = SHN_UNDEF;
1978
1979 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1980
1981 shdr4extnum->sh_type = SHT_NULL;
1982 shdr4extnum->sh_size = elf->e_shnum;
1983 shdr4extnum->sh_link = elf->e_shstrndx;
1984 shdr4extnum->sh_info = segs;
1985 }
1986
1987 /*
1988 * Actual dumper
1989 *
1990 * This is a two-pass process; first we find the offsets of the bits,
1991 * and then they are actually written out. If we run out of core limit
1992 * we just truncate.
1993 */
elf_core_dump(struct coredump_params * cprm)1994 static int elf_core_dump(struct coredump_params *cprm)
1995 {
1996 int has_dumped = 0;
1997 int segs, i;
1998 struct elfhdr elf;
1999 loff_t offset = 0, dataoff;
2000 struct elf_note_info info = { };
2001 struct elf_phdr *phdr4note = NULL;
2002 struct elf_shdr *shdr4extnum = NULL;
2003 Elf_Half e_phnum;
2004 elf_addr_t e_shoff;
2005
2006 /*
2007 * The number of segs are recored into ELF header as 16bit value.
2008 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2009 */
2010 segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
2011
2012 /* for notes section */
2013 segs++;
2014
2015 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2016 * this, kernel supports extended numbering. Have a look at
2017 * include/linux/elf.h for further information. */
2018 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2019
2020 /*
2021 * Collect all the non-memory information about the process for the
2022 * notes. This also sets up the file header.
2023 */
2024 if (!fill_note_info(&elf, e_phnum, &info, cprm))
2025 goto end_coredump;
2026
2027 has_dumped = 1;
2028
2029 offset += sizeof(elf); /* ELF header */
2030 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2031
2032 /* Write notes phdr entry */
2033 {
2034 size_t sz = info.size;
2035
2036 /* For cell spufs and x86 xstate */
2037 sz += elf_coredump_extra_notes_size();
2038
2039 phdr4note = kmalloc_obj(*phdr4note);
2040 if (!phdr4note)
2041 goto end_coredump;
2042
2043 fill_elf_note_phdr(phdr4note, sz, offset);
2044 offset += sz;
2045 }
2046
2047 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2048
2049 offset += cprm->vma_data_size;
2050 offset += elf_core_extra_data_size(cprm);
2051 e_shoff = offset;
2052
2053 if (e_phnum == PN_XNUM) {
2054 shdr4extnum = kmalloc_obj(*shdr4extnum);
2055 if (!shdr4extnum)
2056 goto end_coredump;
2057 fill_extnum_info(&elf, shdr4extnum, e_shoff, segs);
2058 }
2059
2060 offset = dataoff;
2061
2062 if (!dump_emit(cprm, &elf, sizeof(elf)))
2063 goto end_coredump;
2064
2065 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2066 goto end_coredump;
2067
2068 /* Write program headers for segments dump */
2069 for (i = 0; i < cprm->vma_count; i++) {
2070 struct core_vma_metadata *meta = cprm->vma_meta + i;
2071 struct elf_phdr phdr;
2072
2073 phdr.p_type = PT_LOAD;
2074 phdr.p_offset = offset;
2075 phdr.p_vaddr = meta->start;
2076 phdr.p_paddr = 0;
2077 phdr.p_filesz = meta->dump_size;
2078 phdr.p_memsz = meta->end - meta->start;
2079 offset += phdr.p_filesz;
2080 phdr.p_flags = 0;
2081 if (meta->flags & VM_READ)
2082 phdr.p_flags |= PF_R;
2083 if (meta->flags & VM_WRITE)
2084 phdr.p_flags |= PF_W;
2085 if (meta->flags & VM_EXEC)
2086 phdr.p_flags |= PF_X;
2087 phdr.p_align = ELF_EXEC_PAGESIZE;
2088
2089 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2090 goto end_coredump;
2091 }
2092
2093 if (!elf_core_write_extra_phdrs(cprm, offset))
2094 goto end_coredump;
2095
2096 /* write out the notes section */
2097 if (!write_note_info(&info, cprm))
2098 goto end_coredump;
2099
2100 /* For cell spufs and x86 xstate */
2101 if (elf_coredump_extra_notes_write(cprm))
2102 goto end_coredump;
2103
2104 /* Align to page */
2105 dump_skip_to(cprm, dataoff);
2106
2107 for (i = 0; i < cprm->vma_count; i++) {
2108 struct core_vma_metadata *meta = cprm->vma_meta + i;
2109
2110 if (!dump_user_range(cprm, meta->start, meta->dump_size))
2111 goto end_coredump;
2112 }
2113
2114 if (!elf_core_write_extra_data(cprm))
2115 goto end_coredump;
2116
2117 if (e_phnum == PN_XNUM) {
2118 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2119 goto end_coredump;
2120 }
2121
2122 end_coredump:
2123 free_note_info(&info);
2124 kfree(shdr4extnum);
2125 kfree(phdr4note);
2126 return has_dumped;
2127 }
2128
2129 #endif /* CONFIG_ELF_CORE */
2130
init_elf_binfmt(void)2131 static int __init init_elf_binfmt(void)
2132 {
2133 register_binfmt(&elf_format);
2134 return 0;
2135 }
2136
exit_elf_binfmt(void)2137 static void __exit exit_elf_binfmt(void)
2138 {
2139 /* Remove the COFF and ELF loaders. */
2140 unregister_binfmt(&elf_format);
2141 }
2142
2143 core_initcall(init_elf_binfmt);
2144 module_exit(exit_elf_binfmt);
2145
2146 #ifdef CONFIG_BINFMT_ELF_KUNIT_TEST
2147 #include "tests/binfmt_elf_kunit.c"
2148 #endif
2149