1e1743d02SSøren Schmidt /*- 221a3ee0eSDavid E. O'Brien * Copyright (c) 2000 David O'Brien 39a14aa01SUlrich Spörlein * Copyright (c) 1995-1996 Søren Schmidt 4e1743d02SSøren Schmidt * Copyright (c) 1996 Peter Wemm 5e1743d02SSøren Schmidt * All rights reserved. 6e1743d02SSøren Schmidt * 7e1743d02SSøren Schmidt * Redistribution and use in source and binary forms, with or without 8e1743d02SSøren Schmidt * modification, are permitted provided that the following conditions 9e1743d02SSøren Schmidt * are met: 10e1743d02SSøren Schmidt * 1. Redistributions of source code must retain the above copyright 11e1743d02SSøren Schmidt * notice, this list of conditions and the following disclaimer 12e1743d02SSøren Schmidt * in this position and unchanged. 13e1743d02SSøren Schmidt * 2. Redistributions in binary form must reproduce the above copyright 14e1743d02SSøren Schmidt * notice, this list of conditions and the following disclaimer in the 15e1743d02SSøren Schmidt * documentation and/or other materials provided with the distribution. 16e1743d02SSøren Schmidt * 3. The name of the author may not be used to endorse or promote products 1721dc7d4fSJens Schweikhardt * derived from this software without specific prior written permission 18e1743d02SSøren Schmidt * 19e1743d02SSøren Schmidt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20e1743d02SSøren Schmidt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21e1743d02SSøren Schmidt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22e1743d02SSøren Schmidt * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23e1743d02SSøren Schmidt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24e1743d02SSøren Schmidt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25e1743d02SSøren Schmidt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26e1743d02SSøren Schmidt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27e1743d02SSøren Schmidt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28e1743d02SSøren Schmidt * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29e1743d02SSøren Schmidt */ 30e1743d02SSøren Schmidt 31677b542eSDavid E. O'Brien #include <sys/cdefs.h> 32677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 33677b542eSDavid E. O'Brien 3412bc222eSJonathan Anderson #include "opt_capsicum.h" 3562919d78SPeter Wemm #include "opt_compat.h" 36e7228204SAlfred Perlstein #include "opt_core.h" 3762919d78SPeter Wemm 38e1743d02SSøren Schmidt #include <sys/param.h> 3912bc222eSJonathan Anderson #include <sys/capability.h> 40e1743d02SSøren Schmidt #include <sys/exec.h> 418c64af4fSJohn Polstra #include <sys/fcntl.h> 42e1743d02SSøren Schmidt #include <sys/imgact.h> 43e1743d02SSøren Schmidt #include <sys/imgact_elf.h> 44e1743d02SSøren Schmidt #include <sys/kernel.h> 45f34fa851SJohn Baldwin #include <sys/lock.h> 46e1743d02SSøren Schmidt #include <sys/malloc.h> 4768ff2a43SChristian S.J. Peron #include <sys/mount.h> 488c64af4fSJohn Polstra #include <sys/mman.h> 49a794e791SBruce Evans #include <sys/namei.h> 508c64af4fSJohn Polstra #include <sys/pioctl.h> 51a794e791SBruce Evans #include <sys/proc.h> 528c64af4fSJohn Polstra #include <sys/procfs.h> 531ba5ad42SEdward Tomasz Napierala #include <sys/racct.h> 548c64af4fSJohn Polstra #include <sys/resourcevar.h> 55*89f6b863SAttilio Rao #include <sys/rwlock.h> 56da61b9a6SAlan Cox #include <sys/sf_buf.h> 57ee235befSKonstantin Belousov #include <sys/smp.h> 5836240ea5SDoug Rabson #include <sys/systm.h> 59e1743d02SSøren Schmidt #include <sys/signalvar.h> 608c64af4fSJohn Polstra #include <sys/stat.h> 611005a129SJohn Baldwin #include <sys/sx.h> 628c64af4fSJohn Polstra #include <sys/syscall.h> 63e1743d02SSøren Schmidt #include <sys/sysctl.h> 648c64af4fSJohn Polstra #include <sys/sysent.h> 65a794e791SBruce Evans #include <sys/vnode.h> 66e7228204SAlfred Perlstein #include <sys/syslog.h> 67e7228204SAlfred Perlstein #include <sys/eventhandler.h> 68e7228204SAlfred Perlstein 69e7228204SAlfred Perlstein #include <net/zlib.h> 70e1743d02SSøren Schmidt 71e1743d02SSøren Schmidt #include <vm/vm.h> 72e1743d02SSøren Schmidt #include <vm/vm_kern.h> 73e1743d02SSøren Schmidt #include <vm/vm_param.h> 74e1743d02SSøren Schmidt #include <vm/pmap.h> 75e1743d02SSøren Schmidt #include <vm/vm_map.h> 760ff27d31SJohn Polstra #include <vm/vm_object.h> 77e1743d02SSøren Schmidt #include <vm/vm_extern.h> 78e1743d02SSøren Schmidt 7952c24af7SPeter Wemm #include <machine/elf.h> 80e1743d02SSøren Schmidt #include <machine/md_var.h> 81e1743d02SSøren Schmidt 82c815a20cSDavid E. O'Brien #define OLD_EI_BRAND 8 83c815a20cSDavid E. O'Brien 843ebc1248SPeter Wemm static int __elfN(check_header)(const Elf_Ehdr *hdr); 8532c01de2SDmitry Chagin static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, 86d1ae5c83SKonstantin Belousov const char *interp, int interp_name_len, int32_t *osrel); 873ebc1248SPeter Wemm static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 883ebc1248SPeter Wemm u_long *entry, size_t pagesize); 89292177e6SAlan Cox static int __elfN(load_section)(struct image_params *imgp, vm_offset_t offset, 90292177e6SAlan Cox caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, 91292177e6SAlan Cox size_t pagesize); 923ebc1248SPeter Wemm static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); 9389ffc202SBjoern A. Zeeb static boolean_t __elfN(freebsd_trans_osrel)(const Elf_Note *note, 9489ffc202SBjoern A. Zeeb int32_t *osrel); 9589ffc202SBjoern A. Zeeb static boolean_t kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel); 9632c01de2SDmitry Chagin static boolean_t __elfN(check_note)(struct image_params *imgp, 9732c01de2SDmitry Chagin Elf_Brandnote *checknote, int32_t *osrel); 98ed167eaaSKonstantin Belousov static vm_prot_t __elfN(trans_prot)(Elf_Word); 99ed167eaaSKonstantin Belousov static Elf_Word __elfN(untrans_prot)(vm_prot_t); 100e1743d02SSøren Schmidt 101a360a43dSJake Burkholder SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0, 102a360a43dSJake Burkholder ""); 103a360a43dSJake Burkholder 104e7228204SAlfred Perlstein #ifdef COMPRESS_USER_CORES 105e7228204SAlfred Perlstein static int compress_core(gzFile, char *, char *, unsigned int, 106e7228204SAlfred Perlstein struct thread * td); 107e7228204SAlfred Perlstein #define CORE_BUF_SIZE (16 * 1024) 108e7228204SAlfred Perlstein #endif 109e7228204SAlfred Perlstein 110e548a1d4SJake Burkholder int __elfN(fallback_brand) = -1; 111e548a1d4SJake Burkholder SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, 112e548a1d4SJake Burkholder fallback_brand, CTLFLAG_RW, &__elfN(fallback_brand), 0, 113a360a43dSJake Burkholder __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort"); 114a360a43dSJake Burkholder TUNABLE_INT("kern.elf" __XSTRING(__ELF_WORD_SIZE) ".fallback_brand", 115e548a1d4SJake Burkholder &__elfN(fallback_brand)); 116a360a43dSJake Burkholder 117551d79e1SMarcel Moolenaar static int elf_legacy_coredump = 0; 118a360a43dSJake Burkholder SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 119551d79e1SMarcel Moolenaar &elf_legacy_coredump, 0, ""); 120e1743d02SSøren Schmidt 12162c625fdSKonstantin Belousov int __elfN(nxstack) = 12262c625fdSKonstantin Belousov #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ 12362c625fdSKonstantin Belousov 1; 12462c625fdSKonstantin Belousov #else 12562c625fdSKonstantin Belousov 0; 12662c625fdSKonstantin Belousov #endif 127291c06a1SKonstantin Belousov SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, 128291c06a1SKonstantin Belousov nxstack, CTLFLAG_RW, &__elfN(nxstack), 0, 129291c06a1SKonstantin Belousov __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack"); 130291c06a1SKonstantin Belousov 131126b36a2SKonstantin Belousov #if __ELF_WORD_SIZE == 32 132126b36a2SKonstantin Belousov #if defined(__amd64__) || defined(__ia64__) 133126b36a2SKonstantin Belousov int i386_read_exec = 0; 134126b36a2SKonstantin Belousov SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, 135126b36a2SKonstantin Belousov "enable execution from readable segments"); 136126b36a2SKonstantin Belousov #endif 137126b36a2SKonstantin Belousov #endif 138126b36a2SKonstantin Belousov 1393ebc1248SPeter Wemm static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; 140e1743d02SSøren Schmidt 14193d1c728SKonstantin Belousov #define trunc_page_ps(va, ps) ((va) & ~(ps - 1)) 14293d1c728SKonstantin Belousov #define round_page_ps(va, ps) (((va) + (ps - 1)) & ~(ps - 1)) 14393d1c728SKonstantin Belousov #define aligned(a, t) (trunc_page_ps((u_long)(a), sizeof(t)) == (u_long)(a)) 14493d1c728SKonstantin Belousov 14532c01de2SDmitry Chagin static const char FREEBSD_ABI_VENDOR[] = "FreeBSD"; 14632c01de2SDmitry Chagin 14732c01de2SDmitry Chagin Elf_Brandnote __elfN(freebsd_brandnote) = { 14832c01de2SDmitry Chagin .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), 14932c01de2SDmitry Chagin .hdr.n_descsz = sizeof(int32_t), 15032c01de2SDmitry Chagin .hdr.n_type = 1, 15132c01de2SDmitry Chagin .vendor = FREEBSD_ABI_VENDOR, 15289ffc202SBjoern A. Zeeb .flags = BN_TRANSLATE_OSREL, 15389ffc202SBjoern A. Zeeb .trans_osrel = __elfN(freebsd_trans_osrel) 15432c01de2SDmitry Chagin }; 15532c01de2SDmitry Chagin 15689ffc202SBjoern A. Zeeb static boolean_t 15789ffc202SBjoern A. Zeeb __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) 15889ffc202SBjoern A. Zeeb { 15989ffc202SBjoern A. Zeeb uintptr_t p; 16089ffc202SBjoern A. Zeeb 16189ffc202SBjoern A. Zeeb p = (uintptr_t)(note + 1); 16289ffc202SBjoern A. Zeeb p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 16389ffc202SBjoern A. Zeeb *osrel = *(const int32_t *)(p); 16489ffc202SBjoern A. Zeeb 16589ffc202SBjoern A. Zeeb return (TRUE); 16689ffc202SBjoern A. Zeeb } 16789ffc202SBjoern A. Zeeb 16889ffc202SBjoern A. Zeeb static const char GNU_ABI_VENDOR[] = "GNU"; 16989ffc202SBjoern A. Zeeb static int GNU_KFREEBSD_ABI_DESC = 3; 17089ffc202SBjoern A. Zeeb 17189ffc202SBjoern A. Zeeb Elf_Brandnote __elfN(kfreebsd_brandnote) = { 17289ffc202SBjoern A. Zeeb .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 17389ffc202SBjoern A. Zeeb .hdr.n_descsz = 16, /* XXX at least 16 */ 17489ffc202SBjoern A. Zeeb .hdr.n_type = 1, 17589ffc202SBjoern A. Zeeb .vendor = GNU_ABI_VENDOR, 17689ffc202SBjoern A. Zeeb .flags = BN_TRANSLATE_OSREL, 17789ffc202SBjoern A. Zeeb .trans_osrel = kfreebsd_trans_osrel 17889ffc202SBjoern A. Zeeb }; 17989ffc202SBjoern A. Zeeb 18089ffc202SBjoern A. Zeeb static boolean_t 18189ffc202SBjoern A. Zeeb kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel) 18289ffc202SBjoern A. Zeeb { 18389ffc202SBjoern A. Zeeb const Elf32_Word *desc; 18489ffc202SBjoern A. Zeeb uintptr_t p; 18589ffc202SBjoern A. Zeeb 18689ffc202SBjoern A. Zeeb p = (uintptr_t)(note + 1); 18789ffc202SBjoern A. Zeeb p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 18889ffc202SBjoern A. Zeeb 18989ffc202SBjoern A. Zeeb desc = (const Elf32_Word *)p; 19089ffc202SBjoern A. Zeeb if (desc[0] != GNU_KFREEBSD_ABI_DESC) 19189ffc202SBjoern A. Zeeb return (FALSE); 19289ffc202SBjoern A. Zeeb 19389ffc202SBjoern A. Zeeb /* 19489ffc202SBjoern A. Zeeb * Debian GNU/kFreeBSD embed the earliest compatible kernel version 19589ffc202SBjoern A. Zeeb * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way. 19689ffc202SBjoern A. Zeeb */ 19789ffc202SBjoern A. Zeeb *osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3]; 19889ffc202SBjoern A. Zeeb 19989ffc202SBjoern A. Zeeb return (TRUE); 20089ffc202SBjoern A. Zeeb } 20189ffc202SBjoern A. Zeeb 202e1743d02SSøren Schmidt int 2033ebc1248SPeter Wemm __elfN(insert_brand_entry)(Elf_Brandinfo *entry) 204e1743d02SSøren Schmidt { 205e1743d02SSøren Schmidt int i; 206e1743d02SSøren Schmidt 2073ebc1248SPeter Wemm for (i = 0; i < MAX_BRANDS; i++) { 208ea5a2b2eSSøren Schmidt if (elf_brand_list[i] == NULL) { 209ea5a2b2eSSøren Schmidt elf_brand_list[i] = entry; 210e1743d02SSøren Schmidt break; 211e1743d02SSøren Schmidt } 212e1743d02SSøren Schmidt } 213925c8b5bSBjoern A. Zeeb if (i == MAX_BRANDS) { 214925c8b5bSBjoern A. Zeeb printf("WARNING: %s: could not insert brandinfo entry: %p\n", 215925c8b5bSBjoern A. Zeeb __func__, entry); 216a7cddfedSJake Burkholder return (-1); 217925c8b5bSBjoern A. Zeeb } 218a7cddfedSJake Burkholder return (0); 219e1743d02SSøren Schmidt } 220e1743d02SSøren Schmidt 221e1743d02SSøren Schmidt int 2223ebc1248SPeter Wemm __elfN(remove_brand_entry)(Elf_Brandinfo *entry) 223e1743d02SSøren Schmidt { 224e1743d02SSøren Schmidt int i; 225e1743d02SSøren Schmidt 2263ebc1248SPeter Wemm for (i = 0; i < MAX_BRANDS; i++) { 227ea5a2b2eSSøren Schmidt if (elf_brand_list[i] == entry) { 228ea5a2b2eSSøren Schmidt elf_brand_list[i] = NULL; 229e1743d02SSøren Schmidt break; 230e1743d02SSøren Schmidt } 231e1743d02SSøren Schmidt } 232ea5a2b2eSSøren Schmidt if (i == MAX_BRANDS) 233a7cddfedSJake Burkholder return (-1); 234a7cddfedSJake Burkholder return (0); 235e1743d02SSøren Schmidt } 236e1743d02SSøren Schmidt 237096977faSMark Newton int 2383ebc1248SPeter Wemm __elfN(brand_inuse)(Elf_Brandinfo *entry) 239096977faSMark Newton { 240096977faSMark Newton struct proc *p; 241553629ebSJake Burkholder int rval = FALSE; 242096977faSMark Newton 2431005a129SJohn Baldwin sx_slock(&allproc_lock); 2444f506694SXin LI FOREACH_PROC_IN_SYSTEM(p) { 245553629ebSJake Burkholder if (p->p_sysent == entry->sysvec) { 246553629ebSJake Burkholder rval = TRUE; 247553629ebSJake Burkholder break; 248096977faSMark Newton } 249553629ebSJake Burkholder } 2501005a129SJohn Baldwin sx_sunlock(&allproc_lock); 251096977faSMark Newton 252553629ebSJake Burkholder return (rval); 253096977faSMark Newton } 254096977faSMark Newton 2555fe3ed62SJake Burkholder static Elf_Brandinfo * 25632c01de2SDmitry Chagin __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, 257d1ae5c83SKonstantin Belousov int interp_name_len, int32_t *osrel) 2585fe3ed62SJake Burkholder { 25932c01de2SDmitry Chagin const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; 2605fe3ed62SJake Burkholder Elf_Brandinfo *bi; 26132c01de2SDmitry Chagin boolean_t ret; 2625fe3ed62SJake Burkholder int i; 2635fe3ed62SJake Burkholder 2645fe3ed62SJake Burkholder /* 26532c01de2SDmitry Chagin * We support four types of branding -- (1) the ELF EI_OSABI field 2665fe3ed62SJake Burkholder * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string 26732c01de2SDmitry Chagin * branding w/in the ELF header, (3) path of the `interp_path' 26832c01de2SDmitry Chagin * field, and (4) the ".note.ABI-tag" ELF section. 2695fe3ed62SJake Burkholder */ 2705fe3ed62SJake Burkholder 27132c01de2SDmitry Chagin /* Look for an ".note.ABI-tag" ELF section */ 27232c01de2SDmitry Chagin for (i = 0; i < MAX_BRANDS; i++) { 27332c01de2SDmitry Chagin bi = elf_brand_list[i]; 274ecc2fda8SBjoern A. Zeeb if (bi == NULL) 275ecc2fda8SBjoern A. Zeeb continue; 276ecc2fda8SBjoern A. Zeeb if (hdr->e_machine == bi->machine && (bi->flags & 277ecc2fda8SBjoern A. Zeeb (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) { 27832c01de2SDmitry Chagin ret = __elfN(check_note)(imgp, bi->brand_note, osrel); 27932c01de2SDmitry Chagin if (ret) 28032c01de2SDmitry Chagin return (bi); 28132c01de2SDmitry Chagin } 28232c01de2SDmitry Chagin } 28332c01de2SDmitry Chagin 2845fe3ed62SJake Burkholder /* If the executable has a brand, search for it in the brand list. */ 2855fe3ed62SJake Burkholder for (i = 0; i < MAX_BRANDS; i++) { 2865fe3ed62SJake Burkholder bi = elf_brand_list[i]; 287ecc2fda8SBjoern A. Zeeb if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 288ecc2fda8SBjoern A. Zeeb continue; 289ecc2fda8SBjoern A. Zeeb if (hdr->e_machine == bi->machine && 2905fe3ed62SJake Burkholder (hdr->e_ident[EI_OSABI] == bi->brand || 2915fe3ed62SJake Burkholder strncmp((const char *)&hdr->e_ident[OLD_EI_BRAND], 2925fe3ed62SJake Burkholder bi->compat_3_brand, strlen(bi->compat_3_brand)) == 0)) 2935fe3ed62SJake Burkholder return (bi); 2945fe3ed62SJake Burkholder } 2955fe3ed62SJake Burkholder 2965fe3ed62SJake Burkholder /* Lacking a known brand, search for a recognized interpreter. */ 2975fe3ed62SJake Burkholder if (interp != NULL) { 2985fe3ed62SJake Burkholder for (i = 0; i < MAX_BRANDS; i++) { 2995fe3ed62SJake Burkholder bi = elf_brand_list[i]; 300ecc2fda8SBjoern A. Zeeb if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 301ecc2fda8SBjoern A. Zeeb continue; 302ecc2fda8SBjoern A. Zeeb if (hdr->e_machine == bi->machine && 303d1ae5c83SKonstantin Belousov /* ELF image p_filesz includes terminating zero */ 304d1ae5c83SKonstantin Belousov strlen(bi->interp_path) + 1 == interp_name_len && 305d1ae5c83SKonstantin Belousov strncmp(interp, bi->interp_path, interp_name_len) 306d1ae5c83SKonstantin Belousov == 0) 3075fe3ed62SJake Burkholder return (bi); 3085fe3ed62SJake Burkholder } 3095fe3ed62SJake Burkholder } 3105fe3ed62SJake Burkholder 3115fe3ed62SJake Burkholder /* Lacking a recognized interpreter, try the default brand */ 3125fe3ed62SJake Burkholder for (i = 0; i < MAX_BRANDS; i++) { 3135fe3ed62SJake Burkholder bi = elf_brand_list[i]; 314ecc2fda8SBjoern A. Zeeb if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY) 315ecc2fda8SBjoern A. Zeeb continue; 316ecc2fda8SBjoern A. Zeeb if (hdr->e_machine == bi->machine && 317e548a1d4SJake Burkholder __elfN(fallback_brand) == bi->brand) 3185fe3ed62SJake Burkholder return (bi); 3195fe3ed62SJake Burkholder } 3205fe3ed62SJake Burkholder return (NULL); 3215fe3ed62SJake Burkholder } 3225fe3ed62SJake Burkholder 323e1743d02SSøren Schmidt static int 3243ebc1248SPeter Wemm __elfN(check_header)(const Elf_Ehdr *hdr) 325e1743d02SSøren Schmidt { 326d0ca7c29SPeter Wemm Elf_Brandinfo *bi; 3273ebc1248SPeter Wemm int i; 3283ebc1248SPeter Wemm 32952c24af7SPeter Wemm if (!IS_ELF(*hdr) || 33052c24af7SPeter Wemm hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || 33152c24af7SPeter Wemm hdr->e_ident[EI_DATA] != ELF_TARG_DATA || 3323dc19c46SJacques Vidrine hdr->e_ident[EI_VERSION] != EV_CURRENT || 3333dc19c46SJacques Vidrine hdr->e_phentsize != sizeof(Elf_Phdr) || 3343dc19c46SJacques Vidrine hdr->e_version != ELF_TARG_VER) 335a7cddfedSJake Burkholder return (ENOEXEC); 336e1743d02SSøren Schmidt 3373ebc1248SPeter Wemm /* 3383ebc1248SPeter Wemm * Make sure we have at least one brand for this machine. 3393ebc1248SPeter Wemm */ 3403ebc1248SPeter Wemm 3413ebc1248SPeter Wemm for (i = 0; i < MAX_BRANDS; i++) { 342d0ca7c29SPeter Wemm bi = elf_brand_list[i]; 343d0ca7c29SPeter Wemm if (bi != NULL && bi->machine == hdr->e_machine) 3443ebc1248SPeter Wemm break; 3453ebc1248SPeter Wemm } 3463ebc1248SPeter Wemm if (i == MAX_BRANDS) 347a7cddfedSJake Burkholder return (ENOEXEC); 348e1743d02SSøren Schmidt 349a7cddfedSJake Burkholder return (0); 350e1743d02SSøren Schmidt } 351e1743d02SSøren Schmidt 352e1743d02SSøren Schmidt static int 3533ebc1248SPeter Wemm __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 354ff6f03c7SAlan Cox vm_offset_t start, vm_offset_t end, vm_prot_t prot) 3553ebc1248SPeter Wemm { 356da61b9a6SAlan Cox struct sf_buf *sf; 357da61b9a6SAlan Cox int error; 3583ebc1248SPeter Wemm vm_offset_t off; 3593ebc1248SPeter Wemm 3603ebc1248SPeter Wemm /* 3613ebc1248SPeter Wemm * Create the page if it doesn't exist yet. Ignore errors. 3623ebc1248SPeter Wemm */ 3633ebc1248SPeter Wemm vm_map_lock(map); 364ff6f03c7SAlan Cox vm_map_insert(map, NULL, 0, trunc_page(start), round_page(end), 365ff6f03c7SAlan Cox VM_PROT_ALL, VM_PROT_ALL, 0); 3663ebc1248SPeter Wemm vm_map_unlock(map); 3673ebc1248SPeter Wemm 3683ebc1248SPeter Wemm /* 3693ebc1248SPeter Wemm * Find the page from the underlying object. 3703ebc1248SPeter Wemm */ 3713ebc1248SPeter Wemm if (object) { 372da61b9a6SAlan Cox sf = vm_imgact_map_page(object, offset); 373da61b9a6SAlan Cox if (sf == NULL) 374da61b9a6SAlan Cox return (KERN_FAILURE); 3753ebc1248SPeter Wemm off = offset - trunc_page(offset); 376da61b9a6SAlan Cox error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, 377ca0387efSJake Burkholder end - start); 378da61b9a6SAlan Cox vm_imgact_unmap_page(sf); 3793ebc1248SPeter Wemm if (error) { 380a7cddfedSJake Burkholder return (KERN_FAILURE); 3813ebc1248SPeter Wemm } 3823ebc1248SPeter Wemm } 3833ebc1248SPeter Wemm 384a7cddfedSJake Burkholder return (KERN_SUCCESS); 3853ebc1248SPeter Wemm } 3863ebc1248SPeter Wemm 3873ebc1248SPeter Wemm static int 3883ebc1248SPeter Wemm __elfN(map_insert)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 389ff6f03c7SAlan Cox vm_offset_t start, vm_offset_t end, vm_prot_t prot, int cow) 3903ebc1248SPeter Wemm { 391da61b9a6SAlan Cox struct sf_buf *sf; 392da61b9a6SAlan Cox vm_offset_t off; 393a063facbSMarcel Moolenaar vm_size_t sz; 394a063facbSMarcel Moolenaar int error, rv; 3953ebc1248SPeter Wemm 3963ebc1248SPeter Wemm if (start != trunc_page(start)) { 39781f223caSJake Burkholder rv = __elfN(map_partial)(map, object, offset, start, 398ff6f03c7SAlan Cox round_page(start), prot); 3993ebc1248SPeter Wemm if (rv) 400a7cddfedSJake Burkholder return (rv); 4013ebc1248SPeter Wemm offset += round_page(start) - start; 4023ebc1248SPeter Wemm start = round_page(start); 4033ebc1248SPeter Wemm } 4043ebc1248SPeter Wemm if (end != round_page(end)) { 40581f223caSJake Burkholder rv = __elfN(map_partial)(map, object, offset + 406ff6f03c7SAlan Cox trunc_page(end) - start, trunc_page(end), end, prot); 4073ebc1248SPeter Wemm if (rv) 408a7cddfedSJake Burkholder return (rv); 4093ebc1248SPeter Wemm end = trunc_page(end); 4103ebc1248SPeter Wemm } 4113ebc1248SPeter Wemm if (end > start) { 4123ebc1248SPeter Wemm if (offset & PAGE_MASK) { 4133ebc1248SPeter Wemm /* 4143ebc1248SPeter Wemm * The mapping is not page aligned. This means we have 4153ebc1248SPeter Wemm * to copy the data. Sigh. 4163ebc1248SPeter Wemm */ 417584716b0SAlan Cox rv = vm_map_find(map, NULL, 0, &start, end - start, 418ff6f03c7SAlan Cox FALSE, prot | VM_PROT_WRITE, VM_PROT_ALL, 0); 4193ebc1248SPeter Wemm if (rv) 420a7cddfedSJake Burkholder return (rv); 421da61b9a6SAlan Cox if (object == NULL) 422da61b9a6SAlan Cox return (KERN_SUCCESS); 423da61b9a6SAlan Cox for (; start < end; start += sz) { 424da61b9a6SAlan Cox sf = vm_imgact_map_page(object, offset); 425da61b9a6SAlan Cox if (sf == NULL) 426da61b9a6SAlan Cox return (KERN_FAILURE); 4273ebc1248SPeter Wemm off = offset - trunc_page(offset); 4283ebc1248SPeter Wemm sz = end - start; 429da61b9a6SAlan Cox if (sz > PAGE_SIZE - off) 430da61b9a6SAlan Cox sz = PAGE_SIZE - off; 431da61b9a6SAlan Cox error = copyout((caddr_t)sf_buf_kva(sf) + off, 4323ebc1248SPeter Wemm (caddr_t)start, sz); 433da61b9a6SAlan Cox vm_imgact_unmap_page(sf); 4343ebc1248SPeter Wemm if (error) { 435a7cddfedSJake Burkholder return (KERN_FAILURE); 4363ebc1248SPeter Wemm } 437da61b9a6SAlan Cox offset += sz; 4383ebc1248SPeter Wemm } 4393ebc1248SPeter Wemm rv = KERN_SUCCESS; 4403ebc1248SPeter Wemm } else { 441e5e6093bSAlan Cox vm_object_reference(object); 4423ebc1248SPeter Wemm vm_map_lock(map); 4433ebc1248SPeter Wemm rv = vm_map_insert(map, object, offset, start, end, 444ff6f03c7SAlan Cox prot, VM_PROT_ALL, cow); 4453ebc1248SPeter Wemm vm_map_unlock(map); 446e5e6093bSAlan Cox if (rv != KERN_SUCCESS) 447e5e6093bSAlan Cox vm_object_deallocate(object); 4483ebc1248SPeter Wemm } 449a7cddfedSJake Burkholder return (rv); 4503ebc1248SPeter Wemm } else { 451a7cddfedSJake Burkholder return (KERN_SUCCESS); 4523ebc1248SPeter Wemm } 4533ebc1248SPeter Wemm } 4543ebc1248SPeter Wemm 4553ebc1248SPeter Wemm static int 456292177e6SAlan Cox __elfN(load_section)(struct image_params *imgp, vm_offset_t offset, 4573ebc1248SPeter Wemm caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot, 4583ebc1248SPeter Wemm size_t pagesize) 459e1743d02SSøren Schmidt { 460da61b9a6SAlan Cox struct sf_buf *sf; 461e1743d02SSøren Schmidt size_t map_len; 462292177e6SAlan Cox vm_map_t map; 463292177e6SAlan Cox vm_object_t object; 464e1743d02SSøren Schmidt vm_offset_t map_addr; 465fa7dd9c5SMatthew Dillon int error, rv, cow; 466e1743d02SSøren Schmidt size_t copy_len; 46752c24af7SPeter Wemm vm_offset_t file_addr; 46852c24af7SPeter Wemm 46925ead034SBrian Feldman /* 47025ead034SBrian Feldman * It's necessary to fail if the filsz + offset taken from the 47125ead034SBrian Feldman * header is greater than the actual file pager object's size. 47225ead034SBrian Feldman * If we were to allow this, then the vm_map_find() below would 47325ead034SBrian Feldman * walk right off the end of the file object and into the ether. 47425ead034SBrian Feldman * 47525ead034SBrian Feldman * While I'm here, might as well check for something else that 47625ead034SBrian Feldman * is invalid: filsz cannot be greater than memsz. 47725ead034SBrian Feldman */ 478292177e6SAlan Cox if ((off_t)filsz + offset > imgp->attr->va_size || filsz > memsz) { 47925ead034SBrian Feldman uprintf("elf_load_section: truncated ELF file\n"); 48025ead034SBrian Feldman return (ENOEXEC); 48125ead034SBrian Feldman } 48225ead034SBrian Feldman 483292177e6SAlan Cox object = imgp->object; 484292177e6SAlan Cox map = &imgp->proc->p_vmspace->vm_map; 4853ebc1248SPeter Wemm map_addr = trunc_page_ps((vm_offset_t)vmaddr, pagesize); 4863ebc1248SPeter Wemm file_addr = trunc_page_ps(offset, pagesize); 487e1743d02SSøren Schmidt 488e1743d02SSøren Schmidt /* 48952c24af7SPeter Wemm * We have two choices. We can either clear the data in the last page 49052c24af7SPeter Wemm * of an oversized mapping, or we can start the anon mapping a page 49152c24af7SPeter Wemm * early and copy the initialized data into that first page. We 49252c24af7SPeter Wemm * choose the second.. 49352c24af7SPeter Wemm */ 49452c24af7SPeter Wemm if (memsz > filsz) 4953ebc1248SPeter Wemm map_len = trunc_page_ps(offset + filsz, pagesize) - file_addr; 49652c24af7SPeter Wemm else 4973ebc1248SPeter Wemm map_len = round_page_ps(offset + filsz, pagesize) - file_addr; 49852c24af7SPeter Wemm 49952c24af7SPeter Wemm if (map_len != 0) { 500fa7dd9c5SMatthew Dillon /* cow flags: don't dump readonly sections in core */ 501fa7dd9c5SMatthew Dillon cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | 502fa7dd9c5SMatthew Dillon (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); 503fa7dd9c5SMatthew Dillon 504292177e6SAlan Cox rv = __elfN(map_insert)(map, 50552c24af7SPeter Wemm object, 50652c24af7SPeter Wemm file_addr, /* file offset */ 50752c24af7SPeter Wemm map_addr, /* virtual start */ 50852c24af7SPeter Wemm map_addr + map_len,/* virtual end */ 50952c24af7SPeter Wemm prot, 510fa7dd9c5SMatthew Dillon cow); 511e5e6093bSAlan Cox if (rv != KERN_SUCCESS) 512a7cddfedSJake Burkholder return (EINVAL); 51352c24af7SPeter Wemm 51452c24af7SPeter Wemm /* we can stop now if we've covered it all */ 51523955314SAlfred Perlstein if (memsz == filsz) { 516a7cddfedSJake Burkholder return (0); 51752c24af7SPeter Wemm } 51823955314SAlfred Perlstein } 51952c24af7SPeter Wemm 52052c24af7SPeter Wemm 52152c24af7SPeter Wemm /* 52252c24af7SPeter Wemm * We have to get the remaining bit of the file into the first part 52352c24af7SPeter Wemm * of the oversized map segment. This is normally because the .data 52452c24af7SPeter Wemm * segment in the file is extended to provide bss. It's a neat idea 52552c24af7SPeter Wemm * to try and save a page, but it's a pain in the behind to implement. 526e1743d02SSøren Schmidt */ 5273ebc1248SPeter Wemm copy_len = (offset + filsz) - trunc_page_ps(offset + filsz, pagesize); 5283ebc1248SPeter Wemm map_addr = trunc_page_ps((vm_offset_t)vmaddr + filsz, pagesize); 529ca0387efSJake Burkholder map_len = round_page_ps((vm_offset_t)vmaddr + memsz, pagesize) - 530ca0387efSJake Burkholder map_addr; 531e1743d02SSøren Schmidt 53252c24af7SPeter Wemm /* This had damn well better be true! */ 5338191d577SPeter Wemm if (map_len != 0) { 534292177e6SAlan Cox rv = __elfN(map_insert)(map, NULL, 0, map_addr, map_addr + 535292177e6SAlan Cox map_len, VM_PROT_ALL, 0); 53623955314SAlfred Perlstein if (rv != KERN_SUCCESS) { 537a7cddfedSJake Burkholder return (EINVAL); 5388191d577SPeter Wemm } 53923955314SAlfred Perlstein } 540e1743d02SSøren Schmidt 54152c24af7SPeter Wemm if (copy_len != 0) { 5423ebc1248SPeter Wemm vm_offset_t off; 543da61b9a6SAlan Cox 544da61b9a6SAlan Cox sf = vm_imgact_map_page(object, offset + filsz); 545da61b9a6SAlan Cox if (sf == NULL) 546da61b9a6SAlan Cox return (EIO); 547e1743d02SSøren Schmidt 54852c24af7SPeter Wemm /* send the page fragment to user space */ 54981f223caSJake Burkholder off = trunc_page_ps(offset + filsz, pagesize) - 55081f223caSJake Burkholder trunc_page(offset + filsz); 551da61b9a6SAlan Cox error = copyout((caddr_t)sf_buf_kva(sf) + off, 552da61b9a6SAlan Cox (caddr_t)map_addr, copy_len); 553da61b9a6SAlan Cox vm_imgact_unmap_page(sf); 55423955314SAlfred Perlstein if (error) { 55552c24af7SPeter Wemm return (error); 55652c24af7SPeter Wemm } 55723955314SAlfred Perlstein } 558e1743d02SSøren Schmidt 559e1743d02SSøren Schmidt /* 5603ebc1248SPeter Wemm * set it to the specified protection. 5613ebc1248SPeter Wemm * XXX had better undo the damage from pasting over the cracks here! 562e1743d02SSøren Schmidt */ 563292177e6SAlan Cox vm_map_protect(map, trunc_page(map_addr), round_page(map_addr + 564292177e6SAlan Cox map_len), prot, FALSE); 5658191d577SPeter Wemm 566ff6f03c7SAlan Cox return (0); 567e1743d02SSøren Schmidt } 568e1743d02SSøren Schmidt 569c33fe779SJohn Polstra /* 570c33fe779SJohn Polstra * Load the file "file" into memory. It may be either a shared object 571c33fe779SJohn Polstra * or an executable. 572c33fe779SJohn Polstra * 573c33fe779SJohn Polstra * The "addr" reference parameter is in/out. On entry, it specifies 574c33fe779SJohn Polstra * the address where a shared object should be loaded. If the file is 575c33fe779SJohn Polstra * an executable, this value is ignored. On exit, "addr" specifies 576c33fe779SJohn Polstra * where the file was actually loaded. 577c33fe779SJohn Polstra * 578c33fe779SJohn Polstra * The "entry" reference parameter is out only. On exit, it specifies 579c33fe779SJohn Polstra * the entry point for the loaded file. 580c33fe779SJohn Polstra */ 581e1743d02SSøren Schmidt static int 5823ebc1248SPeter Wemm __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 5833ebc1248SPeter Wemm u_long *entry, size_t pagesize) 584e1743d02SSøren Schmidt { 585911c2be0SMark Peek struct { 586911c2be0SMark Peek struct nameidata nd; 587911c2be0SMark Peek struct vattr attr; 588911c2be0SMark Peek struct image_params image_params; 589911c2be0SMark Peek } *tempdata; 590d254af07SMatthew Dillon const Elf_Ehdr *hdr = NULL; 591d254af07SMatthew Dillon const Elf_Phdr *phdr = NULL; 592911c2be0SMark Peek struct nameidata *nd; 593911c2be0SMark Peek struct vattr *attr; 594911c2be0SMark Peek struct image_params *imgp; 59552c24af7SPeter Wemm vm_prot_t prot; 596c33fe779SJohn Polstra u_long rbase; 597c33fe779SJohn Polstra u_long base_addr = 0; 5985050aa86SKonstantin Belousov int error, i, numsegs; 599e1743d02SSøren Schmidt 60012bc222eSJonathan Anderson #ifdef CAPABILITY_MODE 60112bc222eSJonathan Anderson /* 60212bc222eSJonathan Anderson * XXXJA: This check can go away once we are sufficiently confident 60312bc222eSJonathan Anderson * that the checks in namei() are correct. 60412bc222eSJonathan Anderson */ 60512bc222eSJonathan Anderson if (IN_CAPABILITY_MODE(curthread)) 60612bc222eSJonathan Anderson return (ECAPMODE); 60712bc222eSJonathan Anderson #endif 60812bc222eSJonathan Anderson 609a163d034SWarner Losh tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK); 610911c2be0SMark Peek nd = &tempdata->nd; 611911c2be0SMark Peek attr = &tempdata->attr; 612911c2be0SMark Peek imgp = &tempdata->image_params; 613911c2be0SMark Peek 614c8a79999SPeter Wemm /* 615c8a79999SPeter Wemm * Initialize part of the common data 616c8a79999SPeter Wemm */ 617c8a79999SPeter Wemm imgp->proc = p; 618911c2be0SMark Peek imgp->attr = attr; 619c8a79999SPeter Wemm imgp->firstpage = NULL; 62059c8bc40SAlan Cox imgp->image_header = NULL; 6210b2ed1aeSJeff Roberson imgp->object = NULL; 6226d7bdc8dSRobert Watson imgp->execlabel = NULL; 623c8a79999SPeter Wemm 6245050aa86SKonstantin Belousov NDINIT(nd, LOOKUP, LOCKLEAF | FOLLOW, UIO_SYSSPACE, file, curthread); 625911c2be0SMark Peek if ((error = namei(nd)) != 0) { 626911c2be0SMark Peek nd->ni_vp = NULL; 627e1743d02SSøren Schmidt goto fail; 628e1743d02SSøren Schmidt } 629911c2be0SMark Peek NDFREE(nd, NDF_ONLY_PNBUF); 630911c2be0SMark Peek imgp->vp = nd->ni_vp; 631c8a79999SPeter Wemm 632e1743d02SSøren Schmidt /* 633e1743d02SSøren Schmidt * Check permissions, modes, uid, etc on the file, and "open" it. 634e1743d02SSøren Schmidt */ 635c8a79999SPeter Wemm error = exec_check_permissions(imgp); 636373d1a3fSAlan Cox if (error) 637c8a79999SPeter Wemm goto fail; 638e1743d02SSøren Schmidt 639c8a79999SPeter Wemm error = exec_map_first_page(imgp); 640373d1a3fSAlan Cox if (error) 641373d1a3fSAlan Cox goto fail; 642373d1a3fSAlan Cox 64325ead034SBrian Feldman /* 64425ead034SBrian Feldman * Also make certain that the interpreter stays the same, so set 645e6e370a7SJeff Roberson * its VV_TEXT flag, too. 64625ead034SBrian Feldman */ 647877d24acSKonstantin Belousov VOP_SET_TEXT(nd->ni_vp); 648e6e370a7SJeff Roberson 6498516dd18SPoul-Henning Kamp imgp->object = nd->ni_vp->v_object; 650e1743d02SSøren Schmidt 651d254af07SMatthew Dillon hdr = (const Elf_Ehdr *)imgp->image_header; 6523ebc1248SPeter Wemm if ((error = __elfN(check_header)(hdr)) != 0) 653e1743d02SSøren Schmidt goto fail; 654c33fe779SJohn Polstra if (hdr->e_type == ET_DYN) 655c33fe779SJohn Polstra rbase = *addr; 656c33fe779SJohn Polstra else if (hdr->e_type == ET_EXEC) 657c33fe779SJohn Polstra rbase = 0; 658c33fe779SJohn Polstra else { 659c33fe779SJohn Polstra error = ENOEXEC; 660c33fe779SJohn Polstra goto fail; 661c33fe779SJohn Polstra } 662e1743d02SSøren Schmidt 663c8a79999SPeter Wemm /* Only support headers that fit within first page for now */ 6643dc19c46SJacques Vidrine /* (multiplication of two Elf_Half fields will not overflow) */ 66552c24af7SPeter Wemm if ((hdr->e_phoff > PAGE_SIZE) || 6663dc19c46SJacques Vidrine (hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE - hdr->e_phoff) { 667c8a79999SPeter Wemm error = ENOEXEC; 668e1743d02SSøren Schmidt goto fail; 669c8a79999SPeter Wemm } 670c8a79999SPeter Wemm 671d254af07SMatthew Dillon phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 67293d1c728SKonstantin Belousov if (!aligned(phdr, Elf_Addr)) { 67393d1c728SKonstantin Belousov error = ENOEXEC; 67493d1c728SKonstantin Belousov goto fail; 67593d1c728SKonstantin Belousov } 676e1743d02SSøren Schmidt 677c33fe779SJohn Polstra for (i = 0, numsegs = 0; i < hdr->e_phnum; i++) { 6785b33842aSKonstantin Belousov if (phdr[i].p_type == PT_LOAD && phdr[i].p_memsz != 0) { 6795b33842aSKonstantin Belousov /* Loadable segment */ 680ed167eaaSKonstantin Belousov prot = __elfN(trans_prot)(phdr[i].p_flags); 681292177e6SAlan Cox error = __elfN(load_section)(imgp, phdr[i].p_offset, 68281f223caSJake Burkholder (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase, 683292177e6SAlan Cox phdr[i].p_memsz, phdr[i].p_filesz, prot, pagesize); 684292177e6SAlan Cox if (error != 0) 685e1743d02SSøren Schmidt goto fail; 686e1743d02SSøren Schmidt /* 687c33fe779SJohn Polstra * Establish the base address if this is the 688c33fe779SJohn Polstra * first segment. 689e1743d02SSøren Schmidt */ 690c33fe779SJohn Polstra if (numsegs == 0) 691ca0387efSJake Burkholder base_addr = trunc_page(phdr[i].p_vaddr + 692ca0387efSJake Burkholder rbase); 693c33fe779SJohn Polstra numsegs++; 694e1743d02SSøren Schmidt } 695e1743d02SSøren Schmidt } 696c33fe779SJohn Polstra *addr = base_addr; 697c33fe779SJohn Polstra *entry = (unsigned long)hdr->e_entry + rbase; 698e1743d02SSøren Schmidt 699e1743d02SSøren Schmidt fail: 700c8a79999SPeter Wemm if (imgp->firstpage) 701c8a79999SPeter Wemm exec_unmap_first_page(imgp); 7020b2ed1aeSJeff Roberson 703911c2be0SMark Peek if (nd->ni_vp) 704373d1a3fSAlan Cox vput(nd->ni_vp); 705911c2be0SMark Peek 706911c2be0SMark Peek free(tempdata, M_TEMP); 707e1743d02SSøren Schmidt 708a7cddfedSJake Burkholder return (error); 709e1743d02SSøren Schmidt } 710e1743d02SSøren Schmidt 711303b270bSEivind Eklund static int 7123ebc1248SPeter Wemm __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) 713e1743d02SSøren Schmidt { 714ecbb00a2SDoug Rabson const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; 71532c01de2SDmitry Chagin const Elf_Phdr *phdr; 716e5e6093bSAlan Cox Elf_Auxargs *elf_auxargs; 7175856e12eSJohn Dyson struct vmspace *vmspace; 71852c24af7SPeter Wemm vm_prot_t prot; 71921c2d047SMatthew Dillon u_long text_size = 0, data_size = 0, total_size = 0; 720e1743d02SSøren Schmidt u_long text_addr = 0, data_addr = 0; 721cac45152SMatthew Dillon u_long seg_size, seg_addr; 7227564c4adSKonstantin Belousov u_long addr, baddr, et_dyn_addr, entry = 0, proghdr = 0; 72332c01de2SDmitry Chagin int32_t osrel = 0; 724d1ae5c83SKonstantin Belousov int error = 0, i, n, interp_name_len = 0; 7254113f8d7SPeter Wemm const char *interp = NULL, *newinterp = NULL; 726d1dbc694SJohn Polstra Elf_Brandinfo *brand_info; 727911c2be0SMark Peek char *path; 7285fe3ed62SJake Burkholder struct sysentvec *sv; 729e1743d02SSøren Schmidt 730e1743d02SSøren Schmidt /* 731e1743d02SSøren Schmidt * Do we have a valid ELF header ? 732900b28f9SMaxim Sobolev * 733900b28f9SMaxim Sobolev * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later 734900b28f9SMaxim Sobolev * if particular brand doesn't support it. 735e1743d02SSøren Schmidt */ 736900b28f9SMaxim Sobolev if (__elfN(check_header)(hdr) != 0 || 737900b28f9SMaxim Sobolev (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) 738a7cddfedSJake Burkholder return (-1); 739e1743d02SSøren Schmidt 740e1743d02SSøren Schmidt /* 741e1743d02SSøren Schmidt * From here on down, we return an errno, not -1, as we've 742e1743d02SSøren Schmidt * detected an ELF file. 743e1743d02SSøren Schmidt */ 744e1743d02SSøren Schmidt 745e1743d02SSøren Schmidt if ((hdr->e_phoff > PAGE_SIZE) || 74652c24af7SPeter Wemm (hdr->e_phoff + hdr->e_phentsize * hdr->e_phnum) > PAGE_SIZE) { 747c8a79999SPeter Wemm /* Only support headers in first page for now */ 748a7cddfedSJake Burkholder return (ENOEXEC); 749e1743d02SSøren Schmidt } 75052c24af7SPeter Wemm phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 75193d1c728SKonstantin Belousov if (!aligned(phdr, Elf_Addr)) 75293d1c728SKonstantin Belousov return (ENOEXEC); 7537564c4adSKonstantin Belousov n = 0; 7547564c4adSKonstantin Belousov baddr = 0; 7555fe3ed62SJake Burkholder for (i = 0; i < hdr->e_phnum; i++) { 756291c06a1SKonstantin Belousov switch (phdr[i].p_type) { 757291c06a1SKonstantin Belousov case PT_LOAD: 7587564c4adSKonstantin Belousov if (n == 0) 7597564c4adSKonstantin Belousov baddr = phdr[i].p_vaddr; 7607564c4adSKonstantin Belousov n++; 761291c06a1SKonstantin Belousov break; 762291c06a1SKonstantin Belousov case PT_INTERP: 763e5e6093bSAlan Cox /* Path to interpreter */ 7645fe3ed62SJake Burkholder if (phdr[i].p_filesz > MAXPATHLEN || 765d1ae5c83SKonstantin Belousov phdr[i].p_offset >= PAGE_SIZE || 766d1ae5c83SKonstantin Belousov phdr[i].p_offset + phdr[i].p_filesz >= PAGE_SIZE) 76760bb3943SAlan Cox return (ENOEXEC); 7685fe3ed62SJake Burkholder interp = imgp->image_header + phdr[i].p_offset; 769d1ae5c83SKonstantin Belousov interp_name_len = phdr[i].p_filesz; 770291c06a1SKonstantin Belousov break; 771291c06a1SKonstantin Belousov case PT_GNU_STACK: 772291c06a1SKonstantin Belousov if (__elfN(nxstack)) 773291c06a1SKonstantin Belousov imgp->stack_prot = 774291c06a1SKonstantin Belousov __elfN(trans_prot)(phdr[i].p_flags); 775291c06a1SKonstantin Belousov break; 7763ebc1248SPeter Wemm } 7773ebc1248SPeter Wemm } 7783ebc1248SPeter Wemm 779d1ae5c83SKonstantin Belousov brand_info = __elfN(get_brandinfo)(imgp, interp, interp_name_len, 780d1ae5c83SKonstantin Belousov &osrel); 7815fe3ed62SJake Burkholder if (brand_info == NULL) { 7825fe3ed62SJake Burkholder uprintf("ELF binary type \"%u\" not known.\n", 7835fe3ed62SJake Burkholder hdr->e_ident[EI_OSABI]); 78460bb3943SAlan Cox return (ENOEXEC); 7853ebc1248SPeter Wemm } 786ab02d85fSKonstantin Belousov if (hdr->e_type == ET_DYN) { 787ab02d85fSKonstantin Belousov if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) 788d49b2109SMaxim Sobolev return (ENOEXEC); 7897564c4adSKonstantin Belousov /* 7907564c4adSKonstantin Belousov * Honour the base load address from the dso if it is 7917564c4adSKonstantin Belousov * non-zero for some reason. 7927564c4adSKonstantin Belousov */ 7937564c4adSKonstantin Belousov if (baddr == 0) 794ab02d85fSKonstantin Belousov et_dyn_addr = ET_DYN_LOAD_ADDR; 7957564c4adSKonstantin Belousov else 7967564c4adSKonstantin Belousov et_dyn_addr = 0; 797ab02d85fSKonstantin Belousov } else 798ab02d85fSKonstantin Belousov et_dyn_addr = 0; 7995fe3ed62SJake Burkholder sv = brand_info->sysvec; 8009b68618dSPeter Wemm if (interp != NULL && brand_info->interp_newpath != NULL) 8014113f8d7SPeter Wemm newinterp = brand_info->interp_newpath; 8023ebc1248SPeter Wemm 80360bb3943SAlan Cox /* 80460bb3943SAlan Cox * Avoid a possible deadlock if the current address space is destroyed 80560bb3943SAlan Cox * and that address space maps the locked vnode. In the common case, 80660bb3943SAlan Cox * the locked vnode's v_usecount is decremented but remains greater 80760bb3943SAlan Cox * than zero. Consequently, the vnode lock is not needed by vrele(). 80860bb3943SAlan Cox * However, in cases where the vnode lock is external, such as nullfs, 80960bb3943SAlan Cox * v_usecount may become zero. 8101dfab802SAlan Cox * 8111dfab802SAlan Cox * The VV_TEXT flag prevents modifications to the executable while 8121dfab802SAlan Cox * the vnode is unlocked. 81360bb3943SAlan Cox */ 81422db15c0SAttilio Rao VOP_UNLOCK(imgp->vp, 0); 81560bb3943SAlan Cox 81689b57fcfSKonstantin Belousov error = exec_new_vmspace(imgp, sv); 81719059a13SJohn Baldwin imgp->proc->p_sysent = sv; 818e1743d02SSøren Schmidt 819cb05b60aSAttilio Rao vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 82089b57fcfSKonstantin Belousov if (error) 82189b57fcfSKonstantin Belousov return (error); 82260bb3943SAlan Cox 823e1743d02SSøren Schmidt for (i = 0; i < hdr->e_phnum; i++) { 824e1743d02SSøren Schmidt switch (phdr[i].p_type) { 825e1743d02SSøren Schmidt case PT_LOAD: /* Loadable segment */ 8265b33842aSKonstantin Belousov if (phdr[i].p_memsz == 0) 8275b33842aSKonstantin Belousov break; 828ed167eaaSKonstantin Belousov prot = __elfN(trans_prot)(phdr[i].p_flags); 829292177e6SAlan Cox error = __elfN(load_section)(imgp, phdr[i].p_offset, 830ab02d85fSKonstantin Belousov (caddr_t)(uintptr_t)phdr[i].p_vaddr + et_dyn_addr, 83181f223caSJake Burkholder phdr[i].p_memsz, phdr[i].p_filesz, prot, 832292177e6SAlan Cox sv->sv_pagesize); 833292177e6SAlan Cox if (error != 0) 83460bb3943SAlan Cox return (error); 835e1743d02SSøren Schmidt 836cfaf7e60SDoug Rabson /* 837cfaf7e60SDoug Rabson * If this segment contains the program headers, 838cfaf7e60SDoug Rabson * remember their virtual address for the AT_PHDR 839cfaf7e60SDoug Rabson * aux entry. Static binaries don't usually include 840cfaf7e60SDoug Rabson * a PT_PHDR entry. 841cfaf7e60SDoug Rabson */ 842cfaf7e60SDoug Rabson if (phdr[i].p_offset == 0 && 843cfaf7e60SDoug Rabson hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize 844cfaf7e60SDoug Rabson <= phdr[i].p_filesz) 845ab02d85fSKonstantin Belousov proghdr = phdr[i].p_vaddr + hdr->e_phoff + 846ab02d85fSKonstantin Belousov et_dyn_addr; 847cfaf7e60SDoug Rabson 848ab02d85fSKonstantin Belousov seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr); 849cac45152SMatthew Dillon seg_size = round_page(phdr[i].p_memsz + 850ab02d85fSKonstantin Belousov phdr[i].p_vaddr + et_dyn_addr - seg_addr); 851cac45152SMatthew Dillon 852e1743d02SSøren Schmidt /* 853920acedbSNathan Whitehorn * Make the largest executable segment the official 854920acedbSNathan Whitehorn * text segment and all others data. 85521c2d047SMatthew Dillon * 85621c2d047SMatthew Dillon * Note that obreak() assumes that data_addr + 85721c2d047SMatthew Dillon * data_size == end of data load area, and the ELF 85821c2d047SMatthew Dillon * file format expects segments to be sorted by 85921c2d047SMatthew Dillon * address. If multiple data segments exist, the 86021c2d047SMatthew Dillon * last one will be used. 861e1743d02SSøren Schmidt */ 862920acedbSNathan Whitehorn 863920acedbSNathan Whitehorn if (phdr[i].p_flags & PF_X && text_size < seg_size) { 8649782ecbaSPeter Wemm text_size = seg_size; 8659782ecbaSPeter Wemm text_addr = seg_addr; 8669782ecbaSPeter Wemm } else { 86721c2d047SMatthew Dillon data_size = seg_size; 868cac45152SMatthew Dillon data_addr = seg_addr; 869cac45152SMatthew Dillon } 87021c2d047SMatthew Dillon total_size += seg_size; 87196725dd0SAlexander Kabaev break; 87296725dd0SAlexander Kabaev case PT_PHDR: /* Program header table info */ 873ab02d85fSKonstantin Belousov proghdr = phdr[i].p_vaddr + et_dyn_addr; 87496725dd0SAlexander Kabaev break; 87596725dd0SAlexander Kabaev default: 87696725dd0SAlexander Kabaev break; 87796725dd0SAlexander Kabaev } 87896725dd0SAlexander Kabaev } 87996725dd0SAlexander Kabaev 88096725dd0SAlexander Kabaev if (data_addr == 0 && data_size == 0) { 88196725dd0SAlexander Kabaev data_addr = text_addr; 88296725dd0SAlexander Kabaev data_size = text_size; 88396725dd0SAlexander Kabaev } 884cac45152SMatthew Dillon 885920acedbSNathan Whitehorn entry = (u_long)hdr->e_entry + et_dyn_addr; 886920acedbSNathan Whitehorn 887cac45152SMatthew Dillon /* 888cac45152SMatthew Dillon * Check limits. It should be safe to check the 88996725dd0SAlexander Kabaev * limits after loading the segments since we do 89096725dd0SAlexander Kabaev * not actually fault in all the segments pages. 891cac45152SMatthew Dillon */ 89291d5354aSJohn Baldwin PROC_LOCK(imgp->proc); 89391d5354aSJohn Baldwin if (data_size > lim_cur(imgp->proc, RLIMIT_DATA) || 894cac45152SMatthew Dillon text_size > maxtsiz || 8951ba5ad42SEdward Tomasz Napierala total_size > lim_cur(imgp->proc, RLIMIT_VMEM) || 8961ba5ad42SEdward Tomasz Napierala racct_set(imgp->proc, RACCT_DATA, data_size) != 0 || 8971ba5ad42SEdward Tomasz Napierala racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) { 89891d5354aSJohn Baldwin PROC_UNLOCK(imgp->proc); 89960bb3943SAlan Cox return (ENOMEM); 900cac45152SMatthew Dillon } 901e1743d02SSøren Schmidt 902292177e6SAlan Cox vmspace = imgp->proc->p_vmspace; 903e1743d02SSøren Schmidt vmspace->vm_tsize = text_size >> PAGE_SHIFT; 9047cd99438SBruce Evans vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; 905e1743d02SSøren Schmidt vmspace->vm_dsize = data_size >> PAGE_SHIFT; 9067cd99438SBruce Evans vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; 907e1743d02SSøren Schmidt 908c460ac3aSPeter Wemm /* 909c460ac3aSPeter Wemm * We load the dynamic linker where a userland call 910c460ac3aSPeter Wemm * to mmap(0, ...) would put it. The rationale behind this 911c460ac3aSPeter Wemm * calculation is that it leaves room for the heap to grow to 912c460ac3aSPeter Wemm * its maximum allowed size. 913c460ac3aSPeter Wemm */ 914292177e6SAlan Cox addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(imgp->proc, 915292177e6SAlan Cox RLIMIT_DATA)); 91691d5354aSJohn Baldwin PROC_UNLOCK(imgp->proc); 917e1743d02SSøren Schmidt 918ea5a2b2eSSøren Schmidt imgp->entry_addr = entry; 919ea5a2b2eSSøren Schmidt 92060bb3943SAlan Cox if (interp != NULL) { 9214113f8d7SPeter Wemm int have_interp = FALSE; 92222db15c0SAttilio Rao VOP_UNLOCK(imgp->vp, 0); 92360bb3943SAlan Cox if (brand_info->emul_path != NULL && 9249b68618dSPeter Wemm brand_info->emul_path[0] != '\0') { 925a163d034SWarner Losh path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 92660bb3943SAlan Cox snprintf(path, MAXPATHLEN, "%s%s", 92760bb3943SAlan Cox brand_info->emul_path, interp); 9289b68618dSPeter Wemm error = __elfN(load_file)(imgp->proc, path, &addr, 9299b68618dSPeter Wemm &imgp->entry_addr, sv->sv_pagesize); 930911c2be0SMark Peek free(path, M_TEMP); 9319b68618dSPeter Wemm if (error == 0) 9324113f8d7SPeter Wemm have_interp = TRUE; 9339b68618dSPeter Wemm } 9344113f8d7SPeter Wemm if (!have_interp && newinterp != NULL) { 9354113f8d7SPeter Wemm error = __elfN(load_file)(imgp->proc, newinterp, &addr, 9364113f8d7SPeter Wemm &imgp->entry_addr, sv->sv_pagesize); 937387ad998SKonstantin Belousov if (error == 0) 9384113f8d7SPeter Wemm have_interp = TRUE; 9394113f8d7SPeter Wemm } 9404113f8d7SPeter Wemm if (!have_interp) { 9419b68618dSPeter Wemm error = __elfN(load_file)(imgp->proc, interp, &addr, 9429b68618dSPeter Wemm &imgp->entry_addr, sv->sv_pagesize); 94360bb3943SAlan Cox } 944cb05b60aSAttilio Rao vn_lock(imgp->vp, LK_EXCLUSIVE | LK_RETRY); 9459b68618dSPeter Wemm if (error != 0) { 9469b68618dSPeter Wemm uprintf("ELF interpreter %s not found\n", interp); 94760bb3943SAlan Cox return (error); 948e1743d02SSøren Schmidt } 94995c807cfSRobert Watson } else 9507564c4adSKonstantin Belousov addr = et_dyn_addr; 951ea5a2b2eSSøren Schmidt 952e1743d02SSøren Schmidt /* 953e1743d02SSøren Schmidt * Construct auxargs table (used by the fixup routine) 954e1743d02SSøren Schmidt */ 955a163d034SWarner Losh elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); 956e1743d02SSøren Schmidt elf_auxargs->execfd = -1; 957e1743d02SSøren Schmidt elf_auxargs->phdr = proghdr; 958e1743d02SSøren Schmidt elf_auxargs->phent = hdr->e_phentsize; 959e1743d02SSøren Schmidt elf_auxargs->phnum = hdr->e_phnum; 960e1743d02SSøren Schmidt elf_auxargs->pagesz = PAGE_SIZE; 961e1743d02SSøren Schmidt elf_auxargs->base = addr; 962e1743d02SSøren Schmidt elf_auxargs->flags = 0; 963e1743d02SSøren Schmidt elf_auxargs->entry = entry; 964e1743d02SSøren Schmidt 965e1743d02SSøren Schmidt imgp->auxargs = elf_auxargs; 966e1743d02SSøren Schmidt imgp->interpreted = 0; 967a0ea661fSNathan Whitehorn imgp->reloc_base = addr; 96832c01de2SDmitry Chagin imgp->proc->p_osrel = osrel; 969f231de47SKonstantin Belousov 970a7cddfedSJake Burkholder return (error); 971e1743d02SSøren Schmidt } 972e1743d02SSøren Schmidt 973a360a43dSJake Burkholder #define suword __CONCAT(suword, __ELF_WORD_SIZE) 9743ebc1248SPeter Wemm 9753ebc1248SPeter Wemm int 9763ebc1248SPeter Wemm __elfN(freebsd_fixup)(register_t **stack_base, struct image_params *imgp) 977e1743d02SSøren Schmidt { 978ecbb00a2SDoug Rabson Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; 979a360a43dSJake Burkholder Elf_Addr *base; 980a360a43dSJake Burkholder Elf_Addr *pos; 981e1743d02SSøren Schmidt 982a360a43dSJake Burkholder base = (Elf_Addr *)*stack_base; 983610ecfe0SMaxim Sobolev pos = base + (imgp->args->argc + imgp->args->envc + 2); 984e1743d02SSøren Schmidt 98535c2a5a8SWarner Losh if (args->execfd != -1) 986e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 987e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 988e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 989e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 990e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 991e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 992e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 993e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_BASE, args->base); 9943ff06357SKonstantin Belousov if (imgp->execpathp != 0) 9953ff06357SKonstantin Belousov AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp); 996ee235befSKonstantin Belousov AUXARGS_ENTRY(pos, AT_OSRELDATE, osreldate); 997ee235befSKonstantin Belousov if (imgp->canary != 0) { 998ee235befSKonstantin Belousov AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary); 999ee235befSKonstantin Belousov AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen); 1000ee235befSKonstantin Belousov } 1001ee235befSKonstantin Belousov AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus); 1002ee235befSKonstantin Belousov if (imgp->pagesizes != 0) { 1003ee235befSKonstantin Belousov AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes); 1004ee235befSKonstantin Belousov AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen); 1005ee235befSKonstantin Belousov } 1006aea81038SKonstantin Belousov if (imgp->sysent->sv_timekeep_base != 0) { 1007aea81038SKonstantin Belousov AUXARGS_ENTRY(pos, AT_TIMEKEEP, 1008aea81038SKonstantin Belousov imgp->sysent->sv_timekeep_base); 1009aea81038SKonstantin Belousov } 101026d8f3e1SKonstantin Belousov AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj 101126d8f3e1SKonstantin Belousov != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : 101226d8f3e1SKonstantin Belousov imgp->sysent->sv_stackprot); 1013e1743d02SSøren Schmidt AUXARGS_ENTRY(pos, AT_NULL, 0); 1014e1743d02SSøren Schmidt 1015e1743d02SSøren Schmidt free(imgp->auxargs, M_TEMP); 1016e1743d02SSøren Schmidt imgp->auxargs = NULL; 1017e1743d02SSøren Schmidt 10183ebc1248SPeter Wemm base--; 1019610ecfe0SMaxim Sobolev suword(base, (long)imgp->args->argc); 10203ebc1248SPeter Wemm *stack_base = (register_t *)base; 1021a7cddfedSJake Burkholder return (0); 1022e1743d02SSøren Schmidt } 1023e1743d02SSøren Schmidt 1024e1743d02SSøren Schmidt /* 10258c64af4fSJohn Polstra * Code for generating ELF core dumps. 10268c64af4fSJohn Polstra */ 10278c64af4fSJohn Polstra 10284d77a549SAlfred Perlstein typedef void (*segment_callback)(vm_map_entry_t, void *); 10290ff27d31SJohn Polstra 10300ff27d31SJohn Polstra /* Closure for cb_put_phdr(). */ 10310ff27d31SJohn Polstra struct phdr_closure { 10320ff27d31SJohn Polstra Elf_Phdr *phdr; /* Program header to fill in */ 10330ff27d31SJohn Polstra Elf_Off offset; /* Offset of segment in core file */ 10340ff27d31SJohn Polstra }; 10350ff27d31SJohn Polstra 10360ff27d31SJohn Polstra /* Closure for cb_size_segment(). */ 10370ff27d31SJohn Polstra struct sseg_closure { 10380ff27d31SJohn Polstra int count; /* Count of writable segments. */ 10390ff27d31SJohn Polstra size_t size; /* Total size of all writable segments. */ 10400ff27d31SJohn Polstra }; 10410ff27d31SJohn Polstra 10424d77a549SAlfred Perlstein static void cb_put_phdr(vm_map_entry_t, void *); 10434d77a549SAlfred Perlstein static void cb_size_segment(vm_map_entry_t, void *); 1044247aba24SMarcel Moolenaar static void each_writable_segment(struct thread *, segment_callback, void *); 10453ebc1248SPeter Wemm static int __elfN(corehdr)(struct thread *, struct vnode *, struct ucred *, 1046e7228204SAlfred Perlstein int, void *, size_t, gzFile); 1047247aba24SMarcel Moolenaar static void __elfN(puthdr)(struct thread *, void *, size_t *, int); 10483ebc1248SPeter Wemm static void __elfN(putnote)(void *, size_t *, const char *, int, 10494d77a549SAlfred Perlstein const void *, size_t); 10508c64af4fSJohn Polstra 1051e7228204SAlfred Perlstein #ifdef COMPRESS_USER_CORES 1052e7228204SAlfred Perlstein extern int compress_user_cores; 1053e7228204SAlfred Perlstein extern int compress_user_cores_gzlevel; 1054e7228204SAlfred Perlstein #endif 1055e7228204SAlfred Perlstein 1056e7228204SAlfred Perlstein static int 1057e7228204SAlfred Perlstein core_output(struct vnode *vp, void *base, size_t len, off_t offset, 1058e7228204SAlfred Perlstein struct ucred *active_cred, struct ucred *file_cred, 1059e7228204SAlfred Perlstein struct thread *td, char *core_buf, gzFile gzfile) { 1060e7228204SAlfred Perlstein 1061e7228204SAlfred Perlstein int error; 1062e7228204SAlfred Perlstein if (gzfile) { 1063e7228204SAlfred Perlstein #ifdef COMPRESS_USER_CORES 1064e7228204SAlfred Perlstein error = compress_core(gzfile, base, core_buf, len, td); 1065e7228204SAlfred Perlstein #else 1066e7228204SAlfred Perlstein panic("shouldn't be here"); 1067e7228204SAlfred Perlstein #endif 1068e7228204SAlfred Perlstein } else { 1069e7228204SAlfred Perlstein error = vn_rdwr_inchunks(UIO_WRITE, vp, base, len, offset, 1070e7228204SAlfred Perlstein UIO_USERSPACE, IO_UNIT | IO_DIRECT, active_cred, file_cred, 1071e7228204SAlfred Perlstein NULL, td); 1072e7228204SAlfred Perlstein } 1073e7228204SAlfred Perlstein return (error); 1074e7228204SAlfred Perlstein } 1075e7228204SAlfred Perlstein 10768c64af4fSJohn Polstra int 1077e7228204SAlfred Perlstein __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) 1078fca666a1SJulian Elischer { 1079247aba24SMarcel Moolenaar struct ucred *cred = td->td_ucred; 1080fca666a1SJulian Elischer int error = 0; 10810ff27d31SJohn Polstra struct sseg_closure seginfo; 10820ff27d31SJohn Polstra void *hdr; 10838c64af4fSJohn Polstra size_t hdrsize; 10848c64af4fSJohn Polstra 1085e7228204SAlfred Perlstein gzFile gzfile = Z_NULL; 1086e7228204SAlfred Perlstein char *core_buf = NULL; 1087e7228204SAlfred Perlstein #ifdef COMPRESS_USER_CORES 1088e7228204SAlfred Perlstein char gzopen_flags[8]; 1089e7228204SAlfred Perlstein char *p; 1090e7228204SAlfred Perlstein int doing_compress = flags & IMGACT_CORE_COMPRESS; 1091e7228204SAlfred Perlstein #endif 1092e7228204SAlfred Perlstein 1093e7228204SAlfred Perlstein hdr = NULL; 1094e7228204SAlfred Perlstein 1095e7228204SAlfred Perlstein #ifdef COMPRESS_USER_CORES 1096e7228204SAlfred Perlstein if (doing_compress) { 1097e7228204SAlfred Perlstein p = gzopen_flags; 1098e7228204SAlfred Perlstein *p++ = 'w'; 1099e7228204SAlfred Perlstein if (compress_user_cores_gzlevel >= 0 && 1100e7228204SAlfred Perlstein compress_user_cores_gzlevel <= 9) 1101e7228204SAlfred Perlstein *p++ = '0' + compress_user_cores_gzlevel; 1102e7228204SAlfred Perlstein *p = 0; 1103e7228204SAlfred Perlstein gzfile = gz_open("", gzopen_flags, vp); 1104e7228204SAlfred Perlstein if (gzfile == Z_NULL) { 1105e7228204SAlfred Perlstein error = EFAULT; 1106e7228204SAlfred Perlstein goto done; 1107e7228204SAlfred Perlstein } 1108e7228204SAlfred Perlstein core_buf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO); 1109e7228204SAlfred Perlstein if (!core_buf) { 1110e7228204SAlfred Perlstein error = ENOMEM; 1111e7228204SAlfred Perlstein goto done; 1112e7228204SAlfred Perlstein } 1113e7228204SAlfred Perlstein } 1114e7228204SAlfred Perlstein #endif 1115e7228204SAlfred Perlstein 11160ff27d31SJohn Polstra /* Size the program segments. */ 11170ff27d31SJohn Polstra seginfo.count = 0; 11180ff27d31SJohn Polstra seginfo.size = 0; 1119247aba24SMarcel Moolenaar each_writable_segment(td, cb_size_segment, &seginfo); 11200ff27d31SJohn Polstra 11210ff27d31SJohn Polstra /* 11220ff27d31SJohn Polstra * Calculate the size of the core file header area by making 11230ff27d31SJohn Polstra * a dry run of generating it. Nothing is written, but the 11240ff27d31SJohn Polstra * size is calculated. 11250ff27d31SJohn Polstra */ 11260ff27d31SJohn Polstra hdrsize = 0; 1127247aba24SMarcel Moolenaar __elfN(puthdr)(td, (void *)NULL, &hdrsize, seginfo.count); 11280ff27d31SJohn Polstra 1129afcc55f3SEdward Tomasz Napierala #ifdef RACCT 11301ba5ad42SEdward Tomasz Napierala PROC_LOCK(td->td_proc); 11311ba5ad42SEdward Tomasz Napierala error = racct_add(td->td_proc, RACCT_CORE, hdrsize + seginfo.size); 11321ba5ad42SEdward Tomasz Napierala PROC_UNLOCK(td->td_proc); 11331ba5ad42SEdward Tomasz Napierala if (error != 0) { 11341ba5ad42SEdward Tomasz Napierala error = EFAULT; 11351ba5ad42SEdward Tomasz Napierala goto done; 11361ba5ad42SEdward Tomasz Napierala } 1137afcc55f3SEdward Tomasz Napierala #endif 1138fba6b1afSAlfred Perlstein if (hdrsize + seginfo.size >= limit) { 1139fba6b1afSAlfred Perlstein error = EFAULT; 1140fba6b1afSAlfred Perlstein goto done; 1141fba6b1afSAlfred Perlstein } 11420ff27d31SJohn Polstra 11430ff27d31SJohn Polstra /* 11440ff27d31SJohn Polstra * Allocate memory for building the header, fill it up, 11450ff27d31SJohn Polstra * and write it out. 11460ff27d31SJohn Polstra */ 1147a163d034SWarner Losh hdr = malloc(hdrsize, M_TEMP, M_WAITOK); 11480ff27d31SJohn Polstra if (hdr == NULL) { 1149fba6b1afSAlfred Perlstein error = EINVAL; 1150fba6b1afSAlfred Perlstein goto done; 11510ff27d31SJohn Polstra } 1152e7228204SAlfred Perlstein error = __elfN(corehdr)(td, vp, cred, seginfo.count, hdr, hdrsize, 1153e7228204SAlfred Perlstein gzfile); 11540ff27d31SJohn Polstra 11550ff27d31SJohn Polstra /* Write the contents of all of the writable segments. */ 11560ff27d31SJohn Polstra if (error == 0) { 11570ff27d31SJohn Polstra Elf_Phdr *php; 11582b471bc6STim J. Robbins off_t offset; 11590ff27d31SJohn Polstra int i; 11600ff27d31SJohn Polstra 11610ff27d31SJohn Polstra php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1; 11620ff27d31SJohn Polstra offset = hdrsize; 11630ff27d31SJohn Polstra for (i = 0; i < seginfo.count; i++) { 1164e7228204SAlfred Perlstein error = core_output(vp, (caddr_t)(uintptr_t)php->p_vaddr, 1165e7228204SAlfred Perlstein php->p_filesz, offset, cred, NOCRED, curthread, core_buf, gzfile); 11660ff27d31SJohn Polstra if (error != 0) 11672b471bc6STim J. Robbins break; 11680ff27d31SJohn Polstra offset += php->p_filesz; 11690ff27d31SJohn Polstra php++; 11700ff27d31SJohn Polstra } 11710ff27d31SJohn Polstra } 1172e7228204SAlfred Perlstein if (error) { 1173e7228204SAlfred Perlstein log(LOG_WARNING, 1174e7228204SAlfred Perlstein "Failed to write core file for process %s (error %d)\n", 1175e7228204SAlfred Perlstein curproc->p_comm, error); 1176e7228204SAlfred Perlstein } 1177e7228204SAlfred Perlstein 1178e7228204SAlfred Perlstein done: 1179fba6b1afSAlfred Perlstein #ifdef COMPRESS_USER_CORES 1180e7228204SAlfred Perlstein if (core_buf) 1181e7228204SAlfred Perlstein free(core_buf, M_TEMP); 1182e7228204SAlfred Perlstein if (gzfile) 1183e7228204SAlfred Perlstein gzclose(gzfile); 11848b325009SAlfred Perlstein #endif 1185e7228204SAlfred Perlstein 11860ff27d31SJohn Polstra free(hdr, M_TEMP); 11870ff27d31SJohn Polstra 1188a7cddfedSJake Burkholder return (error); 11898c64af4fSJohn Polstra } 11908c64af4fSJohn Polstra 11910ff27d31SJohn Polstra /* 11920ff27d31SJohn Polstra * A callback for each_writable_segment() to write out the segment's 11930ff27d31SJohn Polstra * program header entry. 11940ff27d31SJohn Polstra */ 11950ff27d31SJohn Polstra static void 11960ff27d31SJohn Polstra cb_put_phdr(entry, closure) 11970ff27d31SJohn Polstra vm_map_entry_t entry; 11980ff27d31SJohn Polstra void *closure; 11990ff27d31SJohn Polstra { 12000ff27d31SJohn Polstra struct phdr_closure *phc = (struct phdr_closure *)closure; 12010ff27d31SJohn Polstra Elf_Phdr *phdr = phc->phdr; 12020ff27d31SJohn Polstra 12030ff27d31SJohn Polstra phc->offset = round_page(phc->offset); 12040ff27d31SJohn Polstra 12050ff27d31SJohn Polstra phdr->p_type = PT_LOAD; 12060ff27d31SJohn Polstra phdr->p_offset = phc->offset; 12070ff27d31SJohn Polstra phdr->p_vaddr = entry->start; 12080ff27d31SJohn Polstra phdr->p_paddr = 0; 12090ff27d31SJohn Polstra phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 12100ff27d31SJohn Polstra phdr->p_align = PAGE_SIZE; 1211ed167eaaSKonstantin Belousov phdr->p_flags = __elfN(untrans_prot)(entry->protection); 12120ff27d31SJohn Polstra 12130ff27d31SJohn Polstra phc->offset += phdr->p_filesz; 12140ff27d31SJohn Polstra phc->phdr++; 12150ff27d31SJohn Polstra } 12160ff27d31SJohn Polstra 12170ff27d31SJohn Polstra /* 12180ff27d31SJohn Polstra * A callback for each_writable_segment() to gather information about 12190ff27d31SJohn Polstra * the number of segments and their total size. 12200ff27d31SJohn Polstra */ 12210ff27d31SJohn Polstra static void 12220ff27d31SJohn Polstra cb_size_segment(entry, closure) 12230ff27d31SJohn Polstra vm_map_entry_t entry; 12240ff27d31SJohn Polstra void *closure; 12250ff27d31SJohn Polstra { 12260ff27d31SJohn Polstra struct sseg_closure *ssc = (struct sseg_closure *)closure; 12270ff27d31SJohn Polstra 12280ff27d31SJohn Polstra ssc->count++; 12290ff27d31SJohn Polstra ssc->size += entry->end - entry->start; 12300ff27d31SJohn Polstra } 12310ff27d31SJohn Polstra 12320ff27d31SJohn Polstra /* 12330ff27d31SJohn Polstra * For each writable segment in the process's memory map, call the given 12340ff27d31SJohn Polstra * function with a pointer to the map entry and some arbitrary 12350ff27d31SJohn Polstra * caller-supplied data. 12360ff27d31SJohn Polstra */ 12370ff27d31SJohn Polstra static void 1238247aba24SMarcel Moolenaar each_writable_segment(td, func, closure) 1239247aba24SMarcel Moolenaar struct thread *td; 12400ff27d31SJohn Polstra segment_callback func; 12410ff27d31SJohn Polstra void *closure; 12420ff27d31SJohn Polstra { 1243247aba24SMarcel Moolenaar struct proc *p = td->td_proc; 12440ff27d31SJohn Polstra vm_map_t map = &p->p_vmspace->vm_map; 12450ff27d31SJohn Polstra vm_map_entry_t entry; 1246976a87a2SAlan Cox vm_object_t backing_object, object; 1247976a87a2SAlan Cox boolean_t ignore_entry; 12480ff27d31SJohn Polstra 1249976a87a2SAlan Cox vm_map_lock_read(map); 12500ff27d31SJohn Polstra for (entry = map->header.next; entry != &map->header; 12510ff27d31SJohn Polstra entry = entry->next) { 1252fa7dd9c5SMatthew Dillon /* 1253fa7dd9c5SMatthew Dillon * Don't dump inaccessible mappings, deal with legacy 1254fa7dd9c5SMatthew Dillon * coredump mode. 1255fa7dd9c5SMatthew Dillon * 1256fa7dd9c5SMatthew Dillon * Note that read-only segments related to the elf binary 1257fa7dd9c5SMatthew Dillon * are marked MAP_ENTRY_NOCOREDUMP now so we no longer 1258fa7dd9c5SMatthew Dillon * need to arbitrarily ignore such segments. 1259fa7dd9c5SMatthew Dillon */ 1260fa7dd9c5SMatthew Dillon if (elf_legacy_coredump) { 1261fa7dd9c5SMatthew Dillon if ((entry->protection & VM_PROT_RW) != VM_PROT_RW) 12620ff27d31SJohn Polstra continue; 1263fa7dd9c5SMatthew Dillon } else { 1264fa7dd9c5SMatthew Dillon if ((entry->protection & VM_PROT_ALL) == 0) 1265fa7dd9c5SMatthew Dillon continue; 1266fa7dd9c5SMatthew Dillon } 12670ff27d31SJohn Polstra 12689730a5daSPaul Saab /* 1269fa7dd9c5SMatthew Dillon * Dont include memory segment in the coredump if 1270fa7dd9c5SMatthew Dillon * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in 1271fa7dd9c5SMatthew Dillon * madvise(2). Do not dump submaps (i.e. parts of the 1272fa7dd9c5SMatthew Dillon * kernel map). 12739730a5daSPaul Saab */ 1274fa7dd9c5SMatthew Dillon if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP)) 12759730a5daSPaul Saab continue; 12769730a5daSPaul Saab 1277976a87a2SAlan Cox if ((object = entry->object.vm_object) == NULL) 12780ff27d31SJohn Polstra continue; 12790ff27d31SJohn Polstra 12800ff27d31SJohn Polstra /* Ignore memory-mapped devices and such things. */ 1281*89f6b863SAttilio Rao VM_OBJECT_WLOCK(object); 1282976a87a2SAlan Cox while ((backing_object = object->backing_object) != NULL) { 1283*89f6b863SAttilio Rao VM_OBJECT_WLOCK(backing_object); 1284*89f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 1285976a87a2SAlan Cox object = backing_object; 1286976a87a2SAlan Cox } 1287976a87a2SAlan Cox ignore_entry = object->type != OBJT_DEFAULT && 1288976a87a2SAlan Cox object->type != OBJT_SWAP && object->type != OBJT_VNODE; 1289*89f6b863SAttilio Rao VM_OBJECT_WUNLOCK(object); 1290976a87a2SAlan Cox if (ignore_entry) 12910ff27d31SJohn Polstra continue; 12920ff27d31SJohn Polstra 12930ff27d31SJohn Polstra (*func)(entry, closure); 12940ff27d31SJohn Polstra } 1295976a87a2SAlan Cox vm_map_unlock_read(map); 12960ff27d31SJohn Polstra } 12970ff27d31SJohn Polstra 12980ff27d31SJohn Polstra /* 12990ff27d31SJohn Polstra * Write the core file header to the file, including padding up to 13000ff27d31SJohn Polstra * the page boundary. 13010ff27d31SJohn Polstra */ 13028c64af4fSJohn Polstra static int 1303e7228204SAlfred Perlstein __elfN(corehdr)(td, vp, cred, numsegs, hdr, hdrsize, gzfile) 1304b40ce416SJulian Elischer struct thread *td; 13058c64af4fSJohn Polstra struct vnode *vp; 13068c64af4fSJohn Polstra struct ucred *cred; 13070ff27d31SJohn Polstra int numsegs; 13080ff27d31SJohn Polstra size_t hdrsize; 13090ff27d31SJohn Polstra void *hdr; 1310e7228204SAlfred Perlstein gzFile gzfile; 13118c64af4fSJohn Polstra { 1312911c2be0SMark Peek size_t off; 13138c64af4fSJohn Polstra 13148c64af4fSJohn Polstra /* Fill in the header. */ 13150ff27d31SJohn Polstra bzero(hdr, hdrsize); 13168c64af4fSJohn Polstra off = 0; 1317247aba24SMarcel Moolenaar __elfN(puthdr)(td, hdr, &off, numsegs); 13188c64af4fSJohn Polstra 1319e7228204SAlfred Perlstein if (!gzfile) { 13208c64af4fSJohn Polstra /* Write it to the core file. */ 1321a7cddfedSJake Burkholder return (vn_rdwr_inchunks(UIO_WRITE, vp, hdr, hdrsize, (off_t)0, 13229ca43589SRobert Watson UIO_SYSSPACE, IO_UNIT | IO_DIRECT, cred, NOCRED, NULL, 13236617724cSJeff Roberson td)); 1324e7228204SAlfred Perlstein } else { 1325e7228204SAlfred Perlstein #ifdef COMPRESS_USER_CORES 1326e7228204SAlfred Perlstein if (gzwrite(gzfile, hdr, hdrsize) != hdrsize) { 1327e7228204SAlfred Perlstein log(LOG_WARNING, 1328e7228204SAlfred Perlstein "Failed to compress core file header for process" 1329e7228204SAlfred Perlstein " %s.\n", curproc->p_comm); 1330e7228204SAlfred Perlstein return (EFAULT); 1331e7228204SAlfred Perlstein } 1332e7228204SAlfred Perlstein else { 1333e7228204SAlfred Perlstein return (0); 1334e7228204SAlfred Perlstein } 1335e7228204SAlfred Perlstein #else 1336e7228204SAlfred Perlstein panic("shouldn't be here"); 1337e7228204SAlfred Perlstein #endif 1338e7228204SAlfred Perlstein } 1339dada0278SJohn Polstra } 1340dada0278SJohn Polstra 1341841c0c7eSNathan Whitehorn #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 1342841c0c7eSNathan Whitehorn #include <compat/freebsd32/freebsd32.h> 1343841c0c7eSNathan Whitehorn 134462919d78SPeter Wemm typedef struct prstatus32 elf_prstatus_t; 134562919d78SPeter Wemm typedef struct prpsinfo32 elf_prpsinfo_t; 134662919d78SPeter Wemm typedef struct fpreg32 elf_prfpregset_t; 134762919d78SPeter Wemm typedef struct fpreg32 elf_fpregset_t; 134862919d78SPeter Wemm typedef struct reg32 elf_gregset_t; 13497f08176eSAttilio Rao typedef struct thrmisc32 elf_thrmisc_t; 135062919d78SPeter Wemm #else 135162919d78SPeter Wemm typedef prstatus_t elf_prstatus_t; 135262919d78SPeter Wemm typedef prpsinfo_t elf_prpsinfo_t; 135362919d78SPeter Wemm typedef prfpregset_t elf_prfpregset_t; 135462919d78SPeter Wemm typedef prfpregset_t elf_fpregset_t; 135562919d78SPeter Wemm typedef gregset_t elf_gregset_t; 13567f08176eSAttilio Rao typedef thrmisc_t elf_thrmisc_t; 135762919d78SPeter Wemm #endif 135862919d78SPeter Wemm 13598c64af4fSJohn Polstra static void 1360247aba24SMarcel Moolenaar __elfN(puthdr)(struct thread *td, void *dst, size_t *off, int numsegs) 13618c64af4fSJohn Polstra { 13628c9b7b2cSMarcel Moolenaar struct { 136362919d78SPeter Wemm elf_prstatus_t status; 136462919d78SPeter Wemm elf_prfpregset_t fpregset; 136562919d78SPeter Wemm elf_prpsinfo_t psinfo; 13667f08176eSAttilio Rao elf_thrmisc_t thrmisc; 13678c9b7b2cSMarcel Moolenaar } *tempdata; 136862919d78SPeter Wemm elf_prstatus_t *status; 136962919d78SPeter Wemm elf_prfpregset_t *fpregset; 137062919d78SPeter Wemm elf_prpsinfo_t *psinfo; 13717f08176eSAttilio Rao elf_thrmisc_t *thrmisc; 1372247aba24SMarcel Moolenaar struct proc *p; 1373247aba24SMarcel Moolenaar struct thread *thr; 13748c9b7b2cSMarcel Moolenaar size_t ehoff, noteoff, notesz, phoff; 13758c64af4fSJohn Polstra 1376247aba24SMarcel Moolenaar p = td->td_proc; 1377247aba24SMarcel Moolenaar 13788c64af4fSJohn Polstra ehoff = *off; 13798c64af4fSJohn Polstra *off += sizeof(Elf_Ehdr); 13808c64af4fSJohn Polstra 13818c64af4fSJohn Polstra phoff = *off; 13820ff27d31SJohn Polstra *off += (numsegs + 1) * sizeof(Elf_Phdr); 13838c64af4fSJohn Polstra 13848c64af4fSJohn Polstra noteoff = *off; 13858c9b7b2cSMarcel Moolenaar /* 13868c9b7b2cSMarcel Moolenaar * Don't allocate space for the notes if we're just calculating 13878c9b7b2cSMarcel Moolenaar * the size of the header. We also don't collect the data. 13888c9b7b2cSMarcel Moolenaar */ 13898c9b7b2cSMarcel Moolenaar if (dst != NULL) { 13908c9b7b2cSMarcel Moolenaar tempdata = malloc(sizeof(*tempdata), M_TEMP, M_ZERO|M_WAITOK); 13918c9b7b2cSMarcel Moolenaar status = &tempdata->status; 13928c9b7b2cSMarcel Moolenaar fpregset = &tempdata->fpregset; 13938c9b7b2cSMarcel Moolenaar psinfo = &tempdata->psinfo; 13947f08176eSAttilio Rao thrmisc = &tempdata->thrmisc; 13958c9b7b2cSMarcel Moolenaar } else { 13968c9b7b2cSMarcel Moolenaar tempdata = NULL; 13978c9b7b2cSMarcel Moolenaar status = NULL; 13988c9b7b2cSMarcel Moolenaar fpregset = NULL; 13998c9b7b2cSMarcel Moolenaar psinfo = NULL; 14007f08176eSAttilio Rao thrmisc = NULL; 14018c9b7b2cSMarcel Moolenaar } 14028c9b7b2cSMarcel Moolenaar 14038c9b7b2cSMarcel Moolenaar if (dst != NULL) { 14048c9b7b2cSMarcel Moolenaar psinfo->pr_version = PRPSINFO_VERSION; 140562919d78SPeter Wemm psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t); 1406ccd3953eSJohn Baldwin strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname)); 14078c9b7b2cSMarcel Moolenaar /* 14088c9b7b2cSMarcel Moolenaar * XXX - We don't fill in the command line arguments properly 14098c9b7b2cSMarcel Moolenaar * yet. 14108c9b7b2cSMarcel Moolenaar */ 1411ccd3953eSJohn Baldwin strlcpy(psinfo->pr_psargs, p->p_comm, 14128c9b7b2cSMarcel Moolenaar sizeof(psinfo->pr_psargs)); 14138c9b7b2cSMarcel Moolenaar } 14148c9b7b2cSMarcel Moolenaar __elfN(putnote)(dst, off, "FreeBSD", NT_PRPSINFO, psinfo, 14158c9b7b2cSMarcel Moolenaar sizeof *psinfo); 14168c9b7b2cSMarcel Moolenaar 14178c9b7b2cSMarcel Moolenaar /* 14181f7a1baaSMarcel Moolenaar * To have the debugger select the right thread (LWP) as the initial 14191f7a1baaSMarcel Moolenaar * thread, we dump the state of the thread passed to us in td first. 14201f7a1baaSMarcel Moolenaar * This is the thread that causes the core dump and thus likely to 14211f7a1baaSMarcel Moolenaar * be the right thread one wants to have selected in the debugger. 14228c9b7b2cSMarcel Moolenaar */ 1423247aba24SMarcel Moolenaar thr = td; 1424247aba24SMarcel Moolenaar while (thr != NULL) { 14258c9b7b2cSMarcel Moolenaar if (dst != NULL) { 14268c9b7b2cSMarcel Moolenaar status->pr_version = PRSTATUS_VERSION; 142762919d78SPeter Wemm status->pr_statussz = sizeof(elf_prstatus_t); 142862919d78SPeter Wemm status->pr_gregsetsz = sizeof(elf_gregset_t); 142962919d78SPeter Wemm status->pr_fpregsetsz = sizeof(elf_fpregset_t); 14308c9b7b2cSMarcel Moolenaar status->pr_osreldate = osreldate; 14318c9b7b2cSMarcel Moolenaar status->pr_cursig = p->p_sig; 14321f7a1baaSMarcel Moolenaar status->pr_pid = thr->td_tid; 1433841c0c7eSNathan Whitehorn #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 143462919d78SPeter Wemm fill_regs32(thr, &status->pr_reg); 143562919d78SPeter Wemm fill_fpregs32(thr, fpregset); 143662919d78SPeter Wemm #else 14378c9b7b2cSMarcel Moolenaar fill_regs(thr, &status->pr_reg); 14388c9b7b2cSMarcel Moolenaar fill_fpregs(thr, fpregset); 143962919d78SPeter Wemm #endif 14407f08176eSAttilio Rao memset(&thrmisc->_pad, 0, sizeof (thrmisc->_pad)); 14417f08176eSAttilio Rao strcpy(thrmisc->pr_tname, thr->td_name); 14428c9b7b2cSMarcel Moolenaar } 14433ebc1248SPeter Wemm __elfN(putnote)(dst, off, "FreeBSD", NT_PRSTATUS, status, 14448c64af4fSJohn Polstra sizeof *status); 14453ebc1248SPeter Wemm __elfN(putnote)(dst, off, "FreeBSD", NT_FPREGSET, fpregset, 14468c64af4fSJohn Polstra sizeof *fpregset); 14477f08176eSAttilio Rao __elfN(putnote)(dst, off, "FreeBSD", NT_THRMISC, thrmisc, 14487f08176eSAttilio Rao sizeof *thrmisc); 14494da47b2fSMarcel Moolenaar /* 14504da47b2fSMarcel Moolenaar * Allow for MD specific notes, as well as any MD 14514da47b2fSMarcel Moolenaar * specific preparations for writing MI notes. 14524da47b2fSMarcel Moolenaar */ 14534da47b2fSMarcel Moolenaar __elfN(dump_thread)(thr, dst, off); 1454247aba24SMarcel Moolenaar 1455247aba24SMarcel Moolenaar thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) : 14568c9b7b2cSMarcel Moolenaar TAILQ_NEXT(thr, td_plist); 1457247aba24SMarcel Moolenaar if (thr == td) 14588c9b7b2cSMarcel Moolenaar thr = TAILQ_NEXT(thr, td_plist); 1459247aba24SMarcel Moolenaar } 14608c9b7b2cSMarcel Moolenaar 14618c64af4fSJohn Polstra notesz = *off - noteoff; 14628c64af4fSJohn Polstra 14638c9b7b2cSMarcel Moolenaar if (dst != NULL) 14648c9b7b2cSMarcel Moolenaar free(tempdata, M_TEMP); 14658c9b7b2cSMarcel Moolenaar 14660ff27d31SJohn Polstra /* Align up to a page boundary for the program segments. */ 14678c64af4fSJohn Polstra *off = round_page(*off); 14688c64af4fSJohn Polstra 14698c64af4fSJohn Polstra if (dst != NULL) { 14708c64af4fSJohn Polstra Elf_Ehdr *ehdr; 14718c64af4fSJohn Polstra Elf_Phdr *phdr; 14720ff27d31SJohn Polstra struct phdr_closure phc; 14738c64af4fSJohn Polstra 14748c64af4fSJohn Polstra /* 14758c64af4fSJohn Polstra * Fill in the ELF header. 14768c64af4fSJohn Polstra */ 14778c64af4fSJohn Polstra ehdr = (Elf_Ehdr *)((char *)dst + ehoff); 14788c64af4fSJohn Polstra ehdr->e_ident[EI_MAG0] = ELFMAG0; 14798c64af4fSJohn Polstra ehdr->e_ident[EI_MAG1] = ELFMAG1; 14808c64af4fSJohn Polstra ehdr->e_ident[EI_MAG2] = ELFMAG2; 14818c64af4fSJohn Polstra ehdr->e_ident[EI_MAG3] = ELFMAG3; 14828c64af4fSJohn Polstra ehdr->e_ident[EI_CLASS] = ELF_CLASS; 14838c64af4fSJohn Polstra ehdr->e_ident[EI_DATA] = ELF_DATA; 14848c64af4fSJohn Polstra ehdr->e_ident[EI_VERSION] = EV_CURRENT; 1485c815a20cSDavid E. O'Brien ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD; 1486c815a20cSDavid E. O'Brien ehdr->e_ident[EI_ABIVERSION] = 0; 14878c64af4fSJohn Polstra ehdr->e_ident[EI_PAD] = 0; 14888c64af4fSJohn Polstra ehdr->e_type = ET_CORE; 1489841c0c7eSNathan Whitehorn #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 1490841c0c7eSNathan Whitehorn ehdr->e_machine = ELF_ARCH32; 149162919d78SPeter Wemm #else 14928c64af4fSJohn Polstra ehdr->e_machine = ELF_ARCH; 149362919d78SPeter Wemm #endif 14948c64af4fSJohn Polstra ehdr->e_version = EV_CURRENT; 14958c64af4fSJohn Polstra ehdr->e_entry = 0; 14968c64af4fSJohn Polstra ehdr->e_phoff = phoff; 14978c64af4fSJohn Polstra ehdr->e_flags = 0; 14988c64af4fSJohn Polstra ehdr->e_ehsize = sizeof(Elf_Ehdr); 14998c64af4fSJohn Polstra ehdr->e_phentsize = sizeof(Elf_Phdr); 15000ff27d31SJohn Polstra ehdr->e_phnum = numsegs + 1; 15018c64af4fSJohn Polstra ehdr->e_shentsize = sizeof(Elf_Shdr); 15028c64af4fSJohn Polstra ehdr->e_shnum = 0; 15038c64af4fSJohn Polstra ehdr->e_shstrndx = SHN_UNDEF; 15048c64af4fSJohn Polstra 15058c64af4fSJohn Polstra /* 15068c64af4fSJohn Polstra * Fill in the program header entries. 15078c64af4fSJohn Polstra */ 15088c64af4fSJohn Polstra phdr = (Elf_Phdr *)((char *)dst + phoff); 15098c64af4fSJohn Polstra 15108c64af4fSJohn Polstra /* The note segement. */ 15118c64af4fSJohn Polstra phdr->p_type = PT_NOTE; 15128c64af4fSJohn Polstra phdr->p_offset = noteoff; 15138c64af4fSJohn Polstra phdr->p_vaddr = 0; 15148c64af4fSJohn Polstra phdr->p_paddr = 0; 15158c64af4fSJohn Polstra phdr->p_filesz = notesz; 15168c64af4fSJohn Polstra phdr->p_memsz = 0; 15178c64af4fSJohn Polstra phdr->p_flags = 0; 15188c64af4fSJohn Polstra phdr->p_align = 0; 15198c64af4fSJohn Polstra phdr++; 15208c64af4fSJohn Polstra 15210ff27d31SJohn Polstra /* All the writable segments from the program. */ 15220ff27d31SJohn Polstra phc.phdr = phdr; 15230ff27d31SJohn Polstra phc.offset = *off; 1524247aba24SMarcel Moolenaar each_writable_segment(td, cb_put_phdr, &phc); 15258c64af4fSJohn Polstra } 15268c64af4fSJohn Polstra } 15278c64af4fSJohn Polstra 15288c64af4fSJohn Polstra static void 15293ebc1248SPeter Wemm __elfN(putnote)(void *dst, size_t *off, const char *name, int type, 15308c64af4fSJohn Polstra const void *desc, size_t descsz) 15318c64af4fSJohn Polstra { 15328c64af4fSJohn Polstra Elf_Note note; 15338c64af4fSJohn Polstra 15348c64af4fSJohn Polstra note.n_namesz = strlen(name) + 1; 15358c64af4fSJohn Polstra note.n_descsz = descsz; 15368c64af4fSJohn Polstra note.n_type = type; 15378c64af4fSJohn Polstra if (dst != NULL) 15388c64af4fSJohn Polstra bcopy(¬e, (char *)dst + *off, sizeof note); 15398c64af4fSJohn Polstra *off += sizeof note; 15408c64af4fSJohn Polstra if (dst != NULL) 15418c64af4fSJohn Polstra bcopy(name, (char *)dst + *off, note.n_namesz); 15428c64af4fSJohn Polstra *off += roundup2(note.n_namesz, sizeof(Elf_Size)); 15438c64af4fSJohn Polstra if (dst != NULL) 15448c64af4fSJohn Polstra bcopy(desc, (char *)dst + *off, note.n_descsz); 15458c64af4fSJohn Polstra *off += roundup2(note.n_descsz, sizeof(Elf_Size)); 15468c64af4fSJohn Polstra } 15478c64af4fSJohn Polstra 154832c01de2SDmitry Chagin static boolean_t 15491a9c7decSKonstantin Belousov __elfN(parse_notes)(struct image_params *imgp, Elf_Brandnote *checknote, 15501a9c7decSKonstantin Belousov int32_t *osrel, const Elf_Phdr *pnote) 155132c01de2SDmitry Chagin { 1552267c52fcSKonstantin Belousov const Elf_Note *note, *note0, *note_end; 155332c01de2SDmitry Chagin const char *note_name; 155432c01de2SDmitry Chagin int i; 155532c01de2SDmitry Chagin 155632c01de2SDmitry Chagin if (pnote == NULL || pnote->p_offset >= PAGE_SIZE || 1557d1ae5c83SKonstantin Belousov pnote->p_filesz > PAGE_SIZE || 155832c01de2SDmitry Chagin pnote->p_offset + pnote->p_filesz >= PAGE_SIZE) 155932c01de2SDmitry Chagin return (FALSE); 156032c01de2SDmitry Chagin 1561267c52fcSKonstantin Belousov note = note0 = (const Elf_Note *)(imgp->image_header + pnote->p_offset); 156232c01de2SDmitry Chagin note_end = (const Elf_Note *)(imgp->image_header + 156332c01de2SDmitry Chagin pnote->p_offset + pnote->p_filesz); 1564267c52fcSKonstantin Belousov for (i = 0; i < 100 && note >= note0 && note < note_end; i++) { 1565d1ae5c83SKonstantin Belousov if (!aligned(note, Elf32_Addr) || (const char *)note_end - 1566d1ae5c83SKonstantin Belousov (const char *)note < sizeof(Elf_Note)) 1567267c52fcSKonstantin Belousov return (FALSE); 156832c01de2SDmitry Chagin if (note->n_namesz != checknote->hdr.n_namesz || 156932c01de2SDmitry Chagin note->n_descsz != checknote->hdr.n_descsz || 157032c01de2SDmitry Chagin note->n_type != checknote->hdr.n_type) 157132c01de2SDmitry Chagin goto nextnote; 157232c01de2SDmitry Chagin note_name = (const char *)(note + 1); 1573d1ae5c83SKonstantin Belousov if (note_name + checknote->hdr.n_namesz >= 1574d1ae5c83SKonstantin Belousov (const char *)note_end || strncmp(checknote->vendor, 1575d1ae5c83SKonstantin Belousov note_name, checknote->hdr.n_namesz) != 0) 157632c01de2SDmitry Chagin goto nextnote; 157732c01de2SDmitry Chagin 157832c01de2SDmitry Chagin /* 157932c01de2SDmitry Chagin * Fetch the osreldate for binary 158032c01de2SDmitry Chagin * from the ELF OSABI-note if necessary. 158132c01de2SDmitry Chagin */ 158289ffc202SBjoern A. Zeeb if ((checknote->flags & BN_TRANSLATE_OSREL) != 0 && 158389ffc202SBjoern A. Zeeb checknote->trans_osrel != NULL) 158489ffc202SBjoern A. Zeeb return (checknote->trans_osrel(note, osrel)); 158532c01de2SDmitry Chagin return (TRUE); 158632c01de2SDmitry Chagin 158732c01de2SDmitry Chagin nextnote: 158832c01de2SDmitry Chagin note = (const Elf_Note *)((const char *)(note + 1) + 158932c01de2SDmitry Chagin roundup2(note->n_namesz, sizeof(Elf32_Addr)) + 159032c01de2SDmitry Chagin roundup2(note->n_descsz, sizeof(Elf32_Addr))); 159132c01de2SDmitry Chagin } 159232c01de2SDmitry Chagin 159332c01de2SDmitry Chagin return (FALSE); 159432c01de2SDmitry Chagin } 159532c01de2SDmitry Chagin 159632c01de2SDmitry Chagin /* 15971a9c7decSKonstantin Belousov * Try to find the appropriate ABI-note section for checknote, 15981a9c7decSKonstantin Belousov * fetch the osreldate for binary from the ELF OSABI-note. Only the 15991a9c7decSKonstantin Belousov * first page of the image is searched, the same as for headers. 16001a9c7decSKonstantin Belousov */ 16011a9c7decSKonstantin Belousov static boolean_t 16021a9c7decSKonstantin Belousov __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *checknote, 16031a9c7decSKonstantin Belousov int32_t *osrel) 16041a9c7decSKonstantin Belousov { 16051a9c7decSKonstantin Belousov const Elf_Phdr *phdr; 16061a9c7decSKonstantin Belousov const Elf_Ehdr *hdr; 16071a9c7decSKonstantin Belousov int i; 16081a9c7decSKonstantin Belousov 16091a9c7decSKonstantin Belousov hdr = (const Elf_Ehdr *)imgp->image_header; 16101a9c7decSKonstantin Belousov phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 16111a9c7decSKonstantin Belousov 16121a9c7decSKonstantin Belousov for (i = 0; i < hdr->e_phnum; i++) { 16131a9c7decSKonstantin Belousov if (phdr[i].p_type == PT_NOTE && 16141a9c7decSKonstantin Belousov __elfN(parse_notes)(imgp, checknote, osrel, &phdr[i])) 16151a9c7decSKonstantin Belousov return (TRUE); 16161a9c7decSKonstantin Belousov } 16171a9c7decSKonstantin Belousov return (FALSE); 16181a9c7decSKonstantin Belousov 16191a9c7decSKonstantin Belousov } 16201a9c7decSKonstantin Belousov 16211a9c7decSKonstantin Belousov /* 1622e1743d02SSøren Schmidt * Tell kern_execve.c about it, with a little help from the linker. 1623e1743d02SSøren Schmidt */ 1624a360a43dSJake Burkholder static struct execsw __elfN(execsw) = { 1625a360a43dSJake Burkholder __CONCAT(exec_, __elfN(imgact)), 1626a360a43dSJake Burkholder __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 1627a360a43dSJake Burkholder }; 1628a360a43dSJake Burkholder EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw)); 1629e7228204SAlfred Perlstein 1630e7228204SAlfred Perlstein #ifdef COMPRESS_USER_CORES 1631e7228204SAlfred Perlstein /* 1632e7228204SAlfred Perlstein * Compress and write out a core segment for a user process. 1633e7228204SAlfred Perlstein * 1634e7228204SAlfred Perlstein * 'inbuf' is the starting address of a VM segment in the process' address 1635e7228204SAlfred Perlstein * space that is to be compressed and written out to the core file. 'dest_buf' 1636e7228204SAlfred Perlstein * is a buffer in the kernel's address space. The segment is copied from 1637e7228204SAlfred Perlstein * 'inbuf' to 'dest_buf' first before being processed by the compression 1638e7228204SAlfred Perlstein * routine gzwrite(). This copying is necessary because the content of the VM 1639e7228204SAlfred Perlstein * segment may change between the compression pass and the crc-computation pass 1640e7228204SAlfred Perlstein * in gzwrite(). This is because realtime threads may preempt the UNIX kernel. 1641e7228204SAlfred Perlstein */ 1642e7228204SAlfred Perlstein static int 1643e7228204SAlfred Perlstein compress_core (gzFile file, char *inbuf, char *dest_buf, unsigned int len, 1644e7228204SAlfred Perlstein struct thread *td) 1645e7228204SAlfred Perlstein { 1646e7228204SAlfred Perlstein int len_compressed; 1647e7228204SAlfred Perlstein int error = 0; 1648e7228204SAlfred Perlstein unsigned int chunk_len; 1649e7228204SAlfred Perlstein 1650e7228204SAlfred Perlstein while (len) { 1651e7228204SAlfred Perlstein chunk_len = (len > CORE_BUF_SIZE) ? CORE_BUF_SIZE : len; 1652e7228204SAlfred Perlstein copyin(inbuf, dest_buf, chunk_len); 1653e7228204SAlfred Perlstein len_compressed = gzwrite(file, dest_buf, chunk_len); 1654e7228204SAlfred Perlstein 1655e7228204SAlfred Perlstein EVENTHANDLER_INVOKE(app_coredump_progress, td, len_compressed); 1656e7228204SAlfred Perlstein 1657e7228204SAlfred Perlstein if ((unsigned int)len_compressed != chunk_len) { 1658e7228204SAlfred Perlstein log(LOG_WARNING, 1659e7228204SAlfred Perlstein "compress_core: length mismatch (0x%x returned, " 1660e7228204SAlfred Perlstein "0x%x expected)\n", len_compressed, chunk_len); 1661e7228204SAlfred Perlstein EVENTHANDLER_INVOKE(app_coredump_error, td, 1662e7228204SAlfred Perlstein "compress_core: length mismatch %x -> %x", 1663e7228204SAlfred Perlstein chunk_len, len_compressed); 1664e7228204SAlfred Perlstein error = EFAULT; 1665e7228204SAlfred Perlstein break; 1666e7228204SAlfred Perlstein } 1667e7228204SAlfred Perlstein inbuf += chunk_len; 1668e7228204SAlfred Perlstein len -= chunk_len; 166908b163faSMatthew D Fleming maybe_yield(); 1670e7228204SAlfred Perlstein } 1671e7228204SAlfred Perlstein 1672e7228204SAlfred Perlstein return (error); 1673e7228204SAlfred Perlstein } 1674e7228204SAlfred Perlstein #endif /* COMPRESS_USER_CORES */ 1675ed167eaaSKonstantin Belousov 1676ed167eaaSKonstantin Belousov static vm_prot_t 1677ed167eaaSKonstantin Belousov __elfN(trans_prot)(Elf_Word flags) 1678ed167eaaSKonstantin Belousov { 1679ed167eaaSKonstantin Belousov vm_prot_t prot; 1680ed167eaaSKonstantin Belousov 1681ed167eaaSKonstantin Belousov prot = 0; 1682ed167eaaSKonstantin Belousov if (flags & PF_X) 1683ed167eaaSKonstantin Belousov prot |= VM_PROT_EXECUTE; 1684ed167eaaSKonstantin Belousov if (flags & PF_W) 1685ed167eaaSKonstantin Belousov prot |= VM_PROT_WRITE; 1686ed167eaaSKonstantin Belousov if (flags & PF_R) 1687ed167eaaSKonstantin Belousov prot |= VM_PROT_READ; 1688676eda08SMarcel Moolenaar #if __ELF_WORD_SIZE == 32 1689676eda08SMarcel Moolenaar #if defined(__amd64__) || defined(__ia64__) 1690126b36a2SKonstantin Belousov if (i386_read_exec && (flags & PF_R)) 1691676eda08SMarcel Moolenaar prot |= VM_PROT_EXECUTE; 1692676eda08SMarcel Moolenaar #endif 1693676eda08SMarcel Moolenaar #endif 1694ed167eaaSKonstantin Belousov return (prot); 1695ed167eaaSKonstantin Belousov } 1696ed167eaaSKonstantin Belousov 1697ed167eaaSKonstantin Belousov static Elf_Word 1698ed167eaaSKonstantin Belousov __elfN(untrans_prot)(vm_prot_t prot) 1699ed167eaaSKonstantin Belousov { 1700ed167eaaSKonstantin Belousov Elf_Word flags; 1701ed167eaaSKonstantin Belousov 1702ed167eaaSKonstantin Belousov flags = 0; 1703ed167eaaSKonstantin Belousov if (prot & VM_PROT_EXECUTE) 1704ed167eaaSKonstantin Belousov flags |= PF_X; 1705ed167eaaSKonstantin Belousov if (prot & VM_PROT_READ) 1706ed167eaaSKonstantin Belousov flags |= PF_R; 1707ed167eaaSKonstantin Belousov if (prot & VM_PROT_WRITE) 1708ed167eaaSKonstantin Belousov flags |= PF_W; 1709ed167eaaSKonstantin Belousov return (flags); 1710ed167eaaSKonstantin Belousov } 1711