1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2017 Dell EMC 5 * Copyright (c) 2000-2001, 2003 David O'Brien 6 * Copyright (c) 1995-1996 Søren Schmidt 7 * Copyright (c) 1996 Peter Wemm 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer 15 * in this position and unchanged. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The name of the author may not be used to endorse or promote products 20 * derived from this software without specific prior written permission 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_capsicum.h" 38 39 #include <sys/param.h> 40 #include <sys/capsicum.h> 41 #include <sys/compressor.h> 42 #include <sys/exec.h> 43 #include <sys/fcntl.h> 44 #include <sys/imgact.h> 45 #include <sys/imgact_elf.h> 46 #include <sys/jail.h> 47 #include <sys/kernel.h> 48 #include <sys/lock.h> 49 #include <sys/malloc.h> 50 #include <sys/mount.h> 51 #include <sys/mman.h> 52 #include <sys/namei.h> 53 #include <sys/proc.h> 54 #include <sys/procfs.h> 55 #include <sys/ptrace.h> 56 #include <sys/racct.h> 57 #include <sys/resourcevar.h> 58 #include <sys/rwlock.h> 59 #include <sys/sbuf.h> 60 #include <sys/sf_buf.h> 61 #include <sys/smp.h> 62 #include <sys/systm.h> 63 #include <sys/signalvar.h> 64 #include <sys/stat.h> 65 #include <sys/sx.h> 66 #include <sys/syscall.h> 67 #include <sys/sysctl.h> 68 #include <sys/sysent.h> 69 #include <sys/vnode.h> 70 #include <sys/syslog.h> 71 #include <sys/eventhandler.h> 72 #include <sys/user.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_kern.h> 76 #include <vm/vm_param.h> 77 #include <vm/pmap.h> 78 #include <vm/vm_map.h> 79 #include <vm/vm_object.h> 80 #include <vm/vm_extern.h> 81 82 #include <machine/elf.h> 83 #include <machine/md_var.h> 84 85 #define ELF_NOTE_ROUNDSIZE 4 86 #define OLD_EI_BRAND 8 87 88 static int __elfN(check_header)(const Elf_Ehdr *hdr); 89 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, 90 const char *interp, int32_t *osrel, uint32_t *fctl0); 91 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 92 u_long *entry); 93 static int __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset, 94 caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot); 95 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); 96 static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note, 97 int32_t *osrel); 98 static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel); 99 static boolean_t __elfN(check_note)(struct image_params *imgp, 100 Elf_Brandnote *checknote, int32_t *osrel, boolean_t *has_fctl0, 101 uint32_t *fctl0); 102 static vm_prot_t __elfN(trans_prot)(Elf_Word); 103 static Elf_Word __elfN(untrans_prot)(vm_prot_t); 104 105 SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), 106 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 107 ""); 108 109 int __elfN(fallback_brand) = -1; 110 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, 111 fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0, 112 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort"); 113 114 static int elf_legacy_coredump = 0; 115 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 116 &elf_legacy_coredump, 0, 117 "include all and only RW pages in core dumps"); 118 119 int __elfN(nxstack) = 120 #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ || \ 121 (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__) || \ 122 defined(__riscv) 123 1; 124 #else 125 0; 126 #endif 127 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, 128 nxstack, CTLFLAG_RW, &__elfN(nxstack), 0, 129 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack"); 130 131 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__)) 132 int i386_read_exec = 0; 133 SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, 134 "enable execution from readable segments"); 135 #endif 136 137 static u_long __elfN(pie_base) = ET_DYN_LOAD_ADDR; 138 static int 139 sysctl_pie_base(SYSCTL_HANDLER_ARGS) 140 { 141 u_long val; 142 int error; 143 144 val = __elfN(pie_base); 145 error = sysctl_handle_long(oidp, &val, 0, req); 146 if (error != 0 || req->newptr == NULL) 147 return (error); 148 if ((val & PAGE_MASK) != 0) 149 return (EINVAL); 150 __elfN(pie_base) = val; 151 return (0); 152 } 153 SYSCTL_PROC(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, pie_base, 154 CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 155 sysctl_pie_base, "LU", 156 "PIE load base without randomization"); 157 158 SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, 159 CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 160 ""); 161 #define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr) 162 163 static int __elfN(aslr_enabled) = 0; 164 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN, 165 &__elfN(aslr_enabled), 0, 166 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 167 ": enable address map randomization"); 168 169 static int __elfN(pie_aslr_enabled) = 0; 170 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN, 171 &__elfN(pie_aslr_enabled), 0, 172 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 173 ": enable address map randomization for PIE binaries"); 174 175 static int __elfN(aslr_honor_sbrk) = 1; 176 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW, 177 &__elfN(aslr_honor_sbrk), 0, 178 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used"); 179 180 static int __elfN(aslr_stack_gap) = 3; 181 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack_gap, CTLFLAG_RW, 182 &__elfN(aslr_stack_gap), 0, 183 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 184 ": maximum percentage of main stack to waste on a random gap"); 185 186 static int __elfN(sigfastblock) = 1; 187 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock, 188 CTLFLAG_RWTUN, &__elfN(sigfastblock), 0, 189 "enable sigfastblock for new processes"); 190 191 static bool __elfN(allow_wx) = true; 192 SYSCTL_BOOL(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, allow_wx, 193 CTLFLAG_RWTUN, &__elfN(allow_wx), 0, 194 "Allow pages to be mapped simultaneously writable and executable"); 195 196 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; 197 198 #define aligned(a, t) (rounddown2((u_long)(a), sizeof(t)) == (u_long)(a)) 199 200 Elf_Brandnote __elfN(freebsd_brandnote) = { 201 .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), 202 .hdr.n_descsz = sizeof(int32_t), 203 .hdr.n_type = NT_FREEBSD_ABI_TAG, 204 .vendor = FREEBSD_ABI_VENDOR, 205 .flags = BN_TRANSLATE_OSREL, 206 .trans_osrel = __elfN(freebsd_trans_osrel) 207 }; 208 209 static bool 210 __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) 211 { 212 uintptr_t p; 213 214 p = (uintptr_t)(note + 1); 215 p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); 216 *osrel = *(const int32_t *)(p); 217 218 return (true); 219 } 220 221 static const char GNU_ABI_VENDOR[] = "GNU"; 222 static int GNU_KFREEBSD_ABI_DESC = 3; 223 224 Elf_Brandnote __elfN(kfreebsd_brandnote) = { 225 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 226 .hdr.n_descsz = 16, /* XXX at least 16 */ 227 .hdr.n_type = 1, 228 .vendor = GNU_ABI_VENDOR, 229 .flags = BN_TRANSLATE_OSREL, 230 .trans_osrel = kfreebsd_trans_osrel 231 }; 232 233 static bool 234 kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel) 235 { 236 const Elf32_Word *desc; 237 uintptr_t p; 238 239 p = (uintptr_t)(note + 1); 240 p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); 241 242 desc = (const Elf32_Word *)p; 243 if (desc[0] != GNU_KFREEBSD_ABI_DESC) 244 return (false); 245 246 /* 247 * Debian GNU/kFreeBSD embed the earliest compatible kernel version 248 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way. 249 */ 250 *osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3]; 251 252 return (true); 253 } 254 255 int 256 __elfN(insert_brand_entry)(Elf_Brandinfo *entry) 257 { 258 int i; 259 260 for (i = 0; i < MAX_BRANDS; i++) { 261 if (elf_brand_list[i] == NULL) { 262 elf_brand_list[i] = entry; 263 break; 264 } 265 } 266 if (i == MAX_BRANDS) { 267 printf("WARNING: %s: could not insert brandinfo entry: %p\n", 268 __func__, entry); 269 return (-1); 270 } 271 return (0); 272 } 273 274 int 275 __elfN(remove_brand_entry)(Elf_Brandinfo *entry) 276 { 277 int i; 278 279 for (i = 0; i < MAX_BRANDS; i++) { 280 if (elf_brand_list[i] == entry) { 281 elf_brand_list[i] = NULL; 282 break; 283 } 284 } 285 if (i == MAX_BRANDS) 286 return (-1); 287 return (0); 288 } 289 290 int 291 __elfN(brand_inuse)(Elf_Brandinfo *entry) 292 { 293 struct proc *p; 294 int rval = FALSE; 295 296 sx_slock(&allproc_lock); 297 FOREACH_PROC_IN_SYSTEM(p) { 298 if (p->p_sysent == entry->sysvec) { 299 rval = TRUE; 300 break; 301 } 302 } 303 sx_sunlock(&allproc_lock); 304 305 return (rval); 306 } 307 308 static Elf_Brandinfo * 309 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, 310 int32_t *osrel, uint32_t *fctl0) 311 { 312 const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; 313 Elf_Brandinfo *bi, *bi_m; 314 boolean_t ret, has_fctl0; 315 int i, interp_name_len; 316 317 interp_name_len = interp != NULL ? strlen(interp) + 1 : 0; 318 319 /* 320 * We support four types of branding -- (1) the ELF EI_OSABI field 321 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string 322 * branding w/in the ELF header, (3) path of the `interp_path' 323 * field, and (4) the ".note.ABI-tag" ELF section. 324 */ 325 326 /* Look for an ".note.ABI-tag" ELF section */ 327 bi_m = NULL; 328 for (i = 0; i < MAX_BRANDS; i++) { 329 bi = elf_brand_list[i]; 330 if (bi == NULL) 331 continue; 332 if (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0) 333 continue; 334 if (hdr->e_machine == bi->machine && (bi->flags & 335 (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) { 336 has_fctl0 = false; 337 *fctl0 = 0; 338 *osrel = 0; 339 ret = __elfN(check_note)(imgp, bi->brand_note, osrel, 340 &has_fctl0, fctl0); 341 /* Give brand a chance to veto check_note's guess */ 342 if (ret && bi->header_supported) { 343 ret = bi->header_supported(imgp, osrel, 344 has_fctl0 ? fctl0 : NULL); 345 } 346 /* 347 * If note checker claimed the binary, but the 348 * interpreter path in the image does not 349 * match default one for the brand, try to 350 * search for other brands with the same 351 * interpreter. Either there is better brand 352 * with the right interpreter, or, failing 353 * this, we return first brand which accepted 354 * our note and, optionally, header. 355 */ 356 if (ret && bi_m == NULL && interp != NULL && 357 (bi->interp_path == NULL || 358 (strlen(bi->interp_path) + 1 != interp_name_len || 359 strncmp(interp, bi->interp_path, interp_name_len) 360 != 0))) { 361 bi_m = bi; 362 ret = 0; 363 } 364 if (ret) 365 return (bi); 366 } 367 } 368 if (bi_m != NULL) 369 return (bi_m); 370 371 /* If the executable has a brand, search for it in the brand list. */ 372 for (i = 0; i < MAX_BRANDS; i++) { 373 bi = elf_brand_list[i]; 374 if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 || 375 (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)) 376 continue; 377 if (hdr->e_machine == bi->machine && 378 (hdr->e_ident[EI_OSABI] == bi->brand || 379 (bi->compat_3_brand != NULL && 380 strcmp((const char *)&hdr->e_ident[OLD_EI_BRAND], 381 bi->compat_3_brand) == 0))) { 382 /* Looks good, but give brand a chance to veto */ 383 if (bi->header_supported == NULL || 384 bi->header_supported(imgp, NULL, NULL)) { 385 /* 386 * Again, prefer strictly matching 387 * interpreter path. 388 */ 389 if (interp_name_len == 0 && 390 bi->interp_path == NULL) 391 return (bi); 392 if (bi->interp_path != NULL && 393 strlen(bi->interp_path) + 1 == 394 interp_name_len && strncmp(interp, 395 bi->interp_path, interp_name_len) == 0) 396 return (bi); 397 if (bi_m == NULL) 398 bi_m = bi; 399 } 400 } 401 } 402 if (bi_m != NULL) 403 return (bi_m); 404 405 /* No known brand, see if the header is recognized by any brand */ 406 for (i = 0; i < MAX_BRANDS; i++) { 407 bi = elf_brand_list[i]; 408 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY || 409 bi->header_supported == NULL) 410 continue; 411 if (hdr->e_machine == bi->machine) { 412 ret = bi->header_supported(imgp, NULL, NULL); 413 if (ret) 414 return (bi); 415 } 416 } 417 418 /* Lacking a known brand, search for a recognized interpreter. */ 419 if (interp != NULL) { 420 for (i = 0; i < MAX_BRANDS; i++) { 421 bi = elf_brand_list[i]; 422 if (bi == NULL || (bi->flags & 423 (BI_BRAND_NOTE_MANDATORY | BI_BRAND_ONLY_STATIC)) 424 != 0) 425 continue; 426 if (hdr->e_machine == bi->machine && 427 bi->interp_path != NULL && 428 /* ELF image p_filesz includes terminating zero */ 429 strlen(bi->interp_path) + 1 == interp_name_len && 430 strncmp(interp, bi->interp_path, interp_name_len) 431 == 0 && (bi->header_supported == NULL || 432 bi->header_supported(imgp, NULL, NULL))) 433 return (bi); 434 } 435 } 436 437 /* Lacking a recognized interpreter, try the default brand */ 438 for (i = 0; i < MAX_BRANDS; i++) { 439 bi = elf_brand_list[i]; 440 if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 || 441 (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)) 442 continue; 443 if (hdr->e_machine == bi->machine && 444 __elfN(fallback_brand) == bi->brand && 445 (bi->header_supported == NULL || 446 bi->header_supported(imgp, NULL, NULL))) 447 return (bi); 448 } 449 return (NULL); 450 } 451 452 static bool 453 __elfN(phdr_in_zero_page)(const Elf_Ehdr *hdr) 454 { 455 return (hdr->e_phoff <= PAGE_SIZE && 456 (u_int)hdr->e_phentsize * hdr->e_phnum <= PAGE_SIZE - hdr->e_phoff); 457 } 458 459 static int 460 __elfN(check_header)(const Elf_Ehdr *hdr) 461 { 462 Elf_Brandinfo *bi; 463 int i; 464 465 if (!IS_ELF(*hdr) || 466 hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || 467 hdr->e_ident[EI_DATA] != ELF_TARG_DATA || 468 hdr->e_ident[EI_VERSION] != EV_CURRENT || 469 hdr->e_phentsize != sizeof(Elf_Phdr) || 470 hdr->e_version != ELF_TARG_VER) 471 return (ENOEXEC); 472 473 /* 474 * Make sure we have at least one brand for this machine. 475 */ 476 477 for (i = 0; i < MAX_BRANDS; i++) { 478 bi = elf_brand_list[i]; 479 if (bi != NULL && bi->machine == hdr->e_machine) 480 break; 481 } 482 if (i == MAX_BRANDS) 483 return (ENOEXEC); 484 485 return (0); 486 } 487 488 static int 489 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 490 vm_offset_t start, vm_offset_t end, vm_prot_t prot) 491 { 492 struct sf_buf *sf; 493 int error; 494 vm_offset_t off; 495 496 /* 497 * Create the page if it doesn't exist yet. Ignore errors. 498 */ 499 vm_map_fixed(map, NULL, 0, trunc_page(start), round_page(end) - 500 trunc_page(start), VM_PROT_ALL, VM_PROT_ALL, MAP_CHECK_EXCL); 501 502 /* 503 * Find the page from the underlying object. 504 */ 505 if (object != NULL) { 506 sf = vm_imgact_map_page(object, offset); 507 if (sf == NULL) 508 return (KERN_FAILURE); 509 off = offset - trunc_page(offset); 510 error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, 511 end - start); 512 vm_imgact_unmap_page(sf); 513 if (error != 0) 514 return (KERN_FAILURE); 515 } 516 517 return (KERN_SUCCESS); 518 } 519 520 static int 521 __elfN(map_insert)(struct image_params *imgp, vm_map_t map, vm_object_t object, 522 vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, 523 int cow) 524 { 525 struct sf_buf *sf; 526 vm_offset_t off; 527 vm_size_t sz; 528 int error, locked, rv; 529 530 if (start != trunc_page(start)) { 531 rv = __elfN(map_partial)(map, object, offset, start, 532 round_page(start), prot); 533 if (rv != KERN_SUCCESS) 534 return (rv); 535 offset += round_page(start) - start; 536 start = round_page(start); 537 } 538 if (end != round_page(end)) { 539 rv = __elfN(map_partial)(map, object, offset + 540 trunc_page(end) - start, trunc_page(end), end, prot); 541 if (rv != KERN_SUCCESS) 542 return (rv); 543 end = trunc_page(end); 544 } 545 if (start >= end) 546 return (KERN_SUCCESS); 547 if ((offset & PAGE_MASK) != 0) { 548 /* 549 * The mapping is not page aligned. This means that we have 550 * to copy the data. 551 */ 552 rv = vm_map_fixed(map, NULL, 0, start, end - start, 553 prot | VM_PROT_WRITE, VM_PROT_ALL, MAP_CHECK_EXCL); 554 if (rv != KERN_SUCCESS) 555 return (rv); 556 if (object == NULL) 557 return (KERN_SUCCESS); 558 for (; start < end; start += sz) { 559 sf = vm_imgact_map_page(object, offset); 560 if (sf == NULL) 561 return (KERN_FAILURE); 562 off = offset - trunc_page(offset); 563 sz = end - start; 564 if (sz > PAGE_SIZE - off) 565 sz = PAGE_SIZE - off; 566 error = copyout((caddr_t)sf_buf_kva(sf) + off, 567 (caddr_t)start, sz); 568 vm_imgact_unmap_page(sf); 569 if (error != 0) 570 return (KERN_FAILURE); 571 offset += sz; 572 } 573 } else { 574 vm_object_reference(object); 575 rv = vm_map_fixed(map, object, offset, start, end - start, 576 prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL | 577 (object != NULL ? MAP_VN_EXEC : 0)); 578 if (rv != KERN_SUCCESS) { 579 locked = VOP_ISLOCKED(imgp->vp); 580 VOP_UNLOCK(imgp->vp); 581 vm_object_deallocate(object); 582 vn_lock(imgp->vp, locked | LK_RETRY); 583 return (rv); 584 } else if (object != NULL) { 585 MPASS(imgp->vp->v_object == object); 586 VOP_SET_TEXT_CHECKED(imgp->vp); 587 } 588 } 589 return (KERN_SUCCESS); 590 } 591 592 static int 593 __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset, 594 caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot) 595 { 596 struct sf_buf *sf; 597 size_t map_len; 598 vm_map_t map; 599 vm_object_t object; 600 vm_offset_t map_addr; 601 int error, rv, cow; 602 size_t copy_len; 603 vm_ooffset_t file_addr; 604 605 /* 606 * It's necessary to fail if the filsz + offset taken from the 607 * header is greater than the actual file pager object's size. 608 * If we were to allow this, then the vm_map_find() below would 609 * walk right off the end of the file object and into the ether. 610 * 611 * While I'm here, might as well check for something else that 612 * is invalid: filsz cannot be greater than memsz. 613 */ 614 if ((filsz != 0 && (off_t)filsz + offset > imgp->attr->va_size) || 615 filsz > memsz) { 616 uprintf("elf_load_section: truncated ELF file\n"); 617 return (ENOEXEC); 618 } 619 620 object = imgp->object; 621 map = &imgp->proc->p_vmspace->vm_map; 622 map_addr = trunc_page((vm_offset_t)vmaddr); 623 file_addr = trunc_page(offset); 624 625 /* 626 * We have two choices. We can either clear the data in the last page 627 * of an oversized mapping, or we can start the anon mapping a page 628 * early and copy the initialized data into that first page. We 629 * choose the second. 630 */ 631 if (filsz == 0) 632 map_len = 0; 633 else if (memsz > filsz) 634 map_len = trunc_page(offset + filsz) - file_addr; 635 else 636 map_len = round_page(offset + filsz) - file_addr; 637 638 if (map_len != 0) { 639 /* cow flags: don't dump readonly sections in core */ 640 cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | 641 (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); 642 643 rv = __elfN(map_insert)(imgp, map, object, file_addr, 644 map_addr, map_addr + map_len, prot, cow); 645 if (rv != KERN_SUCCESS) 646 return (EINVAL); 647 648 /* we can stop now if we've covered it all */ 649 if (memsz == filsz) 650 return (0); 651 } 652 653 /* 654 * We have to get the remaining bit of the file into the first part 655 * of the oversized map segment. This is normally because the .data 656 * segment in the file is extended to provide bss. It's a neat idea 657 * to try and save a page, but it's a pain in the behind to implement. 658 */ 659 copy_len = filsz == 0 ? 0 : (offset + filsz) - trunc_page(offset + 660 filsz); 661 map_addr = trunc_page((vm_offset_t)vmaddr + filsz); 662 map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr; 663 664 /* This had damn well better be true! */ 665 if (map_len != 0) { 666 rv = __elfN(map_insert)(imgp, map, NULL, 0, map_addr, 667 map_addr + map_len, prot, 0); 668 if (rv != KERN_SUCCESS) 669 return (EINVAL); 670 } 671 672 if (copy_len != 0) { 673 sf = vm_imgact_map_page(object, offset + filsz); 674 if (sf == NULL) 675 return (EIO); 676 677 /* send the page fragment to user space */ 678 error = copyout((caddr_t)sf_buf_kva(sf), (caddr_t)map_addr, 679 copy_len); 680 vm_imgact_unmap_page(sf); 681 if (error != 0) 682 return (error); 683 } 684 685 /* 686 * Remove write access to the page if it was only granted by map_insert 687 * to allow copyout. 688 */ 689 if ((prot & VM_PROT_WRITE) == 0) 690 vm_map_protect(map, trunc_page(map_addr), round_page(map_addr + 691 map_len), prot, 0, VM_MAP_PROTECT_SET_PROT); 692 693 return (0); 694 } 695 696 static int 697 __elfN(load_sections)(struct image_params *imgp, const Elf_Ehdr *hdr, 698 const Elf_Phdr *phdr, u_long rbase, u_long *base_addrp) 699 { 700 vm_prot_t prot; 701 u_long base_addr; 702 bool first; 703 int error, i; 704 705 ASSERT_VOP_LOCKED(imgp->vp, __func__); 706 707 base_addr = 0; 708 first = true; 709 710 for (i = 0; i < hdr->e_phnum; i++) { 711 if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0) 712 continue; 713 714 /* Loadable segment */ 715 prot = __elfN(trans_prot)(phdr[i].p_flags); 716 error = __elfN(load_section)(imgp, phdr[i].p_offset, 717 (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase, 718 phdr[i].p_memsz, phdr[i].p_filesz, prot); 719 if (error != 0) 720 return (error); 721 722 /* 723 * Establish the base address if this is the first segment. 724 */ 725 if (first) { 726 base_addr = trunc_page(phdr[i].p_vaddr + rbase); 727 first = false; 728 } 729 } 730 731 if (base_addrp != NULL) 732 *base_addrp = base_addr; 733 734 return (0); 735 } 736 737 /* 738 * Load the file "file" into memory. It may be either a shared object 739 * or an executable. 740 * 741 * The "addr" reference parameter is in/out. On entry, it specifies 742 * the address where a shared object should be loaded. If the file is 743 * an executable, this value is ignored. On exit, "addr" specifies 744 * where the file was actually loaded. 745 * 746 * The "entry" reference parameter is out only. On exit, it specifies 747 * the entry point for the loaded file. 748 */ 749 static int 750 __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 751 u_long *entry) 752 { 753 struct { 754 struct nameidata nd; 755 struct vattr attr; 756 struct image_params image_params; 757 } *tempdata; 758 const Elf_Ehdr *hdr = NULL; 759 const Elf_Phdr *phdr = NULL; 760 struct nameidata *nd; 761 struct vattr *attr; 762 struct image_params *imgp; 763 u_long rbase; 764 u_long base_addr = 0; 765 int error; 766 767 #ifdef CAPABILITY_MODE 768 /* 769 * XXXJA: This check can go away once we are sufficiently confident 770 * that the checks in namei() are correct. 771 */ 772 if (IN_CAPABILITY_MODE(curthread)) 773 return (ECAPMODE); 774 #endif 775 776 tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK | M_ZERO); 777 nd = &tempdata->nd; 778 attr = &tempdata->attr; 779 imgp = &tempdata->image_params; 780 781 /* 782 * Initialize part of the common data 783 */ 784 imgp->proc = p; 785 imgp->attr = attr; 786 787 NDINIT(nd, LOOKUP, ISOPEN | FOLLOW | LOCKSHARED | LOCKLEAF, 788 UIO_SYSSPACE, file, curthread); 789 if ((error = namei(nd)) != 0) { 790 nd->ni_vp = NULL; 791 goto fail; 792 } 793 NDFREE(nd, NDF_ONLY_PNBUF); 794 imgp->vp = nd->ni_vp; 795 796 /* 797 * Check permissions, modes, uid, etc on the file, and "open" it. 798 */ 799 error = exec_check_permissions(imgp); 800 if (error) 801 goto fail; 802 803 error = exec_map_first_page(imgp); 804 if (error) 805 goto fail; 806 807 imgp->object = nd->ni_vp->v_object; 808 809 hdr = (const Elf_Ehdr *)imgp->image_header; 810 if ((error = __elfN(check_header)(hdr)) != 0) 811 goto fail; 812 if (hdr->e_type == ET_DYN) 813 rbase = *addr; 814 else if (hdr->e_type == ET_EXEC) 815 rbase = 0; 816 else { 817 error = ENOEXEC; 818 goto fail; 819 } 820 821 /* Only support headers that fit within first page for now */ 822 if (!__elfN(phdr_in_zero_page)(hdr)) { 823 error = ENOEXEC; 824 goto fail; 825 } 826 827 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 828 if (!aligned(phdr, Elf_Addr)) { 829 error = ENOEXEC; 830 goto fail; 831 } 832 833 error = __elfN(load_sections)(imgp, hdr, phdr, rbase, &base_addr); 834 if (error != 0) 835 goto fail; 836 837 *addr = base_addr; 838 *entry = (unsigned long)hdr->e_entry + rbase; 839 840 fail: 841 if (imgp->firstpage) 842 exec_unmap_first_page(imgp); 843 844 if (nd->ni_vp) { 845 if (imgp->textset) 846 VOP_UNSET_TEXT_CHECKED(nd->ni_vp); 847 vput(nd->ni_vp); 848 } 849 free(tempdata, M_TEMP); 850 851 return (error); 852 } 853 854 static u_long 855 __CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv, 856 u_int align) 857 { 858 u_long rbase, res; 859 860 MPASS(vm_map_min(map) <= minv); 861 MPASS(maxv <= vm_map_max(map)); 862 MPASS(minv < maxv); 863 MPASS(minv + align < maxv); 864 arc4rand(&rbase, sizeof(rbase), 0); 865 res = roundup(minv, (u_long)align) + rbase % (maxv - minv); 866 res &= ~((u_long)align - 1); 867 if (res >= maxv) 868 res -= align; 869 KASSERT(res >= minv, 870 ("res %#lx < minv %#lx, maxv %#lx rbase %#lx", 871 res, minv, maxv, rbase)); 872 KASSERT(res < maxv, 873 ("res %#lx > maxv %#lx, minv %#lx rbase %#lx", 874 res, maxv, minv, rbase)); 875 return (res); 876 } 877 878 static int 879 __elfN(enforce_limits)(struct image_params *imgp, const Elf_Ehdr *hdr, 880 const Elf_Phdr *phdr, u_long et_dyn_addr) 881 { 882 struct vmspace *vmspace; 883 const char *err_str; 884 u_long text_size, data_size, total_size, text_addr, data_addr; 885 u_long seg_size, seg_addr; 886 int i; 887 888 err_str = NULL; 889 text_size = data_size = total_size = text_addr = data_addr = 0; 890 891 for (i = 0; i < hdr->e_phnum; i++) { 892 if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0) 893 continue; 894 895 seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr); 896 seg_size = round_page(phdr[i].p_memsz + 897 phdr[i].p_vaddr + et_dyn_addr - seg_addr); 898 899 /* 900 * Make the largest executable segment the official 901 * text segment and all others data. 902 * 903 * Note that obreak() assumes that data_addr + data_size == end 904 * of data load area, and the ELF file format expects segments 905 * to be sorted by address. If multiple data segments exist, 906 * the last one will be used. 907 */ 908 909 if ((phdr[i].p_flags & PF_X) != 0 && text_size < seg_size) { 910 text_size = seg_size; 911 text_addr = seg_addr; 912 } else { 913 data_size = seg_size; 914 data_addr = seg_addr; 915 } 916 total_size += seg_size; 917 } 918 919 if (data_addr == 0 && data_size == 0) { 920 data_addr = text_addr; 921 data_size = text_size; 922 } 923 924 /* 925 * Check limits. It should be safe to check the 926 * limits after loading the segments since we do 927 * not actually fault in all the segments pages. 928 */ 929 PROC_LOCK(imgp->proc); 930 if (data_size > lim_cur_proc(imgp->proc, RLIMIT_DATA)) 931 err_str = "Data segment size exceeds process limit"; 932 else if (text_size > maxtsiz) 933 err_str = "Text segment size exceeds system limit"; 934 else if (total_size > lim_cur_proc(imgp->proc, RLIMIT_VMEM)) 935 err_str = "Total segment size exceeds process limit"; 936 else if (racct_set(imgp->proc, RACCT_DATA, data_size) != 0) 937 err_str = "Data segment size exceeds resource limit"; 938 else if (racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) 939 err_str = "Total segment size exceeds resource limit"; 940 PROC_UNLOCK(imgp->proc); 941 if (err_str != NULL) { 942 uprintf("%s\n", err_str); 943 return (ENOMEM); 944 } 945 946 vmspace = imgp->proc->p_vmspace; 947 vmspace->vm_tsize = text_size >> PAGE_SHIFT; 948 vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; 949 vmspace->vm_dsize = data_size >> PAGE_SHIFT; 950 vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; 951 952 return (0); 953 } 954 955 static int 956 __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr, 957 char **interpp, bool *free_interpp) 958 { 959 struct thread *td; 960 char *interp; 961 int error, interp_name_len; 962 963 KASSERT(phdr->p_type == PT_INTERP, 964 ("%s: p_type %u != PT_INTERP", __func__, phdr->p_type)); 965 ASSERT_VOP_LOCKED(imgp->vp, __func__); 966 967 td = curthread; 968 969 /* Path to interpreter */ 970 if (phdr->p_filesz < 2 || phdr->p_filesz > MAXPATHLEN) { 971 uprintf("Invalid PT_INTERP\n"); 972 return (ENOEXEC); 973 } 974 975 interp_name_len = phdr->p_filesz; 976 if (phdr->p_offset > PAGE_SIZE || 977 interp_name_len > PAGE_SIZE - phdr->p_offset) { 978 /* 979 * The vnode lock might be needed by the pagedaemon to 980 * clean pages owned by the vnode. Do not allow sleep 981 * waiting for memory with the vnode locked, instead 982 * try non-sleepable allocation first, and if it 983 * fails, go to the slow path were we drop the lock 984 * and do M_WAITOK. A text reference prevents 985 * modifications to the vnode content. 986 */ 987 interp = malloc(interp_name_len + 1, M_TEMP, M_NOWAIT); 988 if (interp == NULL) { 989 VOP_UNLOCK(imgp->vp); 990 interp = malloc(interp_name_len + 1, M_TEMP, M_WAITOK); 991 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 992 } 993 994 error = vn_rdwr(UIO_READ, imgp->vp, interp, 995 interp_name_len, phdr->p_offset, 996 UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, 997 NOCRED, NULL, td); 998 if (error != 0) { 999 free(interp, M_TEMP); 1000 uprintf("i/o error PT_INTERP %d\n", error); 1001 return (error); 1002 } 1003 interp[interp_name_len] = '\0'; 1004 1005 *interpp = interp; 1006 *free_interpp = true; 1007 return (0); 1008 } 1009 1010 interp = __DECONST(char *, imgp->image_header) + phdr->p_offset; 1011 if (interp[interp_name_len - 1] != '\0') { 1012 uprintf("Invalid PT_INTERP\n"); 1013 return (ENOEXEC); 1014 } 1015 1016 *interpp = interp; 1017 *free_interpp = false; 1018 return (0); 1019 } 1020 1021 static int 1022 __elfN(load_interp)(struct image_params *imgp, const Elf_Brandinfo *brand_info, 1023 const char *interp, u_long *addr, u_long *entry) 1024 { 1025 char *path; 1026 int error; 1027 1028 if (brand_info->emul_path != NULL && 1029 brand_info->emul_path[0] != '\0') { 1030 path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1031 snprintf(path, MAXPATHLEN, "%s%s", 1032 brand_info->emul_path, interp); 1033 error = __elfN(load_file)(imgp->proc, path, addr, entry); 1034 free(path, M_TEMP); 1035 if (error == 0) 1036 return (0); 1037 } 1038 1039 if (brand_info->interp_newpath != NULL && 1040 (brand_info->interp_path == NULL || 1041 strcmp(interp, brand_info->interp_path) == 0)) { 1042 error = __elfN(load_file)(imgp->proc, 1043 brand_info->interp_newpath, addr, entry); 1044 if (error == 0) 1045 return (0); 1046 } 1047 1048 error = __elfN(load_file)(imgp->proc, interp, addr, entry); 1049 if (error == 0) 1050 return (0); 1051 1052 uprintf("ELF interpreter %s not found, error %d\n", interp, error); 1053 return (error); 1054 } 1055 1056 /* 1057 * Impossible et_dyn_addr initial value indicating that the real base 1058 * must be calculated later with some randomization applied. 1059 */ 1060 #define ET_DYN_ADDR_RAND 1 1061 1062 static int 1063 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) 1064 { 1065 struct thread *td; 1066 const Elf_Ehdr *hdr; 1067 const Elf_Phdr *phdr; 1068 Elf_Auxargs *elf_auxargs; 1069 struct vmspace *vmspace; 1070 vm_map_t map; 1071 char *interp; 1072 Elf_Brandinfo *brand_info; 1073 struct sysentvec *sv; 1074 u_long addr, baddr, et_dyn_addr, entry, proghdr; 1075 u_long maxalign, mapsz, maxv, maxv1; 1076 uint32_t fctl0; 1077 int32_t osrel; 1078 bool free_interp; 1079 int error, i, n; 1080 1081 hdr = (const Elf_Ehdr *)imgp->image_header; 1082 1083 /* 1084 * Do we have a valid ELF header ? 1085 * 1086 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later 1087 * if particular brand doesn't support it. 1088 */ 1089 if (__elfN(check_header)(hdr) != 0 || 1090 (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) 1091 return (-1); 1092 1093 /* 1094 * From here on down, we return an errno, not -1, as we've 1095 * detected an ELF file. 1096 */ 1097 1098 if (!__elfN(phdr_in_zero_page)(hdr)) { 1099 uprintf("Program headers not in the first page\n"); 1100 return (ENOEXEC); 1101 } 1102 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 1103 if (!aligned(phdr, Elf_Addr)) { 1104 uprintf("Unaligned program headers\n"); 1105 return (ENOEXEC); 1106 } 1107 1108 n = error = 0; 1109 baddr = 0; 1110 osrel = 0; 1111 fctl0 = 0; 1112 entry = proghdr = 0; 1113 interp = NULL; 1114 free_interp = false; 1115 td = curthread; 1116 maxalign = PAGE_SIZE; 1117 mapsz = 0; 1118 1119 for (i = 0; i < hdr->e_phnum; i++) { 1120 switch (phdr[i].p_type) { 1121 case PT_LOAD: 1122 if (n == 0) 1123 baddr = phdr[i].p_vaddr; 1124 if (phdr[i].p_align > maxalign) 1125 maxalign = phdr[i].p_align; 1126 mapsz += phdr[i].p_memsz; 1127 n++; 1128 1129 /* 1130 * If this segment contains the program headers, 1131 * remember their virtual address for the AT_PHDR 1132 * aux entry. Static binaries don't usually include 1133 * a PT_PHDR entry. 1134 */ 1135 if (phdr[i].p_offset == 0 && 1136 hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize 1137 <= phdr[i].p_filesz) 1138 proghdr = phdr[i].p_vaddr + hdr->e_phoff; 1139 break; 1140 case PT_INTERP: 1141 /* Path to interpreter */ 1142 if (interp != NULL) { 1143 uprintf("Multiple PT_INTERP headers\n"); 1144 error = ENOEXEC; 1145 goto ret; 1146 } 1147 error = __elfN(get_interp)(imgp, &phdr[i], &interp, 1148 &free_interp); 1149 if (error != 0) 1150 goto ret; 1151 break; 1152 case PT_GNU_STACK: 1153 if (__elfN(nxstack)) 1154 imgp->stack_prot = 1155 __elfN(trans_prot)(phdr[i].p_flags); 1156 imgp->stack_sz = phdr[i].p_memsz; 1157 break; 1158 case PT_PHDR: /* Program header table info */ 1159 proghdr = phdr[i].p_vaddr; 1160 break; 1161 } 1162 } 1163 1164 brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel, &fctl0); 1165 if (brand_info == NULL) { 1166 uprintf("ELF binary type \"%u\" not known.\n", 1167 hdr->e_ident[EI_OSABI]); 1168 error = ENOEXEC; 1169 goto ret; 1170 } 1171 sv = brand_info->sysvec; 1172 et_dyn_addr = 0; 1173 if (hdr->e_type == ET_DYN) { 1174 if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) { 1175 uprintf("Cannot execute shared object\n"); 1176 error = ENOEXEC; 1177 goto ret; 1178 } 1179 /* 1180 * Honour the base load address from the dso if it is 1181 * non-zero for some reason. 1182 */ 1183 if (baddr == 0) { 1184 if ((sv->sv_flags & SV_ASLR) == 0 || 1185 (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) 1186 et_dyn_addr = __elfN(pie_base); 1187 else if ((__elfN(pie_aslr_enabled) && 1188 (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) || 1189 (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0) 1190 et_dyn_addr = ET_DYN_ADDR_RAND; 1191 else 1192 et_dyn_addr = __elfN(pie_base); 1193 } 1194 } 1195 1196 /* 1197 * Avoid a possible deadlock if the current address space is destroyed 1198 * and that address space maps the locked vnode. In the common case, 1199 * the locked vnode's v_usecount is decremented but remains greater 1200 * than zero. Consequently, the vnode lock is not needed by vrele(). 1201 * However, in cases where the vnode lock is external, such as nullfs, 1202 * v_usecount may become zero. 1203 * 1204 * The VV_TEXT flag prevents modifications to the executable while 1205 * the vnode is unlocked. 1206 */ 1207 VOP_UNLOCK(imgp->vp); 1208 1209 /* 1210 * Decide whether to enable randomization of user mappings. 1211 * First, reset user preferences for the setid binaries. 1212 * Then, account for the support of the randomization by the 1213 * ABI, by user preferences, and make special treatment for 1214 * PIE binaries. 1215 */ 1216 if (imgp->credential_setid) { 1217 PROC_LOCK(imgp->proc); 1218 imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE); 1219 PROC_UNLOCK(imgp->proc); 1220 } 1221 if ((sv->sv_flags & SV_ASLR) == 0 || 1222 (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 || 1223 (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) { 1224 KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND, 1225 ("et_dyn_addr == RAND and !ASLR")); 1226 } else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 || 1227 (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) || 1228 et_dyn_addr == ET_DYN_ADDR_RAND) { 1229 imgp->map_flags |= MAP_ASLR; 1230 /* 1231 * If user does not care about sbrk, utilize the bss 1232 * grow region for mappings as well. We can select 1233 * the base for the image anywere and still not suffer 1234 * from the fragmentation. 1235 */ 1236 if (!__elfN(aslr_honor_sbrk) || 1237 (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0) 1238 imgp->map_flags |= MAP_ASLR_IGNSTART; 1239 } 1240 1241 if (!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0) 1242 imgp->map_flags |= MAP_WXORX; 1243 1244 error = exec_new_vmspace(imgp, sv); 1245 vmspace = imgp->proc->p_vmspace; 1246 map = &vmspace->vm_map; 1247 1248 imgp->proc->p_sysent = sv; 1249 imgp->proc->p_elf_brandinfo = brand_info; 1250 1251 maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK); 1252 if (et_dyn_addr == ET_DYN_ADDR_RAND) { 1253 KASSERT((map->flags & MAP_ASLR) != 0, 1254 ("ET_DYN_ADDR_RAND but !MAP_ASLR")); 1255 et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map, 1256 vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA), 1257 /* reserve half of the address space to interpreter */ 1258 maxv / 2, 1UL << flsl(maxalign)); 1259 } 1260 1261 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 1262 if (error != 0) 1263 goto ret; 1264 1265 error = __elfN(load_sections)(imgp, hdr, phdr, et_dyn_addr, NULL); 1266 if (error != 0) 1267 goto ret; 1268 1269 error = __elfN(enforce_limits)(imgp, hdr, phdr, et_dyn_addr); 1270 if (error != 0) 1271 goto ret; 1272 1273 entry = (u_long)hdr->e_entry + et_dyn_addr; 1274 1275 /* 1276 * We load the dynamic linker where a userland call 1277 * to mmap(0, ...) would put it. The rationale behind this 1278 * calculation is that it leaves room for the heap to grow to 1279 * its maximum allowed size. 1280 */ 1281 addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td, 1282 RLIMIT_DATA)); 1283 if ((map->flags & MAP_ASLR) != 0) { 1284 maxv1 = maxv / 2 + addr / 2; 1285 MPASS(maxv1 >= addr); /* No overflow */ 1286 map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1, 1287 (MAXPAGESIZES > 1 && pagesizes[1] != 0) ? 1288 pagesizes[1] : pagesizes[0]); 1289 } else { 1290 map->anon_loc = addr; 1291 } 1292 1293 imgp->entry_addr = entry; 1294 1295 if (interp != NULL) { 1296 VOP_UNLOCK(imgp->vp); 1297 if ((map->flags & MAP_ASLR) != 0) { 1298 /* Assume that interpreter fits into 1/4 of AS */ 1299 maxv1 = maxv / 2 + addr / 2; 1300 MPASS(maxv1 >= addr); /* No overflow */ 1301 addr = __CONCAT(rnd_, __elfN(base))(map, addr, 1302 maxv1, PAGE_SIZE); 1303 } 1304 error = __elfN(load_interp)(imgp, brand_info, interp, &addr, 1305 &imgp->entry_addr); 1306 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 1307 if (error != 0) 1308 goto ret; 1309 } else 1310 addr = et_dyn_addr; 1311 1312 /* 1313 * Construct auxargs table (used by the copyout_auxargs routine) 1314 */ 1315 elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_NOWAIT); 1316 if (elf_auxargs == NULL) { 1317 VOP_UNLOCK(imgp->vp); 1318 elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); 1319 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 1320 } 1321 elf_auxargs->execfd = -1; 1322 elf_auxargs->phdr = proghdr + et_dyn_addr; 1323 elf_auxargs->phent = hdr->e_phentsize; 1324 elf_auxargs->phnum = hdr->e_phnum; 1325 elf_auxargs->pagesz = PAGE_SIZE; 1326 elf_auxargs->base = addr; 1327 elf_auxargs->flags = 0; 1328 elf_auxargs->entry = entry; 1329 elf_auxargs->hdr_eflags = hdr->e_flags; 1330 1331 imgp->auxargs = elf_auxargs; 1332 imgp->interpreted = 0; 1333 imgp->reloc_base = addr; 1334 imgp->proc->p_osrel = osrel; 1335 imgp->proc->p_fctl0 = fctl0; 1336 imgp->proc->p_elf_flags = hdr->e_flags; 1337 1338 ret: 1339 if (free_interp) 1340 free(interp, M_TEMP); 1341 return (error); 1342 } 1343 1344 #define suword __CONCAT(suword, __ELF_WORD_SIZE) 1345 1346 int 1347 __elfN(freebsd_copyout_auxargs)(struct image_params *imgp, uintptr_t base) 1348 { 1349 Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; 1350 Elf_Auxinfo *argarray, *pos; 1351 int error; 1352 1353 argarray = pos = malloc(AT_COUNT * sizeof(*pos), M_TEMP, 1354 M_WAITOK | M_ZERO); 1355 1356 if (args->execfd != -1) 1357 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 1358 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 1359 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 1360 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 1361 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 1362 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 1363 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 1364 AUXARGS_ENTRY(pos, AT_BASE, args->base); 1365 AUXARGS_ENTRY(pos, AT_EHDRFLAGS, args->hdr_eflags); 1366 if (imgp->execpathp != 0) 1367 AUXARGS_ENTRY_PTR(pos, AT_EXECPATH, imgp->execpathp); 1368 AUXARGS_ENTRY(pos, AT_OSRELDATE, 1369 imgp->proc->p_ucred->cr_prison->pr_osreldate); 1370 if (imgp->canary != 0) { 1371 AUXARGS_ENTRY_PTR(pos, AT_CANARY, imgp->canary); 1372 AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen); 1373 } 1374 AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus); 1375 if (imgp->pagesizes != 0) { 1376 AUXARGS_ENTRY_PTR(pos, AT_PAGESIZES, imgp->pagesizes); 1377 AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen); 1378 } 1379 if (imgp->sysent->sv_timekeep_base != 0) { 1380 AUXARGS_ENTRY(pos, AT_TIMEKEEP, 1381 imgp->sysent->sv_timekeep_base); 1382 } 1383 AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj 1384 != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : 1385 imgp->sysent->sv_stackprot); 1386 if (imgp->sysent->sv_hwcap != NULL) 1387 AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap); 1388 if (imgp->sysent->sv_hwcap2 != NULL) 1389 AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2); 1390 AUXARGS_ENTRY(pos, AT_BSDFLAGS, __elfN(sigfastblock) ? 1391 ELF_BSDF_SIGFASTBLK : 0); 1392 AUXARGS_ENTRY(pos, AT_ARGC, imgp->args->argc); 1393 AUXARGS_ENTRY_PTR(pos, AT_ARGV, imgp->argv); 1394 AUXARGS_ENTRY(pos, AT_ENVC, imgp->args->envc); 1395 AUXARGS_ENTRY_PTR(pos, AT_ENVV, imgp->envv); 1396 AUXARGS_ENTRY_PTR(pos, AT_PS_STRINGS, imgp->ps_strings); 1397 if (imgp->sysent->sv_fxrng_gen_base != 0) 1398 AUXARGS_ENTRY(pos, AT_FXRNG, imgp->sysent->sv_fxrng_gen_base); 1399 AUXARGS_ENTRY(pos, AT_NULL, 0); 1400 1401 free(imgp->auxargs, M_TEMP); 1402 imgp->auxargs = NULL; 1403 KASSERT(pos - argarray <= AT_COUNT, ("Too many auxargs")); 1404 1405 error = copyout(argarray, (void *)base, sizeof(*argarray) * AT_COUNT); 1406 free(argarray, M_TEMP); 1407 return (error); 1408 } 1409 1410 int 1411 __elfN(freebsd_fixup)(uintptr_t *stack_base, struct image_params *imgp) 1412 { 1413 Elf_Addr *base; 1414 1415 base = (Elf_Addr *)*stack_base; 1416 base--; 1417 if (suword(base, imgp->args->argc) == -1) 1418 return (EFAULT); 1419 *stack_base = (uintptr_t)base; 1420 return (0); 1421 } 1422 1423 /* 1424 * Code for generating ELF core dumps. 1425 */ 1426 1427 typedef void (*segment_callback)(vm_map_entry_t, void *); 1428 1429 /* Closure for cb_put_phdr(). */ 1430 struct phdr_closure { 1431 Elf_Phdr *phdr; /* Program header to fill in */ 1432 Elf_Off offset; /* Offset of segment in core file */ 1433 }; 1434 1435 struct note_info { 1436 int type; /* Note type. */ 1437 outfunc_t outfunc; /* Output function. */ 1438 void *outarg; /* Argument for the output function. */ 1439 size_t outsize; /* Output size. */ 1440 TAILQ_ENTRY(note_info) link; /* Link to the next note info. */ 1441 }; 1442 1443 TAILQ_HEAD(note_info_list, note_info); 1444 1445 extern int compress_user_cores; 1446 extern int compress_user_cores_level; 1447 1448 static void cb_put_phdr(vm_map_entry_t, void *); 1449 static void cb_size_segment(vm_map_entry_t, void *); 1450 static void each_dumpable_segment(struct thread *, segment_callback, void *, 1451 int); 1452 static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t, 1453 struct note_info_list *, size_t, int); 1454 static void __elfN(putnote)(struct thread *td, struct note_info *, struct sbuf *); 1455 1456 static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *); 1457 static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *); 1458 static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *); 1459 static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *); 1460 static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *); 1461 static void __elfN(note_ptlwpinfo)(void *, struct sbuf *, size_t *); 1462 static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *); 1463 static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *); 1464 static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *); 1465 static void note_procstat_files(void *, struct sbuf *, size_t *); 1466 static void note_procstat_groups(void *, struct sbuf *, size_t *); 1467 static void note_procstat_osrel(void *, struct sbuf *, size_t *); 1468 static void note_procstat_rlimit(void *, struct sbuf *, size_t *); 1469 static void note_procstat_umask(void *, struct sbuf *, size_t *); 1470 static void note_procstat_vmmap(void *, struct sbuf *, size_t *); 1471 1472 static int 1473 core_compressed_write(void *base, size_t len, off_t offset, void *arg) 1474 { 1475 1476 return (core_write((struct coredump_params *)arg, base, len, offset, 1477 UIO_SYSSPACE, NULL)); 1478 } 1479 1480 int 1481 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) 1482 { 1483 struct ucred *cred = td->td_ucred; 1484 int compm, error = 0; 1485 struct sseg_closure seginfo; 1486 struct note_info_list notelst; 1487 struct coredump_params params; 1488 struct note_info *ninfo; 1489 void *hdr, *tmpbuf; 1490 size_t hdrsize, notesz, coresize; 1491 1492 hdr = NULL; 1493 tmpbuf = NULL; 1494 TAILQ_INIT(¬elst); 1495 1496 /* Size the program segments. */ 1497 __elfN(size_segments)(td, &seginfo, flags); 1498 1499 /* 1500 * Collect info about the core file header area. 1501 */ 1502 hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count); 1503 if (seginfo.count + 1 >= PN_XNUM) 1504 hdrsize += sizeof(Elf_Shdr); 1505 td->td_proc->p_sysent->sv_elf_core_prepare_notes(td, ¬elst, ¬esz); 1506 coresize = round_page(hdrsize + notesz) + seginfo.size; 1507 1508 /* Set up core dump parameters. */ 1509 params.offset = 0; 1510 params.active_cred = cred; 1511 params.file_cred = NOCRED; 1512 params.td = td; 1513 params.vp = vp; 1514 params.comp = NULL; 1515 1516 #ifdef RACCT 1517 if (racct_enable) { 1518 PROC_LOCK(td->td_proc); 1519 error = racct_add(td->td_proc, RACCT_CORE, coresize); 1520 PROC_UNLOCK(td->td_proc); 1521 if (error != 0) { 1522 error = EFAULT; 1523 goto done; 1524 } 1525 } 1526 #endif 1527 if (coresize >= limit) { 1528 error = EFAULT; 1529 goto done; 1530 } 1531 1532 /* Create a compression stream if necessary. */ 1533 compm = compress_user_cores; 1534 if ((flags & (SVC_PT_COREDUMP | SVC_NOCOMPRESS)) == SVC_PT_COREDUMP && 1535 compm == 0) 1536 compm = COMPRESS_GZIP; 1537 if (compm != 0) { 1538 params.comp = compressor_init(core_compressed_write, 1539 compm, CORE_BUF_SIZE, 1540 compress_user_cores_level, ¶ms); 1541 if (params.comp == NULL) { 1542 error = EFAULT; 1543 goto done; 1544 } 1545 tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO); 1546 } 1547 1548 /* 1549 * Allocate memory for building the header, fill it up, 1550 * and write it out following the notes. 1551 */ 1552 hdr = malloc(hdrsize, M_TEMP, M_WAITOK); 1553 error = __elfN(corehdr)(¶ms, seginfo.count, hdr, hdrsize, ¬elst, 1554 notesz, flags); 1555 1556 /* Write the contents of all of the writable segments. */ 1557 if (error == 0) { 1558 Elf_Phdr *php; 1559 off_t offset; 1560 int i; 1561 1562 php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1; 1563 offset = round_page(hdrsize + notesz); 1564 for (i = 0; i < seginfo.count; i++) { 1565 error = core_output((char *)(uintptr_t)php->p_vaddr, 1566 php->p_filesz, offset, ¶ms, tmpbuf); 1567 if (error != 0) 1568 break; 1569 offset += php->p_filesz; 1570 php++; 1571 } 1572 if (error == 0 && params.comp != NULL) 1573 error = compressor_flush(params.comp); 1574 } 1575 if (error) { 1576 log(LOG_WARNING, 1577 "Failed to write core file for process %s (error %d)\n", 1578 curproc->p_comm, error); 1579 } 1580 1581 done: 1582 free(tmpbuf, M_TEMP); 1583 if (params.comp != NULL) 1584 compressor_fini(params.comp); 1585 while ((ninfo = TAILQ_FIRST(¬elst)) != NULL) { 1586 TAILQ_REMOVE(¬elst, ninfo, link); 1587 free(ninfo, M_TEMP); 1588 } 1589 if (hdr != NULL) 1590 free(hdr, M_TEMP); 1591 1592 return (error); 1593 } 1594 1595 /* 1596 * A callback for each_dumpable_segment() to write out the segment's 1597 * program header entry. 1598 */ 1599 static void 1600 cb_put_phdr(vm_map_entry_t entry, void *closure) 1601 { 1602 struct phdr_closure *phc = (struct phdr_closure *)closure; 1603 Elf_Phdr *phdr = phc->phdr; 1604 1605 phc->offset = round_page(phc->offset); 1606 1607 phdr->p_type = PT_LOAD; 1608 phdr->p_offset = phc->offset; 1609 phdr->p_vaddr = entry->start; 1610 phdr->p_paddr = 0; 1611 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 1612 phdr->p_align = PAGE_SIZE; 1613 phdr->p_flags = __elfN(untrans_prot)(entry->protection); 1614 1615 phc->offset += phdr->p_filesz; 1616 phc->phdr++; 1617 } 1618 1619 /* 1620 * A callback for each_dumpable_segment() to gather information about 1621 * the number of segments and their total size. 1622 */ 1623 static void 1624 cb_size_segment(vm_map_entry_t entry, void *closure) 1625 { 1626 struct sseg_closure *ssc = (struct sseg_closure *)closure; 1627 1628 ssc->count++; 1629 ssc->size += entry->end - entry->start; 1630 } 1631 1632 void 1633 __elfN(size_segments)(struct thread *td, struct sseg_closure *seginfo, 1634 int flags) 1635 { 1636 seginfo->count = 0; 1637 seginfo->size = 0; 1638 1639 each_dumpable_segment(td, cb_size_segment, seginfo, flags); 1640 } 1641 1642 /* 1643 * For each writable segment in the process's memory map, call the given 1644 * function with a pointer to the map entry and some arbitrary 1645 * caller-supplied data. 1646 */ 1647 static void 1648 each_dumpable_segment(struct thread *td, segment_callback func, void *closure, 1649 int flags) 1650 { 1651 struct proc *p = td->td_proc; 1652 vm_map_t map = &p->p_vmspace->vm_map; 1653 vm_map_entry_t entry; 1654 vm_object_t backing_object, object; 1655 bool ignore_entry; 1656 1657 vm_map_lock_read(map); 1658 VM_MAP_ENTRY_FOREACH(entry, map) { 1659 /* 1660 * Don't dump inaccessible mappings, deal with legacy 1661 * coredump mode. 1662 * 1663 * Note that read-only segments related to the elf binary 1664 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer 1665 * need to arbitrarily ignore such segments. 1666 */ 1667 if ((flags & SVC_ALL) == 0) { 1668 if (elf_legacy_coredump) { 1669 if ((entry->protection & VM_PROT_RW) != 1670 VM_PROT_RW) 1671 continue; 1672 } else { 1673 if ((entry->protection & VM_PROT_ALL) == 0) 1674 continue; 1675 } 1676 } 1677 1678 /* 1679 * Dont include memory segment in the coredump if 1680 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in 1681 * madvise(2). Do not dump submaps (i.e. parts of the 1682 * kernel map). 1683 */ 1684 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) 1685 continue; 1686 if ((entry->eflags & MAP_ENTRY_NOCOREDUMP) != 0 && 1687 (flags & SVC_ALL) == 0) 1688 continue; 1689 if ((object = entry->object.vm_object) == NULL) 1690 continue; 1691 1692 /* Ignore memory-mapped devices and such things. */ 1693 VM_OBJECT_RLOCK(object); 1694 while ((backing_object = object->backing_object) != NULL) { 1695 VM_OBJECT_RLOCK(backing_object); 1696 VM_OBJECT_RUNLOCK(object); 1697 object = backing_object; 1698 } 1699 ignore_entry = (object->flags & OBJ_FICTITIOUS) != 0; 1700 VM_OBJECT_RUNLOCK(object); 1701 if (ignore_entry) 1702 continue; 1703 1704 (*func)(entry, closure); 1705 } 1706 vm_map_unlock_read(map); 1707 } 1708 1709 /* 1710 * Write the core file header to the file, including padding up to 1711 * the page boundary. 1712 */ 1713 static int 1714 __elfN(corehdr)(struct coredump_params *p, int numsegs, void *hdr, 1715 size_t hdrsize, struct note_info_list *notelst, size_t notesz, 1716 int flags) 1717 { 1718 struct note_info *ninfo; 1719 struct sbuf *sb; 1720 int error; 1721 1722 /* Fill in the header. */ 1723 bzero(hdr, hdrsize); 1724 __elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz, flags); 1725 1726 sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN); 1727 sbuf_set_drain(sb, sbuf_drain_core_output, p); 1728 sbuf_start_section(sb, NULL); 1729 sbuf_bcat(sb, hdr, hdrsize); 1730 TAILQ_FOREACH(ninfo, notelst, link) 1731 __elfN(putnote)(p->td, ninfo, sb); 1732 /* Align up to a page boundary for the program segments. */ 1733 sbuf_end_section(sb, -1, PAGE_SIZE, 0); 1734 error = sbuf_finish(sb); 1735 sbuf_delete(sb); 1736 1737 return (error); 1738 } 1739 1740 void 1741 __elfN(prepare_notes)(struct thread *td, struct note_info_list *list, 1742 size_t *sizep) 1743 { 1744 struct proc *p; 1745 struct thread *thr; 1746 size_t size; 1747 1748 p = td->td_proc; 1749 size = 0; 1750 1751 size += __elfN(register_note)(td, list, NT_PRPSINFO, __elfN(note_prpsinfo), p); 1752 1753 /* 1754 * To have the debugger select the right thread (LWP) as the initial 1755 * thread, we dump the state of the thread passed to us in td first. 1756 * This is the thread that causes the core dump and thus likely to 1757 * be the right thread one wants to have selected in the debugger. 1758 */ 1759 thr = td; 1760 while (thr != NULL) { 1761 size += __elfN(register_note)(td, list, NT_PRSTATUS, 1762 __elfN(note_prstatus), thr); 1763 size += __elfN(register_note)(td, list, NT_FPREGSET, 1764 __elfN(note_fpregset), thr); 1765 size += __elfN(register_note)(td, list, NT_THRMISC, 1766 __elfN(note_thrmisc), thr); 1767 size += __elfN(register_note)(td, list, NT_PTLWPINFO, 1768 __elfN(note_ptlwpinfo), thr); 1769 size += __elfN(register_note)(td, list, -1, 1770 __elfN(note_threadmd), thr); 1771 1772 thr = thr == td ? TAILQ_FIRST(&p->p_threads) : 1773 TAILQ_NEXT(thr, td_plist); 1774 if (thr == td) 1775 thr = TAILQ_NEXT(thr, td_plist); 1776 } 1777 1778 size += __elfN(register_note)(td, list, NT_PROCSTAT_PROC, 1779 __elfN(note_procstat_proc), p); 1780 size += __elfN(register_note)(td, list, NT_PROCSTAT_FILES, 1781 note_procstat_files, p); 1782 size += __elfN(register_note)(td, list, NT_PROCSTAT_VMMAP, 1783 note_procstat_vmmap, p); 1784 size += __elfN(register_note)(td, list, NT_PROCSTAT_GROUPS, 1785 note_procstat_groups, p); 1786 size += __elfN(register_note)(td, list, NT_PROCSTAT_UMASK, 1787 note_procstat_umask, p); 1788 size += __elfN(register_note)(td, list, NT_PROCSTAT_RLIMIT, 1789 note_procstat_rlimit, p); 1790 size += __elfN(register_note)(td, list, NT_PROCSTAT_OSREL, 1791 note_procstat_osrel, p); 1792 size += __elfN(register_note)(td, list, NT_PROCSTAT_PSSTRINGS, 1793 __elfN(note_procstat_psstrings), p); 1794 size += __elfN(register_note)(td, list, NT_PROCSTAT_AUXV, 1795 __elfN(note_procstat_auxv), p); 1796 1797 *sizep = size; 1798 } 1799 1800 void 1801 __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs, 1802 size_t notesz, int flags) 1803 { 1804 Elf_Ehdr *ehdr; 1805 Elf_Phdr *phdr; 1806 Elf_Shdr *shdr; 1807 struct phdr_closure phc; 1808 Elf_Brandinfo *bi; 1809 1810 ehdr = (Elf_Ehdr *)hdr; 1811 bi = td->td_proc->p_elf_brandinfo; 1812 1813 ehdr->e_ident[EI_MAG0] = ELFMAG0; 1814 ehdr->e_ident[EI_MAG1] = ELFMAG1; 1815 ehdr->e_ident[EI_MAG2] = ELFMAG2; 1816 ehdr->e_ident[EI_MAG3] = ELFMAG3; 1817 ehdr->e_ident[EI_CLASS] = ELF_CLASS; 1818 ehdr->e_ident[EI_DATA] = ELF_DATA; 1819 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 1820 ehdr->e_ident[EI_OSABI] = td->td_proc->p_sysent->sv_elf_core_osabi; 1821 ehdr->e_ident[EI_ABIVERSION] = 0; 1822 ehdr->e_ident[EI_PAD] = 0; 1823 ehdr->e_type = ET_CORE; 1824 ehdr->e_machine = bi->machine; 1825 ehdr->e_version = EV_CURRENT; 1826 ehdr->e_entry = 0; 1827 ehdr->e_phoff = sizeof(Elf_Ehdr); 1828 ehdr->e_flags = td->td_proc->p_elf_flags; 1829 ehdr->e_ehsize = sizeof(Elf_Ehdr); 1830 ehdr->e_phentsize = sizeof(Elf_Phdr); 1831 ehdr->e_shentsize = sizeof(Elf_Shdr); 1832 ehdr->e_shstrndx = SHN_UNDEF; 1833 if (numsegs + 1 < PN_XNUM) { 1834 ehdr->e_phnum = numsegs + 1; 1835 ehdr->e_shnum = 0; 1836 } else { 1837 ehdr->e_phnum = PN_XNUM; 1838 ehdr->e_shnum = 1; 1839 1840 ehdr->e_shoff = ehdr->e_phoff + 1841 (numsegs + 1) * ehdr->e_phentsize; 1842 KASSERT(ehdr->e_shoff == hdrsize - sizeof(Elf_Shdr), 1843 ("e_shoff: %zu, hdrsize - shdr: %zu", 1844 (size_t)ehdr->e_shoff, hdrsize - sizeof(Elf_Shdr))); 1845 1846 shdr = (Elf_Shdr *)((char *)hdr + ehdr->e_shoff); 1847 memset(shdr, 0, sizeof(*shdr)); 1848 /* 1849 * A special first section is used to hold large segment and 1850 * section counts. This was proposed by Sun Microsystems in 1851 * Solaris and has been adopted by Linux; the standard ELF 1852 * tools are already familiar with the technique. 1853 * 1854 * See table 7-7 of the Solaris "Linker and Libraries Guide" 1855 * (or 12-7 depending on the version of the document) for more 1856 * details. 1857 */ 1858 shdr->sh_type = SHT_NULL; 1859 shdr->sh_size = ehdr->e_shnum; 1860 shdr->sh_link = ehdr->e_shstrndx; 1861 shdr->sh_info = numsegs + 1; 1862 } 1863 1864 /* 1865 * Fill in the program header entries. 1866 */ 1867 phdr = (Elf_Phdr *)((char *)hdr + ehdr->e_phoff); 1868 1869 /* The note segement. */ 1870 phdr->p_type = PT_NOTE; 1871 phdr->p_offset = hdrsize; 1872 phdr->p_vaddr = 0; 1873 phdr->p_paddr = 0; 1874 phdr->p_filesz = notesz; 1875 phdr->p_memsz = 0; 1876 phdr->p_flags = PF_R; 1877 phdr->p_align = ELF_NOTE_ROUNDSIZE; 1878 phdr++; 1879 1880 /* All the writable segments from the program. */ 1881 phc.phdr = phdr; 1882 phc.offset = round_page(hdrsize + notesz); 1883 each_dumpable_segment(td, cb_put_phdr, &phc, flags); 1884 } 1885 1886 size_t 1887 __elfN(register_note)(struct thread *td, struct note_info_list *list, 1888 int type, outfunc_t out, void *arg) 1889 { 1890 const struct sysentvec *sv; 1891 struct note_info *ninfo; 1892 size_t size, notesize; 1893 1894 sv = td->td_proc->p_sysent; 1895 size = 0; 1896 out(arg, NULL, &size); 1897 ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK); 1898 ninfo->type = type; 1899 ninfo->outfunc = out; 1900 ninfo->outarg = arg; 1901 ninfo->outsize = size; 1902 TAILQ_INSERT_TAIL(list, ninfo, link); 1903 1904 if (type == -1) 1905 return (size); 1906 1907 notesize = sizeof(Elf_Note) + /* note header */ 1908 roundup2(strlen(sv->sv_elf_core_abi_vendor) + 1, ELF_NOTE_ROUNDSIZE) + 1909 /* note name */ 1910 roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ 1911 1912 return (notesize); 1913 } 1914 1915 static size_t 1916 append_note_data(const void *src, void *dst, size_t len) 1917 { 1918 size_t padded_len; 1919 1920 padded_len = roundup2(len, ELF_NOTE_ROUNDSIZE); 1921 if (dst != NULL) { 1922 bcopy(src, dst, len); 1923 bzero((char *)dst + len, padded_len - len); 1924 } 1925 return (padded_len); 1926 } 1927 1928 size_t 1929 __elfN(populate_note)(int type, void *src, void *dst, size_t size, void **descp) 1930 { 1931 Elf_Note *note; 1932 char *buf; 1933 size_t notesize; 1934 1935 buf = dst; 1936 if (buf != NULL) { 1937 note = (Elf_Note *)buf; 1938 note->n_namesz = sizeof(FREEBSD_ABI_VENDOR); 1939 note->n_descsz = size; 1940 note->n_type = type; 1941 buf += sizeof(*note); 1942 buf += append_note_data(FREEBSD_ABI_VENDOR, buf, 1943 sizeof(FREEBSD_ABI_VENDOR)); 1944 append_note_data(src, buf, size); 1945 if (descp != NULL) 1946 *descp = buf; 1947 } 1948 1949 notesize = sizeof(Elf_Note) + /* note header */ 1950 roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) + 1951 /* note name */ 1952 roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ 1953 1954 return (notesize); 1955 } 1956 1957 static void 1958 __elfN(putnote)(struct thread *td, struct note_info *ninfo, struct sbuf *sb) 1959 { 1960 Elf_Note note; 1961 const struct sysentvec *sv; 1962 ssize_t old_len, sect_len; 1963 size_t new_len, descsz, i; 1964 1965 if (ninfo->type == -1) { 1966 ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); 1967 return; 1968 } 1969 1970 sv = td->td_proc->p_sysent; 1971 1972 note.n_namesz = strlen(sv->sv_elf_core_abi_vendor) + 1; 1973 note.n_descsz = ninfo->outsize; 1974 note.n_type = ninfo->type; 1975 1976 sbuf_bcat(sb, ¬e, sizeof(note)); 1977 sbuf_start_section(sb, &old_len); 1978 sbuf_bcat(sb, sv->sv_elf_core_abi_vendor, 1979 strlen(sv->sv_elf_core_abi_vendor) + 1); 1980 sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); 1981 if (note.n_descsz == 0) 1982 return; 1983 sbuf_start_section(sb, &old_len); 1984 ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); 1985 sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); 1986 if (sect_len < 0) 1987 return; 1988 1989 new_len = (size_t)sect_len; 1990 descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE); 1991 if (new_len < descsz) { 1992 /* 1993 * It is expected that individual note emitters will correctly 1994 * predict their expected output size and fill up to that size 1995 * themselves, padding in a format-specific way if needed. 1996 * However, in case they don't, just do it here with zeros. 1997 */ 1998 for (i = 0; i < descsz - new_len; i++) 1999 sbuf_putc(sb, 0); 2000 } else if (new_len > descsz) { 2001 /* 2002 * We can't always truncate sb -- we may have drained some 2003 * of it already. 2004 */ 2005 KASSERT(new_len == descsz, ("%s: Note type %u changed as we " 2006 "read it (%zu > %zu). Since it is longer than " 2007 "expected, this coredump's notes are corrupt. THIS " 2008 "IS A BUG in the note_procstat routine for type %u.\n", 2009 __func__, (unsigned)note.n_type, new_len, descsz, 2010 (unsigned)note.n_type)); 2011 } 2012 } 2013 2014 /* 2015 * Miscellaneous note out functions. 2016 */ 2017 2018 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2019 #include <compat/freebsd32/freebsd32.h> 2020 #include <compat/freebsd32/freebsd32_signal.h> 2021 2022 typedef struct prstatus32 elf_prstatus_t; 2023 typedef struct prpsinfo32 elf_prpsinfo_t; 2024 typedef struct fpreg32 elf_prfpregset_t; 2025 typedef struct fpreg32 elf_fpregset_t; 2026 typedef struct reg32 elf_gregset_t; 2027 typedef struct thrmisc32 elf_thrmisc_t; 2028 #define ELF_KERN_PROC_MASK KERN_PROC_MASK32 2029 typedef struct kinfo_proc32 elf_kinfo_proc_t; 2030 typedef uint32_t elf_ps_strings_t; 2031 #else 2032 typedef prstatus_t elf_prstatus_t; 2033 typedef prpsinfo_t elf_prpsinfo_t; 2034 typedef prfpregset_t elf_prfpregset_t; 2035 typedef prfpregset_t elf_fpregset_t; 2036 typedef gregset_t elf_gregset_t; 2037 typedef thrmisc_t elf_thrmisc_t; 2038 #define ELF_KERN_PROC_MASK 0 2039 typedef struct kinfo_proc elf_kinfo_proc_t; 2040 typedef vm_offset_t elf_ps_strings_t; 2041 #endif 2042 2043 static void 2044 __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep) 2045 { 2046 struct sbuf sbarg; 2047 size_t len; 2048 char *cp, *end; 2049 struct proc *p; 2050 elf_prpsinfo_t *psinfo; 2051 int error; 2052 2053 p = arg; 2054 if (sb != NULL) { 2055 KASSERT(*sizep == sizeof(*psinfo), ("invalid size")); 2056 psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK); 2057 psinfo->pr_version = PRPSINFO_VERSION; 2058 psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t); 2059 strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname)); 2060 PROC_LOCK(p); 2061 if (p->p_args != NULL) { 2062 len = sizeof(psinfo->pr_psargs) - 1; 2063 if (len > p->p_args->ar_length) 2064 len = p->p_args->ar_length; 2065 memcpy(psinfo->pr_psargs, p->p_args->ar_args, len); 2066 PROC_UNLOCK(p); 2067 error = 0; 2068 } else { 2069 _PHOLD(p); 2070 PROC_UNLOCK(p); 2071 sbuf_new(&sbarg, psinfo->pr_psargs, 2072 sizeof(psinfo->pr_psargs), SBUF_FIXEDLEN); 2073 error = proc_getargv(curthread, p, &sbarg); 2074 PRELE(p); 2075 if (sbuf_finish(&sbarg) == 0) 2076 len = sbuf_len(&sbarg) - 1; 2077 else 2078 len = sizeof(psinfo->pr_psargs) - 1; 2079 sbuf_delete(&sbarg); 2080 } 2081 if (error || len == 0) 2082 strlcpy(psinfo->pr_psargs, p->p_comm, 2083 sizeof(psinfo->pr_psargs)); 2084 else { 2085 KASSERT(len < sizeof(psinfo->pr_psargs), 2086 ("len is too long: %zu vs %zu", len, 2087 sizeof(psinfo->pr_psargs))); 2088 cp = psinfo->pr_psargs; 2089 end = cp + len - 1; 2090 for (;;) { 2091 cp = memchr(cp, '\0', end - cp); 2092 if (cp == NULL) 2093 break; 2094 *cp = ' '; 2095 } 2096 } 2097 psinfo->pr_pid = p->p_pid; 2098 sbuf_bcat(sb, psinfo, sizeof(*psinfo)); 2099 free(psinfo, M_TEMP); 2100 } 2101 *sizep = sizeof(*psinfo); 2102 } 2103 2104 static void 2105 __elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep) 2106 { 2107 struct thread *td; 2108 elf_prstatus_t *status; 2109 2110 td = arg; 2111 if (sb != NULL) { 2112 KASSERT(*sizep == sizeof(*status), ("invalid size")); 2113 status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK); 2114 status->pr_version = PRSTATUS_VERSION; 2115 status->pr_statussz = sizeof(elf_prstatus_t); 2116 status->pr_gregsetsz = sizeof(elf_gregset_t); 2117 status->pr_fpregsetsz = sizeof(elf_fpregset_t); 2118 status->pr_osreldate = osreldate; 2119 status->pr_cursig = td->td_proc->p_sig; 2120 status->pr_pid = td->td_tid; 2121 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2122 fill_regs32(td, &status->pr_reg); 2123 #else 2124 fill_regs(td, &status->pr_reg); 2125 #endif 2126 sbuf_bcat(sb, status, sizeof(*status)); 2127 free(status, M_TEMP); 2128 } 2129 *sizep = sizeof(*status); 2130 } 2131 2132 static void 2133 __elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep) 2134 { 2135 struct thread *td; 2136 elf_prfpregset_t *fpregset; 2137 2138 td = arg; 2139 if (sb != NULL) { 2140 KASSERT(*sizep == sizeof(*fpregset), ("invalid size")); 2141 fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK); 2142 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2143 fill_fpregs32(td, fpregset); 2144 #else 2145 fill_fpregs(td, fpregset); 2146 #endif 2147 sbuf_bcat(sb, fpregset, sizeof(*fpregset)); 2148 free(fpregset, M_TEMP); 2149 } 2150 *sizep = sizeof(*fpregset); 2151 } 2152 2153 static void 2154 __elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep) 2155 { 2156 struct thread *td; 2157 elf_thrmisc_t thrmisc; 2158 2159 td = arg; 2160 if (sb != NULL) { 2161 KASSERT(*sizep == sizeof(thrmisc), ("invalid size")); 2162 bzero(&thrmisc, sizeof(thrmisc)); 2163 strcpy(thrmisc.pr_tname, td->td_name); 2164 sbuf_bcat(sb, &thrmisc, sizeof(thrmisc)); 2165 } 2166 *sizep = sizeof(thrmisc); 2167 } 2168 2169 static void 2170 __elfN(note_ptlwpinfo)(void *arg, struct sbuf *sb, size_t *sizep) 2171 { 2172 struct thread *td; 2173 size_t size; 2174 int structsize; 2175 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2176 struct ptrace_lwpinfo32 pl; 2177 #else 2178 struct ptrace_lwpinfo pl; 2179 #endif 2180 2181 td = arg; 2182 size = sizeof(structsize) + sizeof(pl); 2183 if (sb != NULL) { 2184 KASSERT(*sizep == size, ("invalid size")); 2185 structsize = sizeof(pl); 2186 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2187 bzero(&pl, sizeof(pl)); 2188 pl.pl_lwpid = td->td_tid; 2189 pl.pl_event = PL_EVENT_NONE; 2190 pl.pl_sigmask = td->td_sigmask; 2191 pl.pl_siglist = td->td_siglist; 2192 if (td->td_si.si_signo != 0) { 2193 pl.pl_event = PL_EVENT_SIGNAL; 2194 pl.pl_flags |= PL_FLAG_SI; 2195 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2196 siginfo_to_siginfo32(&td->td_si, &pl.pl_siginfo); 2197 #else 2198 pl.pl_siginfo = td->td_si; 2199 #endif 2200 } 2201 strcpy(pl.pl_tdname, td->td_name); 2202 /* XXX TODO: supply more information in struct ptrace_lwpinfo*/ 2203 sbuf_bcat(sb, &pl, sizeof(pl)); 2204 } 2205 *sizep = size; 2206 } 2207 2208 /* 2209 * Allow for MD specific notes, as well as any MD 2210 * specific preparations for writing MI notes. 2211 */ 2212 static void 2213 __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep) 2214 { 2215 struct thread *td; 2216 void *buf; 2217 size_t size; 2218 2219 td = (struct thread *)arg; 2220 size = *sizep; 2221 if (size != 0 && sb != NULL) 2222 buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK); 2223 else 2224 buf = NULL; 2225 size = 0; 2226 __elfN(dump_thread)(td, buf, &size); 2227 KASSERT(sb == NULL || *sizep == size, ("invalid size")); 2228 if (size != 0 && sb != NULL) 2229 sbuf_bcat(sb, buf, size); 2230 free(buf, M_TEMP); 2231 *sizep = size; 2232 } 2233 2234 #ifdef KINFO_PROC_SIZE 2235 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); 2236 #endif 2237 2238 static void 2239 __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep) 2240 { 2241 struct proc *p; 2242 size_t size; 2243 int structsize; 2244 2245 p = arg; 2246 size = sizeof(structsize) + p->p_numthreads * 2247 sizeof(elf_kinfo_proc_t); 2248 2249 if (sb != NULL) { 2250 KASSERT(*sizep == size, ("invalid size")); 2251 structsize = sizeof(elf_kinfo_proc_t); 2252 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2253 sx_slock(&proctree_lock); 2254 PROC_LOCK(p); 2255 kern_proc_out(p, sb, ELF_KERN_PROC_MASK); 2256 sx_sunlock(&proctree_lock); 2257 } 2258 *sizep = size; 2259 } 2260 2261 #ifdef KINFO_FILE_SIZE 2262 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); 2263 #endif 2264 2265 static void 2266 note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep) 2267 { 2268 struct proc *p; 2269 size_t size, sect_sz, i; 2270 ssize_t start_len, sect_len; 2271 int structsize, filedesc_flags; 2272 2273 if (coredump_pack_fileinfo) 2274 filedesc_flags = KERN_FILEDESC_PACK_KINFO; 2275 else 2276 filedesc_flags = 0; 2277 2278 p = arg; 2279 structsize = sizeof(struct kinfo_file); 2280 if (sb == NULL) { 2281 size = 0; 2282 sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); 2283 sbuf_set_drain(sb, sbuf_count_drain, &size); 2284 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2285 PROC_LOCK(p); 2286 kern_proc_filedesc_out(p, sb, -1, filedesc_flags); 2287 sbuf_finish(sb); 2288 sbuf_delete(sb); 2289 *sizep = size; 2290 } else { 2291 sbuf_start_section(sb, &start_len); 2292 2293 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2294 PROC_LOCK(p); 2295 kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize), 2296 filedesc_flags); 2297 2298 sect_len = sbuf_end_section(sb, start_len, 0, 0); 2299 if (sect_len < 0) 2300 return; 2301 sect_sz = sect_len; 2302 2303 KASSERT(sect_sz <= *sizep, 2304 ("kern_proc_filedesc_out did not respect maxlen; " 2305 "requested %zu, got %zu", *sizep - sizeof(structsize), 2306 sect_sz - sizeof(structsize))); 2307 2308 for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++) 2309 sbuf_putc(sb, 0); 2310 } 2311 } 2312 2313 #ifdef KINFO_VMENTRY_SIZE 2314 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE); 2315 #endif 2316 2317 static void 2318 note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep) 2319 { 2320 struct proc *p; 2321 size_t size; 2322 int structsize, vmmap_flags; 2323 2324 if (coredump_pack_vmmapinfo) 2325 vmmap_flags = KERN_VMMAP_PACK_KINFO; 2326 else 2327 vmmap_flags = 0; 2328 2329 p = arg; 2330 structsize = sizeof(struct kinfo_vmentry); 2331 if (sb == NULL) { 2332 size = 0; 2333 sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); 2334 sbuf_set_drain(sb, sbuf_count_drain, &size); 2335 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2336 PROC_LOCK(p); 2337 kern_proc_vmmap_out(p, sb, -1, vmmap_flags); 2338 sbuf_finish(sb); 2339 sbuf_delete(sb); 2340 *sizep = size; 2341 } else { 2342 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2343 PROC_LOCK(p); 2344 kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize), 2345 vmmap_flags); 2346 } 2347 } 2348 2349 static void 2350 note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep) 2351 { 2352 struct proc *p; 2353 size_t size; 2354 int structsize; 2355 2356 p = arg; 2357 size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t); 2358 if (sb != NULL) { 2359 KASSERT(*sizep == size, ("invalid size")); 2360 structsize = sizeof(gid_t); 2361 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2362 sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups * 2363 sizeof(gid_t)); 2364 } 2365 *sizep = size; 2366 } 2367 2368 static void 2369 note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep) 2370 { 2371 struct proc *p; 2372 size_t size; 2373 int structsize; 2374 2375 p = arg; 2376 size = sizeof(structsize) + sizeof(p->p_pd->pd_cmask); 2377 if (sb != NULL) { 2378 KASSERT(*sizep == size, ("invalid size")); 2379 structsize = sizeof(p->p_pd->pd_cmask); 2380 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2381 sbuf_bcat(sb, &p->p_pd->pd_cmask, sizeof(p->p_pd->pd_cmask)); 2382 } 2383 *sizep = size; 2384 } 2385 2386 static void 2387 note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep) 2388 { 2389 struct proc *p; 2390 struct rlimit rlim[RLIM_NLIMITS]; 2391 size_t size; 2392 int structsize, i; 2393 2394 p = arg; 2395 size = sizeof(structsize) + sizeof(rlim); 2396 if (sb != NULL) { 2397 KASSERT(*sizep == size, ("invalid size")); 2398 structsize = sizeof(rlim); 2399 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2400 PROC_LOCK(p); 2401 for (i = 0; i < RLIM_NLIMITS; i++) 2402 lim_rlimit_proc(p, i, &rlim[i]); 2403 PROC_UNLOCK(p); 2404 sbuf_bcat(sb, rlim, sizeof(rlim)); 2405 } 2406 *sizep = size; 2407 } 2408 2409 static void 2410 note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep) 2411 { 2412 struct proc *p; 2413 size_t size; 2414 int structsize; 2415 2416 p = arg; 2417 size = sizeof(structsize) + sizeof(p->p_osrel); 2418 if (sb != NULL) { 2419 KASSERT(*sizep == size, ("invalid size")); 2420 structsize = sizeof(p->p_osrel); 2421 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2422 sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel)); 2423 } 2424 *sizep = size; 2425 } 2426 2427 static void 2428 __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep) 2429 { 2430 struct proc *p; 2431 elf_ps_strings_t ps_strings; 2432 size_t size; 2433 int structsize; 2434 2435 p = arg; 2436 size = sizeof(structsize) + sizeof(ps_strings); 2437 if (sb != NULL) { 2438 KASSERT(*sizep == size, ("invalid size")); 2439 structsize = sizeof(ps_strings); 2440 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2441 ps_strings = PTROUT(p->p_sysent->sv_psstrings); 2442 #else 2443 ps_strings = p->p_sysent->sv_psstrings; 2444 #endif 2445 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2446 sbuf_bcat(sb, &ps_strings, sizeof(ps_strings)); 2447 } 2448 *sizep = size; 2449 } 2450 2451 static void 2452 __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep) 2453 { 2454 struct proc *p; 2455 size_t size; 2456 int structsize; 2457 2458 p = arg; 2459 if (sb == NULL) { 2460 size = 0; 2461 sb = sbuf_new(NULL, NULL, AT_COUNT * sizeof(Elf_Auxinfo), 2462 SBUF_FIXEDLEN); 2463 sbuf_set_drain(sb, sbuf_count_drain, &size); 2464 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2465 PHOLD(p); 2466 proc_getauxv(curthread, p, sb); 2467 PRELE(p); 2468 sbuf_finish(sb); 2469 sbuf_delete(sb); 2470 *sizep = size; 2471 } else { 2472 structsize = sizeof(Elf_Auxinfo); 2473 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2474 PHOLD(p); 2475 proc_getauxv(curthread, p, sb); 2476 PRELE(p); 2477 } 2478 } 2479 2480 static boolean_t 2481 __elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote, 2482 const char *note_vendor, const Elf_Phdr *pnote, 2483 boolean_t (*cb)(const Elf_Note *, void *, boolean_t *), void *cb_arg) 2484 { 2485 const Elf_Note *note, *note0, *note_end; 2486 const char *note_name; 2487 char *buf; 2488 int i, error; 2489 boolean_t res; 2490 2491 /* We need some limit, might as well use PAGE_SIZE. */ 2492 if (pnote == NULL || pnote->p_filesz > PAGE_SIZE) 2493 return (FALSE); 2494 ASSERT_VOP_LOCKED(imgp->vp, "parse_notes"); 2495 if (pnote->p_offset > PAGE_SIZE || 2496 pnote->p_filesz > PAGE_SIZE - pnote->p_offset) { 2497 buf = malloc(pnote->p_filesz, M_TEMP, M_NOWAIT); 2498 if (buf == NULL) { 2499 VOP_UNLOCK(imgp->vp); 2500 buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK); 2501 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 2502 } 2503 error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz, 2504 pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED, 2505 curthread->td_ucred, NOCRED, NULL, curthread); 2506 if (error != 0) { 2507 uprintf("i/o error PT_NOTE\n"); 2508 goto retf; 2509 } 2510 note = note0 = (const Elf_Note *)buf; 2511 note_end = (const Elf_Note *)(buf + pnote->p_filesz); 2512 } else { 2513 note = note0 = (const Elf_Note *)(imgp->image_header + 2514 pnote->p_offset); 2515 note_end = (const Elf_Note *)(imgp->image_header + 2516 pnote->p_offset + pnote->p_filesz); 2517 buf = NULL; 2518 } 2519 for (i = 0; i < 100 && note >= note0 && note < note_end; i++) { 2520 if (!aligned(note, Elf32_Addr) || (const char *)note_end - 2521 (const char *)note < sizeof(Elf_Note)) { 2522 goto retf; 2523 } 2524 if (note->n_namesz != checknote->n_namesz || 2525 note->n_descsz != checknote->n_descsz || 2526 note->n_type != checknote->n_type) 2527 goto nextnote; 2528 note_name = (const char *)(note + 1); 2529 if (note_name + checknote->n_namesz >= 2530 (const char *)note_end || strncmp(note_vendor, 2531 note_name, checknote->n_namesz) != 0) 2532 goto nextnote; 2533 2534 if (cb(note, cb_arg, &res)) 2535 goto ret; 2536 nextnote: 2537 note = (const Elf_Note *)((const char *)(note + 1) + 2538 roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) + 2539 roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE)); 2540 } 2541 retf: 2542 res = FALSE; 2543 ret: 2544 free(buf, M_TEMP); 2545 return (res); 2546 } 2547 2548 struct brandnote_cb_arg { 2549 Elf_Brandnote *brandnote; 2550 int32_t *osrel; 2551 }; 2552 2553 static boolean_t 2554 brandnote_cb(const Elf_Note *note, void *arg0, boolean_t *res) 2555 { 2556 struct brandnote_cb_arg *arg; 2557 2558 arg = arg0; 2559 2560 /* 2561 * Fetch the osreldate for binary from the ELF OSABI-note if 2562 * necessary. 2563 */ 2564 *res = (arg->brandnote->flags & BN_TRANSLATE_OSREL) != 0 && 2565 arg->brandnote->trans_osrel != NULL ? 2566 arg->brandnote->trans_osrel(note, arg->osrel) : TRUE; 2567 2568 return (TRUE); 2569 } 2570 2571 static Elf_Note fctl_note = { 2572 .n_namesz = sizeof(FREEBSD_ABI_VENDOR), 2573 .n_descsz = sizeof(uint32_t), 2574 .n_type = NT_FREEBSD_FEATURE_CTL, 2575 }; 2576 2577 struct fctl_cb_arg { 2578 boolean_t *has_fctl0; 2579 uint32_t *fctl0; 2580 }; 2581 2582 static boolean_t 2583 note_fctl_cb(const Elf_Note *note, void *arg0, boolean_t *res) 2584 { 2585 struct fctl_cb_arg *arg; 2586 const Elf32_Word *desc; 2587 uintptr_t p; 2588 2589 arg = arg0; 2590 p = (uintptr_t)(note + 1); 2591 p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); 2592 desc = (const Elf32_Word *)p; 2593 *arg->has_fctl0 = TRUE; 2594 *arg->fctl0 = desc[0]; 2595 *res = TRUE; 2596 return (TRUE); 2597 } 2598 2599 /* 2600 * Try to find the appropriate ABI-note section for checknote, fetch 2601 * the osreldate and feature control flags for binary from the ELF 2602 * OSABI-note. Only the first page of the image is searched, the same 2603 * as for headers. 2604 */ 2605 static boolean_t 2606 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote, 2607 int32_t *osrel, boolean_t *has_fctl0, uint32_t *fctl0) 2608 { 2609 const Elf_Phdr *phdr; 2610 const Elf_Ehdr *hdr; 2611 struct brandnote_cb_arg b_arg; 2612 struct fctl_cb_arg f_arg; 2613 int i, j; 2614 2615 hdr = (const Elf_Ehdr *)imgp->image_header; 2616 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 2617 b_arg.brandnote = brandnote; 2618 b_arg.osrel = osrel; 2619 f_arg.has_fctl0 = has_fctl0; 2620 f_arg.fctl0 = fctl0; 2621 2622 for (i = 0; i < hdr->e_phnum; i++) { 2623 if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp, 2624 &brandnote->hdr, brandnote->vendor, &phdr[i], brandnote_cb, 2625 &b_arg)) { 2626 for (j = 0; j < hdr->e_phnum; j++) { 2627 if (phdr[j].p_type == PT_NOTE && 2628 __elfN(parse_notes)(imgp, &fctl_note, 2629 FREEBSD_ABI_VENDOR, &phdr[j], 2630 note_fctl_cb, &f_arg)) 2631 break; 2632 } 2633 return (TRUE); 2634 } 2635 } 2636 return (FALSE); 2637 2638 } 2639 2640 /* 2641 * Tell kern_execve.c about it, with a little help from the linker. 2642 */ 2643 static struct execsw __elfN(execsw) = { 2644 .ex_imgact = __CONCAT(exec_, __elfN(imgact)), 2645 .ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 2646 }; 2647 EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw)); 2648 2649 static vm_prot_t 2650 __elfN(trans_prot)(Elf_Word flags) 2651 { 2652 vm_prot_t prot; 2653 2654 prot = 0; 2655 if (flags & PF_X) 2656 prot |= VM_PROT_EXECUTE; 2657 if (flags & PF_W) 2658 prot |= VM_PROT_WRITE; 2659 if (flags & PF_R) 2660 prot |= VM_PROT_READ; 2661 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__)) 2662 if (i386_read_exec && (flags & PF_R)) 2663 prot |= VM_PROT_EXECUTE; 2664 #endif 2665 return (prot); 2666 } 2667 2668 static Elf_Word 2669 __elfN(untrans_prot)(vm_prot_t prot) 2670 { 2671 Elf_Word flags; 2672 2673 flags = 0; 2674 if (prot & VM_PROT_EXECUTE) 2675 flags |= PF_X; 2676 if (prot & VM_PROT_READ) 2677 flags |= PF_R; 2678 if (prot & VM_PROT_WRITE) 2679 flags |= PF_W; 2680 return (flags); 2681 } 2682 2683 void 2684 __elfN(stackgap)(struct image_params *imgp, uintptr_t *stack_base) 2685 { 2686 uintptr_t range, rbase, gap; 2687 int pct; 2688 2689 pct = __elfN(aslr_stack_gap); 2690 if (pct == 0) 2691 return; 2692 if (pct > 50) 2693 pct = 50; 2694 range = imgp->eff_stack_sz * pct / 100; 2695 arc4rand(&rbase, sizeof(rbase), 0); 2696 gap = rbase % range; 2697 gap &= ~(sizeof(u_long) - 1); 2698 *stack_base -= gap; 2699 } 2700