1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2017 Dell EMC 5 * Copyright (c) 2000-2001, 2003 David O'Brien 6 * Copyright (c) 1995-1996 Søren Schmidt 7 * Copyright (c) 1996 Peter Wemm 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer 15 * in this position and unchanged. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The name of the author may not be used to endorse or promote products 20 * derived from this software without specific prior written permission 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 27 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 31 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_capsicum.h" 38 39 #include <sys/param.h> 40 #include <sys/capsicum.h> 41 #include <sys/compressor.h> 42 #include <sys/exec.h> 43 #include <sys/fcntl.h> 44 #include <sys/imgact.h> 45 #include <sys/imgact_elf.h> 46 #include <sys/jail.h> 47 #include <sys/kernel.h> 48 #include <sys/lock.h> 49 #include <sys/malloc.h> 50 #include <sys/mount.h> 51 #include <sys/mman.h> 52 #include <sys/namei.h> 53 #include <sys/pioctl.h> 54 #include <sys/proc.h> 55 #include <sys/procfs.h> 56 #include <sys/ptrace.h> 57 #include <sys/racct.h> 58 #include <sys/resourcevar.h> 59 #include <sys/rwlock.h> 60 #include <sys/sbuf.h> 61 #include <sys/sf_buf.h> 62 #include <sys/smp.h> 63 #include <sys/systm.h> 64 #include <sys/signalvar.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/syscall.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysent.h> 70 #include <sys/vnode.h> 71 #include <sys/syslog.h> 72 #include <sys/eventhandler.h> 73 #include <sys/user.h> 74 75 #include <vm/vm.h> 76 #include <vm/vm_kern.h> 77 #include <vm/vm_param.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_map.h> 80 #include <vm/vm_object.h> 81 #include <vm/vm_extern.h> 82 83 #include <machine/elf.h> 84 #include <machine/md_var.h> 85 86 #define ELF_NOTE_ROUNDSIZE 4 87 #define OLD_EI_BRAND 8 88 89 static int __elfN(check_header)(const Elf_Ehdr *hdr); 90 static Elf_Brandinfo *__elfN(get_brandinfo)(struct image_params *imgp, 91 const char *interp, int32_t *osrel, uint32_t *fctl0); 92 static int __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 93 u_long *entry); 94 static int __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset, 95 caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot); 96 static int __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp); 97 static bool __elfN(freebsd_trans_osrel)(const Elf_Note *note, 98 int32_t *osrel); 99 static bool kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel); 100 static boolean_t __elfN(check_note)(struct image_params *imgp, 101 Elf_Brandnote *checknote, int32_t *osrel, uint32_t *fctl0); 102 static vm_prot_t __elfN(trans_prot)(Elf_Word); 103 static Elf_Word __elfN(untrans_prot)(vm_prot_t); 104 105 SYSCTL_NODE(_kern, OID_AUTO, __CONCAT(elf, __ELF_WORD_SIZE), CTLFLAG_RW, 0, 106 ""); 107 108 #define CORE_BUF_SIZE (16 * 1024) 109 110 int __elfN(fallback_brand) = -1; 111 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, 112 fallback_brand, CTLFLAG_RWTUN, &__elfN(fallback_brand), 0, 113 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) " brand of last resort"); 114 115 static int elf_legacy_coredump = 0; 116 SYSCTL_INT(_debug, OID_AUTO, __elfN(legacy_coredump), CTLFLAG_RW, 117 &elf_legacy_coredump, 0, 118 "include all and only RW pages in core dumps"); 119 120 int __elfN(nxstack) = 121 #if defined(__amd64__) || defined(__powerpc64__) /* both 64 and 32 bit */ || \ 122 (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__) || \ 123 defined(__riscv) 124 1; 125 #else 126 0; 127 #endif 128 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, 129 nxstack, CTLFLAG_RW, &__elfN(nxstack), 0, 130 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": enable non-executable stack"); 131 132 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__)) 133 int i386_read_exec = 0; 134 SYSCTL_INT(_kern_elf32, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, 135 "enable execution from readable segments"); 136 #endif 137 138 static u_long __elfN(pie_base) = ET_DYN_LOAD_ADDR; 139 static int 140 sysctl_pie_base(SYSCTL_HANDLER_ARGS) 141 { 142 u_long val; 143 int error; 144 145 val = __elfN(pie_base); 146 error = sysctl_handle_long(oidp, &val, 0, req); 147 if (error != 0 || req->newptr == NULL) 148 return (error); 149 if ((val & PAGE_MASK) != 0) 150 return (EINVAL); 151 __elfN(pie_base) = val; 152 return (0); 153 } 154 SYSCTL_PROC(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, pie_base, 155 CTLTYPE_ULONG | CTLFLAG_MPSAFE | CTLFLAG_RW, NULL, 0, 156 sysctl_pie_base, "LU", 157 "PIE load base without randomization"); 158 159 SYSCTL_NODE(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, aslr, CTLFLAG_RW, 0, 160 ""); 161 #define ASLR_NODE_OID __CONCAT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), _aslr) 162 163 static int __elfN(aslr_enabled) = 0; 164 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, enable, CTLFLAG_RWTUN, 165 &__elfN(aslr_enabled), 0, 166 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 167 ": enable address map randomization"); 168 169 static int __elfN(pie_aslr_enabled) = 0; 170 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, pie_enable, CTLFLAG_RWTUN, 171 &__elfN(pie_aslr_enabled), 0, 172 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 173 ": enable address map randomization for PIE binaries"); 174 175 static int __elfN(aslr_honor_sbrk) = 1; 176 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, honor_sbrk, CTLFLAG_RW, 177 &__elfN(aslr_honor_sbrk), 0, 178 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) ": assume sbrk is used"); 179 180 static int __elfN(aslr_stack_gap) = 3; 181 SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack_gap, CTLFLAG_RW, 182 &__elfN(aslr_stack_gap), 0, 183 __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 184 ": maximum percentage of main stack to waste on a random gap"); 185 186 static int __elfN(sigfastblock) = 1; 187 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock, 188 CTLFLAG_RWTUN, &__elfN(sigfastblock), 0, 189 "enable sigfastblock for new processes"); 190 191 static Elf_Brandinfo *elf_brand_list[MAX_BRANDS]; 192 193 #define aligned(a, t) (rounddown2((u_long)(a), sizeof(t)) == (u_long)(a)) 194 195 static const char FREEBSD_ABI_VENDOR[] = "FreeBSD"; 196 197 Elf_Brandnote __elfN(freebsd_brandnote) = { 198 .hdr.n_namesz = sizeof(FREEBSD_ABI_VENDOR), 199 .hdr.n_descsz = sizeof(int32_t), 200 .hdr.n_type = NT_FREEBSD_ABI_TAG, 201 .vendor = FREEBSD_ABI_VENDOR, 202 .flags = BN_TRANSLATE_OSREL, 203 .trans_osrel = __elfN(freebsd_trans_osrel) 204 }; 205 206 static bool 207 __elfN(freebsd_trans_osrel)(const Elf_Note *note, int32_t *osrel) 208 { 209 uintptr_t p; 210 211 p = (uintptr_t)(note + 1); 212 p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); 213 *osrel = *(const int32_t *)(p); 214 215 return (true); 216 } 217 218 static const char GNU_ABI_VENDOR[] = "GNU"; 219 static int GNU_KFREEBSD_ABI_DESC = 3; 220 221 Elf_Brandnote __elfN(kfreebsd_brandnote) = { 222 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 223 .hdr.n_descsz = 16, /* XXX at least 16 */ 224 .hdr.n_type = 1, 225 .vendor = GNU_ABI_VENDOR, 226 .flags = BN_TRANSLATE_OSREL, 227 .trans_osrel = kfreebsd_trans_osrel 228 }; 229 230 static bool 231 kfreebsd_trans_osrel(const Elf_Note *note, int32_t *osrel) 232 { 233 const Elf32_Word *desc; 234 uintptr_t p; 235 236 p = (uintptr_t)(note + 1); 237 p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); 238 239 desc = (const Elf32_Word *)p; 240 if (desc[0] != GNU_KFREEBSD_ABI_DESC) 241 return (false); 242 243 /* 244 * Debian GNU/kFreeBSD embed the earliest compatible kernel version 245 * (__FreeBSD_version: <major><two digit minor>Rxx) in the LSB way. 246 */ 247 *osrel = desc[1] * 100000 + desc[2] * 1000 + desc[3]; 248 249 return (true); 250 } 251 252 int 253 __elfN(insert_brand_entry)(Elf_Brandinfo *entry) 254 { 255 int i; 256 257 for (i = 0; i < MAX_BRANDS; i++) { 258 if (elf_brand_list[i] == NULL) { 259 elf_brand_list[i] = entry; 260 break; 261 } 262 } 263 if (i == MAX_BRANDS) { 264 printf("WARNING: %s: could not insert brandinfo entry: %p\n", 265 __func__, entry); 266 return (-1); 267 } 268 return (0); 269 } 270 271 int 272 __elfN(remove_brand_entry)(Elf_Brandinfo *entry) 273 { 274 int i; 275 276 for (i = 0; i < MAX_BRANDS; i++) { 277 if (elf_brand_list[i] == entry) { 278 elf_brand_list[i] = NULL; 279 break; 280 } 281 } 282 if (i == MAX_BRANDS) 283 return (-1); 284 return (0); 285 } 286 287 int 288 __elfN(brand_inuse)(Elf_Brandinfo *entry) 289 { 290 struct proc *p; 291 int rval = FALSE; 292 293 sx_slock(&allproc_lock); 294 FOREACH_PROC_IN_SYSTEM(p) { 295 if (p->p_sysent == entry->sysvec) { 296 rval = TRUE; 297 break; 298 } 299 } 300 sx_sunlock(&allproc_lock); 301 302 return (rval); 303 } 304 305 static Elf_Brandinfo * 306 __elfN(get_brandinfo)(struct image_params *imgp, const char *interp, 307 int32_t *osrel, uint32_t *fctl0) 308 { 309 const Elf_Ehdr *hdr = (const Elf_Ehdr *)imgp->image_header; 310 Elf_Brandinfo *bi, *bi_m; 311 boolean_t ret; 312 int i, interp_name_len; 313 314 interp_name_len = interp != NULL ? strlen(interp) + 1 : 0; 315 316 /* 317 * We support four types of branding -- (1) the ELF EI_OSABI field 318 * that SCO added to the ELF spec, (2) FreeBSD 3.x's traditional string 319 * branding w/in the ELF header, (3) path of the `interp_path' 320 * field, and (4) the ".note.ABI-tag" ELF section. 321 */ 322 323 /* Look for an ".note.ABI-tag" ELF section */ 324 bi_m = NULL; 325 for (i = 0; i < MAX_BRANDS; i++) { 326 bi = elf_brand_list[i]; 327 if (bi == NULL) 328 continue; 329 if (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0) 330 continue; 331 if (hdr->e_machine == bi->machine && (bi->flags & 332 (BI_BRAND_NOTE|BI_BRAND_NOTE_MANDATORY)) != 0) { 333 ret = __elfN(check_note)(imgp, bi->brand_note, osrel, 334 fctl0); 335 /* Give brand a chance to veto check_note's guess */ 336 if (ret && bi->header_supported) 337 ret = bi->header_supported(imgp); 338 /* 339 * If note checker claimed the binary, but the 340 * interpreter path in the image does not 341 * match default one for the brand, try to 342 * search for other brands with the same 343 * interpreter. Either there is better brand 344 * with the right interpreter, or, failing 345 * this, we return first brand which accepted 346 * our note and, optionally, header. 347 */ 348 if (ret && bi_m == NULL && interp != NULL && 349 (bi->interp_path == NULL || 350 (strlen(bi->interp_path) + 1 != interp_name_len || 351 strncmp(interp, bi->interp_path, interp_name_len) 352 != 0))) { 353 bi_m = bi; 354 ret = 0; 355 } 356 if (ret) 357 return (bi); 358 } 359 } 360 if (bi_m != NULL) 361 return (bi_m); 362 363 /* If the executable has a brand, search for it in the brand list. */ 364 for (i = 0; i < MAX_BRANDS; i++) { 365 bi = elf_brand_list[i]; 366 if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 || 367 (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)) 368 continue; 369 if (hdr->e_machine == bi->machine && 370 (hdr->e_ident[EI_OSABI] == bi->brand || 371 (bi->compat_3_brand != NULL && 372 strcmp((const char *)&hdr->e_ident[OLD_EI_BRAND], 373 bi->compat_3_brand) == 0))) { 374 /* Looks good, but give brand a chance to veto */ 375 if (bi->header_supported == NULL || 376 bi->header_supported(imgp)) { 377 /* 378 * Again, prefer strictly matching 379 * interpreter path. 380 */ 381 if (interp_name_len == 0 && 382 bi->interp_path == NULL) 383 return (bi); 384 if (bi->interp_path != NULL && 385 strlen(bi->interp_path) + 1 == 386 interp_name_len && strncmp(interp, 387 bi->interp_path, interp_name_len) == 0) 388 return (bi); 389 if (bi_m == NULL) 390 bi_m = bi; 391 } 392 } 393 } 394 if (bi_m != NULL) 395 return (bi_m); 396 397 /* No known brand, see if the header is recognized by any brand */ 398 for (i = 0; i < MAX_BRANDS; i++) { 399 bi = elf_brand_list[i]; 400 if (bi == NULL || bi->flags & BI_BRAND_NOTE_MANDATORY || 401 bi->header_supported == NULL) 402 continue; 403 if (hdr->e_machine == bi->machine) { 404 ret = bi->header_supported(imgp); 405 if (ret) 406 return (bi); 407 } 408 } 409 410 /* Lacking a known brand, search for a recognized interpreter. */ 411 if (interp != NULL) { 412 for (i = 0; i < MAX_BRANDS; i++) { 413 bi = elf_brand_list[i]; 414 if (bi == NULL || (bi->flags & 415 (BI_BRAND_NOTE_MANDATORY | BI_BRAND_ONLY_STATIC)) 416 != 0) 417 continue; 418 if (hdr->e_machine == bi->machine && 419 bi->interp_path != NULL && 420 /* ELF image p_filesz includes terminating zero */ 421 strlen(bi->interp_path) + 1 == interp_name_len && 422 strncmp(interp, bi->interp_path, interp_name_len) 423 == 0 && (bi->header_supported == NULL || 424 bi->header_supported(imgp))) 425 return (bi); 426 } 427 } 428 429 /* Lacking a recognized interpreter, try the default brand */ 430 for (i = 0; i < MAX_BRANDS; i++) { 431 bi = elf_brand_list[i]; 432 if (bi == NULL || (bi->flags & BI_BRAND_NOTE_MANDATORY) != 0 || 433 (interp != NULL && (bi->flags & BI_BRAND_ONLY_STATIC) != 0)) 434 continue; 435 if (hdr->e_machine == bi->machine && 436 __elfN(fallback_brand) == bi->brand && 437 (bi->header_supported == NULL || 438 bi->header_supported(imgp))) 439 return (bi); 440 } 441 return (NULL); 442 } 443 444 static int 445 __elfN(check_header)(const Elf_Ehdr *hdr) 446 { 447 Elf_Brandinfo *bi; 448 int i; 449 450 if (!IS_ELF(*hdr) || 451 hdr->e_ident[EI_CLASS] != ELF_TARG_CLASS || 452 hdr->e_ident[EI_DATA] != ELF_TARG_DATA || 453 hdr->e_ident[EI_VERSION] != EV_CURRENT || 454 hdr->e_phentsize != sizeof(Elf_Phdr) || 455 hdr->e_version != ELF_TARG_VER) 456 return (ENOEXEC); 457 458 /* 459 * Make sure we have at least one brand for this machine. 460 */ 461 462 for (i = 0; i < MAX_BRANDS; i++) { 463 bi = elf_brand_list[i]; 464 if (bi != NULL && bi->machine == hdr->e_machine) 465 break; 466 } 467 if (i == MAX_BRANDS) 468 return (ENOEXEC); 469 470 return (0); 471 } 472 473 static int 474 __elfN(map_partial)(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 475 vm_offset_t start, vm_offset_t end, vm_prot_t prot) 476 { 477 struct sf_buf *sf; 478 int error; 479 vm_offset_t off; 480 481 /* 482 * Create the page if it doesn't exist yet. Ignore errors. 483 */ 484 vm_map_fixed(map, NULL, 0, trunc_page(start), round_page(end) - 485 trunc_page(start), VM_PROT_ALL, VM_PROT_ALL, MAP_CHECK_EXCL); 486 487 /* 488 * Find the page from the underlying object. 489 */ 490 if (object != NULL) { 491 sf = vm_imgact_map_page(object, offset); 492 if (sf == NULL) 493 return (KERN_FAILURE); 494 off = offset - trunc_page(offset); 495 error = copyout((caddr_t)sf_buf_kva(sf) + off, (caddr_t)start, 496 end - start); 497 vm_imgact_unmap_page(sf); 498 if (error != 0) 499 return (KERN_FAILURE); 500 } 501 502 return (KERN_SUCCESS); 503 } 504 505 static int 506 __elfN(map_insert)(struct image_params *imgp, vm_map_t map, vm_object_t object, 507 vm_ooffset_t offset, vm_offset_t start, vm_offset_t end, vm_prot_t prot, 508 int cow) 509 { 510 struct sf_buf *sf; 511 vm_offset_t off; 512 vm_size_t sz; 513 int error, locked, rv; 514 515 if (start != trunc_page(start)) { 516 rv = __elfN(map_partial)(map, object, offset, start, 517 round_page(start), prot); 518 if (rv != KERN_SUCCESS) 519 return (rv); 520 offset += round_page(start) - start; 521 start = round_page(start); 522 } 523 if (end != round_page(end)) { 524 rv = __elfN(map_partial)(map, object, offset + 525 trunc_page(end) - start, trunc_page(end), end, prot); 526 if (rv != KERN_SUCCESS) 527 return (rv); 528 end = trunc_page(end); 529 } 530 if (start >= end) 531 return (KERN_SUCCESS); 532 if ((offset & PAGE_MASK) != 0) { 533 /* 534 * The mapping is not page aligned. This means that we have 535 * to copy the data. 536 */ 537 rv = vm_map_fixed(map, NULL, 0, start, end - start, 538 prot | VM_PROT_WRITE, VM_PROT_ALL, MAP_CHECK_EXCL); 539 if (rv != KERN_SUCCESS) 540 return (rv); 541 if (object == NULL) 542 return (KERN_SUCCESS); 543 for (; start < end; start += sz) { 544 sf = vm_imgact_map_page(object, offset); 545 if (sf == NULL) 546 return (KERN_FAILURE); 547 off = offset - trunc_page(offset); 548 sz = end - start; 549 if (sz > PAGE_SIZE - off) 550 sz = PAGE_SIZE - off; 551 error = copyout((caddr_t)sf_buf_kva(sf) + off, 552 (caddr_t)start, sz); 553 vm_imgact_unmap_page(sf); 554 if (error != 0) 555 return (KERN_FAILURE); 556 offset += sz; 557 } 558 } else { 559 vm_object_reference(object); 560 rv = vm_map_fixed(map, object, offset, start, end - start, 561 prot, VM_PROT_ALL, cow | MAP_CHECK_EXCL | 562 (object != NULL ? MAP_VN_EXEC : 0)); 563 if (rv != KERN_SUCCESS) { 564 locked = VOP_ISLOCKED(imgp->vp); 565 VOP_UNLOCK(imgp->vp); 566 vm_object_deallocate(object); 567 vn_lock(imgp->vp, locked | LK_RETRY); 568 return (rv); 569 } else if (object != NULL) { 570 MPASS(imgp->vp->v_object == object); 571 VOP_SET_TEXT_CHECKED(imgp->vp); 572 } 573 } 574 return (KERN_SUCCESS); 575 } 576 577 static int 578 __elfN(load_section)(struct image_params *imgp, vm_ooffset_t offset, 579 caddr_t vmaddr, size_t memsz, size_t filsz, vm_prot_t prot) 580 { 581 struct sf_buf *sf; 582 size_t map_len; 583 vm_map_t map; 584 vm_object_t object; 585 vm_offset_t map_addr; 586 int error, rv, cow; 587 size_t copy_len; 588 vm_ooffset_t file_addr; 589 590 /* 591 * It's necessary to fail if the filsz + offset taken from the 592 * header is greater than the actual file pager object's size. 593 * If we were to allow this, then the vm_map_find() below would 594 * walk right off the end of the file object and into the ether. 595 * 596 * While I'm here, might as well check for something else that 597 * is invalid: filsz cannot be greater than memsz. 598 */ 599 if ((filsz != 0 && (off_t)filsz + offset > imgp->attr->va_size) || 600 filsz > memsz) { 601 uprintf("elf_load_section: truncated ELF file\n"); 602 return (ENOEXEC); 603 } 604 605 object = imgp->object; 606 map = &imgp->proc->p_vmspace->vm_map; 607 map_addr = trunc_page((vm_offset_t)vmaddr); 608 file_addr = trunc_page(offset); 609 610 /* 611 * We have two choices. We can either clear the data in the last page 612 * of an oversized mapping, or we can start the anon mapping a page 613 * early and copy the initialized data into that first page. We 614 * choose the second. 615 */ 616 if (filsz == 0) 617 map_len = 0; 618 else if (memsz > filsz) 619 map_len = trunc_page(offset + filsz) - file_addr; 620 else 621 map_len = round_page(offset + filsz) - file_addr; 622 623 if (map_len != 0) { 624 /* cow flags: don't dump readonly sections in core */ 625 cow = MAP_COPY_ON_WRITE | MAP_PREFAULT | 626 (prot & VM_PROT_WRITE ? 0 : MAP_DISABLE_COREDUMP); 627 628 rv = __elfN(map_insert)(imgp, map, object, file_addr, 629 map_addr, map_addr + map_len, prot, cow); 630 if (rv != KERN_SUCCESS) 631 return (EINVAL); 632 633 /* we can stop now if we've covered it all */ 634 if (memsz == filsz) 635 return (0); 636 } 637 638 /* 639 * We have to get the remaining bit of the file into the first part 640 * of the oversized map segment. This is normally because the .data 641 * segment in the file is extended to provide bss. It's a neat idea 642 * to try and save a page, but it's a pain in the behind to implement. 643 */ 644 copy_len = filsz == 0 ? 0 : (offset + filsz) - trunc_page(offset + 645 filsz); 646 map_addr = trunc_page((vm_offset_t)vmaddr + filsz); 647 map_len = round_page((vm_offset_t)vmaddr + memsz) - map_addr; 648 649 /* This had damn well better be true! */ 650 if (map_len != 0) { 651 rv = __elfN(map_insert)(imgp, map, NULL, 0, map_addr, 652 map_addr + map_len, prot, 0); 653 if (rv != KERN_SUCCESS) 654 return (EINVAL); 655 } 656 657 if (copy_len != 0) { 658 sf = vm_imgact_map_page(object, offset + filsz); 659 if (sf == NULL) 660 return (EIO); 661 662 /* send the page fragment to user space */ 663 error = copyout((caddr_t)sf_buf_kva(sf), (caddr_t)map_addr, 664 copy_len); 665 vm_imgact_unmap_page(sf); 666 if (error != 0) 667 return (error); 668 } 669 670 /* 671 * Remove write access to the page if it was only granted by map_insert 672 * to allow copyout. 673 */ 674 if ((prot & VM_PROT_WRITE) == 0) 675 vm_map_protect(map, trunc_page(map_addr), round_page(map_addr + 676 map_len), prot, FALSE); 677 678 return (0); 679 } 680 681 static int 682 __elfN(load_sections)(struct image_params *imgp, const Elf_Ehdr *hdr, 683 const Elf_Phdr *phdr, u_long rbase, u_long *base_addrp) 684 { 685 vm_prot_t prot; 686 u_long base_addr; 687 bool first; 688 int error, i; 689 690 ASSERT_VOP_LOCKED(imgp->vp, __func__); 691 692 base_addr = 0; 693 first = true; 694 695 for (i = 0; i < hdr->e_phnum; i++) { 696 if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0) 697 continue; 698 699 /* Loadable segment */ 700 prot = __elfN(trans_prot)(phdr[i].p_flags); 701 error = __elfN(load_section)(imgp, phdr[i].p_offset, 702 (caddr_t)(uintptr_t)phdr[i].p_vaddr + rbase, 703 phdr[i].p_memsz, phdr[i].p_filesz, prot); 704 if (error != 0) 705 return (error); 706 707 /* 708 * Establish the base address if this is the first segment. 709 */ 710 if (first) { 711 base_addr = trunc_page(phdr[i].p_vaddr + rbase); 712 first = false; 713 } 714 } 715 716 if (base_addrp != NULL) 717 *base_addrp = base_addr; 718 719 return (0); 720 } 721 722 /* 723 * Load the file "file" into memory. It may be either a shared object 724 * or an executable. 725 * 726 * The "addr" reference parameter is in/out. On entry, it specifies 727 * the address where a shared object should be loaded. If the file is 728 * an executable, this value is ignored. On exit, "addr" specifies 729 * where the file was actually loaded. 730 * 731 * The "entry" reference parameter is out only. On exit, it specifies 732 * the entry point for the loaded file. 733 */ 734 static int 735 __elfN(load_file)(struct proc *p, const char *file, u_long *addr, 736 u_long *entry) 737 { 738 struct { 739 struct nameidata nd; 740 struct vattr attr; 741 struct image_params image_params; 742 } *tempdata; 743 const Elf_Ehdr *hdr = NULL; 744 const Elf_Phdr *phdr = NULL; 745 struct nameidata *nd; 746 struct vattr *attr; 747 struct image_params *imgp; 748 u_long rbase; 749 u_long base_addr = 0; 750 int error; 751 752 #ifdef CAPABILITY_MODE 753 /* 754 * XXXJA: This check can go away once we are sufficiently confident 755 * that the checks in namei() are correct. 756 */ 757 if (IN_CAPABILITY_MODE(curthread)) 758 return (ECAPMODE); 759 #endif 760 761 tempdata = malloc(sizeof(*tempdata), M_TEMP, M_WAITOK | M_ZERO); 762 nd = &tempdata->nd; 763 attr = &tempdata->attr; 764 imgp = &tempdata->image_params; 765 766 /* 767 * Initialize part of the common data 768 */ 769 imgp->proc = p; 770 imgp->attr = attr; 771 772 NDINIT(nd, LOOKUP, ISOPEN | FOLLOW | LOCKSHARED | LOCKLEAF, 773 UIO_SYSSPACE, file, curthread); 774 if ((error = namei(nd)) != 0) { 775 nd->ni_vp = NULL; 776 goto fail; 777 } 778 NDFREE(nd, NDF_ONLY_PNBUF); 779 imgp->vp = nd->ni_vp; 780 781 /* 782 * Check permissions, modes, uid, etc on the file, and "open" it. 783 */ 784 error = exec_check_permissions(imgp); 785 if (error) 786 goto fail; 787 788 error = exec_map_first_page(imgp); 789 if (error) 790 goto fail; 791 792 imgp->object = nd->ni_vp->v_object; 793 794 hdr = (const Elf_Ehdr *)imgp->image_header; 795 if ((error = __elfN(check_header)(hdr)) != 0) 796 goto fail; 797 if (hdr->e_type == ET_DYN) 798 rbase = *addr; 799 else if (hdr->e_type == ET_EXEC) 800 rbase = 0; 801 else { 802 error = ENOEXEC; 803 goto fail; 804 } 805 806 /* Only support headers that fit within first page for now */ 807 if ((hdr->e_phoff > PAGE_SIZE) || 808 (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) { 809 error = ENOEXEC; 810 goto fail; 811 } 812 813 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 814 if (!aligned(phdr, Elf_Addr)) { 815 error = ENOEXEC; 816 goto fail; 817 } 818 819 error = __elfN(load_sections)(imgp, hdr, phdr, rbase, &base_addr); 820 if (error != 0) 821 goto fail; 822 823 *addr = base_addr; 824 *entry = (unsigned long)hdr->e_entry + rbase; 825 826 fail: 827 if (imgp->firstpage) 828 exec_unmap_first_page(imgp); 829 830 if (nd->ni_vp) { 831 if (imgp->textset) 832 VOP_UNSET_TEXT_CHECKED(nd->ni_vp); 833 vput(nd->ni_vp); 834 } 835 free(tempdata, M_TEMP); 836 837 return (error); 838 } 839 840 static u_long 841 __CONCAT(rnd_, __elfN(base))(vm_map_t map __unused, u_long minv, u_long maxv, 842 u_int align) 843 { 844 u_long rbase, res; 845 846 MPASS(vm_map_min(map) <= minv); 847 MPASS(maxv <= vm_map_max(map)); 848 MPASS(minv < maxv); 849 MPASS(minv + align < maxv); 850 arc4rand(&rbase, sizeof(rbase), 0); 851 res = roundup(minv, (u_long)align) + rbase % (maxv - minv); 852 res &= ~((u_long)align - 1); 853 if (res >= maxv) 854 res -= align; 855 KASSERT(res >= minv, 856 ("res %#lx < minv %#lx, maxv %#lx rbase %#lx", 857 res, minv, maxv, rbase)); 858 KASSERT(res < maxv, 859 ("res %#lx > maxv %#lx, minv %#lx rbase %#lx", 860 res, maxv, minv, rbase)); 861 return (res); 862 } 863 864 static int 865 __elfN(enforce_limits)(struct image_params *imgp, const Elf_Ehdr *hdr, 866 const Elf_Phdr *phdr, u_long et_dyn_addr) 867 { 868 struct vmspace *vmspace; 869 const char *err_str; 870 u_long text_size, data_size, total_size, text_addr, data_addr; 871 u_long seg_size, seg_addr; 872 int i; 873 874 err_str = NULL; 875 text_size = data_size = total_size = text_addr = data_addr = 0; 876 877 for (i = 0; i < hdr->e_phnum; i++) { 878 if (phdr[i].p_type != PT_LOAD || phdr[i].p_memsz == 0) 879 continue; 880 881 seg_addr = trunc_page(phdr[i].p_vaddr + et_dyn_addr); 882 seg_size = round_page(phdr[i].p_memsz + 883 phdr[i].p_vaddr + et_dyn_addr - seg_addr); 884 885 /* 886 * Make the largest executable segment the official 887 * text segment and all others data. 888 * 889 * Note that obreak() assumes that data_addr + data_size == end 890 * of data load area, and the ELF file format expects segments 891 * to be sorted by address. If multiple data segments exist, 892 * the last one will be used. 893 */ 894 895 if ((phdr[i].p_flags & PF_X) != 0 && text_size < seg_size) { 896 text_size = seg_size; 897 text_addr = seg_addr; 898 } else { 899 data_size = seg_size; 900 data_addr = seg_addr; 901 } 902 total_size += seg_size; 903 } 904 905 if (data_addr == 0 && data_size == 0) { 906 data_addr = text_addr; 907 data_size = text_size; 908 } 909 910 /* 911 * Check limits. It should be safe to check the 912 * limits after loading the segments since we do 913 * not actually fault in all the segments pages. 914 */ 915 PROC_LOCK(imgp->proc); 916 if (data_size > lim_cur_proc(imgp->proc, RLIMIT_DATA)) 917 err_str = "Data segment size exceeds process limit"; 918 else if (text_size > maxtsiz) 919 err_str = "Text segment size exceeds system limit"; 920 else if (total_size > lim_cur_proc(imgp->proc, RLIMIT_VMEM)) 921 err_str = "Total segment size exceeds process limit"; 922 else if (racct_set(imgp->proc, RACCT_DATA, data_size) != 0) 923 err_str = "Data segment size exceeds resource limit"; 924 else if (racct_set(imgp->proc, RACCT_VMEM, total_size) != 0) 925 err_str = "Total segment size exceeds resource limit"; 926 PROC_UNLOCK(imgp->proc); 927 if (err_str != NULL) { 928 uprintf("%s\n", err_str); 929 return (ENOMEM); 930 } 931 932 vmspace = imgp->proc->p_vmspace; 933 vmspace->vm_tsize = text_size >> PAGE_SHIFT; 934 vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; 935 vmspace->vm_dsize = data_size >> PAGE_SHIFT; 936 vmspace->vm_daddr = (caddr_t)(uintptr_t)data_addr; 937 938 return (0); 939 } 940 941 static int 942 __elfN(get_interp)(struct image_params *imgp, const Elf_Phdr *phdr, 943 char **interpp, bool *free_interpp) 944 { 945 struct thread *td; 946 char *interp; 947 int error, interp_name_len; 948 949 KASSERT(phdr->p_type == PT_INTERP, 950 ("%s: p_type %u != PT_INTERP", __func__, phdr->p_type)); 951 ASSERT_VOP_LOCKED(imgp->vp, __func__); 952 953 td = curthread; 954 955 /* Path to interpreter */ 956 if (phdr->p_filesz < 2 || phdr->p_filesz > MAXPATHLEN) { 957 uprintf("Invalid PT_INTERP\n"); 958 return (ENOEXEC); 959 } 960 961 interp_name_len = phdr->p_filesz; 962 if (phdr->p_offset > PAGE_SIZE || 963 interp_name_len > PAGE_SIZE - phdr->p_offset) { 964 /* 965 * The vnode lock might be needed by the pagedaemon to 966 * clean pages owned by the vnode. Do not allow sleep 967 * waiting for memory with the vnode locked, instead 968 * try non-sleepable allocation first, and if it 969 * fails, go to the slow path were we drop the lock 970 * and do M_WAITOK. A text reference prevents 971 * modifications to the vnode content. 972 */ 973 interp = malloc(interp_name_len + 1, M_TEMP, M_NOWAIT); 974 if (interp == NULL) { 975 VOP_UNLOCK(imgp->vp); 976 interp = malloc(interp_name_len + 1, M_TEMP, M_WAITOK); 977 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 978 } 979 980 error = vn_rdwr(UIO_READ, imgp->vp, interp, 981 interp_name_len, phdr->p_offset, 982 UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred, 983 NOCRED, NULL, td); 984 if (error != 0) { 985 free(interp, M_TEMP); 986 uprintf("i/o error PT_INTERP %d\n", error); 987 return (error); 988 } 989 interp[interp_name_len] = '\0'; 990 991 *interpp = interp; 992 *free_interpp = true; 993 return (0); 994 } 995 996 interp = __DECONST(char *, imgp->image_header) + phdr->p_offset; 997 if (interp[interp_name_len - 1] != '\0') { 998 uprintf("Invalid PT_INTERP\n"); 999 return (ENOEXEC); 1000 } 1001 1002 *interpp = interp; 1003 *free_interpp = false; 1004 return (0); 1005 } 1006 1007 static int 1008 __elfN(load_interp)(struct image_params *imgp, const Elf_Brandinfo *brand_info, 1009 const char *interp, u_long *addr, u_long *entry) 1010 { 1011 char *path; 1012 int error; 1013 1014 if (brand_info->emul_path != NULL && 1015 brand_info->emul_path[0] != '\0') { 1016 path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1017 snprintf(path, MAXPATHLEN, "%s%s", 1018 brand_info->emul_path, interp); 1019 error = __elfN(load_file)(imgp->proc, path, addr, entry); 1020 free(path, M_TEMP); 1021 if (error == 0) 1022 return (0); 1023 } 1024 1025 if (brand_info->interp_newpath != NULL && 1026 (brand_info->interp_path == NULL || 1027 strcmp(interp, brand_info->interp_path) == 0)) { 1028 error = __elfN(load_file)(imgp->proc, 1029 brand_info->interp_newpath, addr, entry); 1030 if (error == 0) 1031 return (0); 1032 } 1033 1034 error = __elfN(load_file)(imgp->proc, interp, addr, entry); 1035 if (error == 0) 1036 return (0); 1037 1038 uprintf("ELF interpreter %s not found, error %d\n", interp, error); 1039 return (error); 1040 } 1041 1042 /* 1043 * Impossible et_dyn_addr initial value indicating that the real base 1044 * must be calculated later with some randomization applied. 1045 */ 1046 #define ET_DYN_ADDR_RAND 1 1047 1048 static int 1049 __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) 1050 { 1051 struct thread *td; 1052 const Elf_Ehdr *hdr; 1053 const Elf_Phdr *phdr; 1054 Elf_Auxargs *elf_auxargs; 1055 struct vmspace *vmspace; 1056 vm_map_t map; 1057 char *interp; 1058 Elf_Brandinfo *brand_info; 1059 struct sysentvec *sv; 1060 u_long addr, baddr, et_dyn_addr, entry, proghdr; 1061 u_long maxalign, mapsz, maxv, maxv1; 1062 uint32_t fctl0; 1063 int32_t osrel; 1064 bool free_interp; 1065 int error, i, n; 1066 1067 hdr = (const Elf_Ehdr *)imgp->image_header; 1068 1069 /* 1070 * Do we have a valid ELF header ? 1071 * 1072 * Only allow ET_EXEC & ET_DYN here, reject ET_DYN later 1073 * if particular brand doesn't support it. 1074 */ 1075 if (__elfN(check_header)(hdr) != 0 || 1076 (hdr->e_type != ET_EXEC && hdr->e_type != ET_DYN)) 1077 return (-1); 1078 1079 /* 1080 * From here on down, we return an errno, not -1, as we've 1081 * detected an ELF file. 1082 */ 1083 1084 if ((hdr->e_phoff > PAGE_SIZE) || 1085 (u_int)hdr->e_phentsize * hdr->e_phnum > PAGE_SIZE - hdr->e_phoff) { 1086 /* Only support headers in first page for now */ 1087 uprintf("Program headers not in the first page\n"); 1088 return (ENOEXEC); 1089 } 1090 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 1091 if (!aligned(phdr, Elf_Addr)) { 1092 uprintf("Unaligned program headers\n"); 1093 return (ENOEXEC); 1094 } 1095 1096 n = error = 0; 1097 baddr = 0; 1098 osrel = 0; 1099 fctl0 = 0; 1100 entry = proghdr = 0; 1101 interp = NULL; 1102 free_interp = false; 1103 td = curthread; 1104 maxalign = PAGE_SIZE; 1105 mapsz = 0; 1106 1107 for (i = 0; i < hdr->e_phnum; i++) { 1108 switch (phdr[i].p_type) { 1109 case PT_LOAD: 1110 if (n == 0) 1111 baddr = phdr[i].p_vaddr; 1112 if (phdr[i].p_align > maxalign) 1113 maxalign = phdr[i].p_align; 1114 mapsz += phdr[i].p_memsz; 1115 n++; 1116 1117 /* 1118 * If this segment contains the program headers, 1119 * remember their virtual address for the AT_PHDR 1120 * aux entry. Static binaries don't usually include 1121 * a PT_PHDR entry. 1122 */ 1123 if (phdr[i].p_offset == 0 && 1124 hdr->e_phoff + hdr->e_phnum * hdr->e_phentsize 1125 <= phdr[i].p_filesz) 1126 proghdr = phdr[i].p_vaddr + hdr->e_phoff; 1127 break; 1128 case PT_INTERP: 1129 /* Path to interpreter */ 1130 if (interp != NULL) { 1131 uprintf("Multiple PT_INTERP headers\n"); 1132 error = ENOEXEC; 1133 goto ret; 1134 } 1135 error = __elfN(get_interp)(imgp, &phdr[i], &interp, 1136 &free_interp); 1137 if (error != 0) 1138 goto ret; 1139 break; 1140 case PT_GNU_STACK: 1141 if (__elfN(nxstack)) 1142 imgp->stack_prot = 1143 __elfN(trans_prot)(phdr[i].p_flags); 1144 imgp->stack_sz = phdr[i].p_memsz; 1145 break; 1146 case PT_PHDR: /* Program header table info */ 1147 proghdr = phdr[i].p_vaddr; 1148 break; 1149 } 1150 } 1151 1152 brand_info = __elfN(get_brandinfo)(imgp, interp, &osrel, &fctl0); 1153 if (brand_info == NULL) { 1154 uprintf("ELF binary type \"%u\" not known.\n", 1155 hdr->e_ident[EI_OSABI]); 1156 error = ENOEXEC; 1157 goto ret; 1158 } 1159 sv = brand_info->sysvec; 1160 et_dyn_addr = 0; 1161 if (hdr->e_type == ET_DYN) { 1162 if ((brand_info->flags & BI_CAN_EXEC_DYN) == 0) { 1163 uprintf("Cannot execute shared object\n"); 1164 error = ENOEXEC; 1165 goto ret; 1166 } 1167 /* 1168 * Honour the base load address from the dso if it is 1169 * non-zero for some reason. 1170 */ 1171 if (baddr == 0) { 1172 if ((sv->sv_flags & SV_ASLR) == 0 || 1173 (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) 1174 et_dyn_addr = __elfN(pie_base); 1175 else if ((__elfN(pie_aslr_enabled) && 1176 (imgp->proc->p_flag2 & P2_ASLR_DISABLE) == 0) || 1177 (imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0) 1178 et_dyn_addr = ET_DYN_ADDR_RAND; 1179 else 1180 et_dyn_addr = __elfN(pie_base); 1181 } 1182 } 1183 1184 /* 1185 * Avoid a possible deadlock if the current address space is destroyed 1186 * and that address space maps the locked vnode. In the common case, 1187 * the locked vnode's v_usecount is decremented but remains greater 1188 * than zero. Consequently, the vnode lock is not needed by vrele(). 1189 * However, in cases where the vnode lock is external, such as nullfs, 1190 * v_usecount may become zero. 1191 * 1192 * The VV_TEXT flag prevents modifications to the executable while 1193 * the vnode is unlocked. 1194 */ 1195 VOP_UNLOCK(imgp->vp); 1196 1197 /* 1198 * Decide whether to enable randomization of user mappings. 1199 * First, reset user preferences for the setid binaries. 1200 * Then, account for the support of the randomization by the 1201 * ABI, by user preferences, and make special treatment for 1202 * PIE binaries. 1203 */ 1204 if (imgp->credential_setid) { 1205 PROC_LOCK(imgp->proc); 1206 imgp->proc->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE); 1207 PROC_UNLOCK(imgp->proc); 1208 } 1209 if ((sv->sv_flags & SV_ASLR) == 0 || 1210 (imgp->proc->p_flag2 & P2_ASLR_DISABLE) != 0 || 1211 (fctl0 & NT_FREEBSD_FCTL_ASLR_DISABLE) != 0) { 1212 KASSERT(et_dyn_addr != ET_DYN_ADDR_RAND, 1213 ("et_dyn_addr == RAND and !ASLR")); 1214 } else if ((imgp->proc->p_flag2 & P2_ASLR_ENABLE) != 0 || 1215 (__elfN(aslr_enabled) && hdr->e_type == ET_EXEC) || 1216 et_dyn_addr == ET_DYN_ADDR_RAND) { 1217 imgp->map_flags |= MAP_ASLR; 1218 /* 1219 * If user does not care about sbrk, utilize the bss 1220 * grow region for mappings as well. We can select 1221 * the base for the image anywere and still not suffer 1222 * from the fragmentation. 1223 */ 1224 if (!__elfN(aslr_honor_sbrk) || 1225 (imgp->proc->p_flag2 & P2_ASLR_IGNSTART) != 0) 1226 imgp->map_flags |= MAP_ASLR_IGNSTART; 1227 } 1228 1229 error = exec_new_vmspace(imgp, sv); 1230 vmspace = imgp->proc->p_vmspace; 1231 map = &vmspace->vm_map; 1232 1233 imgp->proc->p_sysent = sv; 1234 1235 maxv = vm_map_max(map) - lim_max(td, RLIMIT_STACK); 1236 if (et_dyn_addr == ET_DYN_ADDR_RAND) { 1237 KASSERT((map->flags & MAP_ASLR) != 0, 1238 ("ET_DYN_ADDR_RAND but !MAP_ASLR")); 1239 et_dyn_addr = __CONCAT(rnd_, __elfN(base))(map, 1240 vm_map_min(map) + mapsz + lim_max(td, RLIMIT_DATA), 1241 /* reserve half of the address space to interpreter */ 1242 maxv / 2, 1UL << flsl(maxalign)); 1243 } 1244 1245 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 1246 if (error != 0) 1247 goto ret; 1248 1249 error = __elfN(load_sections)(imgp, hdr, phdr, et_dyn_addr, NULL); 1250 if (error != 0) 1251 goto ret; 1252 1253 error = __elfN(enforce_limits)(imgp, hdr, phdr, et_dyn_addr); 1254 if (error != 0) 1255 goto ret; 1256 1257 entry = (u_long)hdr->e_entry + et_dyn_addr; 1258 1259 /* 1260 * We load the dynamic linker where a userland call 1261 * to mmap(0, ...) would put it. The rationale behind this 1262 * calculation is that it leaves room for the heap to grow to 1263 * its maximum allowed size. 1264 */ 1265 addr = round_page((vm_offset_t)vmspace->vm_daddr + lim_max(td, 1266 RLIMIT_DATA)); 1267 if ((map->flags & MAP_ASLR) != 0) { 1268 maxv1 = maxv / 2 + addr / 2; 1269 MPASS(maxv1 >= addr); /* No overflow */ 1270 map->anon_loc = __CONCAT(rnd_, __elfN(base))(map, addr, maxv1, 1271 MAXPAGESIZES > 1 ? pagesizes[1] : pagesizes[0]); 1272 } else { 1273 map->anon_loc = addr; 1274 } 1275 1276 imgp->entry_addr = entry; 1277 1278 if (interp != NULL) { 1279 VOP_UNLOCK(imgp->vp); 1280 if ((map->flags & MAP_ASLR) != 0) { 1281 /* Assume that interpeter fits into 1/4 of AS */ 1282 maxv1 = maxv / 2 + addr / 2; 1283 MPASS(maxv1 >= addr); /* No overflow */ 1284 addr = __CONCAT(rnd_, __elfN(base))(map, addr, 1285 maxv1, PAGE_SIZE); 1286 } 1287 error = __elfN(load_interp)(imgp, brand_info, interp, &addr, 1288 &imgp->entry_addr); 1289 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 1290 if (error != 0) 1291 goto ret; 1292 } else 1293 addr = et_dyn_addr; 1294 1295 /* 1296 * Construct auxargs table (used by the copyout_auxargs routine) 1297 */ 1298 elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_NOWAIT); 1299 if (elf_auxargs == NULL) { 1300 VOP_UNLOCK(imgp->vp); 1301 elf_auxargs = malloc(sizeof(Elf_Auxargs), M_TEMP, M_WAITOK); 1302 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 1303 } 1304 elf_auxargs->execfd = -1; 1305 elf_auxargs->phdr = proghdr + et_dyn_addr; 1306 elf_auxargs->phent = hdr->e_phentsize; 1307 elf_auxargs->phnum = hdr->e_phnum; 1308 elf_auxargs->pagesz = PAGE_SIZE; 1309 elf_auxargs->base = addr; 1310 elf_auxargs->flags = 0; 1311 elf_auxargs->entry = entry; 1312 elf_auxargs->hdr_eflags = hdr->e_flags; 1313 1314 imgp->auxargs = elf_auxargs; 1315 imgp->interpreted = 0; 1316 imgp->reloc_base = addr; 1317 imgp->proc->p_osrel = osrel; 1318 imgp->proc->p_fctl0 = fctl0; 1319 imgp->proc->p_elf_machine = hdr->e_machine; 1320 imgp->proc->p_elf_flags = hdr->e_flags; 1321 1322 ret: 1323 if (free_interp) 1324 free(interp, M_TEMP); 1325 return (error); 1326 } 1327 1328 #define suword __CONCAT(suword, __ELF_WORD_SIZE) 1329 1330 int 1331 __elfN(freebsd_copyout_auxargs)(struct image_params *imgp, uintptr_t base) 1332 { 1333 Elf_Auxargs *args = (Elf_Auxargs *)imgp->auxargs; 1334 Elf_Auxinfo *argarray, *pos; 1335 int error; 1336 1337 argarray = pos = malloc(AT_COUNT * sizeof(*pos), M_TEMP, 1338 M_WAITOK | M_ZERO); 1339 1340 if (args->execfd != -1) 1341 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 1342 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 1343 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 1344 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 1345 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 1346 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 1347 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 1348 AUXARGS_ENTRY(pos, AT_BASE, args->base); 1349 AUXARGS_ENTRY(pos, AT_EHDRFLAGS, args->hdr_eflags); 1350 if (imgp->execpathp != 0) 1351 AUXARGS_ENTRY(pos, AT_EXECPATH, imgp->execpathp); 1352 AUXARGS_ENTRY(pos, AT_OSRELDATE, 1353 imgp->proc->p_ucred->cr_prison->pr_osreldate); 1354 if (imgp->canary != 0) { 1355 AUXARGS_ENTRY(pos, AT_CANARY, imgp->canary); 1356 AUXARGS_ENTRY(pos, AT_CANARYLEN, imgp->canarylen); 1357 } 1358 AUXARGS_ENTRY(pos, AT_NCPUS, mp_ncpus); 1359 if (imgp->pagesizes != 0) { 1360 AUXARGS_ENTRY(pos, AT_PAGESIZES, imgp->pagesizes); 1361 AUXARGS_ENTRY(pos, AT_PAGESIZESLEN, imgp->pagesizeslen); 1362 } 1363 if (imgp->sysent->sv_timekeep_base != 0) { 1364 AUXARGS_ENTRY(pos, AT_TIMEKEEP, 1365 imgp->sysent->sv_timekeep_base); 1366 } 1367 AUXARGS_ENTRY(pos, AT_STACKPROT, imgp->sysent->sv_shared_page_obj 1368 != NULL && imgp->stack_prot != 0 ? imgp->stack_prot : 1369 imgp->sysent->sv_stackprot); 1370 if (imgp->sysent->sv_hwcap != NULL) 1371 AUXARGS_ENTRY(pos, AT_HWCAP, *imgp->sysent->sv_hwcap); 1372 if (imgp->sysent->sv_hwcap2 != NULL) 1373 AUXARGS_ENTRY(pos, AT_HWCAP2, *imgp->sysent->sv_hwcap2); 1374 AUXARGS_ENTRY(pos, AT_BSDFLAGS, __elfN(sigfastblock) ? 1375 ELF_BSDF_SIGFASTBLK : 0); 1376 AUXARGS_ENTRY(pos, AT_NULL, 0); 1377 1378 free(imgp->auxargs, M_TEMP); 1379 imgp->auxargs = NULL; 1380 KASSERT(pos - argarray <= AT_COUNT, ("Too many auxargs")); 1381 1382 error = copyout(argarray, (void *)base, sizeof(*argarray) * AT_COUNT); 1383 free(argarray, M_TEMP); 1384 return (error); 1385 } 1386 1387 int 1388 __elfN(freebsd_fixup)(uintptr_t *stack_base, struct image_params *imgp) 1389 { 1390 Elf_Addr *base; 1391 1392 base = (Elf_Addr *)*stack_base; 1393 base--; 1394 if (suword(base, imgp->args->argc) == -1) 1395 return (EFAULT); 1396 *stack_base = (uintptr_t)base; 1397 return (0); 1398 } 1399 1400 /* 1401 * Code for generating ELF core dumps. 1402 */ 1403 1404 typedef void (*segment_callback)(vm_map_entry_t, void *); 1405 1406 /* Closure for cb_put_phdr(). */ 1407 struct phdr_closure { 1408 Elf_Phdr *phdr; /* Program header to fill in */ 1409 Elf_Off offset; /* Offset of segment in core file */ 1410 }; 1411 1412 /* Closure for cb_size_segment(). */ 1413 struct sseg_closure { 1414 int count; /* Count of writable segments. */ 1415 size_t size; /* Total size of all writable segments. */ 1416 }; 1417 1418 typedef void (*outfunc_t)(void *, struct sbuf *, size_t *); 1419 1420 struct note_info { 1421 int type; /* Note type. */ 1422 outfunc_t outfunc; /* Output function. */ 1423 void *outarg; /* Argument for the output function. */ 1424 size_t outsize; /* Output size. */ 1425 TAILQ_ENTRY(note_info) link; /* Link to the next note info. */ 1426 }; 1427 1428 TAILQ_HEAD(note_info_list, note_info); 1429 1430 /* Coredump output parameters. */ 1431 struct coredump_params { 1432 off_t offset; 1433 struct ucred *active_cred; 1434 struct ucred *file_cred; 1435 struct thread *td; 1436 struct vnode *vp; 1437 struct compressor *comp; 1438 }; 1439 1440 extern int compress_user_cores; 1441 extern int compress_user_cores_level; 1442 1443 static void cb_put_phdr(vm_map_entry_t, void *); 1444 static void cb_size_segment(vm_map_entry_t, void *); 1445 static int core_write(struct coredump_params *, const void *, size_t, off_t, 1446 enum uio_seg); 1447 static void each_dumpable_segment(struct thread *, segment_callback, void *); 1448 static int __elfN(corehdr)(struct coredump_params *, int, void *, size_t, 1449 struct note_info_list *, size_t); 1450 static void __elfN(prepare_notes)(struct thread *, struct note_info_list *, 1451 size_t *); 1452 static void __elfN(puthdr)(struct thread *, void *, size_t, int, size_t); 1453 static void __elfN(putnote)(struct note_info *, struct sbuf *); 1454 static size_t register_note(struct note_info_list *, int, outfunc_t, void *); 1455 static int sbuf_drain_core_output(void *, const char *, int); 1456 1457 static void __elfN(note_fpregset)(void *, struct sbuf *, size_t *); 1458 static void __elfN(note_prpsinfo)(void *, struct sbuf *, size_t *); 1459 static void __elfN(note_prstatus)(void *, struct sbuf *, size_t *); 1460 static void __elfN(note_threadmd)(void *, struct sbuf *, size_t *); 1461 static void __elfN(note_thrmisc)(void *, struct sbuf *, size_t *); 1462 static void __elfN(note_ptlwpinfo)(void *, struct sbuf *, size_t *); 1463 static void __elfN(note_procstat_auxv)(void *, struct sbuf *, size_t *); 1464 static void __elfN(note_procstat_proc)(void *, struct sbuf *, size_t *); 1465 static void __elfN(note_procstat_psstrings)(void *, struct sbuf *, size_t *); 1466 static void note_procstat_files(void *, struct sbuf *, size_t *); 1467 static void note_procstat_groups(void *, struct sbuf *, size_t *); 1468 static void note_procstat_osrel(void *, struct sbuf *, size_t *); 1469 static void note_procstat_rlimit(void *, struct sbuf *, size_t *); 1470 static void note_procstat_umask(void *, struct sbuf *, size_t *); 1471 static void note_procstat_vmmap(void *, struct sbuf *, size_t *); 1472 1473 /* 1474 * Write out a core segment to the compression stream. 1475 */ 1476 static int 1477 compress_chunk(struct coredump_params *p, char *base, char *buf, u_int len) 1478 { 1479 u_int chunk_len; 1480 int error; 1481 1482 while (len > 0) { 1483 chunk_len = MIN(len, CORE_BUF_SIZE); 1484 1485 /* 1486 * We can get EFAULT error here. 1487 * In that case zero out the current chunk of the segment. 1488 */ 1489 error = copyin(base, buf, chunk_len); 1490 if (error != 0) 1491 bzero(buf, chunk_len); 1492 error = compressor_write(p->comp, buf, chunk_len); 1493 if (error != 0) 1494 break; 1495 base += chunk_len; 1496 len -= chunk_len; 1497 } 1498 return (error); 1499 } 1500 1501 static int 1502 core_compressed_write(void *base, size_t len, off_t offset, void *arg) 1503 { 1504 1505 return (core_write((struct coredump_params *)arg, base, len, offset, 1506 UIO_SYSSPACE)); 1507 } 1508 1509 static int 1510 core_write(struct coredump_params *p, const void *base, size_t len, 1511 off_t offset, enum uio_seg seg) 1512 { 1513 1514 return (vn_rdwr_inchunks(UIO_WRITE, p->vp, __DECONST(void *, base), 1515 len, offset, seg, IO_UNIT | IO_DIRECT | IO_RANGELOCKED, 1516 p->active_cred, p->file_cred, NULL, p->td)); 1517 } 1518 1519 static int 1520 core_output(void *base, size_t len, off_t offset, struct coredump_params *p, 1521 void *tmpbuf) 1522 { 1523 int error; 1524 1525 if (p->comp != NULL) 1526 return (compress_chunk(p, base, tmpbuf, len)); 1527 1528 /* 1529 * EFAULT is a non-fatal error that we can get, for example, 1530 * if the segment is backed by a file but extends beyond its 1531 * end. 1532 */ 1533 error = core_write(p, base, len, offset, UIO_USERSPACE); 1534 if (error == EFAULT) { 1535 log(LOG_WARNING, "Failed to fully fault in a core file segment " 1536 "at VA %p with size 0x%zx to be written at offset 0x%jx " 1537 "for process %s\n", base, len, offset, curproc->p_comm); 1538 1539 /* 1540 * Write a "real" zero byte at the end of the target region 1541 * in the case this is the last segment. 1542 * The intermediate space will be implicitly zero-filled. 1543 */ 1544 error = core_write(p, zero_region, 1, offset + len - 1, 1545 UIO_SYSSPACE); 1546 } 1547 return (error); 1548 } 1549 1550 /* 1551 * Drain into a core file. 1552 */ 1553 static int 1554 sbuf_drain_core_output(void *arg, const char *data, int len) 1555 { 1556 struct coredump_params *p; 1557 int error, locked; 1558 1559 p = (struct coredump_params *)arg; 1560 1561 /* 1562 * Some kern_proc out routines that print to this sbuf may 1563 * call us with the process lock held. Draining with the 1564 * non-sleepable lock held is unsafe. The lock is needed for 1565 * those routines when dumping a live process. In our case we 1566 * can safely release the lock before draining and acquire 1567 * again after. 1568 */ 1569 locked = PROC_LOCKED(p->td->td_proc); 1570 if (locked) 1571 PROC_UNLOCK(p->td->td_proc); 1572 if (p->comp != NULL) 1573 error = compressor_write(p->comp, __DECONST(char *, data), len); 1574 else 1575 error = core_write(p, __DECONST(void *, data), len, p->offset, 1576 UIO_SYSSPACE); 1577 if (locked) 1578 PROC_LOCK(p->td->td_proc); 1579 if (error != 0) 1580 return (-error); 1581 p->offset += len; 1582 return (len); 1583 } 1584 1585 int 1586 __elfN(coredump)(struct thread *td, struct vnode *vp, off_t limit, int flags) 1587 { 1588 struct ucred *cred = td->td_ucred; 1589 int error = 0; 1590 struct sseg_closure seginfo; 1591 struct note_info_list notelst; 1592 struct coredump_params params; 1593 struct note_info *ninfo; 1594 void *hdr, *tmpbuf; 1595 size_t hdrsize, notesz, coresize; 1596 1597 hdr = NULL; 1598 tmpbuf = NULL; 1599 TAILQ_INIT(¬elst); 1600 1601 /* Size the program segments. */ 1602 seginfo.count = 0; 1603 seginfo.size = 0; 1604 each_dumpable_segment(td, cb_size_segment, &seginfo); 1605 1606 /* 1607 * Collect info about the core file header area. 1608 */ 1609 hdrsize = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * (1 + seginfo.count); 1610 if (seginfo.count + 1 >= PN_XNUM) 1611 hdrsize += sizeof(Elf_Shdr); 1612 __elfN(prepare_notes)(td, ¬elst, ¬esz); 1613 coresize = round_page(hdrsize + notesz) + seginfo.size; 1614 1615 /* Set up core dump parameters. */ 1616 params.offset = 0; 1617 params.active_cred = cred; 1618 params.file_cred = NOCRED; 1619 params.td = td; 1620 params.vp = vp; 1621 params.comp = NULL; 1622 1623 #ifdef RACCT 1624 if (racct_enable) { 1625 PROC_LOCK(td->td_proc); 1626 error = racct_add(td->td_proc, RACCT_CORE, coresize); 1627 PROC_UNLOCK(td->td_proc); 1628 if (error != 0) { 1629 error = EFAULT; 1630 goto done; 1631 } 1632 } 1633 #endif 1634 if (coresize >= limit) { 1635 error = EFAULT; 1636 goto done; 1637 } 1638 1639 /* Create a compression stream if necessary. */ 1640 if (compress_user_cores != 0) { 1641 params.comp = compressor_init(core_compressed_write, 1642 compress_user_cores, CORE_BUF_SIZE, 1643 compress_user_cores_level, ¶ms); 1644 if (params.comp == NULL) { 1645 error = EFAULT; 1646 goto done; 1647 } 1648 tmpbuf = malloc(CORE_BUF_SIZE, M_TEMP, M_WAITOK | M_ZERO); 1649 } 1650 1651 /* 1652 * Allocate memory for building the header, fill it up, 1653 * and write it out following the notes. 1654 */ 1655 hdr = malloc(hdrsize, M_TEMP, M_WAITOK); 1656 error = __elfN(corehdr)(¶ms, seginfo.count, hdr, hdrsize, ¬elst, 1657 notesz); 1658 1659 /* Write the contents of all of the writable segments. */ 1660 if (error == 0) { 1661 Elf_Phdr *php; 1662 off_t offset; 1663 int i; 1664 1665 php = (Elf_Phdr *)((char *)hdr + sizeof(Elf_Ehdr)) + 1; 1666 offset = round_page(hdrsize + notesz); 1667 for (i = 0; i < seginfo.count; i++) { 1668 error = core_output((caddr_t)(uintptr_t)php->p_vaddr, 1669 php->p_filesz, offset, ¶ms, tmpbuf); 1670 if (error != 0) 1671 break; 1672 offset += php->p_filesz; 1673 php++; 1674 } 1675 if (error == 0 && params.comp != NULL) 1676 error = compressor_flush(params.comp); 1677 } 1678 if (error) { 1679 log(LOG_WARNING, 1680 "Failed to write core file for process %s (error %d)\n", 1681 curproc->p_comm, error); 1682 } 1683 1684 done: 1685 free(tmpbuf, M_TEMP); 1686 if (params.comp != NULL) 1687 compressor_fini(params.comp); 1688 while ((ninfo = TAILQ_FIRST(¬elst)) != NULL) { 1689 TAILQ_REMOVE(¬elst, ninfo, link); 1690 free(ninfo, M_TEMP); 1691 } 1692 if (hdr != NULL) 1693 free(hdr, M_TEMP); 1694 1695 return (error); 1696 } 1697 1698 /* 1699 * A callback for each_dumpable_segment() to write out the segment's 1700 * program header entry. 1701 */ 1702 static void 1703 cb_put_phdr(vm_map_entry_t entry, void *closure) 1704 { 1705 struct phdr_closure *phc = (struct phdr_closure *)closure; 1706 Elf_Phdr *phdr = phc->phdr; 1707 1708 phc->offset = round_page(phc->offset); 1709 1710 phdr->p_type = PT_LOAD; 1711 phdr->p_offset = phc->offset; 1712 phdr->p_vaddr = entry->start; 1713 phdr->p_paddr = 0; 1714 phdr->p_filesz = phdr->p_memsz = entry->end - entry->start; 1715 phdr->p_align = PAGE_SIZE; 1716 phdr->p_flags = __elfN(untrans_prot)(entry->protection); 1717 1718 phc->offset += phdr->p_filesz; 1719 phc->phdr++; 1720 } 1721 1722 /* 1723 * A callback for each_dumpable_segment() to gather information about 1724 * the number of segments and their total size. 1725 */ 1726 static void 1727 cb_size_segment(vm_map_entry_t entry, void *closure) 1728 { 1729 struct sseg_closure *ssc = (struct sseg_closure *)closure; 1730 1731 ssc->count++; 1732 ssc->size += entry->end - entry->start; 1733 } 1734 1735 /* 1736 * For each writable segment in the process's memory map, call the given 1737 * function with a pointer to the map entry and some arbitrary 1738 * caller-supplied data. 1739 */ 1740 static void 1741 each_dumpable_segment(struct thread *td, segment_callback func, void *closure) 1742 { 1743 struct proc *p = td->td_proc; 1744 vm_map_t map = &p->p_vmspace->vm_map; 1745 vm_map_entry_t entry; 1746 vm_object_t backing_object, object; 1747 boolean_t ignore_entry; 1748 1749 vm_map_lock_read(map); 1750 VM_MAP_ENTRY_FOREACH(entry, map) { 1751 /* 1752 * Don't dump inaccessible mappings, deal with legacy 1753 * coredump mode. 1754 * 1755 * Note that read-only segments related to the elf binary 1756 * are marked MAP_ENTRY_NOCOREDUMP now so we no longer 1757 * need to arbitrarily ignore such segments. 1758 */ 1759 if (elf_legacy_coredump) { 1760 if ((entry->protection & VM_PROT_RW) != VM_PROT_RW) 1761 continue; 1762 } else { 1763 if ((entry->protection & VM_PROT_ALL) == 0) 1764 continue; 1765 } 1766 1767 /* 1768 * Dont include memory segment in the coredump if 1769 * MAP_NOCORE is set in mmap(2) or MADV_NOCORE in 1770 * madvise(2). Do not dump submaps (i.e. parts of the 1771 * kernel map). 1772 */ 1773 if (entry->eflags & (MAP_ENTRY_NOCOREDUMP|MAP_ENTRY_IS_SUB_MAP)) 1774 continue; 1775 1776 if ((object = entry->object.vm_object) == NULL) 1777 continue; 1778 1779 /* Ignore memory-mapped devices and such things. */ 1780 VM_OBJECT_RLOCK(object); 1781 while ((backing_object = object->backing_object) != NULL) { 1782 VM_OBJECT_RLOCK(backing_object); 1783 VM_OBJECT_RUNLOCK(object); 1784 object = backing_object; 1785 } 1786 ignore_entry = object->type != OBJT_DEFAULT && 1787 object->type != OBJT_SWAP && object->type != OBJT_VNODE && 1788 object->type != OBJT_PHYS; 1789 VM_OBJECT_RUNLOCK(object); 1790 if (ignore_entry) 1791 continue; 1792 1793 (*func)(entry, closure); 1794 } 1795 vm_map_unlock_read(map); 1796 } 1797 1798 /* 1799 * Write the core file header to the file, including padding up to 1800 * the page boundary. 1801 */ 1802 static int 1803 __elfN(corehdr)(struct coredump_params *p, int numsegs, void *hdr, 1804 size_t hdrsize, struct note_info_list *notelst, size_t notesz) 1805 { 1806 struct note_info *ninfo; 1807 struct sbuf *sb; 1808 int error; 1809 1810 /* Fill in the header. */ 1811 bzero(hdr, hdrsize); 1812 __elfN(puthdr)(p->td, hdr, hdrsize, numsegs, notesz); 1813 1814 sb = sbuf_new(NULL, NULL, CORE_BUF_SIZE, SBUF_FIXEDLEN); 1815 sbuf_set_drain(sb, sbuf_drain_core_output, p); 1816 sbuf_start_section(sb, NULL); 1817 sbuf_bcat(sb, hdr, hdrsize); 1818 TAILQ_FOREACH(ninfo, notelst, link) 1819 __elfN(putnote)(ninfo, sb); 1820 /* Align up to a page boundary for the program segments. */ 1821 sbuf_end_section(sb, -1, PAGE_SIZE, 0); 1822 error = sbuf_finish(sb); 1823 sbuf_delete(sb); 1824 1825 return (error); 1826 } 1827 1828 static void 1829 __elfN(prepare_notes)(struct thread *td, struct note_info_list *list, 1830 size_t *sizep) 1831 { 1832 struct proc *p; 1833 struct thread *thr; 1834 size_t size; 1835 1836 p = td->td_proc; 1837 size = 0; 1838 1839 size += register_note(list, NT_PRPSINFO, __elfN(note_prpsinfo), p); 1840 1841 /* 1842 * To have the debugger select the right thread (LWP) as the initial 1843 * thread, we dump the state of the thread passed to us in td first. 1844 * This is the thread that causes the core dump and thus likely to 1845 * be the right thread one wants to have selected in the debugger. 1846 */ 1847 thr = td; 1848 while (thr != NULL) { 1849 size += register_note(list, NT_PRSTATUS, 1850 __elfN(note_prstatus), thr); 1851 size += register_note(list, NT_FPREGSET, 1852 __elfN(note_fpregset), thr); 1853 size += register_note(list, NT_THRMISC, 1854 __elfN(note_thrmisc), thr); 1855 size += register_note(list, NT_PTLWPINFO, 1856 __elfN(note_ptlwpinfo), thr); 1857 size += register_note(list, -1, 1858 __elfN(note_threadmd), thr); 1859 1860 thr = (thr == td) ? TAILQ_FIRST(&p->p_threads) : 1861 TAILQ_NEXT(thr, td_plist); 1862 if (thr == td) 1863 thr = TAILQ_NEXT(thr, td_plist); 1864 } 1865 1866 size += register_note(list, NT_PROCSTAT_PROC, 1867 __elfN(note_procstat_proc), p); 1868 size += register_note(list, NT_PROCSTAT_FILES, 1869 note_procstat_files, p); 1870 size += register_note(list, NT_PROCSTAT_VMMAP, 1871 note_procstat_vmmap, p); 1872 size += register_note(list, NT_PROCSTAT_GROUPS, 1873 note_procstat_groups, p); 1874 size += register_note(list, NT_PROCSTAT_UMASK, 1875 note_procstat_umask, p); 1876 size += register_note(list, NT_PROCSTAT_RLIMIT, 1877 note_procstat_rlimit, p); 1878 size += register_note(list, NT_PROCSTAT_OSREL, 1879 note_procstat_osrel, p); 1880 size += register_note(list, NT_PROCSTAT_PSSTRINGS, 1881 __elfN(note_procstat_psstrings), p); 1882 size += register_note(list, NT_PROCSTAT_AUXV, 1883 __elfN(note_procstat_auxv), p); 1884 1885 *sizep = size; 1886 } 1887 1888 static void 1889 __elfN(puthdr)(struct thread *td, void *hdr, size_t hdrsize, int numsegs, 1890 size_t notesz) 1891 { 1892 Elf_Ehdr *ehdr; 1893 Elf_Phdr *phdr; 1894 Elf_Shdr *shdr; 1895 struct phdr_closure phc; 1896 1897 ehdr = (Elf_Ehdr *)hdr; 1898 1899 ehdr->e_ident[EI_MAG0] = ELFMAG0; 1900 ehdr->e_ident[EI_MAG1] = ELFMAG1; 1901 ehdr->e_ident[EI_MAG2] = ELFMAG2; 1902 ehdr->e_ident[EI_MAG3] = ELFMAG3; 1903 ehdr->e_ident[EI_CLASS] = ELF_CLASS; 1904 ehdr->e_ident[EI_DATA] = ELF_DATA; 1905 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 1906 ehdr->e_ident[EI_OSABI] = ELFOSABI_FREEBSD; 1907 ehdr->e_ident[EI_ABIVERSION] = 0; 1908 ehdr->e_ident[EI_PAD] = 0; 1909 ehdr->e_type = ET_CORE; 1910 ehdr->e_machine = td->td_proc->p_elf_machine; 1911 ehdr->e_version = EV_CURRENT; 1912 ehdr->e_entry = 0; 1913 ehdr->e_phoff = sizeof(Elf_Ehdr); 1914 ehdr->e_flags = td->td_proc->p_elf_flags; 1915 ehdr->e_ehsize = sizeof(Elf_Ehdr); 1916 ehdr->e_phentsize = sizeof(Elf_Phdr); 1917 ehdr->e_shentsize = sizeof(Elf_Shdr); 1918 ehdr->e_shstrndx = SHN_UNDEF; 1919 if (numsegs + 1 < PN_XNUM) { 1920 ehdr->e_phnum = numsegs + 1; 1921 ehdr->e_shnum = 0; 1922 } else { 1923 ehdr->e_phnum = PN_XNUM; 1924 ehdr->e_shnum = 1; 1925 1926 ehdr->e_shoff = ehdr->e_phoff + 1927 (numsegs + 1) * ehdr->e_phentsize; 1928 KASSERT(ehdr->e_shoff == hdrsize - sizeof(Elf_Shdr), 1929 ("e_shoff: %zu, hdrsize - shdr: %zu", 1930 (size_t)ehdr->e_shoff, hdrsize - sizeof(Elf_Shdr))); 1931 1932 shdr = (Elf_Shdr *)((char *)hdr + ehdr->e_shoff); 1933 memset(shdr, 0, sizeof(*shdr)); 1934 /* 1935 * A special first section is used to hold large segment and 1936 * section counts. This was proposed by Sun Microsystems in 1937 * Solaris and has been adopted by Linux; the standard ELF 1938 * tools are already familiar with the technique. 1939 * 1940 * See table 7-7 of the Solaris "Linker and Libraries Guide" 1941 * (or 12-7 depending on the version of the document) for more 1942 * details. 1943 */ 1944 shdr->sh_type = SHT_NULL; 1945 shdr->sh_size = ehdr->e_shnum; 1946 shdr->sh_link = ehdr->e_shstrndx; 1947 shdr->sh_info = numsegs + 1; 1948 } 1949 1950 /* 1951 * Fill in the program header entries. 1952 */ 1953 phdr = (Elf_Phdr *)((char *)hdr + ehdr->e_phoff); 1954 1955 /* The note segement. */ 1956 phdr->p_type = PT_NOTE; 1957 phdr->p_offset = hdrsize; 1958 phdr->p_vaddr = 0; 1959 phdr->p_paddr = 0; 1960 phdr->p_filesz = notesz; 1961 phdr->p_memsz = 0; 1962 phdr->p_flags = PF_R; 1963 phdr->p_align = ELF_NOTE_ROUNDSIZE; 1964 phdr++; 1965 1966 /* All the writable segments from the program. */ 1967 phc.phdr = phdr; 1968 phc.offset = round_page(hdrsize + notesz); 1969 each_dumpable_segment(td, cb_put_phdr, &phc); 1970 } 1971 1972 static size_t 1973 register_note(struct note_info_list *list, int type, outfunc_t out, void *arg) 1974 { 1975 struct note_info *ninfo; 1976 size_t size, notesize; 1977 1978 size = 0; 1979 out(arg, NULL, &size); 1980 ninfo = malloc(sizeof(*ninfo), M_TEMP, M_ZERO | M_WAITOK); 1981 ninfo->type = type; 1982 ninfo->outfunc = out; 1983 ninfo->outarg = arg; 1984 ninfo->outsize = size; 1985 TAILQ_INSERT_TAIL(list, ninfo, link); 1986 1987 if (type == -1) 1988 return (size); 1989 1990 notesize = sizeof(Elf_Note) + /* note header */ 1991 roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) + 1992 /* note name */ 1993 roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ 1994 1995 return (notesize); 1996 } 1997 1998 static size_t 1999 append_note_data(const void *src, void *dst, size_t len) 2000 { 2001 size_t padded_len; 2002 2003 padded_len = roundup2(len, ELF_NOTE_ROUNDSIZE); 2004 if (dst != NULL) { 2005 bcopy(src, dst, len); 2006 bzero((char *)dst + len, padded_len - len); 2007 } 2008 return (padded_len); 2009 } 2010 2011 size_t 2012 __elfN(populate_note)(int type, void *src, void *dst, size_t size, void **descp) 2013 { 2014 Elf_Note *note; 2015 char *buf; 2016 size_t notesize; 2017 2018 buf = dst; 2019 if (buf != NULL) { 2020 note = (Elf_Note *)buf; 2021 note->n_namesz = sizeof(FREEBSD_ABI_VENDOR); 2022 note->n_descsz = size; 2023 note->n_type = type; 2024 buf += sizeof(*note); 2025 buf += append_note_data(FREEBSD_ABI_VENDOR, buf, 2026 sizeof(FREEBSD_ABI_VENDOR)); 2027 append_note_data(src, buf, size); 2028 if (descp != NULL) 2029 *descp = buf; 2030 } 2031 2032 notesize = sizeof(Elf_Note) + /* note header */ 2033 roundup2(sizeof(FREEBSD_ABI_VENDOR), ELF_NOTE_ROUNDSIZE) + 2034 /* note name */ 2035 roundup2(size, ELF_NOTE_ROUNDSIZE); /* note description */ 2036 2037 return (notesize); 2038 } 2039 2040 static void 2041 __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb) 2042 { 2043 Elf_Note note; 2044 ssize_t old_len, sect_len; 2045 size_t new_len, descsz, i; 2046 2047 if (ninfo->type == -1) { 2048 ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); 2049 return; 2050 } 2051 2052 note.n_namesz = sizeof(FREEBSD_ABI_VENDOR); 2053 note.n_descsz = ninfo->outsize; 2054 note.n_type = ninfo->type; 2055 2056 sbuf_bcat(sb, ¬e, sizeof(note)); 2057 sbuf_start_section(sb, &old_len); 2058 sbuf_bcat(sb, FREEBSD_ABI_VENDOR, sizeof(FREEBSD_ABI_VENDOR)); 2059 sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); 2060 if (note.n_descsz == 0) 2061 return; 2062 sbuf_start_section(sb, &old_len); 2063 ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); 2064 sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); 2065 if (sect_len < 0) 2066 return; 2067 2068 new_len = (size_t)sect_len; 2069 descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE); 2070 if (new_len < descsz) { 2071 /* 2072 * It is expected that individual note emitters will correctly 2073 * predict their expected output size and fill up to that size 2074 * themselves, padding in a format-specific way if needed. 2075 * However, in case they don't, just do it here with zeros. 2076 */ 2077 for (i = 0; i < descsz - new_len; i++) 2078 sbuf_putc(sb, 0); 2079 } else if (new_len > descsz) { 2080 /* 2081 * We can't always truncate sb -- we may have drained some 2082 * of it already. 2083 */ 2084 KASSERT(new_len == descsz, ("%s: Note type %u changed as we " 2085 "read it (%zu > %zu). Since it is longer than " 2086 "expected, this coredump's notes are corrupt. THIS " 2087 "IS A BUG in the note_procstat routine for type %u.\n", 2088 __func__, (unsigned)note.n_type, new_len, descsz, 2089 (unsigned)note.n_type)); 2090 } 2091 } 2092 2093 /* 2094 * Miscellaneous note out functions. 2095 */ 2096 2097 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2098 #include <compat/freebsd32/freebsd32.h> 2099 #include <compat/freebsd32/freebsd32_signal.h> 2100 2101 typedef struct prstatus32 elf_prstatus_t; 2102 typedef struct prpsinfo32 elf_prpsinfo_t; 2103 typedef struct fpreg32 elf_prfpregset_t; 2104 typedef struct fpreg32 elf_fpregset_t; 2105 typedef struct reg32 elf_gregset_t; 2106 typedef struct thrmisc32 elf_thrmisc_t; 2107 #define ELF_KERN_PROC_MASK KERN_PROC_MASK32 2108 typedef struct kinfo_proc32 elf_kinfo_proc_t; 2109 typedef uint32_t elf_ps_strings_t; 2110 #else 2111 typedef prstatus_t elf_prstatus_t; 2112 typedef prpsinfo_t elf_prpsinfo_t; 2113 typedef prfpregset_t elf_prfpregset_t; 2114 typedef prfpregset_t elf_fpregset_t; 2115 typedef gregset_t elf_gregset_t; 2116 typedef thrmisc_t elf_thrmisc_t; 2117 #define ELF_KERN_PROC_MASK 0 2118 typedef struct kinfo_proc elf_kinfo_proc_t; 2119 typedef vm_offset_t elf_ps_strings_t; 2120 #endif 2121 2122 static void 2123 __elfN(note_prpsinfo)(void *arg, struct sbuf *sb, size_t *sizep) 2124 { 2125 struct sbuf sbarg; 2126 size_t len; 2127 char *cp, *end; 2128 struct proc *p; 2129 elf_prpsinfo_t *psinfo; 2130 int error; 2131 2132 p = (struct proc *)arg; 2133 if (sb != NULL) { 2134 KASSERT(*sizep == sizeof(*psinfo), ("invalid size")); 2135 psinfo = malloc(sizeof(*psinfo), M_TEMP, M_ZERO | M_WAITOK); 2136 psinfo->pr_version = PRPSINFO_VERSION; 2137 psinfo->pr_psinfosz = sizeof(elf_prpsinfo_t); 2138 strlcpy(psinfo->pr_fname, p->p_comm, sizeof(psinfo->pr_fname)); 2139 PROC_LOCK(p); 2140 if (p->p_args != NULL) { 2141 len = sizeof(psinfo->pr_psargs) - 1; 2142 if (len > p->p_args->ar_length) 2143 len = p->p_args->ar_length; 2144 memcpy(psinfo->pr_psargs, p->p_args->ar_args, len); 2145 PROC_UNLOCK(p); 2146 error = 0; 2147 } else { 2148 _PHOLD(p); 2149 PROC_UNLOCK(p); 2150 sbuf_new(&sbarg, psinfo->pr_psargs, 2151 sizeof(psinfo->pr_psargs), SBUF_FIXEDLEN); 2152 error = proc_getargv(curthread, p, &sbarg); 2153 PRELE(p); 2154 if (sbuf_finish(&sbarg) == 0) 2155 len = sbuf_len(&sbarg) - 1; 2156 else 2157 len = sizeof(psinfo->pr_psargs) - 1; 2158 sbuf_delete(&sbarg); 2159 } 2160 if (error || len == 0) 2161 strlcpy(psinfo->pr_psargs, p->p_comm, 2162 sizeof(psinfo->pr_psargs)); 2163 else { 2164 KASSERT(len < sizeof(psinfo->pr_psargs), 2165 ("len is too long: %zu vs %zu", len, 2166 sizeof(psinfo->pr_psargs))); 2167 cp = psinfo->pr_psargs; 2168 end = cp + len - 1; 2169 for (;;) { 2170 cp = memchr(cp, '\0', end - cp); 2171 if (cp == NULL) 2172 break; 2173 *cp = ' '; 2174 } 2175 } 2176 psinfo->pr_pid = p->p_pid; 2177 sbuf_bcat(sb, psinfo, sizeof(*psinfo)); 2178 free(psinfo, M_TEMP); 2179 } 2180 *sizep = sizeof(*psinfo); 2181 } 2182 2183 static void 2184 __elfN(note_prstatus)(void *arg, struct sbuf *sb, size_t *sizep) 2185 { 2186 struct thread *td; 2187 elf_prstatus_t *status; 2188 2189 td = (struct thread *)arg; 2190 if (sb != NULL) { 2191 KASSERT(*sizep == sizeof(*status), ("invalid size")); 2192 status = malloc(sizeof(*status), M_TEMP, M_ZERO | M_WAITOK); 2193 status->pr_version = PRSTATUS_VERSION; 2194 status->pr_statussz = sizeof(elf_prstatus_t); 2195 status->pr_gregsetsz = sizeof(elf_gregset_t); 2196 status->pr_fpregsetsz = sizeof(elf_fpregset_t); 2197 status->pr_osreldate = osreldate; 2198 status->pr_cursig = td->td_proc->p_sig; 2199 status->pr_pid = td->td_tid; 2200 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2201 fill_regs32(td, &status->pr_reg); 2202 #else 2203 fill_regs(td, &status->pr_reg); 2204 #endif 2205 sbuf_bcat(sb, status, sizeof(*status)); 2206 free(status, M_TEMP); 2207 } 2208 *sizep = sizeof(*status); 2209 } 2210 2211 static void 2212 __elfN(note_fpregset)(void *arg, struct sbuf *sb, size_t *sizep) 2213 { 2214 struct thread *td; 2215 elf_prfpregset_t *fpregset; 2216 2217 td = (struct thread *)arg; 2218 if (sb != NULL) { 2219 KASSERT(*sizep == sizeof(*fpregset), ("invalid size")); 2220 fpregset = malloc(sizeof(*fpregset), M_TEMP, M_ZERO | M_WAITOK); 2221 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2222 fill_fpregs32(td, fpregset); 2223 #else 2224 fill_fpregs(td, fpregset); 2225 #endif 2226 sbuf_bcat(sb, fpregset, sizeof(*fpregset)); 2227 free(fpregset, M_TEMP); 2228 } 2229 *sizep = sizeof(*fpregset); 2230 } 2231 2232 static void 2233 __elfN(note_thrmisc)(void *arg, struct sbuf *sb, size_t *sizep) 2234 { 2235 struct thread *td; 2236 elf_thrmisc_t thrmisc; 2237 2238 td = (struct thread *)arg; 2239 if (sb != NULL) { 2240 KASSERT(*sizep == sizeof(thrmisc), ("invalid size")); 2241 bzero(&thrmisc, sizeof(thrmisc)); 2242 strcpy(thrmisc.pr_tname, td->td_name); 2243 sbuf_bcat(sb, &thrmisc, sizeof(thrmisc)); 2244 } 2245 *sizep = sizeof(thrmisc); 2246 } 2247 2248 static void 2249 __elfN(note_ptlwpinfo)(void *arg, struct sbuf *sb, size_t *sizep) 2250 { 2251 struct thread *td; 2252 size_t size; 2253 int structsize; 2254 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2255 struct ptrace_lwpinfo32 pl; 2256 #else 2257 struct ptrace_lwpinfo pl; 2258 #endif 2259 2260 td = (struct thread *)arg; 2261 size = sizeof(structsize) + sizeof(pl); 2262 if (sb != NULL) { 2263 KASSERT(*sizep == size, ("invalid size")); 2264 structsize = sizeof(pl); 2265 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2266 bzero(&pl, sizeof(pl)); 2267 pl.pl_lwpid = td->td_tid; 2268 pl.pl_event = PL_EVENT_NONE; 2269 pl.pl_sigmask = td->td_sigmask; 2270 pl.pl_siglist = td->td_siglist; 2271 if (td->td_si.si_signo != 0) { 2272 pl.pl_event = PL_EVENT_SIGNAL; 2273 pl.pl_flags |= PL_FLAG_SI; 2274 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2275 siginfo_to_siginfo32(&td->td_si, &pl.pl_siginfo); 2276 #else 2277 pl.pl_siginfo = td->td_si; 2278 #endif 2279 } 2280 strcpy(pl.pl_tdname, td->td_name); 2281 /* XXX TODO: supply more information in struct ptrace_lwpinfo*/ 2282 sbuf_bcat(sb, &pl, sizeof(pl)); 2283 } 2284 *sizep = size; 2285 } 2286 2287 /* 2288 * Allow for MD specific notes, as well as any MD 2289 * specific preparations for writing MI notes. 2290 */ 2291 static void 2292 __elfN(note_threadmd)(void *arg, struct sbuf *sb, size_t *sizep) 2293 { 2294 struct thread *td; 2295 void *buf; 2296 size_t size; 2297 2298 td = (struct thread *)arg; 2299 size = *sizep; 2300 if (size != 0 && sb != NULL) 2301 buf = malloc(size, M_TEMP, M_ZERO | M_WAITOK); 2302 else 2303 buf = NULL; 2304 size = 0; 2305 __elfN(dump_thread)(td, buf, &size); 2306 KASSERT(sb == NULL || *sizep == size, ("invalid size")); 2307 if (size != 0 && sb != NULL) 2308 sbuf_bcat(sb, buf, size); 2309 free(buf, M_TEMP); 2310 *sizep = size; 2311 } 2312 2313 #ifdef KINFO_PROC_SIZE 2314 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE); 2315 #endif 2316 2317 static void 2318 __elfN(note_procstat_proc)(void *arg, struct sbuf *sb, size_t *sizep) 2319 { 2320 struct proc *p; 2321 size_t size; 2322 int structsize; 2323 2324 p = (struct proc *)arg; 2325 size = sizeof(structsize) + p->p_numthreads * 2326 sizeof(elf_kinfo_proc_t); 2327 2328 if (sb != NULL) { 2329 KASSERT(*sizep == size, ("invalid size")); 2330 structsize = sizeof(elf_kinfo_proc_t); 2331 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2332 PROC_LOCK(p); 2333 kern_proc_out(p, sb, ELF_KERN_PROC_MASK); 2334 } 2335 *sizep = size; 2336 } 2337 2338 #ifdef KINFO_FILE_SIZE 2339 CTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE); 2340 #endif 2341 2342 static void 2343 note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep) 2344 { 2345 struct proc *p; 2346 size_t size, sect_sz, i; 2347 ssize_t start_len, sect_len; 2348 int structsize, filedesc_flags; 2349 2350 if (coredump_pack_fileinfo) 2351 filedesc_flags = KERN_FILEDESC_PACK_KINFO; 2352 else 2353 filedesc_flags = 0; 2354 2355 p = (struct proc *)arg; 2356 structsize = sizeof(struct kinfo_file); 2357 if (sb == NULL) { 2358 size = 0; 2359 sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); 2360 sbuf_set_drain(sb, sbuf_count_drain, &size); 2361 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2362 PROC_LOCK(p); 2363 kern_proc_filedesc_out(p, sb, -1, filedesc_flags); 2364 sbuf_finish(sb); 2365 sbuf_delete(sb); 2366 *sizep = size; 2367 } else { 2368 sbuf_start_section(sb, &start_len); 2369 2370 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2371 PROC_LOCK(p); 2372 kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize), 2373 filedesc_flags); 2374 2375 sect_len = sbuf_end_section(sb, start_len, 0, 0); 2376 if (sect_len < 0) 2377 return; 2378 sect_sz = sect_len; 2379 2380 KASSERT(sect_sz <= *sizep, 2381 ("kern_proc_filedesc_out did not respect maxlen; " 2382 "requested %zu, got %zu", *sizep - sizeof(structsize), 2383 sect_sz - sizeof(structsize))); 2384 2385 for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++) 2386 sbuf_putc(sb, 0); 2387 } 2388 } 2389 2390 #ifdef KINFO_VMENTRY_SIZE 2391 CTASSERT(sizeof(struct kinfo_vmentry) == KINFO_VMENTRY_SIZE); 2392 #endif 2393 2394 static void 2395 note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep) 2396 { 2397 struct proc *p; 2398 size_t size; 2399 int structsize, vmmap_flags; 2400 2401 if (coredump_pack_vmmapinfo) 2402 vmmap_flags = KERN_VMMAP_PACK_KINFO; 2403 else 2404 vmmap_flags = 0; 2405 2406 p = (struct proc *)arg; 2407 structsize = sizeof(struct kinfo_vmentry); 2408 if (sb == NULL) { 2409 size = 0; 2410 sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); 2411 sbuf_set_drain(sb, sbuf_count_drain, &size); 2412 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2413 PROC_LOCK(p); 2414 kern_proc_vmmap_out(p, sb, -1, vmmap_flags); 2415 sbuf_finish(sb); 2416 sbuf_delete(sb); 2417 *sizep = size; 2418 } else { 2419 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2420 PROC_LOCK(p); 2421 kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize), 2422 vmmap_flags); 2423 } 2424 } 2425 2426 static void 2427 note_procstat_groups(void *arg, struct sbuf *sb, size_t *sizep) 2428 { 2429 struct proc *p; 2430 size_t size; 2431 int structsize; 2432 2433 p = (struct proc *)arg; 2434 size = sizeof(structsize) + p->p_ucred->cr_ngroups * sizeof(gid_t); 2435 if (sb != NULL) { 2436 KASSERT(*sizep == size, ("invalid size")); 2437 structsize = sizeof(gid_t); 2438 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2439 sbuf_bcat(sb, p->p_ucred->cr_groups, p->p_ucred->cr_ngroups * 2440 sizeof(gid_t)); 2441 } 2442 *sizep = size; 2443 } 2444 2445 static void 2446 note_procstat_umask(void *arg, struct sbuf *sb, size_t *sizep) 2447 { 2448 struct proc *p; 2449 size_t size; 2450 int structsize; 2451 2452 p = (struct proc *)arg; 2453 size = sizeof(structsize) + sizeof(p->p_fd->fd_cmask); 2454 if (sb != NULL) { 2455 KASSERT(*sizep == size, ("invalid size")); 2456 structsize = sizeof(p->p_fd->fd_cmask); 2457 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2458 sbuf_bcat(sb, &p->p_fd->fd_cmask, sizeof(p->p_fd->fd_cmask)); 2459 } 2460 *sizep = size; 2461 } 2462 2463 static void 2464 note_procstat_rlimit(void *arg, struct sbuf *sb, size_t *sizep) 2465 { 2466 struct proc *p; 2467 struct rlimit rlim[RLIM_NLIMITS]; 2468 size_t size; 2469 int structsize, i; 2470 2471 p = (struct proc *)arg; 2472 size = sizeof(structsize) + sizeof(rlim); 2473 if (sb != NULL) { 2474 KASSERT(*sizep == size, ("invalid size")); 2475 structsize = sizeof(rlim); 2476 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2477 PROC_LOCK(p); 2478 for (i = 0; i < RLIM_NLIMITS; i++) 2479 lim_rlimit_proc(p, i, &rlim[i]); 2480 PROC_UNLOCK(p); 2481 sbuf_bcat(sb, rlim, sizeof(rlim)); 2482 } 2483 *sizep = size; 2484 } 2485 2486 static void 2487 note_procstat_osrel(void *arg, struct sbuf *sb, size_t *sizep) 2488 { 2489 struct proc *p; 2490 size_t size; 2491 int structsize; 2492 2493 p = (struct proc *)arg; 2494 size = sizeof(structsize) + sizeof(p->p_osrel); 2495 if (sb != NULL) { 2496 KASSERT(*sizep == size, ("invalid size")); 2497 structsize = sizeof(p->p_osrel); 2498 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2499 sbuf_bcat(sb, &p->p_osrel, sizeof(p->p_osrel)); 2500 } 2501 *sizep = size; 2502 } 2503 2504 static void 2505 __elfN(note_procstat_psstrings)(void *arg, struct sbuf *sb, size_t *sizep) 2506 { 2507 struct proc *p; 2508 elf_ps_strings_t ps_strings; 2509 size_t size; 2510 int structsize; 2511 2512 p = (struct proc *)arg; 2513 size = sizeof(structsize) + sizeof(ps_strings); 2514 if (sb != NULL) { 2515 KASSERT(*sizep == size, ("invalid size")); 2516 structsize = sizeof(ps_strings); 2517 #if defined(COMPAT_FREEBSD32) && __ELF_WORD_SIZE == 32 2518 ps_strings = PTROUT(p->p_sysent->sv_psstrings); 2519 #else 2520 ps_strings = p->p_sysent->sv_psstrings; 2521 #endif 2522 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2523 sbuf_bcat(sb, &ps_strings, sizeof(ps_strings)); 2524 } 2525 *sizep = size; 2526 } 2527 2528 static void 2529 __elfN(note_procstat_auxv)(void *arg, struct sbuf *sb, size_t *sizep) 2530 { 2531 struct proc *p; 2532 size_t size; 2533 int structsize; 2534 2535 p = (struct proc *)arg; 2536 if (sb == NULL) { 2537 size = 0; 2538 sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); 2539 sbuf_set_drain(sb, sbuf_count_drain, &size); 2540 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2541 PHOLD(p); 2542 proc_getauxv(curthread, p, sb); 2543 PRELE(p); 2544 sbuf_finish(sb); 2545 sbuf_delete(sb); 2546 *sizep = size; 2547 } else { 2548 structsize = sizeof(Elf_Auxinfo); 2549 sbuf_bcat(sb, &structsize, sizeof(structsize)); 2550 PHOLD(p); 2551 proc_getauxv(curthread, p, sb); 2552 PRELE(p); 2553 } 2554 } 2555 2556 static boolean_t 2557 __elfN(parse_notes)(struct image_params *imgp, Elf_Note *checknote, 2558 const char *note_vendor, const Elf_Phdr *pnote, 2559 boolean_t (*cb)(const Elf_Note *, void *, boolean_t *), void *cb_arg) 2560 { 2561 const Elf_Note *note, *note0, *note_end; 2562 const char *note_name; 2563 char *buf; 2564 int i, error; 2565 boolean_t res; 2566 2567 /* We need some limit, might as well use PAGE_SIZE. */ 2568 if (pnote == NULL || pnote->p_filesz > PAGE_SIZE) 2569 return (FALSE); 2570 ASSERT_VOP_LOCKED(imgp->vp, "parse_notes"); 2571 if (pnote->p_offset > PAGE_SIZE || 2572 pnote->p_filesz > PAGE_SIZE - pnote->p_offset) { 2573 buf = malloc(pnote->p_filesz, M_TEMP, M_NOWAIT); 2574 if (buf == NULL) { 2575 VOP_UNLOCK(imgp->vp); 2576 buf = malloc(pnote->p_filesz, M_TEMP, M_WAITOK); 2577 vn_lock(imgp->vp, LK_SHARED | LK_RETRY); 2578 } 2579 error = vn_rdwr(UIO_READ, imgp->vp, buf, pnote->p_filesz, 2580 pnote->p_offset, UIO_SYSSPACE, IO_NODELOCKED, 2581 curthread->td_ucred, NOCRED, NULL, curthread); 2582 if (error != 0) { 2583 uprintf("i/o error PT_NOTE\n"); 2584 goto retf; 2585 } 2586 note = note0 = (const Elf_Note *)buf; 2587 note_end = (const Elf_Note *)(buf + pnote->p_filesz); 2588 } else { 2589 note = note0 = (const Elf_Note *)(imgp->image_header + 2590 pnote->p_offset); 2591 note_end = (const Elf_Note *)(imgp->image_header + 2592 pnote->p_offset + pnote->p_filesz); 2593 buf = NULL; 2594 } 2595 for (i = 0; i < 100 && note >= note0 && note < note_end; i++) { 2596 if (!aligned(note, Elf32_Addr) || (const char *)note_end - 2597 (const char *)note < sizeof(Elf_Note)) { 2598 goto retf; 2599 } 2600 if (note->n_namesz != checknote->n_namesz || 2601 note->n_descsz != checknote->n_descsz || 2602 note->n_type != checknote->n_type) 2603 goto nextnote; 2604 note_name = (const char *)(note + 1); 2605 if (note_name + checknote->n_namesz >= 2606 (const char *)note_end || strncmp(note_vendor, 2607 note_name, checknote->n_namesz) != 0) 2608 goto nextnote; 2609 2610 if (cb(note, cb_arg, &res)) 2611 goto ret; 2612 nextnote: 2613 note = (const Elf_Note *)((const char *)(note + 1) + 2614 roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE) + 2615 roundup2(note->n_descsz, ELF_NOTE_ROUNDSIZE)); 2616 } 2617 retf: 2618 res = FALSE; 2619 ret: 2620 free(buf, M_TEMP); 2621 return (res); 2622 } 2623 2624 struct brandnote_cb_arg { 2625 Elf_Brandnote *brandnote; 2626 int32_t *osrel; 2627 }; 2628 2629 static boolean_t 2630 brandnote_cb(const Elf_Note *note, void *arg0, boolean_t *res) 2631 { 2632 struct brandnote_cb_arg *arg; 2633 2634 arg = arg0; 2635 2636 /* 2637 * Fetch the osreldate for binary from the ELF OSABI-note if 2638 * necessary. 2639 */ 2640 *res = (arg->brandnote->flags & BN_TRANSLATE_OSREL) != 0 && 2641 arg->brandnote->trans_osrel != NULL ? 2642 arg->brandnote->trans_osrel(note, arg->osrel) : TRUE; 2643 2644 return (TRUE); 2645 } 2646 2647 static Elf_Note fctl_note = { 2648 .n_namesz = sizeof(FREEBSD_ABI_VENDOR), 2649 .n_descsz = sizeof(uint32_t), 2650 .n_type = NT_FREEBSD_FEATURE_CTL, 2651 }; 2652 2653 struct fctl_cb_arg { 2654 uint32_t *fctl0; 2655 }; 2656 2657 static boolean_t 2658 note_fctl_cb(const Elf_Note *note, void *arg0, boolean_t *res) 2659 { 2660 struct fctl_cb_arg *arg; 2661 const Elf32_Word *desc; 2662 uintptr_t p; 2663 2664 arg = arg0; 2665 p = (uintptr_t)(note + 1); 2666 p += roundup2(note->n_namesz, ELF_NOTE_ROUNDSIZE); 2667 desc = (const Elf32_Word *)p; 2668 *arg->fctl0 = desc[0]; 2669 return (TRUE); 2670 } 2671 2672 /* 2673 * Try to find the appropriate ABI-note section for checknote, fetch 2674 * the osreldate and feature control flags for binary from the ELF 2675 * OSABI-note. Only the first page of the image is searched, the same 2676 * as for headers. 2677 */ 2678 static boolean_t 2679 __elfN(check_note)(struct image_params *imgp, Elf_Brandnote *brandnote, 2680 int32_t *osrel, uint32_t *fctl0) 2681 { 2682 const Elf_Phdr *phdr; 2683 const Elf_Ehdr *hdr; 2684 struct brandnote_cb_arg b_arg; 2685 struct fctl_cb_arg f_arg; 2686 int i, j; 2687 2688 hdr = (const Elf_Ehdr *)imgp->image_header; 2689 phdr = (const Elf_Phdr *)(imgp->image_header + hdr->e_phoff); 2690 b_arg.brandnote = brandnote; 2691 b_arg.osrel = osrel; 2692 f_arg.fctl0 = fctl0; 2693 2694 for (i = 0; i < hdr->e_phnum; i++) { 2695 if (phdr[i].p_type == PT_NOTE && __elfN(parse_notes)(imgp, 2696 &brandnote->hdr, brandnote->vendor, &phdr[i], brandnote_cb, 2697 &b_arg)) { 2698 for (j = 0; j < hdr->e_phnum; j++) { 2699 if (phdr[j].p_type == PT_NOTE && 2700 __elfN(parse_notes)(imgp, &fctl_note, 2701 FREEBSD_ABI_VENDOR, &phdr[j], 2702 note_fctl_cb, &f_arg)) 2703 break; 2704 } 2705 return (TRUE); 2706 } 2707 } 2708 return (FALSE); 2709 2710 } 2711 2712 /* 2713 * Tell kern_execve.c about it, with a little help from the linker. 2714 */ 2715 static struct execsw __elfN(execsw) = { 2716 .ex_imgact = __CONCAT(exec_, __elfN(imgact)), 2717 .ex_name = __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE)) 2718 }; 2719 EXEC_SET(__CONCAT(elf, __ELF_WORD_SIZE), __elfN(execsw)); 2720 2721 static vm_prot_t 2722 __elfN(trans_prot)(Elf_Word flags) 2723 { 2724 vm_prot_t prot; 2725 2726 prot = 0; 2727 if (flags & PF_X) 2728 prot |= VM_PROT_EXECUTE; 2729 if (flags & PF_W) 2730 prot |= VM_PROT_WRITE; 2731 if (flags & PF_R) 2732 prot |= VM_PROT_READ; 2733 #if __ELF_WORD_SIZE == 32 && (defined(__amd64__) || defined(__i386__)) 2734 if (i386_read_exec && (flags & PF_R)) 2735 prot |= VM_PROT_EXECUTE; 2736 #endif 2737 return (prot); 2738 } 2739 2740 static Elf_Word 2741 __elfN(untrans_prot)(vm_prot_t prot) 2742 { 2743 Elf_Word flags; 2744 2745 flags = 0; 2746 if (prot & VM_PROT_EXECUTE) 2747 flags |= PF_X; 2748 if (prot & VM_PROT_READ) 2749 flags |= PF_R; 2750 if (prot & VM_PROT_WRITE) 2751 flags |= PF_W; 2752 return (flags); 2753 } 2754 2755 void 2756 __elfN(stackgap)(struct image_params *imgp, uintptr_t *stack_base) 2757 { 2758 uintptr_t range, rbase, gap; 2759 int pct; 2760 2761 if ((imgp->map_flags & MAP_ASLR) == 0) 2762 return; 2763 pct = __elfN(aslr_stack_gap); 2764 if (pct == 0) 2765 return; 2766 if (pct > 50) 2767 pct = 50; 2768 range = imgp->eff_stack_sz * pct / 100; 2769 arc4rand(&rbase, sizeof(rbase), 0); 2770 gap = rbase % range; 2771 gap &= ~(sizeof(u_long) - 1); 2772 *stack_base -= gap; 2773 } 2774