1 #include <fcntl.h> 2 #include <inttypes.h> 3 #include <mach-o/compact_unwind_encoding.h> 4 #include <mach-o/loader.h> 5 #include <mach-o/nlist.h> 6 #include <mach/machine.h> 7 #include <stdbool.h> 8 #include <stdint.h> 9 #include <stdio.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <sys/errno.h> 13 #include <sys/mman.h> 14 #include <sys/stat.h> 15 #include <sys/types.h> 16 17 enum { 18 UNWIND_ARM64_MODE_MASK = 0x0F000000, 19 UNWIND_ARM64_MODE_FRAMELESS = 0x02000000, 20 UNWIND_ARM64_MODE_DWARF = 0x03000000, 21 UNWIND_ARM64_MODE_FRAME = 0x04000000, 22 23 UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001, 24 UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002, 25 UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004, 26 UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008, 27 UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010, 28 UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100, 29 UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200, 30 UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400, 31 UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800, 32 33 UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK = 0x00FFF000, 34 UNWIND_ARM64_DWARF_SECTION_OFFSET = 0x00FFFFFF, 35 }; 36 37 enum { 38 UNWIND_ARM_MODE_MASK = 0x0F000000, 39 UNWIND_ARM_MODE_FRAME = 0x01000000, 40 UNWIND_ARM_MODE_FRAME_D = 0x02000000, 41 UNWIND_ARM_MODE_DWARF = 0x04000000, 42 43 UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000, 44 45 UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001, 46 UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002, 47 UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004, 48 49 UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008, 50 UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010, 51 UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020, 52 UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040, 53 UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080, 54 55 UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000700, 56 57 UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF, 58 }; 59 60 #define EXTRACT_BITS(value, mask) \ 61 ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1)) 62 63 // A quick sketch of a program which can parse the compact unwind info 64 // used on Darwin systems for exception handling. The output of 65 // unwinddump will be more authoritative/reliable but this program 66 // can dump at least the UNWIND_X86_64_MODE_RBP_FRAME format entries 67 // correctly. 68 69 struct symbol { 70 uint64_t file_address; 71 const char *name; 72 }; 73 74 int symbol_compare(const void *a, const void *b) { 75 return (int)((struct symbol *)a)->file_address - 76 ((struct symbol *)b)->file_address; 77 } 78 79 struct baton { 80 cpu_type_t cputype; 81 82 uint8_t *mach_header_start; // pointer into this program's address space 83 uint8_t *compact_unwind_start; // pointer into this program's address space 84 85 int addr_size; // 4 or 8 bytes, the size of addresses in this file 86 87 uint64_t text_segment_vmaddr; // __TEXT segment vmaddr 88 uint64_t text_segment_file_offset; 89 90 uint64_t text_section_vmaddr; // __TEXT,__text section vmaddr 91 uint64_t text_section_file_offset; 92 93 uint64_t eh_section_file_address; // the file address of the __TEXT,__eh_frame 94 // section 95 96 uint8_t 97 *lsda_array_start; // for the currently-being-processed first-level index 98 uint8_t 99 *lsda_array_end; // the lsda_array_start for the NEXT first-level index 100 101 struct symbol *symbols; 102 int symbols_count; 103 104 uint64_t *function_start_addresses; 105 int function_start_addresses_count; 106 107 int current_index_table_number; 108 109 struct unwind_info_section_header unwind_header; 110 struct unwind_info_section_header_index_entry first_level_index_entry; 111 struct unwind_info_compressed_second_level_page_header 112 compressed_second_level_page_header; 113 struct unwind_info_regular_second_level_page_header 114 regular_second_level_page_header; 115 }; 116 117 uint64_t read_leb128(uint8_t **offset) { 118 uint64_t result = 0; 119 int shift = 0; 120 while (1) { 121 uint8_t byte = **offset; 122 *offset = *offset + 1; 123 result |= (byte & 0x7f) << shift; 124 if ((byte & 0x80) == 0) 125 break; 126 shift += 7; 127 } 128 129 return result; 130 } 131 132 // step through the load commands in a thin mach-o binary, 133 // find the cputype and the start of the __TEXT,__unwind_info 134 // section, return a pointer to that section or NULL if not found. 135 136 static void scan_macho_load_commands(struct baton *baton) { 137 struct symtab_command symtab_cmd; 138 uint64_t linkedit_segment_vmaddr; 139 uint64_t linkedit_segment_file_offset; 140 141 baton->compact_unwind_start = 0; 142 143 uint32_t *magic = (uint32_t *)baton->mach_header_start; 144 145 if (*magic != MH_MAGIC && *magic != MH_MAGIC_64) { 146 printf("Unexpected magic number 0x%x in header, exiting.", *magic); 147 exit(1); 148 } 149 150 bool is_64bit = false; 151 if (*magic == MH_MAGIC_64) 152 is_64bit = true; 153 154 uint8_t *offset = baton->mach_header_start; 155 156 struct mach_header mh; 157 memcpy(&mh, offset, sizeof(struct mach_header)); 158 if (is_64bit) 159 offset += sizeof(struct mach_header_64); 160 else 161 offset += sizeof(struct mach_header); 162 163 if (is_64bit) 164 baton->addr_size = 8; 165 else 166 baton->addr_size = 4; 167 168 baton->cputype = mh.cputype; 169 170 uint8_t *start_of_load_commands = offset; 171 172 uint32_t cur_cmd = 0; 173 while (cur_cmd < mh.ncmds && 174 (offset - start_of_load_commands) < mh.sizeofcmds) { 175 struct load_command lc; 176 uint32_t *lc_cmd = (uint32_t *)offset; 177 uint32_t *lc_cmdsize = (uint32_t *)offset + 1; 178 uint8_t *start_of_this_load_cmd = offset; 179 180 if (*lc_cmd == LC_SEGMENT || *lc_cmd == LC_SEGMENT_64) { 181 char segment_name[17]; 182 segment_name[0] = '\0'; 183 uint32_t nsects = 0; 184 uint64_t segment_offset = 0; 185 uint64_t segment_vmaddr = 0; 186 187 if (*lc_cmd == LC_SEGMENT_64) { 188 struct segment_command_64 seg; 189 memcpy(&seg, offset, sizeof(struct segment_command_64)); 190 memcpy(&segment_name, &seg.segname, 16); 191 segment_name[16] = '\0'; 192 nsects = seg.nsects; 193 segment_offset = seg.fileoff; 194 segment_vmaddr = seg.vmaddr; 195 offset += sizeof(struct segment_command_64); 196 if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1) { 197 printf("Segment '%s' is encrypted.\n", segment_name); 198 } 199 } 200 201 if (*lc_cmd == LC_SEGMENT) { 202 struct segment_command seg; 203 memcpy(&seg, offset, sizeof(struct segment_command)); 204 memcpy(&segment_name, &seg.segname, 16); 205 segment_name[16] = '\0'; 206 nsects = seg.nsects; 207 segment_offset = seg.fileoff; 208 segment_vmaddr = seg.vmaddr; 209 offset += sizeof(struct segment_command); 210 if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1) { 211 printf("Segment '%s' is encrypted.\n", segment_name); 212 } 213 } 214 215 if (nsects != 0 && strcmp(segment_name, "__TEXT") == 0) { 216 baton->text_segment_vmaddr = segment_vmaddr; 217 baton->text_segment_file_offset = segment_offset; 218 219 uint32_t current_sect = 0; 220 while (current_sect < nsects && 221 (offset - start_of_this_load_cmd) < *lc_cmdsize) { 222 char sect_name[17]; 223 memcpy(§_name, offset, 16); 224 sect_name[16] = '\0'; 225 if (strcmp(sect_name, "__unwind_info") == 0) { 226 if (is_64bit) { 227 struct section_64 sect; 228 memset(§, 0, sizeof(struct section_64)); 229 memcpy(§, offset, sizeof(struct section_64)); 230 baton->compact_unwind_start = 231 baton->mach_header_start + sect.offset; 232 } else { 233 struct section sect; 234 memset(§, 0, sizeof(struct section)); 235 memcpy(§, offset, sizeof(struct section)); 236 baton->compact_unwind_start = 237 baton->mach_header_start + sect.offset; 238 } 239 } 240 if (strcmp(sect_name, "__eh_frame") == 0) { 241 if (is_64bit) { 242 struct section_64 sect; 243 memset(§, 0, sizeof(struct section_64)); 244 memcpy(§, offset, sizeof(struct section_64)); 245 baton->eh_section_file_address = sect.addr; 246 } else { 247 struct section sect; 248 memset(§, 0, sizeof(struct section)); 249 memcpy(§, offset, sizeof(struct section)); 250 baton->eh_section_file_address = sect.addr; 251 } 252 } 253 if (strcmp(sect_name, "__text") == 0) { 254 if (is_64bit) { 255 struct section_64 sect; 256 memset(§, 0, sizeof(struct section_64)); 257 memcpy(§, offset, sizeof(struct section_64)); 258 baton->text_section_vmaddr = sect.addr; 259 baton->text_section_file_offset = sect.offset; 260 } else { 261 struct section sect; 262 memset(§, 0, sizeof(struct section)); 263 memcpy(§, offset, sizeof(struct section)); 264 baton->text_section_vmaddr = sect.addr; 265 } 266 } 267 if (is_64bit) { 268 offset += sizeof(struct section_64); 269 } else { 270 offset += sizeof(struct section); 271 } 272 } 273 } 274 275 if (strcmp(segment_name, "__LINKEDIT") == 0) { 276 linkedit_segment_vmaddr = segment_vmaddr; 277 linkedit_segment_file_offset = segment_offset; 278 } 279 } 280 281 if (*lc_cmd == LC_SYMTAB) { 282 memcpy(&symtab_cmd, offset, sizeof(struct symtab_command)); 283 } 284 285 if (*lc_cmd == LC_DYSYMTAB) { 286 struct dysymtab_command dysymtab_cmd; 287 memcpy(&dysymtab_cmd, offset, sizeof(struct dysymtab_command)); 288 289 int nlist_size = 12; 290 if (is_64bit) 291 nlist_size = 16; 292 293 char *string_table = 294 (char *)(baton->mach_header_start + symtab_cmd.stroff); 295 uint8_t *local_syms = baton->mach_header_start + symtab_cmd.symoff + 296 (dysymtab_cmd.ilocalsym * nlist_size); 297 int local_syms_count = dysymtab_cmd.nlocalsym; 298 uint8_t *exported_syms = baton->mach_header_start + symtab_cmd.symoff + 299 (dysymtab_cmd.iextdefsym * nlist_size); 300 int exported_syms_count = dysymtab_cmd.nextdefsym; 301 302 // We're only going to create records for a small number of these symbols 303 // but to 304 // simplify the memory management I'll allocate enough space to store all 305 // of them. 306 baton->symbols = (struct symbol *)malloc( 307 sizeof(struct symbol) * (local_syms_count + exported_syms_count)); 308 baton->symbols_count = 0; 309 310 for (int i = 0; i < local_syms_count; i++) { 311 struct nlist_64 nlist; 312 memset(&nlist, 0, sizeof(struct nlist_64)); 313 if (is_64bit) { 314 memcpy(&nlist, local_syms + (i * nlist_size), 315 sizeof(struct nlist_64)); 316 } else { 317 struct nlist nlist_32; 318 memset(&nlist_32, 0, sizeof(struct nlist)); 319 memcpy(&nlist_32, local_syms + (i * nlist_size), 320 sizeof(struct nlist)); 321 nlist.n_un.n_strx = nlist_32.n_un.n_strx; 322 nlist.n_type = nlist_32.n_type; 323 nlist.n_sect = nlist_32.n_sect; 324 nlist.n_desc = nlist_32.n_desc; 325 nlist.n_value = nlist_32.n_value; 326 } 327 if ((nlist.n_type & N_STAB) == 0 && 328 ((nlist.n_type & N_EXT) == 1 || 329 ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT)) && 330 nlist.n_value != 0 && nlist.n_value != baton->text_segment_vmaddr) { 331 baton->symbols[baton->symbols_count].file_address = nlist.n_value; 332 if (baton->cputype == CPU_TYPE_ARM) 333 baton->symbols[baton->symbols_count].file_address = 334 baton->symbols[baton->symbols_count].file_address & ~1; 335 baton->symbols[baton->symbols_count].name = 336 string_table + nlist.n_un.n_strx; 337 baton->symbols_count++; 338 } 339 } 340 341 for (int i = 0; i < exported_syms_count; i++) { 342 struct nlist_64 nlist; 343 memset(&nlist, 0, sizeof(struct nlist_64)); 344 if (is_64bit) { 345 memcpy(&nlist, exported_syms + (i * nlist_size), 346 sizeof(struct nlist_64)); 347 } else { 348 struct nlist nlist_32; 349 memcpy(&nlist_32, exported_syms + (i * nlist_size), 350 sizeof(struct nlist)); 351 nlist.n_un.n_strx = nlist_32.n_un.n_strx; 352 nlist.n_type = nlist_32.n_type; 353 nlist.n_sect = nlist_32.n_sect; 354 nlist.n_desc = nlist_32.n_desc; 355 nlist.n_value = nlist_32.n_value; 356 } 357 if ((nlist.n_type & N_STAB) == 0 && 358 ((nlist.n_type & N_EXT) == 1 || 359 ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT)) && 360 nlist.n_value != 0 && nlist.n_value != baton->text_segment_vmaddr) { 361 baton->symbols[baton->symbols_count].file_address = nlist.n_value; 362 if (baton->cputype == CPU_TYPE_ARM) 363 baton->symbols[baton->symbols_count].file_address = 364 baton->symbols[baton->symbols_count].file_address & ~1; 365 baton->symbols[baton->symbols_count].name = 366 string_table + nlist.n_un.n_strx; 367 baton->symbols_count++; 368 } 369 } 370 371 qsort(baton->symbols, baton->symbols_count, sizeof(struct symbol), 372 symbol_compare); 373 } 374 375 if (*lc_cmd == LC_FUNCTION_STARTS) { 376 struct linkedit_data_command function_starts_cmd; 377 memcpy(&function_starts_cmd, offset, 378 sizeof(struct linkedit_data_command)); 379 380 uint8_t *funcstarts_offset = 381 baton->mach_header_start + function_starts_cmd.dataoff; 382 uint8_t *function_end = funcstarts_offset + function_starts_cmd.datasize; 383 int count = 0; 384 385 while (funcstarts_offset < function_end) { 386 if (read_leb128(&funcstarts_offset) != 0) { 387 count++; 388 } 389 } 390 391 baton->function_start_addresses = 392 (uint64_t *)malloc(sizeof(uint64_t) * count); 393 baton->function_start_addresses_count = count; 394 395 funcstarts_offset = 396 baton->mach_header_start + function_starts_cmd.dataoff; 397 uint64_t current_pc = baton->text_segment_vmaddr; 398 int i = 0; 399 while (funcstarts_offset < function_end) { 400 uint64_t func_start = read_leb128(&funcstarts_offset); 401 if (func_start != 0) { 402 current_pc += func_start; 403 baton->function_start_addresses[i++] = current_pc; 404 } 405 } 406 } 407 408 offset = start_of_this_load_cmd + *lc_cmdsize; 409 cur_cmd++; 410 } 411 412 // Augment the symbol table with the function starts table -- adding symbol 413 // entries 414 // for functions that were stripped. 415 416 int unnamed_functions_to_add = 0; 417 for (int i = 0; i < baton->function_start_addresses_count; i++) { 418 struct symbol search_key; 419 search_key.file_address = baton->function_start_addresses[i]; 420 if (baton->cputype == CPU_TYPE_ARM) 421 search_key.file_address = search_key.file_address & ~1; 422 struct symbol *sym = 423 bsearch(&search_key, baton->symbols, baton->symbols_count, 424 sizeof(struct symbol), symbol_compare); 425 if (sym == NULL) 426 unnamed_functions_to_add++; 427 } 428 429 baton->symbols = (struct symbol *)realloc( 430 baton->symbols, sizeof(struct symbol) * 431 (baton->symbols_count + unnamed_functions_to_add)); 432 433 int current_unnamed_symbol = 1; 434 int number_symbols_added = 0; 435 for (int i = 0; i < baton->function_start_addresses_count; i++) { 436 struct symbol search_key; 437 search_key.file_address = baton->function_start_addresses[i]; 438 if (baton->cputype == CPU_TYPE_ARM) 439 search_key.file_address = search_key.file_address & ~1; 440 struct symbol *sym = 441 bsearch(&search_key, baton->symbols, baton->symbols_count, 442 sizeof(struct symbol), symbol_compare); 443 if (sym == NULL) { 444 char *name; 445 asprintf(&name, "unnamed function #%d", current_unnamed_symbol++); 446 baton->symbols[baton->symbols_count + number_symbols_added].file_address = 447 baton->function_start_addresses[i]; 448 baton->symbols[baton->symbols_count + number_symbols_added].name = name; 449 number_symbols_added++; 450 } 451 } 452 baton->symbols_count += number_symbols_added; 453 qsort(baton->symbols, baton->symbols_count, sizeof(struct symbol), 454 symbol_compare); 455 456 // printf ("function start addresses\n"); 457 // for (int i = 0; i < baton->function_start_addresses_count; i++) 458 // { 459 // printf ("0x%012llx\n", baton->function_start_addresses[i]); 460 // } 461 462 // printf ("symbol table names & addresses\n"); 463 // for (int i = 0; i < baton->symbols_count; i++) 464 // { 465 // printf ("0x%012llx %s\n", baton->symbols[i].file_address, 466 // baton->symbols[i].name); 467 // } 468 } 469 470 void print_encoding_x86_64(struct baton baton, uint8_t *function_start, 471 uint32_t encoding) { 472 int mode = encoding & UNWIND_X86_64_MODE_MASK; 473 switch (mode) { 474 case UNWIND_X86_64_MODE_RBP_FRAME: { 475 printf("frame func: CFA is rbp+%d ", 16); 476 printf(" rip=[CFA-8] rbp=[CFA-16]"); 477 uint32_t saved_registers_offset = 478 EXTRACT_BITS(encoding, UNWIND_X86_64_RBP_FRAME_OFFSET); 479 480 uint32_t saved_registers_locations = 481 EXTRACT_BITS(encoding, UNWIND_X86_64_RBP_FRAME_REGISTERS); 482 483 saved_registers_offset += 2; 484 485 for (int i = 0; i < 5; i++) { 486 switch (saved_registers_locations & 0x7) { 487 case UNWIND_X86_64_REG_NONE: 488 break; 489 case UNWIND_X86_64_REG_RBX: 490 printf(" rbx=[CFA-%d]", saved_registers_offset * 8); 491 break; 492 case UNWIND_X86_64_REG_R12: 493 printf(" r12=[CFA-%d]", saved_registers_offset * 8); 494 break; 495 case UNWIND_X86_64_REG_R13: 496 printf(" r13=[CFA-%d]", saved_registers_offset * 8); 497 break; 498 case UNWIND_X86_64_REG_R14: 499 printf(" r14=[CFA-%d]", saved_registers_offset * 8); 500 break; 501 case UNWIND_X86_64_REG_R15: 502 printf(" r15=[CFA-%d]", saved_registers_offset * 8); 503 break; 504 } 505 saved_registers_offset--; 506 saved_registers_locations >>= 3; 507 } 508 } break; 509 510 case UNWIND_X86_64_MODE_STACK_IND: 511 case UNWIND_X86_64_MODE_STACK_IMMD: { 512 uint32_t stack_size = 513 EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE); 514 uint32_t register_count = 515 EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT); 516 uint32_t permutation = 517 EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION); 518 519 if (mode == UNWIND_X86_64_MODE_STACK_IND && function_start) { 520 uint32_t stack_adjust = 521 EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST); 522 523 // offset into the function instructions; 0 == beginning of first 524 // instruction 525 uint32_t offset_to_subl_insn = 526 EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE); 527 528 stack_size = *((uint32_t *)(function_start + offset_to_subl_insn)); 529 530 stack_size += stack_adjust * 8; 531 532 printf("large stack "); 533 } 534 535 if (mode == UNWIND_X86_64_MODE_STACK_IND) { 536 printf("frameless function: stack size %d, register count %d ", 537 stack_size * 8, register_count); 538 } else { 539 printf("frameless function: stack size %d, register count %d ", 540 stack_size, register_count); 541 } 542 543 if (register_count == 0) { 544 printf(" no registers saved"); 545 } else { 546 547 // We need to include (up to) 6 registers in 10 bits. 548 // That would be 18 bits if we just used 3 bits per reg to indicate 549 // the order they're saved on the stack. 550 // 551 // This is done with Lehmer code permutation, e.g. see 552 // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms 553 int permunreg[6]; 554 555 // This decodes the variable-base number in the 10 bits 556 // and gives us the Lehmer code sequence which can then 557 // be decoded. 558 559 switch (register_count) { 560 case 6: 561 permunreg[0] = permutation / 120; // 120 == 5! 562 permutation -= (permunreg[0] * 120); 563 permunreg[1] = permutation / 24; // 24 == 4! 564 permutation -= (permunreg[1] * 24); 565 permunreg[2] = permutation / 6; // 6 == 3! 566 permutation -= (permunreg[2] * 6); 567 permunreg[3] = permutation / 2; // 2 == 2! 568 permutation -= (permunreg[3] * 2); 569 permunreg[4] = permutation; // 1 == 1! 570 permunreg[5] = 0; 571 break; 572 case 5: 573 permunreg[0] = permutation / 120; 574 permutation -= (permunreg[0] * 120); 575 permunreg[1] = permutation / 24; 576 permutation -= (permunreg[1] * 24); 577 permunreg[2] = permutation / 6; 578 permutation -= (permunreg[2] * 6); 579 permunreg[3] = permutation / 2; 580 permutation -= (permunreg[3] * 2); 581 permunreg[4] = permutation; 582 break; 583 case 4: 584 permunreg[0] = permutation / 60; 585 permutation -= (permunreg[0] * 60); 586 permunreg[1] = permutation / 12; 587 permutation -= (permunreg[1] * 12); 588 permunreg[2] = permutation / 3; 589 permutation -= (permunreg[2] * 3); 590 permunreg[3] = permutation; 591 break; 592 case 3: 593 permunreg[0] = permutation / 20; 594 permutation -= (permunreg[0] * 20); 595 permunreg[1] = permutation / 4; 596 permutation -= (permunreg[1] * 4); 597 permunreg[2] = permutation; 598 break; 599 case 2: 600 permunreg[0] = permutation / 5; 601 permutation -= (permunreg[0] * 5); 602 permunreg[1] = permutation; 603 break; 604 case 1: 605 permunreg[0] = permutation; 606 break; 607 } 608 609 // Decode the Lehmer code for this permutation of 610 // the registers v. http://en.wikipedia.org/wiki/Lehmer_code 611 612 int registers[6]; 613 bool used[7] = {false, false, false, false, false, false, false}; 614 for (int i = 0; i < register_count; i++) { 615 int renum = 0; 616 for (int j = 1; j < 7; j++) { 617 if (used[j] == false) { 618 if (renum == permunreg[i]) { 619 registers[i] = j; 620 used[j] = true; 621 break; 622 } 623 renum++; 624 } 625 } 626 } 627 628 if (mode == UNWIND_X86_64_MODE_STACK_IND) { 629 printf(" CFA is rsp+%d ", stack_size); 630 } else { 631 printf(" CFA is rsp+%d ", stack_size * 8); 632 } 633 634 uint32_t saved_registers_offset = 1; 635 printf(" rip=[CFA-%d]", saved_registers_offset * 8); 636 saved_registers_offset++; 637 638 for (int i = (sizeof(registers) / sizeof(int)) - 1; i >= 0; i--) { 639 switch (registers[i]) { 640 case UNWIND_X86_64_REG_NONE: 641 break; 642 case UNWIND_X86_64_REG_RBX: 643 printf(" rbx=[CFA-%d]", saved_registers_offset * 8); 644 saved_registers_offset++; 645 break; 646 case UNWIND_X86_64_REG_R12: 647 printf(" r12=[CFA-%d]", saved_registers_offset * 8); 648 saved_registers_offset++; 649 break; 650 case UNWIND_X86_64_REG_R13: 651 printf(" r13=[CFA-%d]", saved_registers_offset * 8); 652 saved_registers_offset++; 653 break; 654 case UNWIND_X86_64_REG_R14: 655 printf(" r14=[CFA-%d]", saved_registers_offset * 8); 656 saved_registers_offset++; 657 break; 658 case UNWIND_X86_64_REG_R15: 659 printf(" r15=[CFA-%d]", saved_registers_offset * 8); 660 saved_registers_offset++; 661 break; 662 case UNWIND_X86_64_REG_RBP: 663 printf(" rbp=[CFA-%d]", saved_registers_offset * 8); 664 saved_registers_offset++; 665 break; 666 } 667 } 668 } 669 670 } break; 671 672 case UNWIND_X86_64_MODE_DWARF: { 673 uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET; 674 printf( 675 "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64 676 ")", 677 dwarf_offset, dwarf_offset + baton.eh_section_file_address); 678 } break; 679 680 case 0: { 681 printf(" no unwind information"); 682 } break; 683 } 684 } 685 686 void print_encoding_i386(struct baton baton, uint8_t *function_start, 687 uint32_t encoding) { 688 int mode = encoding & UNWIND_X86_MODE_MASK; 689 switch (mode) { 690 case UNWIND_X86_MODE_EBP_FRAME: { 691 printf("frame func: CFA is ebp+%d ", 8); 692 printf(" eip=[CFA-4] ebp=[CFA-8]"); 693 uint32_t saved_registers_offset = 694 EXTRACT_BITS(encoding, UNWIND_X86_EBP_FRAME_OFFSET); 695 696 uint32_t saved_registers_locations = 697 EXTRACT_BITS(encoding, UNWIND_X86_EBP_FRAME_REGISTERS); 698 699 saved_registers_offset += 2; 700 701 for (int i = 0; i < 5; i++) { 702 switch (saved_registers_locations & 0x7) { 703 case UNWIND_X86_REG_NONE: 704 break; 705 case UNWIND_X86_REG_EBX: 706 printf(" ebx=[CFA-%d]", saved_registers_offset * 4); 707 break; 708 case UNWIND_X86_REG_ECX: 709 printf(" ecx=[CFA-%d]", saved_registers_offset * 4); 710 break; 711 case UNWIND_X86_REG_EDX: 712 printf(" edx=[CFA-%d]", saved_registers_offset * 4); 713 break; 714 case UNWIND_X86_REG_EDI: 715 printf(" edi=[CFA-%d]", saved_registers_offset * 4); 716 break; 717 case UNWIND_X86_REG_ESI: 718 printf(" esi=[CFA-%d]", saved_registers_offset * 4); 719 break; 720 } 721 saved_registers_offset--; 722 saved_registers_locations >>= 3; 723 } 724 } break; 725 726 case UNWIND_X86_MODE_STACK_IND: 727 case UNWIND_X86_MODE_STACK_IMMD: { 728 uint32_t stack_size = 729 EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE); 730 uint32_t register_count = 731 EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT); 732 uint32_t permutation = 733 EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION); 734 735 if (mode == UNWIND_X86_MODE_STACK_IND && function_start) { 736 uint32_t stack_adjust = 737 EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST); 738 739 // offset into the function instructions; 0 == beginning of first 740 // instruction 741 uint32_t offset_to_subl_insn = 742 EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE); 743 744 stack_size = *((uint32_t *)(function_start + offset_to_subl_insn)); 745 746 stack_size += stack_adjust * 4; 747 748 printf("large stack "); 749 } 750 751 if (mode == UNWIND_X86_MODE_STACK_IND) { 752 printf("frameless function: stack size %d, register count %d ", 753 stack_size, register_count); 754 } else { 755 printf("frameless function: stack size %d, register count %d ", 756 stack_size * 4, register_count); 757 } 758 759 if (register_count == 0) { 760 printf(" no registers saved"); 761 } else { 762 763 // We need to include (up to) 6 registers in 10 bits. 764 // That would be 18 bits if we just used 3 bits per reg to indicate 765 // the order they're saved on the stack. 766 // 767 // This is done with Lehmer code permutation, e.g. see 768 // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms 769 int permunreg[6]; 770 771 // This decodes the variable-base number in the 10 bits 772 // and gives us the Lehmer code sequence which can then 773 // be decoded. 774 775 switch (register_count) { 776 case 6: 777 permunreg[0] = permutation / 120; // 120 == 5! 778 permutation -= (permunreg[0] * 120); 779 permunreg[1] = permutation / 24; // 24 == 4! 780 permutation -= (permunreg[1] * 24); 781 permunreg[2] = permutation / 6; // 6 == 3! 782 permutation -= (permunreg[2] * 6); 783 permunreg[3] = permutation / 2; // 2 == 2! 784 permutation -= (permunreg[3] * 2); 785 permunreg[4] = permutation; // 1 == 1! 786 permunreg[5] = 0; 787 break; 788 case 5: 789 permunreg[0] = permutation / 120; 790 permutation -= (permunreg[0] * 120); 791 permunreg[1] = permutation / 24; 792 permutation -= (permunreg[1] * 24); 793 permunreg[2] = permutation / 6; 794 permutation -= (permunreg[2] * 6); 795 permunreg[3] = permutation / 2; 796 permutation -= (permunreg[3] * 2); 797 permunreg[4] = permutation; 798 break; 799 case 4: 800 permunreg[0] = permutation / 60; 801 permutation -= (permunreg[0] * 60); 802 permunreg[1] = permutation / 12; 803 permutation -= (permunreg[1] * 12); 804 permunreg[2] = permutation / 3; 805 permutation -= (permunreg[2] * 3); 806 permunreg[3] = permutation; 807 break; 808 case 3: 809 permunreg[0] = permutation / 20; 810 permutation -= (permunreg[0] * 20); 811 permunreg[1] = permutation / 4; 812 permutation -= (permunreg[1] * 4); 813 permunreg[2] = permutation; 814 break; 815 case 2: 816 permunreg[0] = permutation / 5; 817 permutation -= (permunreg[0] * 5); 818 permunreg[1] = permutation; 819 break; 820 case 1: 821 permunreg[0] = permutation; 822 break; 823 } 824 825 // Decode the Lehmer code for this permutation of 826 // the registers v. http://en.wikipedia.org/wiki/Lehmer_code 827 828 int registers[6]; 829 bool used[7] = {false, false, false, false, false, false, false}; 830 for (int i = 0; i < register_count; i++) { 831 int renum = 0; 832 for (int j = 1; j < 7; j++) { 833 if (used[j] == false) { 834 if (renum == permunreg[i]) { 835 registers[i] = j; 836 used[j] = true; 837 break; 838 } 839 renum++; 840 } 841 } 842 } 843 844 if (mode == UNWIND_X86_MODE_STACK_IND) { 845 printf(" CFA is esp+%d ", stack_size); 846 } else { 847 printf(" CFA is esp+%d ", stack_size * 4); 848 } 849 850 uint32_t saved_registers_offset = 1; 851 printf(" eip=[CFA-%d]", saved_registers_offset * 4); 852 saved_registers_offset++; 853 854 for (int i = (sizeof(registers) / sizeof(int)) - 1; i >= 0; i--) { 855 switch (registers[i]) { 856 case UNWIND_X86_REG_NONE: 857 break; 858 case UNWIND_X86_REG_EBX: 859 printf(" ebx=[CFA-%d]", saved_registers_offset * 4); 860 saved_registers_offset++; 861 break; 862 case UNWIND_X86_REG_ECX: 863 printf(" ecx=[CFA-%d]", saved_registers_offset * 4); 864 saved_registers_offset++; 865 break; 866 case UNWIND_X86_REG_EDX: 867 printf(" edx=[CFA-%d]", saved_registers_offset * 4); 868 saved_registers_offset++; 869 break; 870 case UNWIND_X86_REG_EDI: 871 printf(" edi=[CFA-%d]", saved_registers_offset * 4); 872 saved_registers_offset++; 873 break; 874 case UNWIND_X86_REG_ESI: 875 printf(" esi=[CFA-%d]", saved_registers_offset * 4); 876 saved_registers_offset++; 877 break; 878 case UNWIND_X86_REG_EBP: 879 printf(" ebp=[CFA-%d]", saved_registers_offset * 4); 880 saved_registers_offset++; 881 break; 882 } 883 } 884 } 885 886 } break; 887 888 case UNWIND_X86_MODE_DWARF: { 889 uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET; 890 printf( 891 "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64 892 ")", 893 dwarf_offset, dwarf_offset + baton.eh_section_file_address); 894 } break; 895 896 case 0: { 897 printf(" no unwind information"); 898 } break; 899 } 900 } 901 902 void print_encoding_arm64(struct baton baton, uint8_t *function_start, 903 uint32_t encoding) { 904 const int wordsize = 8; 905 int mode = encoding & UNWIND_ARM64_MODE_MASK; 906 switch (mode) { 907 case UNWIND_ARM64_MODE_FRAME: { 908 printf("frame func: CFA is fp+%d ", 16); 909 printf(" pc=[CFA-8] fp=[CFA-16]"); 910 int reg_pairs_saved_count = 1; 911 uint32_t saved_register_bits = encoding & 0xfff; 912 if (saved_register_bits & UNWIND_ARM64_FRAME_X19_X20_PAIR) { 913 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 914 cfa_offset -= wordsize; 915 printf(" x19=[CFA%d]", cfa_offset); 916 cfa_offset -= wordsize; 917 printf(" x20=[CFA%d]", cfa_offset); 918 reg_pairs_saved_count++; 919 } 920 if (saved_register_bits & UNWIND_ARM64_FRAME_X21_X22_PAIR) { 921 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 922 cfa_offset -= wordsize; 923 printf(" x21=[CFA%d]", cfa_offset); 924 cfa_offset -= wordsize; 925 printf(" x22=[CFA%d]", cfa_offset); 926 reg_pairs_saved_count++; 927 } 928 if (saved_register_bits & UNWIND_ARM64_FRAME_X23_X24_PAIR) { 929 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 930 cfa_offset -= wordsize; 931 printf(" x23=[CFA%d]", cfa_offset); 932 cfa_offset -= wordsize; 933 printf(" x24=[CFA%d]", cfa_offset); 934 reg_pairs_saved_count++; 935 } 936 if (saved_register_bits & UNWIND_ARM64_FRAME_X25_X26_PAIR) { 937 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 938 cfa_offset -= wordsize; 939 printf(" x25=[CFA%d]", cfa_offset); 940 cfa_offset -= wordsize; 941 printf(" x26=[CFA%d]", cfa_offset); 942 reg_pairs_saved_count++; 943 } 944 if (saved_register_bits & UNWIND_ARM64_FRAME_X27_X28_PAIR) { 945 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 946 cfa_offset -= wordsize; 947 printf(" x27=[CFA%d]", cfa_offset); 948 cfa_offset -= wordsize; 949 printf(" x28=[CFA%d]", cfa_offset); 950 reg_pairs_saved_count++; 951 } 952 if (saved_register_bits & UNWIND_ARM64_FRAME_D8_D9_PAIR) { 953 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 954 cfa_offset -= wordsize; 955 printf(" d8=[CFA%d]", cfa_offset); 956 cfa_offset -= wordsize; 957 printf(" d9=[CFA%d]", cfa_offset); 958 reg_pairs_saved_count++; 959 } 960 if (saved_register_bits & UNWIND_ARM64_FRAME_D10_D11_PAIR) { 961 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 962 cfa_offset -= wordsize; 963 printf(" d10=[CFA%d]", cfa_offset); 964 cfa_offset -= wordsize; 965 printf(" d11=[CFA%d]", cfa_offset); 966 reg_pairs_saved_count++; 967 } 968 if (saved_register_bits & UNWIND_ARM64_FRAME_D12_D13_PAIR) { 969 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 970 cfa_offset -= wordsize; 971 printf(" d12=[CFA%d]", cfa_offset); 972 cfa_offset -= wordsize; 973 printf(" d13=[CFA%d]", cfa_offset); 974 reg_pairs_saved_count++; 975 } 976 if (saved_register_bits & UNWIND_ARM64_FRAME_D14_D15_PAIR) { 977 int cfa_offset = reg_pairs_saved_count * -2 * wordsize; 978 cfa_offset -= wordsize; 979 printf(" d14=[CFA%d]", cfa_offset); 980 cfa_offset -= wordsize; 981 printf(" d15=[CFA%d]", cfa_offset); 982 reg_pairs_saved_count++; 983 } 984 985 } break; 986 987 case UNWIND_ARM64_MODE_FRAMELESS: { 988 uint32_t stack_size = encoding & UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK; 989 printf("frameless function: stack size %d ", stack_size * 16); 990 991 } break; 992 993 case UNWIND_ARM64_MODE_DWARF: { 994 uint32_t dwarf_offset = encoding & UNWIND_ARM64_DWARF_SECTION_OFFSET; 995 printf( 996 "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64 997 ")", 998 dwarf_offset, dwarf_offset + baton.eh_section_file_address); 999 } break; 1000 1001 case 0: { 1002 printf(" no unwind information"); 1003 } break; 1004 } 1005 } 1006 1007 void print_encoding_armv7(struct baton baton, uint8_t *function_start, 1008 uint32_t encoding) { 1009 const int wordsize = 4; 1010 int mode = encoding & UNWIND_ARM_MODE_MASK; 1011 switch (mode) { 1012 case UNWIND_ARM_MODE_FRAME_D: 1013 case UNWIND_ARM_MODE_FRAME: { 1014 int stack_adjust = 1015 EXTRACT_BITS(encoding, UNWIND_ARM_FRAME_STACK_ADJUST_MASK) * wordsize; 1016 1017 printf("frame func: CFA is fp+%d ", (2 * wordsize) + stack_adjust); 1018 int cfa_offset = -stack_adjust; 1019 1020 cfa_offset -= wordsize; 1021 printf(" pc=[CFA%d]", cfa_offset); 1022 cfa_offset -= wordsize; 1023 printf(" fp=[CFA%d]", cfa_offset); 1024 1025 uint32_t saved_register_bits = encoding & 0xff; 1026 if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R6) { 1027 cfa_offset -= wordsize; 1028 printf(" r6=[CFA%d]", cfa_offset); 1029 } 1030 if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R5) { 1031 cfa_offset -= wordsize; 1032 printf(" r5=[CFA%d]", cfa_offset); 1033 } 1034 if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R4) { 1035 cfa_offset -= wordsize; 1036 printf(" r4=[CFA%d]", cfa_offset); 1037 } 1038 if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R12) { 1039 cfa_offset -= wordsize; 1040 printf(" r12=[CFA%d]", cfa_offset); 1041 } 1042 if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R11) { 1043 cfa_offset -= wordsize; 1044 printf(" r11=[CFA%d]", cfa_offset); 1045 } 1046 if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R10) { 1047 cfa_offset -= wordsize; 1048 printf(" r10=[CFA%d]", cfa_offset); 1049 } 1050 if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R9) { 1051 cfa_offset -= wordsize; 1052 printf(" r9=[CFA%d]", cfa_offset); 1053 } 1054 if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R8) { 1055 cfa_offset -= wordsize; 1056 printf(" r8=[CFA%d]", cfa_offset); 1057 } 1058 1059 if (mode == UNWIND_ARM_MODE_FRAME_D) { 1060 uint32_t d_reg_bits = 1061 EXTRACT_BITS(encoding, UNWIND_ARM_FRAME_D_REG_COUNT_MASK); 1062 switch (d_reg_bits) { 1063 case 0: 1064 // vpush {d8} 1065 cfa_offset -= 8; 1066 printf(" d8=[CFA%d]", cfa_offset); 1067 break; 1068 case 1: 1069 // vpush {d10} 1070 // vpush {d8} 1071 cfa_offset -= 8; 1072 printf(" d10=[CFA%d]", cfa_offset); 1073 cfa_offset -= 8; 1074 printf(" d8=[CFA%d]", cfa_offset); 1075 break; 1076 case 2: 1077 // vpush {d12} 1078 // vpush {d10} 1079 // vpush {d8} 1080 cfa_offset -= 8; 1081 printf(" d12=[CFA%d]", cfa_offset); 1082 cfa_offset -= 8; 1083 printf(" d10=[CFA%d]", cfa_offset); 1084 cfa_offset -= 8; 1085 printf(" d8=[CFA%d]", cfa_offset); 1086 break; 1087 case 3: 1088 // vpush {d14} 1089 // vpush {d12} 1090 // vpush {d10} 1091 // vpush {d8} 1092 cfa_offset -= 8; 1093 printf(" d14=[CFA%d]", cfa_offset); 1094 cfa_offset -= 8; 1095 printf(" d12=[CFA%d]", cfa_offset); 1096 cfa_offset -= 8; 1097 printf(" d10=[CFA%d]", cfa_offset); 1098 cfa_offset -= 8; 1099 printf(" d8=[CFA%d]", cfa_offset); 1100 break; 1101 case 4: 1102 // vpush {d14} 1103 // vpush {d12} 1104 // sp = (sp - 24) & (-16); 1105 // vst {d8, d9, d10} 1106 printf(" d14, d12, d10, d9, d8"); 1107 break; 1108 case 5: 1109 // vpush {d14} 1110 // sp = (sp - 40) & (-16); 1111 // vst {d8, d9, d10, d11} 1112 // vst {d12} 1113 printf(" d14, d11, d10, d9, d8, d12"); 1114 break; 1115 case 6: 1116 // sp = (sp - 56) & (-16); 1117 // vst {d8, d9, d10, d11} 1118 // vst {d12, d13, d14} 1119 printf(" d11, d10, d9, d8, d14, d13, d12"); 1120 break; 1121 case 7: 1122 // sp = (sp - 64) & (-16); 1123 // vst {d8, d9, d10, d11} 1124 // vst {d12, d13, d14, d15} 1125 printf(" d11, d10, d9, d8, d15, d14, d13, d12"); 1126 break; 1127 } 1128 } 1129 } break; 1130 1131 case UNWIND_ARM_MODE_DWARF: { 1132 uint32_t dwarf_offset = encoding & UNWIND_ARM_DWARF_SECTION_OFFSET; 1133 printf( 1134 "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64 1135 ")", 1136 dwarf_offset, dwarf_offset + baton.eh_section_file_address); 1137 } break; 1138 1139 case 0: { 1140 printf(" no unwind information"); 1141 } break; 1142 } 1143 } 1144 1145 void print_encoding(struct baton baton, uint8_t *function_start, 1146 uint32_t encoding) { 1147 1148 if (baton.cputype == CPU_TYPE_X86_64) { 1149 print_encoding_x86_64(baton, function_start, encoding); 1150 } else if (baton.cputype == CPU_TYPE_I386) { 1151 print_encoding_i386(baton, function_start, encoding); 1152 } else if (baton.cputype == CPU_TYPE_ARM64 || baton.cputype == CPU_TYPE_ARM64_32) { 1153 print_encoding_arm64(baton, function_start, encoding); 1154 } else if (baton.cputype == CPU_TYPE_ARM) { 1155 print_encoding_armv7(baton, function_start, encoding); 1156 } else { 1157 printf(" -- unsupported encoding arch -- "); 1158 } 1159 } 1160 1161 void print_function_encoding(struct baton baton, uint32_t idx, 1162 uint32_t encoding, uint32_t entry_encoding_index, 1163 uint32_t entry_func_offset) { 1164 1165 char *entry_encoding_index_str = ""; 1166 if (entry_encoding_index != (uint32_t)-1) { 1167 asprintf(&entry_encoding_index_str, ", encoding #%d", entry_encoding_index); 1168 } else { 1169 asprintf(&entry_encoding_index_str, ""); 1170 } 1171 1172 uint64_t file_address = baton.first_level_index_entry.functionOffset + 1173 entry_func_offset + baton.text_segment_vmaddr; 1174 1175 if (baton.cputype == CPU_TYPE_ARM) 1176 file_address = file_address & ~1; 1177 1178 printf( 1179 " func [%d] offset %d (file addr 0x%" PRIx64 ")%s, encoding is 0x%x", 1180 idx, entry_func_offset, file_address, entry_encoding_index_str, encoding); 1181 1182 struct symbol *symbol = NULL; 1183 for (int i = 0; i < baton.symbols_count; i++) { 1184 if (i == baton.symbols_count - 1 && 1185 baton.symbols[i].file_address <= file_address) { 1186 symbol = &(baton.symbols[i]); 1187 break; 1188 } else { 1189 if (baton.symbols[i].file_address <= file_address && 1190 baton.symbols[i + 1].file_address > file_address) { 1191 symbol = &(baton.symbols[i]); 1192 break; 1193 } 1194 } 1195 } 1196 1197 printf("\n "); 1198 if (symbol) { 1199 int offset = file_address - symbol->file_address; 1200 1201 // FIXME this is a poor heuristic - if we're greater than 16 bytes past the 1202 // start of the function, this is the unwind info for a stripped function. 1203 // In reality the compact unwind entry may not line up exactly with the 1204 // function bounds. 1205 if (offset >= 0) { 1206 printf("name: %s", symbol->name); 1207 if (offset > 0) { 1208 printf(" + %d", offset); 1209 } 1210 } 1211 printf("\n "); 1212 } 1213 1214 print_encoding(baton, baton.mach_header_start + 1215 baton.first_level_index_entry.functionOffset + 1216 baton.text_section_file_offset + entry_func_offset, 1217 encoding); 1218 1219 bool has_lsda = encoding & UNWIND_HAS_LSDA; 1220 1221 if (has_lsda) { 1222 uint32_t func_offset = 1223 entry_func_offset + baton.first_level_index_entry.functionOffset; 1224 1225 int lsda_entry_number = -1; 1226 1227 uint32_t low = 0; 1228 uint32_t high = (baton.lsda_array_end - baton.lsda_array_start) / 1229 sizeof(struct unwind_info_section_header_lsda_index_entry); 1230 1231 while (low < high) { 1232 uint32_t mid = (low + high) / 2; 1233 1234 uint8_t *mid_lsda_entry_addr = 1235 (baton.lsda_array_start + 1236 (mid * sizeof(struct unwind_info_section_header_lsda_index_entry))); 1237 struct unwind_info_section_header_lsda_index_entry mid_lsda_entry; 1238 memcpy(&mid_lsda_entry, mid_lsda_entry_addr, 1239 sizeof(struct unwind_info_section_header_lsda_index_entry)); 1240 if (mid_lsda_entry.functionOffset == func_offset) { 1241 lsda_entry_number = 1242 (mid_lsda_entry_addr - baton.lsda_array_start) / 1243 sizeof(struct unwind_info_section_header_lsda_index_entry); 1244 break; 1245 } else if (mid_lsda_entry.functionOffset < func_offset) { 1246 low = mid + 1; 1247 } else { 1248 high = mid; 1249 } 1250 } 1251 1252 if (lsda_entry_number != -1) { 1253 printf(", LSDA entry #%d", lsda_entry_number); 1254 } else { 1255 printf(", LSDA entry not found"); 1256 } 1257 } 1258 1259 uint32_t pers_idx = EXTRACT_BITS(encoding, UNWIND_PERSONALITY_MASK); 1260 if (pers_idx != 0) { 1261 pers_idx--; // Change 1-based to 0-based index 1262 printf(", personality entry #%d", pers_idx); 1263 } 1264 1265 printf("\n"); 1266 } 1267 1268 void print_second_level_index_regular(struct baton baton) { 1269 uint8_t *page_entries = 1270 baton.compact_unwind_start + 1271 baton.first_level_index_entry.secondLevelPagesSectionOffset + 1272 baton.regular_second_level_page_header.entryPageOffset; 1273 uint32_t entries_count = baton.regular_second_level_page_header.entryCount; 1274 1275 uint8_t *offset = page_entries; 1276 1277 uint32_t idx = 0; 1278 while (idx < entries_count) { 1279 uint32_t func_offset = *((uint32_t *)(offset)); 1280 uint32_t encoding = *((uint32_t *)(offset + 4)); 1281 1282 // UNWIND_SECOND_LEVEL_REGULAR entries have a funcOffset which includes the 1283 // functionOffset from the containing index table already. 1284 // UNWIND_SECOND_LEVEL_COMPRESSED 1285 // entries only have the offset from the containing index table 1286 // functionOffset. 1287 // So strip off the containing index table functionOffset value here so they 1288 // can 1289 // be treated the same at the lower layers. 1290 1291 print_function_encoding(baton, idx, encoding, (uint32_t)-1, 1292 func_offset - 1293 baton.first_level_index_entry.functionOffset); 1294 idx++; 1295 offset += 8; 1296 } 1297 } 1298 1299 void print_second_level_index_compressed(struct baton baton) { 1300 uint8_t *this_index = 1301 baton.compact_unwind_start + 1302 baton.first_level_index_entry.secondLevelPagesSectionOffset; 1303 uint8_t *start_of_entries = 1304 this_index + baton.compressed_second_level_page_header.entryPageOffset; 1305 uint8_t *offset = start_of_entries; 1306 for (uint16_t idx = 0; 1307 idx < baton.compressed_second_level_page_header.entryCount; idx++) { 1308 uint32_t entry = *((uint32_t *)offset); 1309 offset += 4; 1310 uint32_t encoding; 1311 1312 uint32_t entry_encoding_index = 1313 UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry); 1314 uint32_t entry_func_offset = 1315 UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry); 1316 1317 if (entry_encoding_index < baton.unwind_header.commonEncodingsArrayCount) { 1318 // encoding is in common table in section header 1319 encoding = 1320 *((uint32_t *)(baton.compact_unwind_start + 1321 baton.unwind_header.commonEncodingsArraySectionOffset + 1322 (entry_encoding_index * sizeof(uint32_t)))); 1323 } else { 1324 // encoding is in page specific table 1325 uint32_t page_encoding_index = 1326 entry_encoding_index - baton.unwind_header.commonEncodingsArrayCount; 1327 encoding = *((uint32_t *)(this_index + 1328 baton.compressed_second_level_page_header 1329 .encodingsPageOffset + 1330 (page_encoding_index * sizeof(uint32_t)))); 1331 } 1332 1333 print_function_encoding(baton, idx, encoding, entry_encoding_index, 1334 entry_func_offset); 1335 } 1336 } 1337 1338 void print_second_level_index(struct baton baton) { 1339 uint8_t *index_start = 1340 baton.compact_unwind_start + 1341 baton.first_level_index_entry.secondLevelPagesSectionOffset; 1342 1343 if ((*(uint32_t *)index_start) == UNWIND_SECOND_LEVEL_REGULAR) { 1344 struct unwind_info_regular_second_level_page_header header; 1345 memcpy(&header, index_start, 1346 sizeof(struct unwind_info_regular_second_level_page_header)); 1347 printf( 1348 " UNWIND_SECOND_LEVEL_REGULAR #%d entryPageOffset %d, entryCount %d\n", 1349 baton.current_index_table_number, header.entryPageOffset, 1350 header.entryCount); 1351 baton.regular_second_level_page_header = header; 1352 print_second_level_index_regular(baton); 1353 } 1354 1355 if ((*(uint32_t *)index_start) == UNWIND_SECOND_LEVEL_COMPRESSED) { 1356 struct unwind_info_compressed_second_level_page_header header; 1357 memcpy(&header, index_start, 1358 sizeof(struct unwind_info_compressed_second_level_page_header)); 1359 printf(" UNWIND_SECOND_LEVEL_COMPRESSED #%d entryPageOffset %d, " 1360 "entryCount %d, encodingsPageOffset %d, encodingsCount %d\n", 1361 baton.current_index_table_number, header.entryPageOffset, 1362 header.entryCount, header.encodingsPageOffset, 1363 header.encodingsCount); 1364 baton.compressed_second_level_page_header = header; 1365 print_second_level_index_compressed(baton); 1366 } 1367 } 1368 1369 void print_index_sections(struct baton baton) { 1370 uint8_t *index_section_offset = 1371 baton.compact_unwind_start + baton.unwind_header.indexSectionOffset; 1372 uint32_t index_count = baton.unwind_header.indexCount; 1373 1374 uint32_t cur_idx = 0; 1375 1376 uint8_t *offset = index_section_offset; 1377 while (cur_idx < index_count) { 1378 baton.current_index_table_number = cur_idx; 1379 struct unwind_info_section_header_index_entry index_entry; 1380 memcpy(&index_entry, offset, 1381 sizeof(struct unwind_info_section_header_index_entry)); 1382 printf("index section #%d: functionOffset %d, " 1383 "secondLevelPagesSectionOffset %d, lsdaIndexArraySectionOffset %d\n", 1384 cur_idx, index_entry.functionOffset, 1385 index_entry.secondLevelPagesSectionOffset, 1386 index_entry.lsdaIndexArraySectionOffset); 1387 1388 // secondLevelPagesSectionOffset == 0 means this is a sentinel entry 1389 if (index_entry.secondLevelPagesSectionOffset != 0) { 1390 struct unwind_info_section_header_index_entry next_index_entry; 1391 memcpy(&next_index_entry, 1392 offset + sizeof(struct unwind_info_section_header_index_entry), 1393 sizeof(struct unwind_info_section_header_index_entry)); 1394 1395 baton.lsda_array_start = 1396 baton.compact_unwind_start + index_entry.lsdaIndexArraySectionOffset; 1397 baton.lsda_array_end = baton.compact_unwind_start + 1398 next_index_entry.lsdaIndexArraySectionOffset; 1399 1400 uint8_t *lsda_entry_offset = baton.lsda_array_start; 1401 uint32_t lsda_count = 0; 1402 while (lsda_entry_offset < baton.lsda_array_end) { 1403 struct unwind_info_section_header_lsda_index_entry lsda_entry; 1404 memcpy(&lsda_entry, lsda_entry_offset, 1405 sizeof(struct unwind_info_section_header_lsda_index_entry)); 1406 uint64_t function_file_address = 1407 baton.first_level_index_entry.functionOffset + 1408 lsda_entry.functionOffset + baton.text_segment_vmaddr; 1409 uint64_t lsda_file_address = 1410 lsda_entry.lsdaOffset + baton.text_segment_vmaddr; 1411 printf(" LSDA [%d] functionOffset %d (%d) (file address 0x%" PRIx64 1412 "), lsdaOffset %d (file address 0x%" PRIx64 ")\n", 1413 lsda_count, lsda_entry.functionOffset, 1414 lsda_entry.functionOffset - index_entry.functionOffset, 1415 function_file_address, lsda_entry.lsdaOffset, lsda_file_address); 1416 lsda_count++; 1417 lsda_entry_offset += 1418 sizeof(struct unwind_info_section_header_lsda_index_entry); 1419 } 1420 1421 printf("\n"); 1422 1423 baton.first_level_index_entry = index_entry; 1424 print_second_level_index(baton); 1425 } 1426 1427 printf("\n"); 1428 1429 cur_idx++; 1430 offset += sizeof(struct unwind_info_section_header_index_entry); 1431 } 1432 } 1433 1434 int main(int argc, char **argv) { 1435 struct stat st; 1436 char *file = argv[0]; 1437 if (argc > 1) 1438 file = argv[1]; 1439 int fd = open(file, O_RDONLY); 1440 if (fd == -1) { 1441 printf("Failed to open '%s'\n", file); 1442 exit(1); 1443 } 1444 fstat(fd, &st); 1445 uint8_t *file_mem = 1446 (uint8_t *)mmap(0, st.st_size, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0); 1447 if (file_mem == MAP_FAILED) { 1448 printf("Failed to mmap() '%s'\n", file); 1449 } 1450 1451 FILE *f = fopen("a.out", "r"); 1452 1453 struct baton baton; 1454 baton.mach_header_start = file_mem; 1455 baton.symbols = NULL; 1456 baton.symbols_count = 0; 1457 baton.function_start_addresses = NULL; 1458 baton.function_start_addresses_count = 0; 1459 1460 scan_macho_load_commands(&baton); 1461 1462 if (baton.compact_unwind_start == NULL) { 1463 printf("could not find __TEXT,__unwind_info section\n"); 1464 exit(1); 1465 } 1466 1467 struct unwind_info_section_header header; 1468 memcpy(&header, baton.compact_unwind_start, 1469 sizeof(struct unwind_info_section_header)); 1470 printf("Header:\n"); 1471 printf(" version %u\n", header.version); 1472 printf(" commonEncodingsArraySectionOffset is %d\n", 1473 header.commonEncodingsArraySectionOffset); 1474 printf(" commonEncodingsArrayCount is %d\n", 1475 header.commonEncodingsArrayCount); 1476 printf(" personalityArraySectionOffset is %d\n", 1477 header.personalityArraySectionOffset); 1478 printf(" personalityArrayCount is %d\n", header.personalityArrayCount); 1479 printf(" indexSectionOffset is %d\n", header.indexSectionOffset); 1480 printf(" indexCount is %d\n", header.indexCount); 1481 1482 uint8_t *common_encodings = 1483 baton.compact_unwind_start + header.commonEncodingsArraySectionOffset; 1484 uint32_t encoding_idx = 0; 1485 while (encoding_idx < header.commonEncodingsArrayCount) { 1486 uint32_t encoding = *((uint32_t *)common_encodings); 1487 printf(" Common Encoding [%d]: 0x%x ", encoding_idx, encoding); 1488 print_encoding(baton, NULL, encoding); 1489 printf("\n"); 1490 common_encodings += sizeof(uint32_t); 1491 encoding_idx++; 1492 } 1493 1494 uint8_t *pers_arr = 1495 baton.compact_unwind_start + header.personalityArraySectionOffset; 1496 uint32_t pers_idx = 0; 1497 while (pers_idx < header.personalityArrayCount) { 1498 int32_t pers_delta = *((int32_t *)(baton.compact_unwind_start + 1499 header.personalityArraySectionOffset + 1500 (pers_idx * sizeof(uint32_t)))); 1501 printf(" Personality [%d]: personality function ptr @ offset %d (file " 1502 "address 0x%" PRIx64 ")\n", 1503 pers_idx, pers_delta, baton.text_segment_vmaddr + pers_delta); 1504 pers_idx++; 1505 pers_arr += sizeof(uint32_t); 1506 } 1507 1508 printf("\n"); 1509 1510 baton.unwind_header = header; 1511 1512 print_index_sections(baton); 1513 1514 return 0; 1515 } 1516