xref: /freebsd/contrib/llvm-project/lldb/tools/compact-unwind/compact-unwind-dumper.c (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
1 #include <fcntl.h>
2 #include <inttypes.h>
3 #include <mach-o/compact_unwind_encoding.h>
4 #include <mach-o/loader.h>
5 #include <mach-o/nlist.h>
6 #include <mach/machine.h>
7 #include <stdbool.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/errno.h>
13 #include <sys/mman.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 
17 #define EXTRACT_BITS(value, mask)                                              \
18   ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1))
19 
20 // A quick sketch of a program which can parse the compact unwind info
21 // used on Darwin systems for exception handling.  The output of
22 // unwinddump will be more authoritative/reliable but this program
23 // can dump at least the UNWIND_X86_64_MODE_RBP_FRAME format entries
24 // correctly.
25 
26 struct symbol {
27   uint64_t file_address;
28   const char *name;
29 };
30 
symbol_compare(const void * a,const void * b)31 int symbol_compare(const void *a, const void *b) {
32   return (int)((struct symbol *)a)->file_address -
33          ((struct symbol *)b)->file_address;
34 }
35 
36 struct baton {
37   cpu_type_t cputype;
38 
39   uint8_t *mach_header_start;    // pointer into this program's address space
40   uint8_t *compact_unwind_start; // pointer into this program's address space
41 
42   int addr_size; // 4 or 8 bytes, the size of addresses in this file
43 
44   uint64_t text_segment_vmaddr; // __TEXT segment vmaddr
45   uint64_t text_segment_file_offset;
46 
47   uint64_t text_section_vmaddr; // __TEXT,__text section vmaddr
48   uint64_t text_section_file_offset;
49 
50   uint64_t eh_section_file_address; // the file address of the __TEXT,__eh_frame
51                                     // section
52 
53   uint8_t
54       *lsda_array_start; // for the currently-being-processed first-level index
55   uint8_t
56       *lsda_array_end; // the lsda_array_start for the NEXT first-level index
57 
58   struct symbol *symbols;
59   int symbols_count;
60 
61   uint64_t *function_start_addresses;
62   int function_start_addresses_count;
63 
64   int current_index_table_number;
65 
66   struct unwind_info_section_header unwind_header;
67   struct unwind_info_section_header_index_entry first_level_index_entry;
68   struct unwind_info_compressed_second_level_page_header
69       compressed_second_level_page_header;
70   struct unwind_info_regular_second_level_page_header
71       regular_second_level_page_header;
72 };
73 
read_leb128(uint8_t ** offset)74 uint64_t read_leb128(uint8_t **offset) {
75   uint64_t result = 0;
76   int shift = 0;
77   while (1) {
78     uint8_t byte = **offset;
79     *offset = *offset + 1;
80     result |= (byte & 0x7f) << shift;
81     if ((byte & 0x80) == 0)
82       break;
83     shift += 7;
84   }
85 
86   return result;
87 }
88 
89 // step through the load commands in a thin mach-o binary,
90 // find the cputype and the start of the __TEXT,__unwind_info
91 // section, return a pointer to that section or NULL if not found.
92 
scan_macho_load_commands(struct baton * baton)93 static void scan_macho_load_commands(struct baton *baton) {
94   struct symtab_command symtab_cmd;
95   uint64_t linkedit_segment_vmaddr;
96   uint64_t linkedit_segment_file_offset;
97 
98   baton->compact_unwind_start = 0;
99 
100   uint32_t *magic = (uint32_t *)baton->mach_header_start;
101 
102   if (*magic != MH_MAGIC && *magic != MH_MAGIC_64) {
103     printf("Unexpected magic number 0x%x in header, exiting.", *magic);
104     exit(1);
105   }
106 
107   bool is_64bit = false;
108   if (*magic == MH_MAGIC_64)
109     is_64bit = true;
110 
111   uint8_t *offset = baton->mach_header_start;
112 
113   struct mach_header mh;
114   memcpy(&mh, offset, sizeof(struct mach_header));
115   if (is_64bit)
116     offset += sizeof(struct mach_header_64);
117   else
118     offset += sizeof(struct mach_header);
119 
120   if (is_64bit)
121     baton->addr_size = 8;
122   else
123     baton->addr_size = 4;
124 
125   baton->cputype = mh.cputype;
126 
127   uint8_t *start_of_load_commands = offset;
128 
129   uint32_t cur_cmd = 0;
130   while (cur_cmd < mh.ncmds &&
131          (offset - start_of_load_commands) < mh.sizeofcmds) {
132     struct load_command lc;
133     uint32_t *lc_cmd = (uint32_t *)offset;
134     uint32_t *lc_cmdsize = (uint32_t *)offset + 1;
135     uint8_t *start_of_this_load_cmd = offset;
136 
137     if (*lc_cmd == LC_SEGMENT || *lc_cmd == LC_SEGMENT_64) {
138       char segment_name[17];
139       segment_name[0] = '\0';
140       uint32_t nsects = 0;
141       uint64_t segment_offset = 0;
142       uint64_t segment_vmaddr = 0;
143 
144       if (*lc_cmd == LC_SEGMENT_64) {
145         struct segment_command_64 seg;
146         memcpy(&seg, offset, sizeof(struct segment_command_64));
147         memcpy(&segment_name, &seg.segname, 16);
148         segment_name[16] = '\0';
149         nsects = seg.nsects;
150         segment_offset = seg.fileoff;
151         segment_vmaddr = seg.vmaddr;
152         offset += sizeof(struct segment_command_64);
153         if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1) {
154           printf("Segment '%s' is encrypted.\n", segment_name);
155         }
156       }
157 
158       if (*lc_cmd == LC_SEGMENT) {
159         struct segment_command seg;
160         memcpy(&seg, offset, sizeof(struct segment_command));
161         memcpy(&segment_name, &seg.segname, 16);
162         segment_name[16] = '\0';
163         nsects = seg.nsects;
164         segment_offset = seg.fileoff;
165         segment_vmaddr = seg.vmaddr;
166         offset += sizeof(struct segment_command);
167         if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1) {
168           printf("Segment '%s' is encrypted.\n", segment_name);
169         }
170       }
171 
172       if (nsects != 0 && strcmp(segment_name, "__TEXT") == 0) {
173         baton->text_segment_vmaddr = segment_vmaddr;
174         baton->text_segment_file_offset = segment_offset;
175 
176         uint32_t current_sect = 0;
177         while (current_sect < nsects &&
178                (offset - start_of_this_load_cmd) < *lc_cmdsize) {
179           char sect_name[17];
180           memcpy(&sect_name, offset, 16);
181           sect_name[16] = '\0';
182           if (strcmp(sect_name, "__unwind_info") == 0) {
183             if (is_64bit) {
184               struct section_64 sect;
185               memset(&sect, 0, sizeof(struct section_64));
186               memcpy(&sect, offset, sizeof(struct section_64));
187               baton->compact_unwind_start =
188                   baton->mach_header_start + sect.offset;
189             } else {
190               struct section sect;
191               memset(&sect, 0, sizeof(struct section));
192               memcpy(&sect, offset, sizeof(struct section));
193               baton->compact_unwind_start =
194                   baton->mach_header_start + sect.offset;
195             }
196           }
197           if (strcmp(sect_name, "__eh_frame") == 0) {
198             if (is_64bit) {
199               struct section_64 sect;
200               memset(&sect, 0, sizeof(struct section_64));
201               memcpy(&sect, offset, sizeof(struct section_64));
202               baton->eh_section_file_address = sect.addr;
203             } else {
204               struct section sect;
205               memset(&sect, 0, sizeof(struct section));
206               memcpy(&sect, offset, sizeof(struct section));
207               baton->eh_section_file_address = sect.addr;
208             }
209           }
210           if (strcmp(sect_name, "__text") == 0) {
211             if (is_64bit) {
212               struct section_64 sect;
213               memset(&sect, 0, sizeof(struct section_64));
214               memcpy(&sect, offset, sizeof(struct section_64));
215               baton->text_section_vmaddr = sect.addr;
216               baton->text_section_file_offset = sect.offset;
217             } else {
218               struct section sect;
219               memset(&sect, 0, sizeof(struct section));
220               memcpy(&sect, offset, sizeof(struct section));
221               baton->text_section_vmaddr = sect.addr;
222             }
223           }
224           if (is_64bit) {
225             offset += sizeof(struct section_64);
226           } else {
227             offset += sizeof(struct section);
228           }
229         }
230       }
231 
232       if (strcmp(segment_name, "__LINKEDIT") == 0) {
233         linkedit_segment_vmaddr = segment_vmaddr;
234         linkedit_segment_file_offset = segment_offset;
235       }
236     }
237 
238     if (*lc_cmd == LC_SYMTAB) {
239       memcpy(&symtab_cmd, offset, sizeof(struct symtab_command));
240     }
241 
242     if (*lc_cmd == LC_DYSYMTAB) {
243       struct dysymtab_command dysymtab_cmd;
244       memcpy(&dysymtab_cmd, offset, sizeof(struct dysymtab_command));
245 
246       int nlist_size = 12;
247       if (is_64bit)
248         nlist_size = 16;
249 
250       char *string_table =
251           (char *)(baton->mach_header_start + symtab_cmd.stroff);
252       uint8_t *local_syms = baton->mach_header_start + symtab_cmd.symoff +
253                             (dysymtab_cmd.ilocalsym * nlist_size);
254       int local_syms_count = dysymtab_cmd.nlocalsym;
255       uint8_t *exported_syms = baton->mach_header_start + symtab_cmd.symoff +
256                                (dysymtab_cmd.iextdefsym * nlist_size);
257       int exported_syms_count = dysymtab_cmd.nextdefsym;
258 
259       // We're only going to create records for a small number of these symbols
260       // but to
261       // simplify the memory management I'll allocate enough space to store all
262       // of them.
263       baton->symbols = (struct symbol *)malloc(
264           sizeof(struct symbol) * (local_syms_count + exported_syms_count));
265       baton->symbols_count = 0;
266 
267       for (int i = 0; i < local_syms_count; i++) {
268         struct nlist_64 nlist;
269         memset(&nlist, 0, sizeof(struct nlist_64));
270         if (is_64bit) {
271           memcpy(&nlist, local_syms + (i * nlist_size),
272                  sizeof(struct nlist_64));
273         } else {
274           struct nlist nlist_32;
275           memset(&nlist_32, 0, sizeof(struct nlist));
276           memcpy(&nlist_32, local_syms + (i * nlist_size),
277                  sizeof(struct nlist));
278           nlist.n_un.n_strx = nlist_32.n_un.n_strx;
279           nlist.n_type = nlist_32.n_type;
280           nlist.n_sect = nlist_32.n_sect;
281           nlist.n_desc = nlist_32.n_desc;
282           nlist.n_value = nlist_32.n_value;
283         }
284         if ((nlist.n_type & N_STAB) == 0 &&
285             ((nlist.n_type & N_EXT) == 1 ||
286              ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT)) &&
287             nlist.n_value != 0 && nlist.n_value != baton->text_segment_vmaddr) {
288           baton->symbols[baton->symbols_count].file_address = nlist.n_value;
289           if (baton->cputype == CPU_TYPE_ARM)
290             baton->symbols[baton->symbols_count].file_address =
291                 baton->symbols[baton->symbols_count].file_address & ~1;
292           baton->symbols[baton->symbols_count].name =
293               string_table + nlist.n_un.n_strx;
294           baton->symbols_count++;
295         }
296       }
297 
298       for (int i = 0; i < exported_syms_count; i++) {
299         struct nlist_64 nlist;
300         memset(&nlist, 0, sizeof(struct nlist_64));
301         if (is_64bit) {
302           memcpy(&nlist, exported_syms + (i * nlist_size),
303                  sizeof(struct nlist_64));
304         } else {
305           struct nlist nlist_32;
306           memcpy(&nlist_32, exported_syms + (i * nlist_size),
307                  sizeof(struct nlist));
308           nlist.n_un.n_strx = nlist_32.n_un.n_strx;
309           nlist.n_type = nlist_32.n_type;
310           nlist.n_sect = nlist_32.n_sect;
311           nlist.n_desc = nlist_32.n_desc;
312           nlist.n_value = nlist_32.n_value;
313         }
314         if ((nlist.n_type & N_STAB) == 0 &&
315             ((nlist.n_type & N_EXT) == 1 ||
316              ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT)) &&
317             nlist.n_value != 0 && nlist.n_value != baton->text_segment_vmaddr) {
318           baton->symbols[baton->symbols_count].file_address = nlist.n_value;
319           if (baton->cputype == CPU_TYPE_ARM)
320             baton->symbols[baton->symbols_count].file_address =
321                 baton->symbols[baton->symbols_count].file_address & ~1;
322           baton->symbols[baton->symbols_count].name =
323               string_table + nlist.n_un.n_strx;
324           baton->symbols_count++;
325         }
326       }
327 
328       qsort(baton->symbols, baton->symbols_count, sizeof(struct symbol),
329             symbol_compare);
330     }
331 
332     if (*lc_cmd == LC_FUNCTION_STARTS) {
333       struct linkedit_data_command function_starts_cmd;
334       memcpy(&function_starts_cmd, offset,
335              sizeof(struct linkedit_data_command));
336 
337       uint8_t *funcstarts_offset =
338           baton->mach_header_start + function_starts_cmd.dataoff;
339       uint8_t *function_end = funcstarts_offset + function_starts_cmd.datasize;
340       int count = 0;
341 
342       while (funcstarts_offset < function_end) {
343         if (read_leb128(&funcstarts_offset) != 0) {
344           count++;
345         }
346       }
347 
348       baton->function_start_addresses =
349           (uint64_t *)malloc(sizeof(uint64_t) * count);
350       baton->function_start_addresses_count = count;
351 
352       funcstarts_offset =
353           baton->mach_header_start + function_starts_cmd.dataoff;
354       uint64_t current_pc = baton->text_segment_vmaddr;
355       int i = 0;
356       while (funcstarts_offset < function_end) {
357         uint64_t func_start = read_leb128(&funcstarts_offset);
358         if (func_start != 0) {
359           current_pc += func_start;
360           baton->function_start_addresses[i++] = current_pc;
361         }
362       }
363     }
364 
365     offset = start_of_this_load_cmd + *lc_cmdsize;
366     cur_cmd++;
367   }
368 
369   // Augment the symbol table with the function starts table -- adding symbol
370   // entries
371   // for functions that were stripped.
372 
373   int unnamed_functions_to_add = 0;
374   for (int i = 0; i < baton->function_start_addresses_count; i++) {
375     struct symbol search_key;
376     search_key.file_address = baton->function_start_addresses[i];
377     if (baton->cputype == CPU_TYPE_ARM)
378       search_key.file_address = search_key.file_address & ~1;
379     struct symbol *sym =
380         bsearch(&search_key, baton->symbols, baton->symbols_count,
381                 sizeof(struct symbol), symbol_compare);
382     if (sym == NULL)
383       unnamed_functions_to_add++;
384   }
385 
386   baton->symbols = (struct symbol *)realloc(
387       baton->symbols, sizeof(struct symbol) *
388                           (baton->symbols_count + unnamed_functions_to_add));
389 
390   int current_unnamed_symbol = 1;
391   int number_symbols_added = 0;
392   for (int i = 0; i < baton->function_start_addresses_count; i++) {
393     struct symbol search_key;
394     search_key.file_address = baton->function_start_addresses[i];
395     if (baton->cputype == CPU_TYPE_ARM)
396       search_key.file_address = search_key.file_address & ~1;
397     struct symbol *sym =
398         bsearch(&search_key, baton->symbols, baton->symbols_count,
399                 sizeof(struct symbol), symbol_compare);
400     if (sym == NULL) {
401       char *name;
402       asprintf(&name, "unnamed function #%d", current_unnamed_symbol++);
403       baton->symbols[baton->symbols_count + number_symbols_added].file_address =
404           baton->function_start_addresses[i];
405       baton->symbols[baton->symbols_count + number_symbols_added].name = name;
406       number_symbols_added++;
407     }
408   }
409   baton->symbols_count += number_symbols_added;
410   qsort(baton->symbols, baton->symbols_count, sizeof(struct symbol),
411         symbol_compare);
412 
413   //    printf ("function start addresses\n");
414   //    for (int i = 0; i < baton->function_start_addresses_count; i++)
415   //    {
416   //        printf ("0x%012llx\n", baton->function_start_addresses[i]);
417   //    }
418 
419   //    printf ("symbol table names & addresses\n");
420   //    for (int i = 0; i < baton->symbols_count; i++)
421   //    {
422   //        printf ("0x%012llx %s\n", baton->symbols[i].file_address,
423   //        baton->symbols[i].name);
424   //    }
425 }
426 
print_encoding_x86_64(struct baton baton,uint8_t * function_start,uint32_t encoding)427 void print_encoding_x86_64(struct baton baton, uint8_t *function_start,
428                            uint32_t encoding) {
429   int mode = encoding & UNWIND_X86_64_MODE_MASK;
430   switch (mode) {
431   case UNWIND_X86_64_MODE_RBP_FRAME: {
432     printf("frame func: CFA is rbp+%d ", 16);
433     printf(" rip=[CFA-8] rbp=[CFA-16]");
434     uint32_t saved_registers_offset =
435         EXTRACT_BITS(encoding, UNWIND_X86_64_RBP_FRAME_OFFSET);
436 
437     uint32_t saved_registers_locations =
438         EXTRACT_BITS(encoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
439 
440     saved_registers_offset += 2;
441 
442     for (int i = 0; i < 5; i++) {
443       switch (saved_registers_locations & 0x7) {
444       case UNWIND_X86_64_REG_NONE:
445         break;
446       case UNWIND_X86_64_REG_RBX:
447         printf(" rbx=[CFA-%d]", saved_registers_offset * 8);
448         break;
449       case UNWIND_X86_64_REG_R12:
450         printf(" r12=[CFA-%d]", saved_registers_offset * 8);
451         break;
452       case UNWIND_X86_64_REG_R13:
453         printf(" r13=[CFA-%d]", saved_registers_offset * 8);
454         break;
455       case UNWIND_X86_64_REG_R14:
456         printf(" r14=[CFA-%d]", saved_registers_offset * 8);
457         break;
458       case UNWIND_X86_64_REG_R15:
459         printf(" r15=[CFA-%d]", saved_registers_offset * 8);
460         break;
461       }
462       saved_registers_offset--;
463       saved_registers_locations >>= 3;
464     }
465   } break;
466 
467   case UNWIND_X86_64_MODE_STACK_IND:
468   case UNWIND_X86_64_MODE_STACK_IMMD: {
469     uint32_t stack_size =
470         EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
471     uint32_t register_count =
472         EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT);
473     uint32_t permutation =
474         EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION);
475 
476     if (mode == UNWIND_X86_64_MODE_STACK_IND && function_start) {
477       uint32_t stack_adjust =
478           EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST);
479 
480       // offset into the function instructions; 0 == beginning of first
481       // instruction
482       uint32_t offset_to_subl_insn =
483           EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
484 
485       stack_size = *((uint32_t *)(function_start + offset_to_subl_insn));
486 
487       stack_size += stack_adjust * 8;
488 
489       printf("large stack ");
490     }
491 
492     if (mode == UNWIND_X86_64_MODE_STACK_IND) {
493       printf("frameless function: stack size %d, register count %d ",
494              stack_size * 8, register_count);
495     } else {
496       printf("frameless function: stack size %d, register count %d ",
497              stack_size, register_count);
498     }
499 
500     if (register_count == 0) {
501       printf(" no registers saved");
502     } else {
503 
504       // We need to include (up to) 6 registers in 10 bits.
505       // That would be 18 bits if we just used 3 bits per reg to indicate
506       // the order they're saved on the stack.
507       //
508       // This is done with Lehmer code permutation, e.g. see
509       // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms
510       int permunreg[6];
511 
512       // This decodes the variable-base number in the 10 bits
513       // and gives us the Lehmer code sequence which can then
514       // be decoded.
515 
516       switch (register_count) {
517       case 6:
518         permunreg[0] = permutation / 120; // 120 == 5!
519         permutation -= (permunreg[0] * 120);
520         permunreg[1] = permutation / 24; // 24 == 4!
521         permutation -= (permunreg[1] * 24);
522         permunreg[2] = permutation / 6; // 6 == 3!
523         permutation -= (permunreg[2] * 6);
524         permunreg[3] = permutation / 2; // 2 == 2!
525         permutation -= (permunreg[3] * 2);
526         permunreg[4] = permutation; // 1 == 1!
527         permunreg[5] = 0;
528         break;
529       case 5:
530         permunreg[0] = permutation / 120;
531         permutation -= (permunreg[0] * 120);
532         permunreg[1] = permutation / 24;
533         permutation -= (permunreg[1] * 24);
534         permunreg[2] = permutation / 6;
535         permutation -= (permunreg[2] * 6);
536         permunreg[3] = permutation / 2;
537         permutation -= (permunreg[3] * 2);
538         permunreg[4] = permutation;
539         break;
540       case 4:
541         permunreg[0] = permutation / 60;
542         permutation -= (permunreg[0] * 60);
543         permunreg[1] = permutation / 12;
544         permutation -= (permunreg[1] * 12);
545         permunreg[2] = permutation / 3;
546         permutation -= (permunreg[2] * 3);
547         permunreg[3] = permutation;
548         break;
549       case 3:
550         permunreg[0] = permutation / 20;
551         permutation -= (permunreg[0] * 20);
552         permunreg[1] = permutation / 4;
553         permutation -= (permunreg[1] * 4);
554         permunreg[2] = permutation;
555         break;
556       case 2:
557         permunreg[0] = permutation / 5;
558         permutation -= (permunreg[0] * 5);
559         permunreg[1] = permutation;
560         break;
561       case 1:
562         permunreg[0] = permutation;
563         break;
564       }
565 
566       // Decode the Lehmer code for this permutation of
567       // the registers v. http://en.wikipedia.org/wiki/Lehmer_code
568 
569       int registers[6];
570       bool used[7] = {false, false, false, false, false, false, false};
571       for (int i = 0; i < register_count; i++) {
572         int renum = 0;
573         for (int j = 1; j < 7; j++) {
574           if (used[j] == false) {
575             if (renum == permunreg[i]) {
576               registers[i] = j;
577               used[j] = true;
578               break;
579             }
580             renum++;
581           }
582         }
583       }
584 
585       if (mode == UNWIND_X86_64_MODE_STACK_IND) {
586         printf(" CFA is rsp+%d ", stack_size);
587       } else {
588         printf(" CFA is rsp+%d ", stack_size * 8);
589       }
590 
591       uint32_t saved_registers_offset = 1;
592       printf(" rip=[CFA-%d]", saved_registers_offset * 8);
593       saved_registers_offset++;
594 
595       for (int i = (sizeof(registers) / sizeof(int)) - 1; i >= 0; i--) {
596         switch (registers[i]) {
597         case UNWIND_X86_64_REG_NONE:
598           break;
599         case UNWIND_X86_64_REG_RBX:
600           printf(" rbx=[CFA-%d]", saved_registers_offset * 8);
601           saved_registers_offset++;
602           break;
603         case UNWIND_X86_64_REG_R12:
604           printf(" r12=[CFA-%d]", saved_registers_offset * 8);
605           saved_registers_offset++;
606           break;
607         case UNWIND_X86_64_REG_R13:
608           printf(" r13=[CFA-%d]", saved_registers_offset * 8);
609           saved_registers_offset++;
610           break;
611         case UNWIND_X86_64_REG_R14:
612           printf(" r14=[CFA-%d]", saved_registers_offset * 8);
613           saved_registers_offset++;
614           break;
615         case UNWIND_X86_64_REG_R15:
616           printf(" r15=[CFA-%d]", saved_registers_offset * 8);
617           saved_registers_offset++;
618           break;
619         case UNWIND_X86_64_REG_RBP:
620           printf(" rbp=[CFA-%d]", saved_registers_offset * 8);
621           saved_registers_offset++;
622           break;
623         }
624       }
625     }
626 
627   } break;
628 
629   case UNWIND_X86_64_MODE_DWARF: {
630     uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET;
631     printf(
632         "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
633         ")",
634         dwarf_offset, dwarf_offset + baton.eh_section_file_address);
635   } break;
636 
637   case 0: {
638     printf(" no unwind information");
639   } break;
640   }
641 }
642 
print_encoding_i386(struct baton baton,uint8_t * function_start,uint32_t encoding)643 void print_encoding_i386(struct baton baton, uint8_t *function_start,
644                          uint32_t encoding) {
645   int mode = encoding & UNWIND_X86_MODE_MASK;
646   switch (mode) {
647   case UNWIND_X86_MODE_EBP_FRAME: {
648     printf("frame func: CFA is ebp+%d ", 8);
649     printf(" eip=[CFA-4] ebp=[CFA-8]");
650     uint32_t saved_registers_offset =
651         EXTRACT_BITS(encoding, UNWIND_X86_EBP_FRAME_OFFSET);
652 
653     uint32_t saved_registers_locations =
654         EXTRACT_BITS(encoding, UNWIND_X86_EBP_FRAME_REGISTERS);
655 
656     saved_registers_offset += 2;
657 
658     for (int i = 0; i < 5; i++) {
659       switch (saved_registers_locations & 0x7) {
660       case UNWIND_X86_REG_NONE:
661         break;
662       case UNWIND_X86_REG_EBX:
663         printf(" ebx=[CFA-%d]", saved_registers_offset * 4);
664         break;
665       case UNWIND_X86_REG_ECX:
666         printf(" ecx=[CFA-%d]", saved_registers_offset * 4);
667         break;
668       case UNWIND_X86_REG_EDX:
669         printf(" edx=[CFA-%d]", saved_registers_offset * 4);
670         break;
671       case UNWIND_X86_REG_EDI:
672         printf(" edi=[CFA-%d]", saved_registers_offset * 4);
673         break;
674       case UNWIND_X86_REG_ESI:
675         printf(" esi=[CFA-%d]", saved_registers_offset * 4);
676         break;
677       }
678       saved_registers_offset--;
679       saved_registers_locations >>= 3;
680     }
681   } break;
682 
683   case UNWIND_X86_MODE_STACK_IND:
684   case UNWIND_X86_MODE_STACK_IMMD: {
685     uint32_t stack_size =
686         EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
687     uint32_t register_count =
688         EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT);
689     uint32_t permutation =
690         EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION);
691 
692     if (mode == UNWIND_X86_MODE_STACK_IND && function_start) {
693       uint32_t stack_adjust =
694           EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST);
695 
696       // offset into the function instructions; 0 == beginning of first
697       // instruction
698       uint32_t offset_to_subl_insn =
699           EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
700 
701       stack_size = *((uint32_t *)(function_start + offset_to_subl_insn));
702 
703       stack_size += stack_adjust * 4;
704 
705       printf("large stack ");
706     }
707 
708     if (mode == UNWIND_X86_MODE_STACK_IND) {
709       printf("frameless function: stack size %d, register count %d ",
710              stack_size, register_count);
711     } else {
712       printf("frameless function: stack size %d, register count %d ",
713              stack_size * 4, register_count);
714     }
715 
716     if (register_count == 0) {
717       printf(" no registers saved");
718     } else {
719 
720       // We need to include (up to) 6 registers in 10 bits.
721       // That would be 18 bits if we just used 3 bits per reg to indicate
722       // the order they're saved on the stack.
723       //
724       // This is done with Lehmer code permutation, e.g. see
725       // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms
726       int permunreg[6];
727 
728       // This decodes the variable-base number in the 10 bits
729       // and gives us the Lehmer code sequence which can then
730       // be decoded.
731 
732       switch (register_count) {
733       case 6:
734         permunreg[0] = permutation / 120; // 120 == 5!
735         permutation -= (permunreg[0] * 120);
736         permunreg[1] = permutation / 24; // 24 == 4!
737         permutation -= (permunreg[1] * 24);
738         permunreg[2] = permutation / 6; // 6 == 3!
739         permutation -= (permunreg[2] * 6);
740         permunreg[3] = permutation / 2; // 2 == 2!
741         permutation -= (permunreg[3] * 2);
742         permunreg[4] = permutation; // 1 == 1!
743         permunreg[5] = 0;
744         break;
745       case 5:
746         permunreg[0] = permutation / 120;
747         permutation -= (permunreg[0] * 120);
748         permunreg[1] = permutation / 24;
749         permutation -= (permunreg[1] * 24);
750         permunreg[2] = permutation / 6;
751         permutation -= (permunreg[2] * 6);
752         permunreg[3] = permutation / 2;
753         permutation -= (permunreg[3] * 2);
754         permunreg[4] = permutation;
755         break;
756       case 4:
757         permunreg[0] = permutation / 60;
758         permutation -= (permunreg[0] * 60);
759         permunreg[1] = permutation / 12;
760         permutation -= (permunreg[1] * 12);
761         permunreg[2] = permutation / 3;
762         permutation -= (permunreg[2] * 3);
763         permunreg[3] = permutation;
764         break;
765       case 3:
766         permunreg[0] = permutation / 20;
767         permutation -= (permunreg[0] * 20);
768         permunreg[1] = permutation / 4;
769         permutation -= (permunreg[1] * 4);
770         permunreg[2] = permutation;
771         break;
772       case 2:
773         permunreg[0] = permutation / 5;
774         permutation -= (permunreg[0] * 5);
775         permunreg[1] = permutation;
776         break;
777       case 1:
778         permunreg[0] = permutation;
779         break;
780       }
781 
782       // Decode the Lehmer code for this permutation of
783       // the registers v. http://en.wikipedia.org/wiki/Lehmer_code
784 
785       int registers[6];
786       bool used[7] = {false, false, false, false, false, false, false};
787       for (int i = 0; i < register_count; i++) {
788         int renum = 0;
789         for (int j = 1; j < 7; j++) {
790           if (used[j] == false) {
791             if (renum == permunreg[i]) {
792               registers[i] = j;
793               used[j] = true;
794               break;
795             }
796             renum++;
797           }
798         }
799       }
800 
801       if (mode == UNWIND_X86_MODE_STACK_IND) {
802         printf(" CFA is esp+%d ", stack_size);
803       } else {
804         printf(" CFA is esp+%d ", stack_size * 4);
805       }
806 
807       uint32_t saved_registers_offset = 1;
808       printf(" eip=[CFA-%d]", saved_registers_offset * 4);
809       saved_registers_offset++;
810 
811       for (int i = (sizeof(registers) / sizeof(int)) - 1; i >= 0; i--) {
812         switch (registers[i]) {
813         case UNWIND_X86_REG_NONE:
814           break;
815         case UNWIND_X86_REG_EBX:
816           printf(" ebx=[CFA-%d]", saved_registers_offset * 4);
817           saved_registers_offset++;
818           break;
819         case UNWIND_X86_REG_ECX:
820           printf(" ecx=[CFA-%d]", saved_registers_offset * 4);
821           saved_registers_offset++;
822           break;
823         case UNWIND_X86_REG_EDX:
824           printf(" edx=[CFA-%d]", saved_registers_offset * 4);
825           saved_registers_offset++;
826           break;
827         case UNWIND_X86_REG_EDI:
828           printf(" edi=[CFA-%d]", saved_registers_offset * 4);
829           saved_registers_offset++;
830           break;
831         case UNWIND_X86_REG_ESI:
832           printf(" esi=[CFA-%d]", saved_registers_offset * 4);
833           saved_registers_offset++;
834           break;
835         case UNWIND_X86_REG_EBP:
836           printf(" ebp=[CFA-%d]", saved_registers_offset * 4);
837           saved_registers_offset++;
838           break;
839         }
840       }
841     }
842 
843   } break;
844 
845   case UNWIND_X86_MODE_DWARF: {
846     uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET;
847     printf(
848         "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
849         ")",
850         dwarf_offset, dwarf_offset + baton.eh_section_file_address);
851   } break;
852 
853   case 0: {
854     printf(" no unwind information");
855   } break;
856   }
857 }
858 
print_encoding_arm64(struct baton baton,uint8_t * function_start,uint32_t encoding)859 void print_encoding_arm64(struct baton baton, uint8_t *function_start,
860                           uint32_t encoding) {
861   const int wordsize = 8;
862   int mode = encoding & UNWIND_ARM64_MODE_MASK;
863   switch (mode) {
864   case UNWIND_ARM64_MODE_FRAME: {
865     printf("frame func: CFA is fp+%d ", 16);
866     printf(" pc=[CFA-8] fp=[CFA-16]");
867     int reg_pairs_saved_count = 1;
868     uint32_t saved_register_bits = encoding & 0xfff;
869     if (saved_register_bits & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
870       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
871       cfa_offset -= wordsize;
872       printf(" x19=[CFA%d]", cfa_offset);
873       cfa_offset -= wordsize;
874       printf(" x20=[CFA%d]", cfa_offset);
875       reg_pairs_saved_count++;
876     }
877     if (saved_register_bits & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
878       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
879       cfa_offset -= wordsize;
880       printf(" x21=[CFA%d]", cfa_offset);
881       cfa_offset -= wordsize;
882       printf(" x22=[CFA%d]", cfa_offset);
883       reg_pairs_saved_count++;
884     }
885     if (saved_register_bits & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
886       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
887       cfa_offset -= wordsize;
888       printf(" x23=[CFA%d]", cfa_offset);
889       cfa_offset -= wordsize;
890       printf(" x24=[CFA%d]", cfa_offset);
891       reg_pairs_saved_count++;
892     }
893     if (saved_register_bits & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
894       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
895       cfa_offset -= wordsize;
896       printf(" x25=[CFA%d]", cfa_offset);
897       cfa_offset -= wordsize;
898       printf(" x26=[CFA%d]", cfa_offset);
899       reg_pairs_saved_count++;
900     }
901     if (saved_register_bits & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
902       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
903       cfa_offset -= wordsize;
904       printf(" x27=[CFA%d]", cfa_offset);
905       cfa_offset -= wordsize;
906       printf(" x28=[CFA%d]", cfa_offset);
907       reg_pairs_saved_count++;
908     }
909     if (saved_register_bits & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
910       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
911       cfa_offset -= wordsize;
912       printf(" d8=[CFA%d]", cfa_offset);
913       cfa_offset -= wordsize;
914       printf(" d9=[CFA%d]", cfa_offset);
915       reg_pairs_saved_count++;
916     }
917     if (saved_register_bits & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
918       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
919       cfa_offset -= wordsize;
920       printf(" d10=[CFA%d]", cfa_offset);
921       cfa_offset -= wordsize;
922       printf(" d11=[CFA%d]", cfa_offset);
923       reg_pairs_saved_count++;
924     }
925     if (saved_register_bits & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
926       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
927       cfa_offset -= wordsize;
928       printf(" d12=[CFA%d]", cfa_offset);
929       cfa_offset -= wordsize;
930       printf(" d13=[CFA%d]", cfa_offset);
931       reg_pairs_saved_count++;
932     }
933     if (saved_register_bits & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
934       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
935       cfa_offset -= wordsize;
936       printf(" d14=[CFA%d]", cfa_offset);
937       cfa_offset -= wordsize;
938       printf(" d15=[CFA%d]", cfa_offset);
939       reg_pairs_saved_count++;
940     }
941 
942   } break;
943 
944   case UNWIND_ARM64_MODE_FRAMELESS: {
945     uint32_t stack_size = encoding & UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK;
946     printf("frameless function: stack size %d ", stack_size * 16);
947 
948   } break;
949 
950   case UNWIND_ARM64_MODE_DWARF: {
951     uint32_t dwarf_offset = encoding & UNWIND_ARM64_DWARF_SECTION_OFFSET;
952     printf(
953         "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
954         ")",
955         dwarf_offset, dwarf_offset + baton.eh_section_file_address);
956   } break;
957 
958   case 0: {
959     printf(" no unwind information");
960   } break;
961   }
962 }
963 
print_encoding_armv7(struct baton baton,uint8_t * function_start,uint32_t encoding)964 void print_encoding_armv7(struct baton baton, uint8_t *function_start,
965                           uint32_t encoding) {
966   const int wordsize = 4;
967   int mode = encoding & UNWIND_ARM_MODE_MASK;
968   switch (mode) {
969   case UNWIND_ARM_MODE_FRAME_D:
970   case UNWIND_ARM_MODE_FRAME: {
971     int stack_adjust =
972         EXTRACT_BITS(encoding, UNWIND_ARM_FRAME_STACK_ADJUST_MASK) * wordsize;
973 
974     printf("frame func: CFA is fp+%d ", (2 * wordsize) + stack_adjust);
975     int cfa_offset = -stack_adjust;
976 
977     cfa_offset -= wordsize;
978     printf(" pc=[CFA%d]", cfa_offset);
979     cfa_offset -= wordsize;
980     printf(" fp=[CFA%d]", cfa_offset);
981 
982     uint32_t saved_register_bits = encoding & 0xff;
983     if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R6) {
984       cfa_offset -= wordsize;
985       printf(" r6=[CFA%d]", cfa_offset);
986     }
987     if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R5) {
988       cfa_offset -= wordsize;
989       printf(" r5=[CFA%d]", cfa_offset);
990     }
991     if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R4) {
992       cfa_offset -= wordsize;
993       printf(" r4=[CFA%d]", cfa_offset);
994     }
995     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R12) {
996       cfa_offset -= wordsize;
997       printf(" r12=[CFA%d]", cfa_offset);
998     }
999     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R11) {
1000       cfa_offset -= wordsize;
1001       printf(" r11=[CFA%d]", cfa_offset);
1002     }
1003     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R10) {
1004       cfa_offset -= wordsize;
1005       printf(" r10=[CFA%d]", cfa_offset);
1006     }
1007     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R9) {
1008       cfa_offset -= wordsize;
1009       printf(" r9=[CFA%d]", cfa_offset);
1010     }
1011     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R8) {
1012       cfa_offset -= wordsize;
1013       printf(" r8=[CFA%d]", cfa_offset);
1014     }
1015 
1016     if (mode == UNWIND_ARM_MODE_FRAME_D) {
1017       uint32_t d_reg_bits =
1018           EXTRACT_BITS(encoding, UNWIND_ARM_FRAME_D_REG_COUNT_MASK);
1019       switch (d_reg_bits) {
1020       case 0:
1021         // vpush {d8}
1022         cfa_offset -= 8;
1023         printf(" d8=[CFA%d]", cfa_offset);
1024         break;
1025       case 1:
1026         // vpush {d10}
1027         // vpush {d8}
1028         cfa_offset -= 8;
1029         printf(" d10=[CFA%d]", cfa_offset);
1030         cfa_offset -= 8;
1031         printf(" d8=[CFA%d]", cfa_offset);
1032         break;
1033       case 2:
1034         // vpush {d12}
1035         // vpush {d10}
1036         // vpush {d8}
1037         cfa_offset -= 8;
1038         printf(" d12=[CFA%d]", cfa_offset);
1039         cfa_offset -= 8;
1040         printf(" d10=[CFA%d]", cfa_offset);
1041         cfa_offset -= 8;
1042         printf(" d8=[CFA%d]", cfa_offset);
1043         break;
1044       case 3:
1045         // vpush {d14}
1046         // vpush {d12}
1047         // vpush {d10}
1048         // vpush {d8}
1049         cfa_offset -= 8;
1050         printf(" d14=[CFA%d]", cfa_offset);
1051         cfa_offset -= 8;
1052         printf(" d12=[CFA%d]", cfa_offset);
1053         cfa_offset -= 8;
1054         printf(" d10=[CFA%d]", cfa_offset);
1055         cfa_offset -= 8;
1056         printf(" d8=[CFA%d]", cfa_offset);
1057         break;
1058       case 4:
1059         // vpush {d14}
1060         // vpush {d12}
1061         // sp = (sp - 24) & (-16);
1062         // vst   {d8, d9, d10}
1063         printf(" d14, d12, d10, d9, d8");
1064         break;
1065       case 5:
1066         // vpush {d14}
1067         // sp = (sp - 40) & (-16);
1068         // vst   {d8, d9, d10, d11}
1069         // vst   {d12}
1070         printf(" d14, d11, d10, d9, d8, d12");
1071         break;
1072       case 6:
1073         // sp = (sp - 56) & (-16);
1074         // vst   {d8, d9, d10, d11}
1075         // vst   {d12, d13, d14}
1076         printf(" d11, d10, d9, d8, d14, d13, d12");
1077         break;
1078       case 7:
1079         // sp = (sp - 64) & (-16);
1080         // vst   {d8, d9, d10, d11}
1081         // vst   {d12, d13, d14, d15}
1082         printf(" d11, d10, d9, d8, d15, d14, d13, d12");
1083         break;
1084       }
1085     }
1086   } break;
1087 
1088   case UNWIND_ARM_MODE_DWARF: {
1089     uint32_t dwarf_offset = encoding & UNWIND_ARM_DWARF_SECTION_OFFSET;
1090     printf(
1091         "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
1092         ")",
1093         dwarf_offset, dwarf_offset + baton.eh_section_file_address);
1094   } break;
1095 
1096   case 0: {
1097     printf(" no unwind information");
1098   } break;
1099   }
1100 }
1101 
print_encoding(struct baton baton,uint8_t * function_start,uint32_t encoding)1102 void print_encoding(struct baton baton, uint8_t *function_start,
1103                     uint32_t encoding) {
1104 
1105   if (baton.cputype == CPU_TYPE_X86_64) {
1106     print_encoding_x86_64(baton, function_start, encoding);
1107   } else if (baton.cputype == CPU_TYPE_I386) {
1108     print_encoding_i386(baton, function_start, encoding);
1109   } else if (baton.cputype == CPU_TYPE_ARM64 || baton.cputype == CPU_TYPE_ARM64_32) {
1110     print_encoding_arm64(baton, function_start, encoding);
1111   } else if (baton.cputype == CPU_TYPE_ARM) {
1112     print_encoding_armv7(baton, function_start, encoding);
1113   } else {
1114     printf(" -- unsupported encoding arch -- ");
1115   }
1116 }
1117 
print_function_encoding(struct baton baton,uint32_t idx,uint32_t encoding,uint32_t entry_encoding_index,uint32_t entry_func_offset)1118 void print_function_encoding(struct baton baton, uint32_t idx,
1119                              uint32_t encoding, uint32_t entry_encoding_index,
1120                              uint32_t entry_func_offset) {
1121 
1122   char *entry_encoding_index_str = "";
1123   if (entry_encoding_index != (uint32_t)-1) {
1124     asprintf(&entry_encoding_index_str, ", encoding #%d", entry_encoding_index);
1125   } else {
1126     asprintf(&entry_encoding_index_str, "");
1127   }
1128 
1129   uint64_t file_address = baton.first_level_index_entry.functionOffset +
1130                           entry_func_offset + baton.text_segment_vmaddr;
1131 
1132   if (baton.cputype == CPU_TYPE_ARM)
1133     file_address = file_address & ~1;
1134 
1135   printf(
1136       "    func [%d] offset %d (file addr 0x%" PRIx64 ")%s, encoding is 0x%x",
1137       idx, entry_func_offset, file_address, entry_encoding_index_str, encoding);
1138 
1139   struct symbol *symbol = NULL;
1140   for (int i = 0; i < baton.symbols_count; i++) {
1141     if (i == baton.symbols_count - 1 &&
1142         baton.symbols[i].file_address <= file_address) {
1143       symbol = &(baton.symbols[i]);
1144       break;
1145     } else {
1146       if (baton.symbols[i].file_address <= file_address &&
1147           baton.symbols[i + 1].file_address > file_address) {
1148         symbol = &(baton.symbols[i]);
1149         break;
1150       }
1151     }
1152   }
1153 
1154   printf("\n         ");
1155   if (symbol) {
1156     int offset = file_address - symbol->file_address;
1157 
1158     // FIXME this is a poor heuristic - if we're greater than 16 bytes past the
1159     // start of the function, this is the unwind info for a stripped function.
1160     // In reality the compact unwind entry may not line up exactly with the
1161     // function bounds.
1162     if (offset >= 0) {
1163       printf("name: %s", symbol->name);
1164       if (offset > 0) {
1165         printf(" + %d", offset);
1166       }
1167     }
1168     printf("\n         ");
1169   }
1170 
1171   print_encoding(baton, baton.mach_header_start +
1172                             baton.first_level_index_entry.functionOffset +
1173                             baton.text_section_file_offset + entry_func_offset,
1174                  encoding);
1175 
1176   bool has_lsda = encoding & UNWIND_HAS_LSDA;
1177 
1178   if (has_lsda) {
1179     uint32_t func_offset =
1180         entry_func_offset + baton.first_level_index_entry.functionOffset;
1181 
1182     int lsda_entry_number = -1;
1183 
1184     uint32_t low = 0;
1185     uint32_t high = (baton.lsda_array_end - baton.lsda_array_start) /
1186                     sizeof(struct unwind_info_section_header_lsda_index_entry);
1187 
1188     while (low < high) {
1189       uint32_t mid = (low + high) / 2;
1190 
1191       uint8_t *mid_lsda_entry_addr =
1192           (baton.lsda_array_start +
1193            (mid * sizeof(struct unwind_info_section_header_lsda_index_entry)));
1194       struct unwind_info_section_header_lsda_index_entry mid_lsda_entry;
1195       memcpy(&mid_lsda_entry, mid_lsda_entry_addr,
1196              sizeof(struct unwind_info_section_header_lsda_index_entry));
1197       if (mid_lsda_entry.functionOffset == func_offset) {
1198         lsda_entry_number =
1199             (mid_lsda_entry_addr - baton.lsda_array_start) /
1200             sizeof(struct unwind_info_section_header_lsda_index_entry);
1201         break;
1202       } else if (mid_lsda_entry.functionOffset < func_offset) {
1203         low = mid + 1;
1204       } else {
1205         high = mid;
1206       }
1207     }
1208 
1209     if (lsda_entry_number != -1) {
1210       printf(", LSDA entry #%d", lsda_entry_number);
1211     } else {
1212       printf(", LSDA entry not found");
1213     }
1214   }
1215 
1216   uint32_t pers_idx = EXTRACT_BITS(encoding, UNWIND_PERSONALITY_MASK);
1217   if (pers_idx != 0) {
1218     pers_idx--; // Change 1-based to 0-based index
1219     printf(", personality entry #%d", pers_idx);
1220   }
1221 
1222   printf("\n");
1223 }
1224 
print_second_level_index_regular(struct baton baton)1225 void print_second_level_index_regular(struct baton baton) {
1226   uint8_t *page_entries =
1227       baton.compact_unwind_start +
1228       baton.first_level_index_entry.secondLevelPagesSectionOffset +
1229       baton.regular_second_level_page_header.entryPageOffset;
1230   uint32_t entries_count = baton.regular_second_level_page_header.entryCount;
1231 
1232   uint8_t *offset = page_entries;
1233 
1234   uint32_t idx = 0;
1235   while (idx < entries_count) {
1236     uint32_t func_offset = *((uint32_t *)(offset));
1237     uint32_t encoding = *((uint32_t *)(offset + 4));
1238 
1239     // UNWIND_SECOND_LEVEL_REGULAR entries have a funcOffset which includes the
1240     // functionOffset from the containing index table already.
1241     // UNWIND_SECOND_LEVEL_COMPRESSED
1242     // entries only have the offset from the containing index table
1243     // functionOffset.
1244     // So strip off the containing index table functionOffset value here so they
1245     // can
1246     // be treated the same at the lower layers.
1247 
1248     print_function_encoding(baton, idx, encoding, (uint32_t)-1,
1249                             func_offset -
1250                                 baton.first_level_index_entry.functionOffset);
1251     idx++;
1252     offset += 8;
1253   }
1254 }
1255 
print_second_level_index_compressed(struct baton baton)1256 void print_second_level_index_compressed(struct baton baton) {
1257   uint8_t *this_index =
1258       baton.compact_unwind_start +
1259       baton.first_level_index_entry.secondLevelPagesSectionOffset;
1260   uint8_t *start_of_entries =
1261       this_index + baton.compressed_second_level_page_header.entryPageOffset;
1262   uint8_t *offset = start_of_entries;
1263   for (uint16_t idx = 0;
1264        idx < baton.compressed_second_level_page_header.entryCount; idx++) {
1265     uint32_t entry = *((uint32_t *)offset);
1266     offset += 4;
1267     uint32_t encoding;
1268 
1269     uint32_t entry_encoding_index =
1270         UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry);
1271     uint32_t entry_func_offset =
1272         UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry);
1273 
1274     if (entry_encoding_index < baton.unwind_header.commonEncodingsArrayCount) {
1275       // encoding is in common table in section header
1276       encoding =
1277           *((uint32_t *)(baton.compact_unwind_start +
1278                          baton.unwind_header.commonEncodingsArraySectionOffset +
1279                          (entry_encoding_index * sizeof(uint32_t))));
1280     } else {
1281       // encoding is in page specific table
1282       uint32_t page_encoding_index =
1283           entry_encoding_index - baton.unwind_header.commonEncodingsArrayCount;
1284       encoding = *((uint32_t *)(this_index +
1285                                 baton.compressed_second_level_page_header
1286                                     .encodingsPageOffset +
1287                                 (page_encoding_index * sizeof(uint32_t))));
1288     }
1289 
1290     print_function_encoding(baton, idx, encoding, entry_encoding_index,
1291                             entry_func_offset);
1292   }
1293 }
1294 
print_second_level_index(struct baton baton)1295 void print_second_level_index(struct baton baton) {
1296   uint8_t *index_start =
1297       baton.compact_unwind_start +
1298       baton.first_level_index_entry.secondLevelPagesSectionOffset;
1299 
1300   if ((*(uint32_t *)index_start) == UNWIND_SECOND_LEVEL_REGULAR) {
1301     struct unwind_info_regular_second_level_page_header header;
1302     memcpy(&header, index_start,
1303            sizeof(struct unwind_info_regular_second_level_page_header));
1304     printf(
1305         "  UNWIND_SECOND_LEVEL_REGULAR #%d entryPageOffset %d, entryCount %d\n",
1306         baton.current_index_table_number, header.entryPageOffset,
1307         header.entryCount);
1308     baton.regular_second_level_page_header = header;
1309     print_second_level_index_regular(baton);
1310   }
1311 
1312   if ((*(uint32_t *)index_start) == UNWIND_SECOND_LEVEL_COMPRESSED) {
1313     struct unwind_info_compressed_second_level_page_header header;
1314     memcpy(&header, index_start,
1315            sizeof(struct unwind_info_compressed_second_level_page_header));
1316     printf("  UNWIND_SECOND_LEVEL_COMPRESSED #%d entryPageOffset %d, "
1317            "entryCount %d, encodingsPageOffset %d, encodingsCount %d\n",
1318            baton.current_index_table_number, header.entryPageOffset,
1319            header.entryCount, header.encodingsPageOffset,
1320            header.encodingsCount);
1321     baton.compressed_second_level_page_header = header;
1322     print_second_level_index_compressed(baton);
1323   }
1324 }
1325 
print_index_sections(struct baton baton)1326 void print_index_sections(struct baton baton) {
1327   uint8_t *index_section_offset =
1328       baton.compact_unwind_start + baton.unwind_header.indexSectionOffset;
1329   uint32_t index_count = baton.unwind_header.indexCount;
1330 
1331   uint32_t cur_idx = 0;
1332 
1333   uint8_t *offset = index_section_offset;
1334   while (cur_idx < index_count) {
1335     baton.current_index_table_number = cur_idx;
1336     struct unwind_info_section_header_index_entry index_entry;
1337     memcpy(&index_entry, offset,
1338            sizeof(struct unwind_info_section_header_index_entry));
1339     printf("index section #%d: functionOffset %d, "
1340            "secondLevelPagesSectionOffset %d, lsdaIndexArraySectionOffset %d\n",
1341            cur_idx, index_entry.functionOffset,
1342            index_entry.secondLevelPagesSectionOffset,
1343            index_entry.lsdaIndexArraySectionOffset);
1344 
1345     // secondLevelPagesSectionOffset == 0 means this is a sentinel entry
1346     if (index_entry.secondLevelPagesSectionOffset != 0) {
1347       struct unwind_info_section_header_index_entry next_index_entry;
1348       memcpy(&next_index_entry,
1349              offset + sizeof(struct unwind_info_section_header_index_entry),
1350              sizeof(struct unwind_info_section_header_index_entry));
1351 
1352       baton.lsda_array_start =
1353           baton.compact_unwind_start + index_entry.lsdaIndexArraySectionOffset;
1354       baton.lsda_array_end = baton.compact_unwind_start +
1355                              next_index_entry.lsdaIndexArraySectionOffset;
1356 
1357       uint8_t *lsda_entry_offset = baton.lsda_array_start;
1358       uint32_t lsda_count = 0;
1359       while (lsda_entry_offset < baton.lsda_array_end) {
1360         struct unwind_info_section_header_lsda_index_entry lsda_entry;
1361         memcpy(&lsda_entry, lsda_entry_offset,
1362                sizeof(struct unwind_info_section_header_lsda_index_entry));
1363         uint64_t function_file_address =
1364             baton.first_level_index_entry.functionOffset +
1365             lsda_entry.functionOffset + baton.text_segment_vmaddr;
1366         uint64_t lsda_file_address =
1367             lsda_entry.lsdaOffset + baton.text_segment_vmaddr;
1368         printf("    LSDA [%d] functionOffset %d (%d) (file address 0x%" PRIx64
1369                "), lsdaOffset %d (file address 0x%" PRIx64 ")\n",
1370                lsda_count, lsda_entry.functionOffset,
1371                lsda_entry.functionOffset - index_entry.functionOffset,
1372                function_file_address, lsda_entry.lsdaOffset, lsda_file_address);
1373         lsda_count++;
1374         lsda_entry_offset +=
1375             sizeof(struct unwind_info_section_header_lsda_index_entry);
1376       }
1377 
1378       printf("\n");
1379 
1380       baton.first_level_index_entry = index_entry;
1381       print_second_level_index(baton);
1382     }
1383 
1384     printf("\n");
1385 
1386     cur_idx++;
1387     offset += sizeof(struct unwind_info_section_header_index_entry);
1388   }
1389 }
1390 
main(int argc,char ** argv)1391 int main(int argc, char **argv) {
1392   struct stat st;
1393   char *file = argv[0];
1394   if (argc > 1)
1395     file = argv[1];
1396   int fd = open(file, O_RDONLY);
1397   if (fd == -1) {
1398     printf("Failed to open '%s'\n", file);
1399     exit(1);
1400   }
1401   fstat(fd, &st);
1402   uint8_t *file_mem =
1403       (uint8_t *)mmap(0, st.st_size, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
1404   if (file_mem == MAP_FAILED) {
1405     printf("Failed to mmap() '%s'\n", file);
1406   }
1407 
1408   FILE *f = fopen("a.out", "r");
1409 
1410   struct baton baton;
1411   baton.mach_header_start = file_mem;
1412   baton.symbols = NULL;
1413   baton.symbols_count = 0;
1414   baton.function_start_addresses = NULL;
1415   baton.function_start_addresses_count = 0;
1416 
1417   scan_macho_load_commands(&baton);
1418 
1419   if (baton.compact_unwind_start == NULL) {
1420     printf("could not find __TEXT,__unwind_info section\n");
1421     exit(1);
1422   }
1423 
1424   struct unwind_info_section_header header;
1425   memcpy(&header, baton.compact_unwind_start,
1426          sizeof(struct unwind_info_section_header));
1427   printf("Header:\n");
1428   printf("  version %u\n", header.version);
1429   printf("  commonEncodingsArraySectionOffset is %d\n",
1430          header.commonEncodingsArraySectionOffset);
1431   printf("  commonEncodingsArrayCount is %d\n",
1432          header.commonEncodingsArrayCount);
1433   printf("  personalityArraySectionOffset is %d\n",
1434          header.personalityArraySectionOffset);
1435   printf("  personalityArrayCount is %d\n", header.personalityArrayCount);
1436   printf("  indexSectionOffset is %d\n", header.indexSectionOffset);
1437   printf("  indexCount is %d\n", header.indexCount);
1438 
1439   uint8_t *common_encodings =
1440       baton.compact_unwind_start + header.commonEncodingsArraySectionOffset;
1441   uint32_t encoding_idx = 0;
1442   while (encoding_idx < header.commonEncodingsArrayCount) {
1443     uint32_t encoding = *((uint32_t *)common_encodings);
1444     printf("    Common Encoding [%d]: 0x%x ", encoding_idx, encoding);
1445     print_encoding(baton, NULL, encoding);
1446     printf("\n");
1447     common_encodings += sizeof(uint32_t);
1448     encoding_idx++;
1449   }
1450 
1451   uint8_t *pers_arr =
1452       baton.compact_unwind_start + header.personalityArraySectionOffset;
1453   uint32_t pers_idx = 0;
1454   while (pers_idx < header.personalityArrayCount) {
1455     int32_t pers_delta = *((int32_t *)(baton.compact_unwind_start +
1456                                        header.personalityArraySectionOffset +
1457                                        (pers_idx * sizeof(uint32_t))));
1458     printf("    Personality [%d]: personality function ptr @ offset %d (file "
1459            "address 0x%" PRIx64 ")\n",
1460            pers_idx, pers_delta, baton.text_segment_vmaddr + pers_delta);
1461     pers_idx++;
1462     pers_arr += sizeof(uint32_t);
1463   }
1464 
1465   printf("\n");
1466 
1467   baton.unwind_header = header;
1468 
1469   print_index_sections(baton);
1470 
1471   return 0;
1472 }
1473