1 /* Generate assembler source containing symbol information 2 * 3 * Copyright 2002 by Kai Germaschewski 4 * 5 * This software may be used and distributed according to the terms 6 * of the GNU General Public License, incorporated herein by reference. 7 * 8 * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S 9 * 10 * Table compression uses all the unused char codes on the symbols and 11 * maps these to the most used substrings (tokens). For instance, it might 12 * map char code 0xF7 to represent "write_" and then in every symbol where 13 * "write_" appears it can be replaced by 0xF7, saving 5 bytes. 14 * The used codes themselves are also placed in the table so that the 15 * decompresion can work without "special cases". 16 * Applied to kernel symbols, this usually produces a compression ratio 17 * of about 50%. 18 * 19 */ 20 21 #include <errno.h> 22 #include <getopt.h> 23 #include <stdbool.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <ctype.h> 28 #include <limits.h> 29 30 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 31 32 #define KSYM_NAME_LEN 512 33 34 struct sym_entry { 35 unsigned long long addr; 36 unsigned int len; 37 unsigned int seq; 38 bool percpu_absolute; 39 unsigned char sym[]; 40 }; 41 42 struct addr_range { 43 const char *start_sym, *end_sym; 44 unsigned long long start, end; 45 }; 46 47 static unsigned long long _text; 48 static unsigned long long relative_base; 49 static struct addr_range text_ranges[] = { 50 { "_stext", "_etext" }, 51 { "_sinittext", "_einittext" }, 52 }; 53 #define text_range_text (&text_ranges[0]) 54 #define text_range_inittext (&text_ranges[1]) 55 56 static struct addr_range percpu_range = { 57 "__per_cpu_start", "__per_cpu_end", -1ULL, 0 58 }; 59 60 static struct sym_entry **table; 61 static unsigned int table_size, table_cnt; 62 static int all_symbols; 63 static int absolute_percpu; 64 65 static int token_profit[0x10000]; 66 67 /* the table that holds the result of the compression */ 68 static unsigned char best_table[256][2]; 69 static unsigned char best_table_len[256]; 70 71 72 static void usage(void) 73 { 74 fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n"); 75 exit(1); 76 } 77 78 static char *sym_name(const struct sym_entry *s) 79 { 80 return (char *)s->sym + 1; 81 } 82 83 static bool is_ignored_symbol(const char *name, char type) 84 { 85 if (type == 'u' || type == 'n') 86 return true; 87 88 if (toupper(type) == 'A') { 89 /* Keep these useful absolute symbols */ 90 if (strcmp(name, "__kernel_syscall_via_break") && 91 strcmp(name, "__kernel_syscall_via_epc") && 92 strcmp(name, "__kernel_sigtramp") && 93 strcmp(name, "__gp")) 94 return true; 95 } 96 97 return false; 98 } 99 100 static void check_symbol_range(const char *sym, unsigned long long addr, 101 struct addr_range *ranges, int entries) 102 { 103 size_t i; 104 struct addr_range *ar; 105 106 for (i = 0; i < entries; ++i) { 107 ar = &ranges[i]; 108 109 if (strcmp(sym, ar->start_sym) == 0) { 110 ar->start = addr; 111 return; 112 } else if (strcmp(sym, ar->end_sym) == 0) { 113 ar->end = addr; 114 return; 115 } 116 } 117 } 118 119 static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len) 120 { 121 char *name, type, *p; 122 unsigned long long addr; 123 size_t len; 124 ssize_t readlen; 125 struct sym_entry *sym; 126 127 errno = 0; 128 readlen = getline(buf, buf_len, in); 129 if (readlen < 0) { 130 if (errno) { 131 perror("read_symbol"); 132 exit(EXIT_FAILURE); 133 } 134 return NULL; 135 } 136 137 if ((*buf)[readlen - 1] == '\n') 138 (*buf)[readlen - 1] = 0; 139 140 addr = strtoull(*buf, &p, 16); 141 142 if (*buf == p || *p++ != ' ' || !isascii((type = *p++)) || *p++ != ' ') { 143 fprintf(stderr, "line format error\n"); 144 exit(EXIT_FAILURE); 145 } 146 147 name = p; 148 len = strlen(name); 149 150 if (len >= KSYM_NAME_LEN) { 151 fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n" 152 "Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n", 153 name, len, KSYM_NAME_LEN); 154 return NULL; 155 } 156 157 if (strcmp(name, "_text") == 0) 158 _text = addr; 159 160 /* Ignore most absolute/undefined (?) symbols. */ 161 if (is_ignored_symbol(name, type)) 162 return NULL; 163 164 check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges)); 165 check_symbol_range(name, addr, &percpu_range, 1); 166 167 /* include the type field in the symbol name, so that it gets 168 * compressed together */ 169 len++; 170 171 sym = malloc(sizeof(*sym) + len + 1); 172 if (!sym) { 173 fprintf(stderr, "kallsyms failure: " 174 "unable to allocate required amount of memory\n"); 175 exit(EXIT_FAILURE); 176 } 177 sym->addr = addr; 178 sym->len = len; 179 sym->sym[0] = type; 180 strcpy(sym_name(sym), name); 181 sym->percpu_absolute = false; 182 183 return sym; 184 } 185 186 static int symbol_in_range(const struct sym_entry *s, 187 const struct addr_range *ranges, int entries) 188 { 189 size_t i; 190 const struct addr_range *ar; 191 192 for (i = 0; i < entries; ++i) { 193 ar = &ranges[i]; 194 195 if (s->addr >= ar->start && s->addr <= ar->end) 196 return 1; 197 } 198 199 return 0; 200 } 201 202 static bool string_starts_with(const char *s, const char *prefix) 203 { 204 return strncmp(s, prefix, strlen(prefix)) == 0; 205 } 206 207 static int symbol_valid(const struct sym_entry *s) 208 { 209 const char *name = sym_name(s); 210 211 /* if --all-symbols is not specified, then symbols outside the text 212 * and inittext sections are discarded */ 213 if (!all_symbols) { 214 /* 215 * Symbols starting with __start and __stop are used to denote 216 * section boundaries, and should always be included: 217 */ 218 if (string_starts_with(name, "__start_") || 219 string_starts_with(name, "__stop_")) 220 return 1; 221 222 if (symbol_in_range(s, text_ranges, 223 ARRAY_SIZE(text_ranges)) == 0) 224 return 0; 225 /* Corner case. Discard any symbols with the same value as 226 * _etext _einittext; they can move between pass 1 and 2 when 227 * the kallsyms data are added. If these symbols move then 228 * they may get dropped in pass 2, which breaks the kallsyms 229 * rules. 230 */ 231 if ((s->addr == text_range_text->end && 232 strcmp(name, text_range_text->end_sym)) || 233 (s->addr == text_range_inittext->end && 234 strcmp(name, text_range_inittext->end_sym))) 235 return 0; 236 } 237 238 return 1; 239 } 240 241 /* remove all the invalid symbols from the table */ 242 static void shrink_table(void) 243 { 244 unsigned int i, pos; 245 246 pos = 0; 247 for (i = 0; i < table_cnt; i++) { 248 if (symbol_valid(table[i])) { 249 if (pos != i) 250 table[pos] = table[i]; 251 pos++; 252 } else { 253 free(table[i]); 254 } 255 } 256 table_cnt = pos; 257 } 258 259 static void read_map(const char *in) 260 { 261 FILE *fp; 262 struct sym_entry *sym; 263 char *buf = NULL; 264 size_t buflen = 0; 265 266 fp = fopen(in, "r"); 267 if (!fp) { 268 perror(in); 269 exit(1); 270 } 271 272 while (!feof(fp)) { 273 sym = read_symbol(fp, &buf, &buflen); 274 if (!sym) 275 continue; 276 277 sym->seq = table_cnt; 278 279 if (table_cnt >= table_size) { 280 table_size += 10000; 281 table = realloc(table, sizeof(*table) * table_size); 282 if (!table) { 283 fprintf(stderr, "out of memory\n"); 284 fclose(fp); 285 exit (1); 286 } 287 } 288 289 table[table_cnt++] = sym; 290 } 291 292 free(buf); 293 fclose(fp); 294 } 295 296 static void output_label(const char *label) 297 { 298 printf(".globl %s\n", label); 299 printf("\tALGN\n"); 300 printf("%s:\n", label); 301 } 302 303 /* Provide proper symbols relocatability by their '_text' relativeness. */ 304 static void output_address(unsigned long long addr) 305 { 306 if (_text <= addr) 307 printf("\tPTR\t_text + %#llx\n", addr - _text); 308 else 309 printf("\tPTR\t_text - %#llx\n", _text - addr); 310 } 311 312 /* uncompress a compressed symbol. When this function is called, the best table 313 * might still be compressed itself, so the function needs to be recursive */ 314 static int expand_symbol(const unsigned char *data, int len, char *result) 315 { 316 int c, rlen, total=0; 317 318 while (len) { 319 c = *data; 320 /* if the table holds a single char that is the same as the one 321 * we are looking for, then end the search */ 322 if (best_table[c][0]==c && best_table_len[c]==1) { 323 *result++ = c; 324 total++; 325 } else { 326 /* if not, recurse and expand */ 327 rlen = expand_symbol(best_table[c], best_table_len[c], result); 328 total += rlen; 329 result += rlen; 330 } 331 data++; 332 len--; 333 } 334 *result=0; 335 336 return total; 337 } 338 339 static bool symbol_absolute(const struct sym_entry *s) 340 { 341 return s->percpu_absolute; 342 } 343 344 static int compare_names(const void *a, const void *b) 345 { 346 int ret; 347 const struct sym_entry *sa = *(const struct sym_entry **)a; 348 const struct sym_entry *sb = *(const struct sym_entry **)b; 349 350 ret = strcmp(sym_name(sa), sym_name(sb)); 351 if (!ret) { 352 if (sa->addr > sb->addr) 353 return 1; 354 else if (sa->addr < sb->addr) 355 return -1; 356 357 /* keep old order */ 358 return (int)(sa->seq - sb->seq); 359 } 360 361 return ret; 362 } 363 364 static void sort_symbols_by_name(void) 365 { 366 qsort(table, table_cnt, sizeof(table[0]), compare_names); 367 } 368 369 static void write_src(void) 370 { 371 unsigned int i, k, off; 372 unsigned int best_idx[256]; 373 unsigned int *markers, markers_cnt; 374 char buf[KSYM_NAME_LEN]; 375 376 printf("#include <asm/bitsperlong.h>\n"); 377 printf("#if BITS_PER_LONG == 64\n"); 378 printf("#define PTR .quad\n"); 379 printf("#define ALGN .balign 8\n"); 380 printf("#else\n"); 381 printf("#define PTR .long\n"); 382 printf("#define ALGN .balign 4\n"); 383 printf("#endif\n"); 384 385 printf("\t.section .rodata, \"a\"\n"); 386 387 output_label("kallsyms_num_syms"); 388 printf("\t.long\t%u\n", table_cnt); 389 printf("\n"); 390 391 /* table of offset markers, that give the offset in the compressed stream 392 * every 256 symbols */ 393 markers_cnt = (table_cnt + 255) / 256; 394 markers = malloc(sizeof(*markers) * markers_cnt); 395 if (!markers) { 396 fprintf(stderr, "kallsyms failure: " 397 "unable to allocate required memory\n"); 398 exit(EXIT_FAILURE); 399 } 400 401 output_label("kallsyms_names"); 402 off = 0; 403 for (i = 0; i < table_cnt; i++) { 404 if ((i & 0xFF) == 0) 405 markers[i >> 8] = off; 406 table[i]->seq = i; 407 408 /* There cannot be any symbol of length zero. */ 409 if (table[i]->len == 0) { 410 fprintf(stderr, "kallsyms failure: " 411 "unexpected zero symbol length\n"); 412 exit(EXIT_FAILURE); 413 } 414 415 /* Only lengths that fit in up-to-two-byte ULEB128 are supported. */ 416 if (table[i]->len > 0x3FFF) { 417 fprintf(stderr, "kallsyms failure: " 418 "unexpected huge symbol length\n"); 419 exit(EXIT_FAILURE); 420 } 421 422 /* Encode length with ULEB128. */ 423 if (table[i]->len <= 0x7F) { 424 /* Most symbols use a single byte for the length. */ 425 printf("\t.byte 0x%02x", table[i]->len); 426 off += table[i]->len + 1; 427 } else { 428 /* "Big" symbols use two bytes. */ 429 printf("\t.byte 0x%02x, 0x%02x", 430 (table[i]->len & 0x7F) | 0x80, 431 (table[i]->len >> 7) & 0x7F); 432 off += table[i]->len + 2; 433 } 434 for (k = 0; k < table[i]->len; k++) 435 printf(", 0x%02x", table[i]->sym[k]); 436 437 /* 438 * Now that we wrote out the compressed symbol name, restore the 439 * original name and print it in the comment. 440 */ 441 expand_symbol(table[i]->sym, table[i]->len, buf); 442 strcpy((char *)table[i]->sym, buf); 443 printf("\t/* %s */\n", table[i]->sym); 444 } 445 printf("\n"); 446 447 output_label("kallsyms_markers"); 448 for (i = 0; i < markers_cnt; i++) 449 printf("\t.long\t%u\n", markers[i]); 450 printf("\n"); 451 452 free(markers); 453 454 output_label("kallsyms_token_table"); 455 off = 0; 456 for (i = 0; i < 256; i++) { 457 best_idx[i] = off; 458 expand_symbol(best_table[i], best_table_len[i], buf); 459 printf("\t.asciz\t\"%s\"\n", buf); 460 off += strlen(buf) + 1; 461 } 462 printf("\n"); 463 464 output_label("kallsyms_token_index"); 465 for (i = 0; i < 256; i++) 466 printf("\t.short\t%d\n", best_idx[i]); 467 printf("\n"); 468 469 output_label("kallsyms_offsets"); 470 471 for (i = 0; i < table_cnt; i++) { 472 /* 473 * Use the offset relative to the lowest value 474 * encountered of all relative symbols, and emit 475 * non-relocatable fixed offsets that will be fixed 476 * up at runtime. 477 */ 478 479 long long offset; 480 int overflow; 481 482 if (!absolute_percpu) { 483 offset = table[i]->addr - relative_base; 484 overflow = (offset < 0 || offset > UINT_MAX); 485 } else if (symbol_absolute(table[i])) { 486 offset = table[i]->addr; 487 overflow = (offset < 0 || offset > INT_MAX); 488 } else { 489 offset = relative_base - table[i]->addr - 1; 490 overflow = (offset < INT_MIN || offset >= 0); 491 } 492 if (overflow) { 493 fprintf(stderr, "kallsyms failure: " 494 "%s symbol value %#llx out of range in relative mode\n", 495 symbol_absolute(table[i]) ? "absolute" : "relative", 496 table[i]->addr); 497 exit(EXIT_FAILURE); 498 } 499 printf("\t.long\t%#x\t/* %s */\n", (int)offset, table[i]->sym); 500 } 501 printf("\n"); 502 503 output_label("kallsyms_relative_base"); 504 output_address(relative_base); 505 printf("\n"); 506 507 sort_symbols_by_name(); 508 output_label("kallsyms_seqs_of_names"); 509 for (i = 0; i < table_cnt; i++) 510 printf("\t.byte 0x%02x, 0x%02x, 0x%02x\t/* %s */\n", 511 (unsigned char)(table[i]->seq >> 16), 512 (unsigned char)(table[i]->seq >> 8), 513 (unsigned char)(table[i]->seq >> 0), 514 table[i]->sym); 515 printf("\n"); 516 } 517 518 519 /* table lookup compression functions */ 520 521 /* count all the possible tokens in a symbol */ 522 static void learn_symbol(const unsigned char *symbol, int len) 523 { 524 int i; 525 526 for (i = 0; i < len - 1; i++) 527 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++; 528 } 529 530 /* decrease the count for all the possible tokens in a symbol */ 531 static void forget_symbol(const unsigned char *symbol, int len) 532 { 533 int i; 534 535 for (i = 0; i < len - 1; i++) 536 token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--; 537 } 538 539 /* do the initial token count */ 540 static void build_initial_token_table(void) 541 { 542 unsigned int i; 543 544 for (i = 0; i < table_cnt; i++) 545 learn_symbol(table[i]->sym, table[i]->len); 546 } 547 548 static unsigned char *find_token(unsigned char *str, int len, 549 const unsigned char *token) 550 { 551 int i; 552 553 for (i = 0; i < len - 1; i++) { 554 if (str[i] == token[0] && str[i+1] == token[1]) 555 return &str[i]; 556 } 557 return NULL; 558 } 559 560 /* replace a given token in all the valid symbols. Use the sampled symbols 561 * to update the counts */ 562 static void compress_symbols(const unsigned char *str, int idx) 563 { 564 unsigned int i, len, size; 565 unsigned char *p1, *p2; 566 567 for (i = 0; i < table_cnt; i++) { 568 569 len = table[i]->len; 570 p1 = table[i]->sym; 571 572 /* find the token on the symbol */ 573 p2 = find_token(p1, len, str); 574 if (!p2) continue; 575 576 /* decrease the counts for this symbol's tokens */ 577 forget_symbol(table[i]->sym, len); 578 579 size = len; 580 581 do { 582 *p2 = idx; 583 p2++; 584 size -= (p2 - p1); 585 memmove(p2, p2 + 1, size); 586 p1 = p2; 587 len--; 588 589 if (size < 2) break; 590 591 /* find the token on the symbol */ 592 p2 = find_token(p1, size, str); 593 594 } while (p2); 595 596 table[i]->len = len; 597 598 /* increase the counts for this symbol's new tokens */ 599 learn_symbol(table[i]->sym, len); 600 } 601 } 602 603 /* search the token with the maximum profit */ 604 static int find_best_token(void) 605 { 606 int i, best, bestprofit; 607 608 bestprofit=-10000; 609 best = 0; 610 611 for (i = 0; i < 0x10000; i++) { 612 if (token_profit[i] > bestprofit) { 613 best = i; 614 bestprofit = token_profit[i]; 615 } 616 } 617 return best; 618 } 619 620 /* this is the core of the algorithm: calculate the "best" table */ 621 static void optimize_result(void) 622 { 623 int i, best; 624 625 /* using the '\0' symbol last allows compress_symbols to use standard 626 * fast string functions */ 627 for (i = 255; i >= 0; i--) { 628 629 /* if this table slot is empty (it is not used by an actual 630 * original char code */ 631 if (!best_table_len[i]) { 632 633 /* find the token with the best profit value */ 634 best = find_best_token(); 635 if (token_profit[best] == 0) 636 break; 637 638 /* place it in the "best" table */ 639 best_table_len[i] = 2; 640 best_table[i][0] = best & 0xFF; 641 best_table[i][1] = (best >> 8) & 0xFF; 642 643 /* replace this token in all the valid symbols */ 644 compress_symbols(best_table[i], i); 645 } 646 } 647 } 648 649 /* start by placing the symbols that are actually used on the table */ 650 static void insert_real_symbols_in_table(void) 651 { 652 unsigned int i, j, c; 653 654 for (i = 0; i < table_cnt; i++) { 655 for (j = 0; j < table[i]->len; j++) { 656 c = table[i]->sym[j]; 657 best_table[c][0]=c; 658 best_table_len[c]=1; 659 } 660 } 661 } 662 663 static void optimize_token_table(void) 664 { 665 build_initial_token_table(); 666 667 insert_real_symbols_in_table(); 668 669 optimize_result(); 670 } 671 672 /* guess for "linker script provide" symbol */ 673 static int may_be_linker_script_provide_symbol(const struct sym_entry *se) 674 { 675 const char *symbol = sym_name(se); 676 int len = se->len - 1; 677 678 if (len < 8) 679 return 0; 680 681 if (symbol[0] != '_' || symbol[1] != '_') 682 return 0; 683 684 /* __start_XXXXX */ 685 if (!memcmp(symbol + 2, "start_", 6)) 686 return 1; 687 688 /* __stop_XXXXX */ 689 if (!memcmp(symbol + 2, "stop_", 5)) 690 return 1; 691 692 /* __end_XXXXX */ 693 if (!memcmp(symbol + 2, "end_", 4)) 694 return 1; 695 696 /* __XXXXX_start */ 697 if (!memcmp(symbol + len - 6, "_start", 6)) 698 return 1; 699 700 /* __XXXXX_end */ 701 if (!memcmp(symbol + len - 4, "_end", 4)) 702 return 1; 703 704 return 0; 705 } 706 707 static int compare_symbols(const void *a, const void *b) 708 { 709 const struct sym_entry *sa = *(const struct sym_entry **)a; 710 const struct sym_entry *sb = *(const struct sym_entry **)b; 711 int wa, wb; 712 713 /* sort by address first */ 714 if (sa->addr > sb->addr) 715 return 1; 716 if (sa->addr < sb->addr) 717 return -1; 718 719 /* sort by "weakness" type */ 720 wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W'); 721 wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W'); 722 if (wa != wb) 723 return wa - wb; 724 725 /* sort by "linker script provide" type */ 726 wa = may_be_linker_script_provide_symbol(sa); 727 wb = may_be_linker_script_provide_symbol(sb); 728 if (wa != wb) 729 return wa - wb; 730 731 /* sort by the number of prefix underscores */ 732 wa = strspn(sym_name(sa), "_"); 733 wb = strspn(sym_name(sb), "_"); 734 if (wa != wb) 735 return wa - wb; 736 737 /* sort by initial order, so that other symbols are left undisturbed */ 738 return sa->seq - sb->seq; 739 } 740 741 static void sort_symbols(void) 742 { 743 qsort(table, table_cnt, sizeof(table[0]), compare_symbols); 744 } 745 746 static void make_percpus_absolute(void) 747 { 748 unsigned int i; 749 750 for (i = 0; i < table_cnt; i++) 751 if (symbol_in_range(table[i], &percpu_range, 1)) { 752 /* 753 * Keep the 'A' override for percpu symbols to 754 * ensure consistent behavior compared to older 755 * versions of this tool. 756 */ 757 table[i]->sym[0] = 'A'; 758 table[i]->percpu_absolute = true; 759 } 760 } 761 762 /* find the minimum non-absolute symbol address */ 763 static void record_relative_base(void) 764 { 765 unsigned int i; 766 767 for (i = 0; i < table_cnt; i++) 768 if (!symbol_absolute(table[i])) { 769 /* 770 * The table is sorted by address. 771 * Take the first non-absolute symbol value. 772 */ 773 relative_base = table[i]->addr; 774 return; 775 } 776 } 777 778 int main(int argc, char **argv) 779 { 780 while (1) { 781 static const struct option long_options[] = { 782 {"all-symbols", no_argument, &all_symbols, 1}, 783 {"absolute-percpu", no_argument, &absolute_percpu, 1}, 784 {}, 785 }; 786 787 int c = getopt_long(argc, argv, "", long_options, NULL); 788 789 if (c == -1) 790 break; 791 if (c != 0) 792 usage(); 793 } 794 795 if (optind >= argc) 796 usage(); 797 798 read_map(argv[optind]); 799 shrink_table(); 800 if (absolute_percpu) 801 make_percpus_absolute(); 802 sort_symbols(); 803 record_relative_base(); 804 optimize_token_table(); 805 write_src(); 806 807 return 0; 808 } 809