1#!/usr/bin/env python 2#- 3# Copyright (c) 2010 Gleb Kurtsou 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions 8# are met: 9# 1. Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# 2. Redistributions in binary form must reproduce the above copyright 12# notice, this list of conditions and the following disclaimer in the 13# documentation and/or other materials provided with the distribution. 14# 15# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25# SUCH DAMAGE. 26# 27 28from __future__ import print_function 29import os 30import sys 31import re 32import optparse 33 34class Config(object): 35 version = '0.1' 36 # controlled by user 37 verbose = 0 38 dump = False 39 no_dump = False 40 version_filter = None 41 symbol_filter = None 42 alias_prefixes = [] 43 # misc opts 44 objdump = 'objdump' 45 dwarfdump = 'dwarfdump' 46 # debug 47 cmpcache_enabled = True 48 dwarfcache_enabled = True 49 w_alias = True 50 w_cached = False 51 w_symbol = True 52 53 class FileConfig(object): 54 filename = None 55 out = sys.stdout 56 def init(self, outname): 57 if outname and outname != '-': 58 self.out = open(outname, "w") 59 60 origfile = FileConfig() 61 newfile = FileConfig() 62 63 exclude_sym_default = [ 64 '^__bss_start$', 65 '^_edata$', 66 '^_end$', 67 '^_fini$', 68 '^_init$', 69 ] 70 71 @classmethod 72 def init(cls): 73 cls.version_filter = StrFilter() 74 cls.symbol_filter = StrFilter() 75 76class App(object): 77 result_code = 0 78 79def warn(cond, msg): 80 if cond: 81 print("WARN: " + msg, file=sys.stderr) 82 83# {{{ misc 84 85class StrFilter(object): 86 def __init__(self): 87 self.exclude = [] 88 self.include = [] 89 90 def compile(self): 91 self.re_exclude = [ re.compile(x) for x in self.exclude ] 92 self.re_include = [ re.compile(x) for x in self.include ] 93 94 def match(self, s): 95 if len(self.re_include): 96 matched = False 97 for r in self.re_include: 98 if r.match(s): 99 matched = True 100 break 101 if not matched: 102 return False 103 for r in self.re_exclude: 104 if r.match(s): 105 return False 106 return True 107 108class Cache(object): 109 110 class CacheStats(object): 111 def __init__(self): 112 self.hit = 0 113 self.miss = 0 114 115 def show(self, name): 116 total = self.hit + self.miss 117 if total == 0: 118 ratio = '(undef)' 119 else: 120 ratio = '%f' % (self.hit/float(total)) 121 return '%s cache stats: hit: %d; miss: %d; ratio: %s' % \ 122 (name, self.hit, self.miss, ratio) 123 124 def __init__(self, enabled=True, stats=None): 125 self.enabled = enabled 126 self.items = {} 127 if stats == None: 128 self.stats = Cache.CacheStats() 129 else: 130 self.stats = stats 131 132 def get(self, id): 133 if self.enabled and id in self.items: 134 self.stats.hit += 1 135 return self.items[id] 136 else: 137 self.stats.miss += 1 138 return None 139 140 def put(self, id, obj): 141 if self.enabled: 142 if id in self.items and obj is not self.items[id]: 143 #raise ValueError("Item is already cached: %d (%s, %s)" % 144 # (id, self.items[id], obj)) 145 warn(Config.w_cached, "Item is already cached: %d (%s, %s)" % \ 146 (id, self.items[id], obj)) 147 self.items[id] = obj 148 149 def replace(self, id, obj): 150 if self.enabled: 151 assert id in self.items 152 self.items[id] = obj 153 154class ListDiff(object): 155 def __init__(self, orig, new): 156 self.orig = set(orig) 157 self.new = set(new) 158 self.common = self.orig & self.new 159 self.added = self.new - self.common 160 self.removed = self.orig - self.common 161 162class PrettyPrinter(object): 163 def __init__(self): 164 self.stack = [] 165 166 def run_nested(self, obj): 167 ex = obj._pp_ex(self) 168 self.stack.append(ex) 169 170 def run(self, obj): 171 self._result = obj._pp(self) 172 return self._result 173 174 def nested(self): 175 return sorted(set(self.stack)) 176 177 def result(self): 178 return self._result; 179 180# }}} 181 182#{{{ symbols and version maps 183 184class Symbol(object): 185 def __init__(self, name, offset, version, lib): 186 self.name = name 187 self.offset = offset 188 self.version = version 189 self.lib = lib 190 self.definition = None 191 192 @property 193 def name_ver(self): 194 return self.name + '@' + self.version 195 196 def __repr__(self): 197 return "Symbol(%s, 0x%x, %s)" % (self.name, self.offset, self.version) 198 199class CommonSymbol(object): 200 def __init__(self, origsym, newsym): 201 if origsym.name != newsym.name or origsym.version != newsym.version: 202 raise RuntimeError("Symbols have different names: %s", 203 [origsym, newsym]) 204 self.origsym = origsym 205 self.newsym = newsym 206 self.name = newsym.name 207 self.version = newsym.version 208 209 def __repr__(self): 210 return "CommonSymbol(%s, %s)" % (self.name, self.version) 211 212class SymbolAlias(object): 213 def __init__(self, alias, prefix, offset): 214 assert alias.startswith(prefix) 215 self.alias = alias 216 self.name = alias[len(prefix):] 217 self.offset = offset 218 219 def __repr__(self): 220 return "SymbolAlias(%s, 0x%x)" % (self.alias, self.offset) 221 222 223class VersionMap(object): 224 def __init__(self, name): 225 self.name = name 226 self.symbols = {} 227 228 def append(self, symbol): 229 if (symbol.name in self.symbols): 230 raise ValueError("Symbol is already defined %s@%s" % 231 (symbol.name, self.name)) 232 self.symbols[symbol.name] = symbol 233 234 def names(self): 235 return self.symbols.keys() 236 237 def __repr__(self): 238 return repr(self.symbols.values()) 239 240# }}} 241 242# {{{ types and definitions 243 244class Def(object): 245 _is_alias = False 246 247 def __init__(self, id, name, **kwargs): 248 self.id = id 249 self.name = name 250 self.attrs = kwargs 251 252 def __getattr__(self, attr): 253 if attr not in self.attrs: 254 raise AttributeError('%s in %s' % (attr, str(self))) 255 return self.attrs[attr] 256 257 def _name_opt(self, default=''): 258 if not self.name: 259 return default 260 return self.name 261 262 def _alias(self): 263 if self._is_alias: 264 return self.type._alias() 265 return self 266 267 def __cmp__(self, other): 268 # TODO assert 'self' and 'other' belong to different libraries 269 #print 'cmp defs: %s, %s' % (self, other) 270 a = self._alias() 271 try: 272 b = other._alias() 273 except AttributeError: 274 return 1 275 r = cmp(a.__class__, b.__class__) 276 if r == 0: 277 if a.id != 0 and b.id != 0: 278 ind = (long(a.id) << 32) + b.id 279 r = Dwarf.cmpcache.get(ind) 280 if r != None: 281 return r 282 else: 283 ind = 0 284 r = cmp(a.attrs, b.attrs) 285 if ind != 0: 286 Dwarf.cmpcache.put(ind, r) 287 else: 288 r = 0 289 #raise RuntimeError('Comparing different classes: %s, %s' % 290 # (a.__class__.__name__, b.__class__.__name__)) 291 return r 292 293 def __repr__(self): 294 p = [] 295 if hasattr(self, 'name'): 296 p.append("name=%s" % self.name) 297 for (k, v) in self.attrs.items(): 298 if isinstance(v, Def): 299 v = v.__class__.__name__ + '(...)' 300 p.append("%s=%s" % (k, v)) 301 return self.__class__.__name__ + '(' + ', '.join(p) + ')' 302 303 def _mapval(self, param, vals): 304 if param not in vals.keys(): 305 raise NotImplementedError("Invalid value '%s': %s" % 306 (param, str(self))) 307 return vals[param] 308 309 def _pp_ex(self, pp): 310 raise NotImplementedError('Extended pretty print not implemeted: %s' % 311 str(self)) 312 313 def _pp(self, pp): 314 raise NotImplementedError('Pretty print not implemeted: %s' % str(self)) 315 316class AnonymousDef(Def): 317 def __init__(self, id, **kwargs): 318 Def.__init__(self, id, None, **kwargs) 319 320class Void(AnonymousDef): 321 _instance = None 322 323 def __new__(cls, *args, **kwargs): 324 if not cls._instance: 325 cls._instance = super(Void, cls).__new__( 326 cls, *args, **kwargs) 327 return cls._instance 328 329 def __init__(self): 330 AnonymousDef.__init__(self, 0) 331 332 def _pp(self, pp): 333 return "void" 334 335class VarArgs(AnonymousDef): 336 def _pp(self, pp): 337 return "..." 338 339class PointerDef(AnonymousDef): 340 def _pp(self, pp): 341 t = pp.run(self.type) 342 return "%s*" % (t,) 343 344class BaseTypeDef(Def): 345 inttypes = ['DW_ATE_signed', 'DW_ATE_unsigned', 'DW_ATE_unsigned_char'] 346 def _pp(self, pp): 347 if self.encoding in self.inttypes: 348 sign = '' if self.encoding == 'DW_ATE_signed' else 'u' 349 bits = int(self.byte_size, 0) * 8 350 return '%sint%s_t' % (sign, bits) 351 elif self.encoding == 'DW_ATE_signed_char' and int(self.byte_size, 0) == 1: 352 return 'char'; 353 elif self.encoding == 'DW_ATE_boolean' and int(self.byte_size, 0) == 1: 354 return 'bool'; 355 elif self.encoding == 'DW_ATE_float': 356 return self._mapval(int(self.byte_size, 0), { 357 16: 'long double', 358 8: 'double', 359 4: 'float', 360 }) 361 raise NotImplementedError('Invalid encoding: %s' % self) 362 363class TypeAliasDef(Def): 364 _is_alias = True 365 def _pp(self, pp): 366 alias = self._alias() 367 # push typedef name 368 if self.name and not alias.name: 369 alias.name = 'T(%s)' % self.name 370 # return type with modifiers 371 return self.type._pp(pp) 372 373class EnumerationTypeDef(Def): 374 def _pp(self, pp): 375 return 'enum ' + self._name_opt('UNKNOWN') 376 377class ConstTypeDef(AnonymousDef): 378 _is_alias = True 379 def _pp(self, pp): 380 return 'const ' + self.type._pp(pp) 381 382class VolatileTypeDef(AnonymousDef): 383 _is_alias = True 384 def _pp(self, pp): 385 return 'volatile ' + self.type._pp(pp) 386 387class RestrictTypeDef(AnonymousDef): 388 _is_alias = True 389 def _pp(self, pp): 390 return 'restrict ' + self.type._pp(pp) 391 392class ArrayDef(AnonymousDef): 393 def _pp(self, pp): 394 t = pp.run(self.type) 395 assert len(self.subranges) == 1 396 try: 397 sz = int(self.subranges[0].upper_bound) + 1 398 except ValueError: 399 s = re.sub(r'\(.+\)', '', self.subranges[0].upper_bound) 400 sz = int(s) + 1 401 return '%s[%s]' % (t, sz) 402 403class ArraySubrangeDef(AnonymousDef): 404 pass 405 406class FunctionDef(Def): 407 def _pp(self, pp): 408 result = pp.run(self.result) 409 if not self.params: 410 params = "void" 411 else: 412 params = ', '.join([ pp.run(x) for x in self.params ]) 413 return "%s %s(%s);" % (result, self.name, params) 414 415class FunctionTypeDef(Def): 416 def _pp(self, pp): 417 result = pp.run(self.result) 418 if not self.params: 419 params = "void" 420 else: 421 params = ', '.join([ pp.run(x) for x in self.params ]) 422 return "F(%s, %s, (%s))" % (self._name_opt(), result, params) 423 424class ParameterDef(Def): 425 def _pp(self, pp): 426 t = pp.run(self.type) 427 return "%s %s" % (t, self._name_opt()) 428 429class VariableDef(Def): 430 def _pp(self, pp): 431 t = pp.run(self.type) 432 return "%s %s" % (t, self._name_opt()) 433 434# TODO 435class StructForwardDef(Def): 436 pass 437 438class IncompleteDef(Def): 439 def update(self, complete, cache=None): 440 self.complete = complete 441 complete.incomplete = self 442 if cache != None: 443 cached = cache.get(self.id) 444 if cached != None and isinstance(cached, IncompleteDef): 445 cache.replace(self.id, complete) 446 447class StructIncompleteDef(IncompleteDef): 448 def _pp(self, pp): 449 return "struct %s" % (self.name,) 450 451class UnionIncompleteDef(IncompleteDef): 452 def _pp(self, pp): 453 return "union %s" % (self.name,) 454 455class StructDef(Def): 456 def _pp_ex(self, pp, suffix=';'): 457 members = [ pp.run(x) for x in self.members ] 458 return "struct %s { %s }%s" % \ 459 (self._name_opt(), ' '.join(members), suffix) 460 def _pp(self, pp): 461 if self.name: 462 pp.run_nested(self) 463 return "struct %s" % (self.name,) 464 else: 465 return self._pp_ex(pp, suffix='') 466 467class UnionDef(Def): 468 def _pp_ex(self, pp, suffix=';'): 469 members = [ pp.run(x) for x in self.members ] 470 return "union %s { %s }%s" % \ 471 (self._name_opt(), ' '.join(members), suffix) 472 def _pp(self, pp): 473 if self.name: 474 pp.run_nested(self) 475 return "union %s" % (self.name,) 476 else: 477 return self._pp_ex(pp, suffix='') 478 479class MemberDef(Def): 480 def _pp(self, pp): 481 t = pp.run(self.type) 482 if self.bit_size: 483 bits = ":%s" % self.bit_size 484 else: 485 bits = "" 486 return "%s %s%s;" % (t, self._name_opt(), bits) 487 488class Dwarf(object): 489 490 cmpcache = Cache(enabled=Config.cmpcache_enabled) 491 492 def __init__(self, dump): 493 self.dump = dump 494 495 def _build_optarg_type(self, praw): 496 type = praw.optarg('type', Void()) 497 if type != Void(): 498 type = self.buildref(praw.unit, type) 499 return type 500 501 def build_subprogram(self, raw): 502 if raw.optname == None: 503 raw.setname('SUBPROGRAM_NONAME_' + raw.arg('low_pc')); 504 params = [ self.build(x) for x in raw.nested ] 505 result = self._build_optarg_type(raw) 506 return FunctionDef(raw.id, raw.name, params=params, result=result) 507 508 def build_variable(self, raw): 509 type = self._build_optarg_type(raw) 510 return VariableDef(raw.id, raw.optname, type=type) 511 512 def build_subroutine_type(self, raw): 513 params = [ self.build(x) for x in raw.nested ] 514 result = self._build_optarg_type(raw) 515 return FunctionTypeDef(raw.id, raw.optname, params=params, result=result) 516 517 def build_formal_parameter(self, raw): 518 type = self._build_optarg_type(raw) 519 return ParameterDef(raw.id, raw.optname, type=type) 520 521 def build_pointer_type(self, raw): 522 type = self._build_optarg_type(raw) 523 return PointerDef(raw.id, type=type) 524 525 def build_member(self, raw): 526 type = self.buildref(raw.unit, raw.arg('type')) 527 return MemberDef(raw.id, raw.name, type=type, 528 bit_size=raw.optarg('bit_size', None)) 529 530 def build_structure_type(self, raw): 531 incomplete = raw.unit.incomplete.get(raw.id) 532 if incomplete == None: 533 incomplete = StructIncompleteDef(raw.id, raw.optname) 534 raw.unit.incomplete.put(raw.id, incomplete) 535 else: 536 return incomplete 537 members = [ self.build(x) for x in raw.nested ] 538 byte_size = raw.optarg('byte_size', None) 539 if byte_size == None: 540 obj = StructForwardDef(raw.id, raw.name, members=members, 541 forcename=raw.name) 542 obj = StructDef(raw.id, raw.optname, members=members, 543 byte_size=byte_size) 544 incomplete.update(obj, cache=raw.unit.cache) 545 return obj 546 547 def build_union_type(self, raw): 548 incomplete = raw.unit.incomplete.get(raw.id) 549 if incomplete == None: 550 incomplete = UnionIncompleteDef(raw.id, raw.optname) 551 raw.unit.incomplete.put(raw.id, incomplete) 552 else: 553 return incomplete 554 members = [ self.build(x) for x in raw.nested ] 555 byte_size = raw.optarg('byte_size', None) 556 obj = UnionDef(raw.id, raw.optname, members=members, 557 byte_size=byte_size) 558 obj.incomplete = incomplete 559 incomplete.complete = obj 560 return obj 561 562 def build_typedef(self, raw): 563 type = self._build_optarg_type(raw) 564 return TypeAliasDef(raw.id, raw.name, type=type) 565 566 def build_const_type(self, raw): 567 type = self._build_optarg_type(raw) 568 return ConstTypeDef(raw.id, type=type) 569 570 def build_volatile_type(self, raw): 571 type = self._build_optarg_type(raw) 572 return VolatileTypeDef(raw.id, type=type) 573 574 def build_restrict_type(self, raw): 575 type = self._build_optarg_type(raw) 576 return RestrictTypeDef(raw.id, type=type) 577 578 def build_enumeration_type(self, raw): 579 # TODO handle DW_TAG_enumerator ??? 580 return EnumerationTypeDef(raw.id, name=raw.optname, 581 byte_size=raw.arg('byte_size')) 582 583 def build_base_type(self, raw): 584 return BaseTypeDef(raw.id, raw.optname, 585 byte_size=raw.arg('byte_size'), encoding=raw.arg('encoding')) 586 587 def build_array_type(self, raw): 588 type = self.buildref(raw.unit, raw.arg('type')) 589 subranges = [ self.build(x) for x in raw.nested ] 590 return ArrayDef(raw.id, type=type, subranges=subranges) 591 592 def build_subrange_type(self, raw): 593 type = self.buildref(raw.unit, raw.arg('type')) 594 return ArraySubrangeDef(raw.id, type=type, 595 upper_bound=raw.optarg('upper_bound', 0)) 596 597 def build_unspecified_parameters(self, raw): 598 return VarArgs(raw.id) 599 600 def _get_id(self, id): 601 try: 602 return int(id) 603 except ValueError: 604 if (id.startswith('<') and id.endswith('>')): 605 return int(id[1:-1], 0) 606 else: 607 raise ValueError("Invalid dwarf id: %s" % id) 608 609 def build(self, raw): 610 obj = raw.unit.cache.get(raw.id) 611 if obj != None: 612 return obj 613 builder_name = raw.tag.replace('DW_TAG_', 'build_') 614 try: 615 builder = getattr(self, builder_name) 616 except AttributeError: 617 raise AttributeError("Unknown dwarf tag: %s" % raw) 618 obj = builder(raw) 619 raw.unit.cache.put(obj.id, obj) 620 return obj 621 622 def buildref(self, unit, id): 623 id = self._get_id(id) 624 raw = unit.tags[id] 625 obj = self.build(raw) 626 return obj 627 628# }}} 629 630class Shlib(object): 631 def __init__(self, libfile): 632 self.libfile = libfile 633 self.versions = {} 634 self.alias_syms = {} 635 636 def parse_objdump(self): 637 objdump = ObjdumpParser(self.libfile) 638 objdump.run() 639 for p in objdump.dynamic_symbols: 640 vername = p['ver'] 641 if vername.startswith('(') and vername.endswith(')'): 642 vername = vername[1:-1] 643 if not Config.version_filter.match(vername): 644 continue 645 if not Config.symbol_filter.match(p['symbol']): 646 continue 647 sym = Symbol(p['symbol'], p['offset'], vername, self) 648 if vername not in self.versions: 649 self.versions[vername] = VersionMap(vername) 650 self.versions[vername].append(sym) 651 if Config.alias_prefixes: 652 self.local_offsetmap = objdump.local_offsetmap 653 for p in objdump.local_symbols: 654 for prefix in Config.alias_prefixes: 655 if not p['symbol'].startswith(prefix): 656 continue 657 alias = SymbolAlias(p['symbol'], prefix, p['offset']) 658 if alias.name in self.alias_syms: 659 prevalias = self.alias_syms[alias.name] 660 if alias.name != prevalias.name or \ 661 alias.offset != prevalias.offset: 662 warn(Config.w_alias, "Symbol alias is " \ 663 "already defined: %s: %s at %08x -- %s at %08x" % \ 664 (alias.alias, alias.name, alias.offset, 665 prevalias.name, prevalias.offset)) 666 self.alias_syms[alias.name] = alias 667 668 def parse_dwarfdump(self): 669 dwarfdump = DwarfdumpParser(self.libfile) 670 def lookup(sym): 671 raw = None 672 try: 673 raw = dwarfdump.offsetmap[sym.offset] 674 except: 675 try: 676 localnames = self.local_offsetmap[sym.offset] 677 localnames.sort(key=lambda x: -len(x)) 678 for localname in localnames: 679 if localname not in self.alias_syms: 680 continue 681 alias = self.alias_syms[localname] 682 raw = dwarfdump.offsetmap[alias.offset] 683 break 684 except: 685 pass 686 return raw 687 dwarfdump.run() 688 dwarf = Dwarf(dwarfdump) 689 for ver in self.versions.values(): 690 for sym in ver.symbols.values(): 691 raw = lookup(sym); 692 if not raw: 693 warn(Config.w_symbol, "Symbol %s (%s) not found at offset 0x%x" % \ 694 (sym.name_ver, self.libfile, sym.offset)) 695 continue 696 if Config.verbose >= 3: 697 print("Parsing symbol %s (%s)" % (sym.name_ver, self.libfile)) 698 sym.definition = dwarf.build(raw) 699 700 def parse(self): 701 if not os.path.isfile(self.libfile): 702 print("No such file: %s" % self.libfile, file=sys.stderr) 703 sys.exit(1) 704 self.parse_objdump() 705 self.parse_dwarfdump() 706 707# {{{ parsers 708 709class Parser(object): 710 def __init__(self, proc): 711 self.proc = proc 712 self.parser = self.parse_begin 713 714 def run(self): 715 fd = os.popen(self.proc, 'r') 716 while True: 717 line = fd.readline() 718 if (not line): 719 break 720 line = line.strip() 721 if (line): 722 self.parser(line) 723 err = fd.close() 724 if err: 725 print("Execution failed: %s" % self.proc, file=sys.stderr) 726 sys.exit(2) 727 728 def parse_begin(self, line): 729 print(line) 730 731class ObjdumpParser(Parser): 732 733 re_header = re.compile('(?P<table>\w*)\s*SYMBOL TABLE:') 734 735 re_local_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<symbol>[^\s]*)') 736 re_lame_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+\*[A-Z]+\*') 737 738 re_dynamic_symbol = re.compile('(?P<offset>[0-9a-fA-F]+)\s+(?P<bind>\w+)\s+(?P<type>\w+)\s+(?P<section>[^\s]+)\s+(?P<foffset>[0-9a-fA-F]+)\s*(?P<ver>[^\s]*)\s*(?P<symbol>[^\s]*)') 739 740 def __init__(self, libfile): 741 Parser.__init__(self, "%s -wtT %s" % (Config.objdump, libfile)) 742 self.dynamic_symbols = [] 743 self.local_symbols = [] 744 self.local_offsetmap = {} 745 746 def parse_begin(self, line): 747 self.parse_header(line) 748 749 def add_symbol(self, table, symbol, offsetmap = None): 750 offset = int(symbol['offset'], 16); 751 symbol['offset'] = offset 752 if (offset == 0): 753 return 754 table.append(symbol) 755 if offsetmap != None: 756 if offset not in offsetmap: 757 offsetmap[offset] = [symbol['symbol']] 758 else: 759 offsetmap[offset].append(symbol['symbol']) 760 761 def parse_header(self, line): 762 m = self.re_header.match(line) 763 if (m): 764 table = m.group('table') 765 if (table == "DYNAMIC"): 766 self.parser = self.parse_dynamic 767 elif table == '': 768 self.parser = self.parse_local 769 else: 770 raise ValueError("Invalid symbol table: %s" % table) 771 return True 772 return False 773 774 def parse_local(self, line): 775 if (self.parse_header(line)): 776 return 777 if (self.re_lame_symbol.match(line)): 778 return 779 m = self.re_local_symbol.match(line) 780 if (not m): 781 return 782 #raise ValueError("Invalid symbol definition: %s" % line) 783 p = m.groupdict() 784 if (p['symbol'] and p['symbol'].find('@') == -1): 785 self.add_symbol(self.local_symbols, p, self.local_offsetmap); 786 787 def parse_dynamic(self, line): 788 if (self.parse_header(line)): 789 return 790 if (self.re_lame_symbol.match(line)): 791 return 792 m = self.re_dynamic_symbol.match(line) 793 if (not m): 794 raise ValueError("Invalid symbol definition: %s" % line) 795 p = m.groupdict() 796 if (p['symbol'] and p['ver']): 797 self.add_symbol(self.dynamic_symbols, p); 798 799class DwarfdumpParser(Parser): 800 801 tagcache_stats = Cache.CacheStats() 802 803 class Unit(object): 804 def __init__(self): 805 self.cache = Cache(enabled=Config.dwarfcache_enabled, 806 stats=DwarfdumpParser.tagcache_stats) 807 self.incomplete = Cache() 808 self.tags = {} 809 810 class Tag(object): 811 def __init__(self, unit, data): 812 self.unit = unit 813 self.id = int(data['id'], 0) 814 self.level = int(data['level']) 815 self.tag = data['tag'] 816 self.args = {} 817 self.nested = [] 818 819 @property 820 def name(self): 821 return self.arg('name') 822 823 @property 824 def optname(self): 825 return self.optarg('name', None) 826 827 def setname(self, name): 828 self.args['DW_AT_name'] = name 829 830 def arg(self, a): 831 name = 'DW_AT_' + a 832 try: 833 return self.args[name] 834 except KeyError: 835 raise KeyError("Argument '%s' not found in %s: %s" % 836 (name, self, self.args)) 837 838 def optarg(self, a, default): 839 try: 840 return self.arg(a) 841 except KeyError: 842 return default 843 844 def __repr__(self): 845 return "Tag(%d, %d, %s)" % (self.level, self.id, self.tag) 846 847 re_header = re.compile('<(?P<level>\d+)><(?P<id>[0xX0-9a-fA-F]+(?:\+(0[xX])?[0-9a-fA-F]+)?)><(?P<tag>\w+)>') 848 re_argname = re.compile('(?P<arg>\w+)<') 849 re_argunknown = re.compile('<Unknown AT value \w+><[^<>]+>') 850 851 skip_tags = set([ 852 'DW_TAG_lexical_block', 853 'DW_TAG_inlined_subroutine', 854 'DW_TAG_label', 855 'DW_TAG_variable', 856 ]) 857 858 external_tags = set([ 859 'DW_TAG_variable', 860 ]) 861 862 def __init__(self, libfile): 863 Parser.__init__(self, "%s -di %s" % (Config.dwarfdump, libfile)) 864 self.current_unit = None 865 self.offsetmap = {} 866 self.stack = [] 867 868 def parse_begin(self, line): 869 if line == '.debug_info': 870 self.parser = self.parse_debuginfo 871 else: 872 raise ValueError("Invalid dwarfdump header: %s" % line) 873 874 def parse_argvalue(self, args): 875 assert args.startswith('<') 876 i = 1 877 cnt = 1 878 while i < len(args) and args[i]: 879 if args[i] == '<': 880 cnt += 1 881 elif args[i] == '>': 882 cnt -= 1 883 if cnt == 0: 884 break 885 i = i + 1 886 value = args[1:i] 887 args = args[i+1:] 888 return (args, value) 889 890 def parse_arg(self, tag, args): 891 m = self.re_argname.match(args) 892 if not m: 893 m = self.re_argunknown.match(args) 894 if not m: 895 raise ValueError("Invalid dwarfdump: couldn't parse arguments: %s" % 896 args) 897 args = args[len(m.group(0)):].lstrip() 898 return args 899 argname = m.group('arg') 900 args = args[len(argname):] 901 value = [] 902 while len(args) > 0 and args.startswith('<'): 903 (args, v) = self.parse_argvalue(args) 904 value.append(v) 905 args = args.lstrip() 906 if len(value) == 1: 907 value = value[0] 908 tag.args[argname] = value 909 return args 910 911 def parse_debuginfo(self, line): 912 m = self.re_header.match(line) 913 if not m: 914 raise ValueError("Invalid dwarfdump: %s" % line) 915 if m.group('level') == '0': 916 self.current_unit = DwarfdumpParser.Unit() 917 return 918 tag = DwarfdumpParser.Tag(self.current_unit, m.groupdict()) 919 args = line[len(m.group(0)):].lstrip() 920 while args: 921 args = self.parse_arg(tag, args) 922 tag.unit.tags[tag.id] = tag 923 def parse_offset(tag): 924 if 'DW_AT_low_pc' in tag.args: 925 return int(tag.args['DW_AT_low_pc'], 16) 926 elif 'DW_AT_location' in tag.args: 927 location = tag.args['DW_AT_location'] 928 if location.startswith('DW_OP_addr'): 929 return int(location.replace('DW_OP_addr', ''), 16) 930 return None 931 offset = parse_offset(tag) 932 if offset is not None and \ 933 (tag.tag not in DwarfdumpParser.skip_tags or \ 934 ('DW_AT_external' in tag.args and \ 935 tag.tag in DwarfdumpParser.external_tags)): 936 if offset in self.offsetmap: 937 raise ValueError("Dwarf dump parse error: " + 938 "symbol is already defined at offset 0x%x" % offset) 939 self.offsetmap[offset] = tag 940 if len(self.stack) > 0: 941 prev = self.stack.pop() 942 while prev.level >= tag.level and len(self.stack) > 0: 943 prev = self.stack.pop() 944 if prev.level < tag.level: 945 assert prev.level == tag.level - 1 946 # TODO check DW_AT_sibling ??? 947 if tag.tag not in DwarfdumpParser.skip_tags: 948 prev.nested.append(tag) 949 self.stack.append(prev) 950 self.stack.append(tag) 951 assert len(self.stack) == tag.level 952 953# }}} 954 955def list_str(l): 956 l = [ str(x) for x in l ] 957 l.sort() 958 return ', '.join(l) 959 960def names_ver_str(vername, names): 961 return list_str([ x + "@" + vername for x in names ]) 962 963def common_symbols(origlib, newlib): 964 result = [] 965 verdiff = ListDiff(origlib.versions.keys(), newlib.versions.keys()) 966 if Config.verbose >= 1: 967 print('Original versions: ', list_str(verdiff.orig)) 968 print('New versions: ', list_str(verdiff.new)) 969 for vername in verdiff.added: 970 print('Added version: ', vername) 971 print(' Added symbols: ', \ 972 names_ver_str(vername, newlib.versions[vername].names())) 973 for vername in verdiff.removed: 974 print('Removed version: ', vername) 975 print(' Removed symbols: ', \ 976 names_ver_str(vername, origlib.versions[vername].names())) 977 added = [] 978 removed = [] 979 for vername in verdiff.common: 980 origver = origlib.versions[vername] 981 newver = newlib.versions[vername] 982 namediff = ListDiff(origver.names(), newver.names()) 983 if namediff.added: 984 added.append(names_ver_str(vername, namediff.added)) 985 if namediff.removed: 986 removed.append(names_ver_str(vername, namediff.removed)) 987 commonver = VersionMap(vername) 988 result.append(commonver) 989 for n in namediff.common: 990 sym = CommonSymbol(origver.symbols[n], newver.symbols[n]) 991 commonver.append(sym) 992 if added: 993 print('Added symbols:') 994 for i in added: 995 print(' ', i) 996 if removed: 997 print('Removed symbols:') 998 for i in removed: 999 print(' ', i) 1000 return result 1001 1002def cmp_symbols(commonver): 1003 for ver in commonver: 1004 names = ver.names(); 1005 names.sort() 1006 for symname in names: 1007 sym = ver.symbols[symname] 1008 missing = sym.origsym.definition is None or sym.newsym.definition is None 1009 match = not missing and sym.origsym.definition == sym.newsym.definition 1010 if not match: 1011 App.result_code = 1 1012 if Config.verbose >= 1 or not match: 1013 if missing: 1014 print('%s: missing definition' % \ 1015 (sym.origsym.name_ver,)) 1016 continue 1017 print('%s: definitions %smatch' % \ 1018 (sym.origsym.name_ver, "" if match else "mis")) 1019 if Config.dump or (not match and not Config.no_dump): 1020 for x in [(sym.origsym, Config.origfile), 1021 (sym.newsym, Config.newfile)]: 1022 xsym = x[0] 1023 xout = x[1].out 1024 if not xsym.definition: 1025 print('\n// Definition not found: %s %s' % \ 1026 (xsym.name_ver, xsym.lib.libfile), file=xout) 1027 continue 1028 print('\n// Definitions mismatch: %s %s' % \ 1029 (xsym.name_ver, xsym.lib.libfile), file=xout) 1030 pp = PrettyPrinter() 1031 pp.run(xsym.definition) 1032 for i in pp.nested(): 1033 print(i, file=xout) 1034 print(pp.result(), file=xout) 1035 1036def dump_symbols(commonver): 1037 class SymbolDump(object): 1038 def __init__(self, io_conf): 1039 self.io_conf = io_conf 1040 self.pp = PrettyPrinter() 1041 self.res = [] 1042 def run(self, sym): 1043 r = self.pp.run(sym.definition) 1044 self.res.append('/* %s@%s */ %s' % (sym.name, sym.version, r)) 1045 def finish(self): 1046 print('\n// Symbol dump: version %s, library %s' % \ 1047 (ver.name, self.io_conf.filename), file=self.io_conf.out) 1048 for i in self.pp.nested(): 1049 print(i, file=self.io_conf.out) 1050 print('', file=self.io_conf.out) 1051 for i in self.res: 1052 print(i, file=self.io_conf.out) 1053 for ver in commonver: 1054 names = sorted(ver.names()); 1055 d_orig = SymbolDump(Config.origfile) 1056 d_new = SymbolDump(Config.newfile) 1057 for symname in names: 1058 sym = ver.symbols[symname] 1059 if not sym.origsym.definition or not sym.newsym.definition: 1060 # XXX 1061 warn(Config.w_symbol, 'Missing symbol definition: %s@%s' % \ 1062 (symname, ver.name)) 1063 continue 1064 d_orig.run(sym.origsym) 1065 d_new.run(sym.newsym) 1066 d_orig.finish() 1067 d_new.finish() 1068 1069if __name__ == '__main__': 1070 Config.init() 1071 parser = optparse.OptionParser(usage="usage: %prog origlib newlib", 1072 version="%prog " + Config.version) 1073 parser.add_option('-v', '--verbose', action='count', 1074 help="verbose mode, may be specified several times") 1075 parser.add_option('--alias-prefix', action='append', 1076 help="name prefix to try for symbol alias lookup", metavar="STR") 1077 parser.add_option('--dump', action='store_true', 1078 help="dump symbol definitions") 1079 parser.add_option('--no-dump', action='store_true', 1080 help="disable dump for mismatched symbols") 1081 parser.add_option('--out-orig', action='store', 1082 help="result output file for original library", metavar="ORIGFILE") 1083 parser.add_option('--out-new', action='store', 1084 help="result output file for new library", metavar="NEWFILE") 1085 parser.add_option('--dwarfdump', action='store', 1086 help="path to dwarfdump executable", metavar="DWARFDUMP") 1087 parser.add_option('--objdump', action='store', 1088 help="path to objdump executable", metavar="OBJDUMP") 1089 parser.add_option('--exclude-ver', action='append', metavar="RE") 1090 parser.add_option('--include-ver', action='append', metavar="RE") 1091 parser.add_option('--exclude-sym', action='append', metavar="RE") 1092 parser.add_option('--include-sym', action='append', metavar="RE") 1093 parser.add_option('--no-exclude-sym-default', action='store_true', 1094 help="don't exclude special symbols like _init, _end, __bss_start") 1095 for opt in ['alias', 'cached', 'symbol']: 1096 parser.add_option("--w-" + opt, 1097 action="store_true", dest="w_" + opt) 1098 parser.add_option("--w-no-" + opt, 1099 action="store_false", dest="w_" + opt) 1100 (opts, args) = parser.parse_args() 1101 1102 if len(args) != 2: 1103 parser.print_help() 1104 sys.exit(-1) 1105 if opts.dwarfdump: 1106 Config.dwarfdump = opts.dwarfdump 1107 if opts.objdump: 1108 Config.objdump = opts.objdump 1109 if opts.out_orig: 1110 Config.origfile.init(opts.out_orig) 1111 if opts.out_new: 1112 Config.newfile.init(opts.out_new) 1113 if opts.no_dump: 1114 Config.dump = False 1115 Config.no_dump = True 1116 if opts.dump: 1117 Config.dump = True 1118 Config.no_dump = False 1119 Config.verbose = 1 1120 if opts.verbose: 1121 Config.verbose = opts.verbose 1122 if opts.alias_prefix: 1123 Config.alias_prefixes = opts.alias_prefix 1124 Config.alias_prefixes.sort(key=lambda x: -len(x)) 1125 for (k, v) in ({ '_sym': Config.symbol_filter, 1126 '_ver': Config.version_filter }).items(): 1127 for a in [ 'exclude', 'include' ]: 1128 opt = getattr(opts, a + k) 1129 if opt: 1130 getattr(v, a).extend(opt) 1131 if not opts.no_exclude_sym_default: 1132 Config.symbol_filter.exclude.extend(Config.exclude_sym_default) 1133 Config.version_filter.compile() 1134 Config.symbol_filter.compile() 1135 for w in ['w_alias', 'w_cached', 'w_symbol']: 1136 if hasattr(opts, w): 1137 v = getattr(opts, w) 1138 if v != None: 1139 setattr(Config, w, v) 1140 1141 (Config.origfile.filename, Config.newfile.filename) = (args[0], args[1]) 1142 1143 origlib = Shlib(Config.origfile.filename) 1144 origlib.parse() 1145 newlib = Shlib(Config.newfile.filename) 1146 newlib.parse() 1147 1148 commonver = common_symbols(origlib, newlib) 1149 if Config.dump: 1150 dump_symbols(commonver) 1151 cmp_symbols(commonver) 1152 if Config.verbose >= 4: 1153 print(Dwarf.cmpcache.stats.show('Cmp')) 1154 print(DwarfdumpParser.tagcache_stats.show('Dwarf tag')) 1155 1156 sys.exit(App.result_code) 1157