1 //===-- LVBinaryReader.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements the LVBinaryReader class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h" 14 #include "llvm/Support/Errc.h" 15 #include "llvm/Support/FormatAdapters.h" 16 #include "llvm/Support/FormatVariadic.h" 17 18 using namespace llvm; 19 using namespace llvm::logicalview; 20 21 #define DEBUG_TYPE "BinaryReader" 22 23 // Function names extracted from the object symbol table. 24 void LVSymbolTable::add(StringRef Name, LVScope *Function, 25 LVSectionIndex SectionIndex) { 26 std::string SymbolName(Name); 27 if (SymbolNames.find(SymbolName) == SymbolNames.end()) { 28 SymbolNames.emplace( 29 std::piecewise_construct, std::forward_as_tuple(SymbolName), 30 std::forward_as_tuple(Function, 0, SectionIndex, false)); 31 } else { 32 // Update a recorded entry with its logical scope and section index. 33 SymbolNames[SymbolName].Scope = Function; 34 if (SectionIndex) 35 SymbolNames[SymbolName].SectionIndex = SectionIndex; 36 } 37 38 if (Function && SymbolNames[SymbolName].IsComdat) 39 Function->setIsComdat(); 40 41 LLVM_DEBUG({ print(dbgs()); }); 42 } 43 44 void LVSymbolTable::add(StringRef Name, LVAddress Address, 45 LVSectionIndex SectionIndex, bool IsComdat) { 46 std::string SymbolName(Name); 47 if (SymbolNames.find(SymbolName) == SymbolNames.end()) 48 SymbolNames.emplace( 49 std::piecewise_construct, std::forward_as_tuple(SymbolName), 50 std::forward_as_tuple(nullptr, Address, SectionIndex, IsComdat)); 51 else 52 // Update a recorded symbol name with its logical scope. 53 SymbolNames[SymbolName].Address = Address; 54 55 LVScope *Function = SymbolNames[SymbolName].Scope; 56 if (Function && IsComdat) 57 Function->setIsComdat(); 58 LLVM_DEBUG({ print(dbgs()); }); 59 } 60 61 LVSectionIndex LVSymbolTable::update(LVScope *Function) { 62 LVSectionIndex SectionIndex = getReader().getDotTextSectionIndex(); 63 StringRef Name = Function->getLinkageName(); 64 if (Name.empty()) 65 Name = Function->getName(); 66 std::string SymbolName(Name); 67 68 if (SymbolName.empty() || (SymbolNames.find(SymbolName) == SymbolNames.end())) 69 return SectionIndex; 70 71 // Update a recorded entry with its logical scope, only if the scope has 72 // ranges. That is the case when in DWARF there are 2 DIEs connected via 73 // the DW_AT_specification. 74 if (Function->getHasRanges()) { 75 SymbolNames[SymbolName].Scope = Function; 76 SectionIndex = SymbolNames[SymbolName].SectionIndex; 77 } else { 78 SectionIndex = UndefinedSectionIndex; 79 } 80 81 if (SymbolNames[SymbolName].IsComdat) 82 Function->setIsComdat(); 83 84 LLVM_DEBUG({ print(dbgs()); }); 85 return SectionIndex; 86 } 87 88 const LVSymbolTableEntry &LVSymbolTable::getEntry(StringRef Name) { 89 static LVSymbolTableEntry Empty = LVSymbolTableEntry(); 90 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 91 return Iter != SymbolNames.end() ? Iter->second : Empty; 92 } 93 LVAddress LVSymbolTable::getAddress(StringRef Name) { 94 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 95 return Iter != SymbolNames.end() ? Iter->second.Address : 0; 96 } 97 LVSectionIndex LVSymbolTable::getIndex(StringRef Name) { 98 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 99 return Iter != SymbolNames.end() ? Iter->second.SectionIndex 100 : getReader().getDotTextSectionIndex(); 101 } 102 bool LVSymbolTable::getIsComdat(StringRef Name) { 103 LVSymbolNames::iterator Iter = SymbolNames.find(std::string(Name)); 104 return Iter != SymbolNames.end() ? Iter->second.IsComdat : false; 105 } 106 107 void LVSymbolTable::print(raw_ostream &OS) { 108 OS << "Symbol Table\n"; 109 for (LVSymbolNames::reference Entry : SymbolNames) { 110 LVSymbolTableEntry &SymbolName = Entry.second; 111 LVScope *Scope = SymbolName.Scope; 112 LVOffset Offset = Scope ? Scope->getOffset() : 0; 113 OS << "Index: " << hexValue(SymbolName.SectionIndex, 5) 114 << " Comdat: " << (SymbolName.IsComdat ? "Y" : "N") 115 << " Scope: " << hexValue(Offset) 116 << " Address: " << hexValue(SymbolName.Address) 117 << " Name: " << Entry.first << "\n"; 118 } 119 } 120 121 void LVBinaryReader::addToSymbolTable(StringRef Name, LVScope *Function, 122 LVSectionIndex SectionIndex) { 123 SymbolTable.add(Name, Function, SectionIndex); 124 } 125 void LVBinaryReader::addToSymbolTable(StringRef Name, LVAddress Address, 126 LVSectionIndex SectionIndex, 127 bool IsComdat) { 128 SymbolTable.add(Name, Address, SectionIndex, IsComdat); 129 } 130 LVSectionIndex LVBinaryReader::updateSymbolTable(LVScope *Function) { 131 return SymbolTable.update(Function); 132 } 133 134 const LVSymbolTableEntry &LVBinaryReader::getSymbolTableEntry(StringRef Name) { 135 return SymbolTable.getEntry(Name); 136 } 137 LVAddress LVBinaryReader::getSymbolTableAddress(StringRef Name) { 138 return SymbolTable.getAddress(Name); 139 } 140 LVSectionIndex LVBinaryReader::getSymbolTableIndex(StringRef Name) { 141 return SymbolTable.getIndex(Name); 142 } 143 bool LVBinaryReader::getSymbolTableIsComdat(StringRef Name) { 144 return SymbolTable.getIsComdat(Name); 145 } 146 147 void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) { 148 for (const object::SectionRef &Section : Obj.sections()) { 149 LLVM_DEBUG({ 150 Expected<StringRef> SectionNameOrErr = Section.getName(); 151 StringRef Name; 152 if (!SectionNameOrErr) 153 consumeError(SectionNameOrErr.takeError()); 154 else 155 Name = *SectionNameOrErr; 156 dbgs() << "Index: " << format_decimal(Section.getIndex(), 3) << ", " 157 << "Address: " << hexValue(Section.getAddress()) << ", " 158 << "Size: " << hexValue(Section.getSize()) << ", " 159 << "Name: " << Name << "\n"; 160 dbgs() << "isCompressed: " << Section.isCompressed() << ", " 161 << "isText: " << Section.isText() << ", " 162 << "isData: " << Section.isData() << ", " 163 << "isBSS: " << Section.isBSS() << ", " 164 << "isVirtual: " << Section.isVirtual() << "\n"; 165 dbgs() << "isBitcode: " << Section.isBitcode() << ", " 166 << "isStripped: " << Section.isStripped() << ", " 167 << "isBerkeleyText: " << Section.isBerkeleyText() << ", " 168 << "isBerkeleyData: " << Section.isBerkeleyData() << ", " 169 << "isDebugSection: " << Section.isDebugSection() << "\n"; 170 dbgs() << "\n"; 171 }); 172 173 if (!Section.isText() || Section.isVirtual() || !Section.getSize()) 174 continue; 175 176 // Record section information required for symbol resolution. 177 // Note: The section index returned by 'getIndex()' is one based. 178 Sections.emplace(Section.getIndex(), Section); 179 addSectionAddress(Section); 180 181 // Identify the ".text" section. 182 Expected<StringRef> SectionNameOrErr = Section.getName(); 183 if (!SectionNameOrErr) { 184 consumeError(SectionNameOrErr.takeError()); 185 continue; 186 } 187 if (*SectionNameOrErr == ".text" || *SectionNameOrErr == "CODE" || 188 *SectionNameOrErr == ".code") { 189 DotTextSectionIndex = Section.getIndex(); 190 // If the object is WebAssembly, update the address offset that 191 // will be added to DWARF DW_AT_* attributes. 192 if (Obj.isWasm()) 193 WasmCodeSectionOffset = Section.getAddress(); 194 } 195 } 196 197 // Process the symbol table. 198 mapRangeAddress(Obj); 199 200 LLVM_DEBUG({ 201 dbgs() << "\nSections Information:\n"; 202 for (LVSections::reference Entry : Sections) { 203 LVSectionIndex SectionIndex = Entry.first; 204 const object::SectionRef Section = Entry.second; 205 Expected<StringRef> SectionNameOrErr = Section.getName(); 206 if (!SectionNameOrErr) 207 consumeError(SectionNameOrErr.takeError()); 208 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) 209 << " Name: " << *SectionNameOrErr << "\n" 210 << "Size: " << hexValue(Section.getSize()) << "\n" 211 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" 212 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"; 213 } 214 dbgs() << "\nObject Section Information:\n"; 215 for (LVSectionAddresses::const_reference Entry : SectionAddresses) 216 dbgs() << "[" << hexValue(Entry.first) << ":" 217 << hexValue(Entry.first + Entry.second.getSize()) 218 << "] Size: " << hexValue(Entry.second.getSize()) << "\n"; 219 }); 220 } 221 222 void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) { 223 ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase(); 224 if (ImageBase) 225 ImageBaseAddress = ImageBase.get(); 226 227 LLVM_DEBUG({ 228 dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n"; 229 }); 230 231 uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT; 232 233 for (const object::SectionRef &Section : COFFObj.sections()) { 234 if (!Section.isText() || Section.isVirtual() || !Section.getSize()) 235 continue; 236 237 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); 238 VirtualAddress = COFFSection->VirtualAddress; 239 bool IsComdat = (COFFSection->Characteristics & Flags) == Flags; 240 241 // Record section information required for symbol resolution. 242 // Note: The section index returned by 'getIndex()' is zero based. 243 Sections.emplace(Section.getIndex() + 1, Section); 244 addSectionAddress(Section); 245 246 // Additional initialization on the specific object format. 247 mapRangeAddress(COFFObj, Section, IsComdat); 248 } 249 250 LLVM_DEBUG({ 251 dbgs() << "\nSections Information:\n"; 252 for (LVSections::reference Entry : Sections) { 253 LVSectionIndex SectionIndex = Entry.first; 254 const object::SectionRef Section = Entry.second; 255 const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section); 256 Expected<StringRef> SectionNameOrErr = Section.getName(); 257 if (!SectionNameOrErr) 258 consumeError(SectionNameOrErr.takeError()); 259 dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3) 260 << " Name: " << *SectionNameOrErr << "\n" 261 << "Size: " << hexValue(Section.getSize()) << "\n" 262 << "VirtualAddress: " << hexValue(VirtualAddress) << "\n" 263 << "SectionAddress: " << hexValue(Section.getAddress()) << "\n" 264 << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData) 265 << "\n" 266 << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData) 267 << "\n"; 268 } 269 dbgs() << "\nObject Section Information:\n"; 270 for (LVSectionAddresses::const_reference Entry : SectionAddresses) 271 dbgs() << "[" << hexValue(Entry.first) << ":" 272 << hexValue(Entry.first + Entry.second.getSize()) 273 << "] Size: " << hexValue(Entry.second.getSize()) << "\n"; 274 }); 275 } 276 277 Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple, 278 StringRef TheFeatures) { 279 std::string TargetLookupError; 280 const Target *TheTarget = 281 TargetRegistry::lookupTarget(std::string(TheTriple), TargetLookupError); 282 if (!TheTarget) 283 return createStringError(errc::invalid_argument, TargetLookupError.c_str()); 284 285 // Register information. 286 MCRegisterInfo *RegisterInfo = TheTarget->createMCRegInfo(TheTriple); 287 if (!RegisterInfo) 288 return createStringError(errc::invalid_argument, 289 "no register info for target " + TheTriple); 290 MRI.reset(RegisterInfo); 291 292 // Assembler properties and features. 293 MCTargetOptions MCOptions; 294 MCAsmInfo *AsmInfo(TheTarget->createMCAsmInfo(*MRI, TheTriple, MCOptions)); 295 if (!AsmInfo) 296 return createStringError(errc::invalid_argument, 297 "no assembly info for target " + TheTriple); 298 MAI.reset(AsmInfo); 299 300 // Target subtargets. 301 StringRef CPU; 302 MCSubtargetInfo *SubtargetInfo( 303 TheTarget->createMCSubtargetInfo(TheTriple, CPU, TheFeatures)); 304 if (!SubtargetInfo) 305 return createStringError(errc::invalid_argument, 306 "no subtarget info for target " + TheTriple); 307 STI.reset(SubtargetInfo); 308 309 // Instructions Info. 310 MCInstrInfo *InstructionInfo(TheTarget->createMCInstrInfo()); 311 if (!InstructionInfo) 312 return createStringError(errc::invalid_argument, 313 "no instruction info for target " + TheTriple); 314 MII.reset(InstructionInfo); 315 316 MC = std::make_unique<MCContext>(Triple(TheTriple), MAI.get(), MRI.get(), 317 STI.get()); 318 319 // Assembler. 320 MCDisassembler *DisAsm(TheTarget->createMCDisassembler(*STI, *MC)); 321 if (!DisAsm) 322 return createStringError(errc::invalid_argument, 323 "no disassembler for target " + TheTriple); 324 MD.reset(DisAsm); 325 326 MCInstPrinter *InstructionPrinter(TheTarget->createMCInstPrinter( 327 Triple(TheTriple), AsmInfo->getAssemblerDialect(), *MAI, *MII, *MRI)); 328 if (!InstructionPrinter) 329 return createStringError(errc::invalid_argument, 330 "no target assembly language printer for target " + 331 TheTriple); 332 MIP.reset(InstructionPrinter); 333 InstructionPrinter->setPrintImmHex(true); 334 335 return Error::success(); 336 } 337 338 Expected<std::pair<uint64_t, object::SectionRef>> 339 LVBinaryReader::getSection(LVScope *Scope, LVAddress Address, 340 LVSectionIndex SectionIndex) { 341 // Return the 'text' section with the code for this logical scope. 342 // COFF: SectionIndex is zero. Use 'SectionAddresses' data. 343 // ELF: SectionIndex is the section index in the file. 344 if (SectionIndex) { 345 LVSections::iterator Iter = Sections.find(SectionIndex); 346 if (Iter == Sections.end()) { 347 return createStringError(errc::invalid_argument, 348 "invalid section index for: '%s'", 349 Scope->getName().str().c_str()); 350 } 351 const object::SectionRef Section = Iter->second; 352 return std::make_pair(Section.getAddress(), Section); 353 } 354 355 // Ensure a valid starting address for the public names. 356 LVSectionAddresses::const_iterator Iter = 357 SectionAddresses.upper_bound(Address); 358 if (Iter == SectionAddresses.begin()) 359 return createStringError(errc::invalid_argument, 360 "invalid section address for: '%s'", 361 Scope->getName().str().c_str()); 362 363 // Get section that contains the code for this function. 364 Iter = SectionAddresses.lower_bound(Address); 365 if (Iter != SectionAddresses.begin()) 366 --Iter; 367 return std::make_pair(Iter->first, Iter->second); 368 } 369 370 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, 371 LVScope *Scope) { 372 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 373 ScopesWithRanges->addEntry(Scope); 374 } 375 376 void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex, 377 LVScope *Scope, LVAddress LowerAddress, 378 LVAddress UpperAddress) { 379 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 380 ScopesWithRanges->addEntry(Scope, LowerAddress, UpperAddress); 381 } 382 383 LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) { 384 // Check if we already have a mapping for this section index. 385 LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex); 386 if (IterSection == SectionRanges.end()) 387 IterSection = 388 SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first; 389 LVRange *Range = IterSection->second.get(); 390 assert(Range && "Range is null."); 391 return Range; 392 } 393 394 Error LVBinaryReader::createInstructions(LVScope *Scope, 395 LVSectionIndex SectionIndex, 396 const LVNameInfo &NameInfo) { 397 assert(Scope && "Scope is null."); 398 399 // Skip stripped functions. 400 if (Scope->getIsDiscarded()) 401 return Error::success(); 402 403 // Find associated address and size for the given function entry point. 404 LVAddress Address = NameInfo.first; 405 uint64_t Size = NameInfo.second; 406 407 LLVM_DEBUG({ 408 dbgs() << "\nPublic Name instructions: '" << Scope->getName() << "' / '" 409 << Scope->getLinkageName() << "'\n" 410 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" 411 << hexValue(Address) << ":" << hexValue(Address + Size) << "]\n"; 412 }); 413 414 Expected<std::pair<uint64_t, const object::SectionRef>> SectionOrErr = 415 getSection(Scope, Address, SectionIndex); 416 if (!SectionOrErr) 417 return SectionOrErr.takeError(); 418 const object::SectionRef Section = (*SectionOrErr).second; 419 uint64_t SectionAddress = (*SectionOrErr).first; 420 421 Expected<StringRef> SectionContentsOrErr = Section.getContents(); 422 if (!SectionContentsOrErr) 423 return SectionOrErr.takeError(); 424 425 // There are cases where the section size is smaller than the [LowPC,HighPC] 426 // range; it causes us to decode invalid addresses. The recorded size in the 427 // logical scope is one less than the real size. 428 LLVM_DEBUG({ 429 dbgs() << " Size: " << hexValue(Size) 430 << ", Section Size: " << hexValue(Section.getSize()) << "\n"; 431 }); 432 Size = std::min(Size + 1, Section.getSize()); 433 434 ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(*SectionContentsOrErr); 435 uint64_t Offset = Address - SectionAddress; 436 uint8_t const *Begin = Bytes.data() + Offset; 437 uint8_t const *End = Bytes.data() + Offset + Size; 438 439 LLVM_DEBUG({ 440 Expected<StringRef> SectionNameOrErr = Section.getName(); 441 if (!SectionNameOrErr) 442 consumeError(SectionNameOrErr.takeError()); 443 else 444 dbgs() << "Section Index: " << hexValue(Section.getIndex()) << " [" 445 << hexValue((uint64_t)Section.getAddress()) << ":" 446 << hexValue((uint64_t)Section.getAddress() + Section.getSize(), 10) 447 << "] Name: '" << *SectionNameOrErr << "'\n" 448 << "Begin: " << hexValue((uint64_t)Begin) 449 << ", End: " << hexValue((uint64_t)End) << "\n"; 450 }); 451 452 // Address for first instruction line. 453 LVAddress FirstAddress = Address; 454 auto InstructionsSP = std::make_unique<LVLines>(); 455 LVLines &Instructions = *InstructionsSP; 456 DiscoveredLines.emplace_back(std::move(InstructionsSP)); 457 458 while (Begin < End) { 459 MCInst Instruction; 460 uint64_t BytesConsumed = 0; 461 SmallVector<char, 64> InsnStr; 462 raw_svector_ostream Annotations(InsnStr); 463 MCDisassembler::DecodeStatus const S = 464 MD->getInstruction(Instruction, BytesConsumed, 465 ArrayRef<uint8_t>(Begin, End), Address, outs()); 466 switch (S) { 467 case MCDisassembler::Fail: 468 LLVM_DEBUG({ dbgs() << "Invalid instruction\n"; }); 469 if (BytesConsumed == 0) 470 // Skip invalid bytes 471 BytesConsumed = 1; 472 break; 473 case MCDisassembler::SoftFail: 474 LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; }); 475 [[fallthrough]]; 476 case MCDisassembler::Success: { 477 std::string Buffer; 478 raw_string_ostream Stream(Buffer); 479 StringRef AnnotationsStr = Annotations.str(); 480 MIP->printInst(&Instruction, Address, AnnotationsStr, *STI, Stream); 481 LLVM_DEBUG({ 482 std::string BufferCodes; 483 raw_string_ostream StreamCodes(BufferCodes); 484 StreamCodes << format_bytes( 485 ArrayRef<uint8_t>(Begin, Begin + BytesConsumed), std::nullopt, 16, 486 16); 487 dbgs() << "[" << hexValue((uint64_t)Begin) << "] " 488 << "Size: " << format_decimal(BytesConsumed, 2) << " (" 489 << formatv("{0}", 490 fmt_align(StreamCodes.str(), AlignStyle::Left, 32)) 491 << ") " << hexValue((uint64_t)Address) << ": " << Stream.str() 492 << "\n"; 493 }); 494 // Here we add logical lines to the Instructions. Later on, 495 // the 'processLines()' function will move each created logical line 496 // to its enclosing logical scope, using the debug ranges information 497 // and they will be released when its scope parent is deleted. 498 LVLineAssembler *Line = createLineAssembler(); 499 Line->setAddress(Address); 500 Line->setName(StringRef(Stream.str()).trim()); 501 Instructions.push_back(Line); 502 break; 503 } 504 } 505 Address += BytesConsumed; 506 Begin += BytesConsumed; 507 } 508 509 LLVM_DEBUG({ 510 size_t Index = 0; 511 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 512 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 513 << "Address: " << hexValue(FirstAddress) 514 << format(" - Collected instructions lines: %d\n", 515 Instructions.size()); 516 for (const LVLine *Line : Instructions) 517 dbgs() << format_decimal(++Index, 5) << ": " 518 << hexValue(Line->getOffset()) << ", (" << Line->getName() 519 << ")\n"; 520 }); 521 522 // The scope in the assembler names is linked to its own instructions. 523 ScopeInstructions.add(SectionIndex, Scope, &Instructions); 524 AssemblerMappings.add(SectionIndex, FirstAddress, Scope); 525 526 return Error::success(); 527 } 528 529 Error LVBinaryReader::createInstructions(LVScope *Function, 530 LVSectionIndex SectionIndex) { 531 if (!options().getPrintInstructions()) 532 return Error::success(); 533 534 LVNameInfo Name = CompileUnit->findPublicName(Function); 535 if (Name.first != LVAddress(UINT64_MAX)) 536 return createInstructions(Function, SectionIndex, Name); 537 538 return Error::success(); 539 } 540 541 Error LVBinaryReader::createInstructions() { 542 if (!options().getPrintInstructions()) 543 return Error::success(); 544 545 LLVM_DEBUG({ 546 size_t Index = 1; 547 dbgs() << "\nPublic Names (Scope):\n"; 548 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { 549 LVScope *Scope = Name.first; 550 const LVNameInfo &NameInfo = Name.second; 551 LVAddress Address = NameInfo.first; 552 uint64_t Size = NameInfo.second; 553 dbgs() << format_decimal(Index++, 5) << ": " 554 << "DIE Offset: " << hexValue(Scope->getOffset()) << " Range: [" 555 << hexValue(Address) << ":" << hexValue(Address + Size) << "] " 556 << "Name: '" << Scope->getName() << "' / '" 557 << Scope->getLinkageName() << "'\n"; 558 } 559 }); 560 561 // For each public name in the current compile unit, create the line 562 // records that represent the executable instructions. 563 for (LVPublicNames::const_reference Name : CompileUnit->getPublicNames()) { 564 LVScope *Scope = Name.first; 565 // The symbol table extracted from the object file always contains a 566 // non-empty name (linkage name). However, the logical scope does not 567 // guarantee to have a name for the linkage name (main is one case). 568 // For those cases, set the linkage name the same as the name. 569 if (!Scope->getLinkageNameIndex()) 570 Scope->setLinkageName(Scope->getName()); 571 LVSectionIndex SectionIndex = getSymbolTableIndex(Scope->getLinkageName()); 572 if (Error Err = createInstructions(Scope, SectionIndex, Name.second)) 573 return Err; 574 } 575 576 return Error::success(); 577 } 578 579 // During the traversal of the debug information sections, we created the 580 // logical lines representing the disassembled instructions from the text 581 // section and the logical lines representing the line records from the 582 // debug line section. Using the ranges associated with the logical scopes, 583 // we will allocate those logical lines to their logical scopes. 584 void LVBinaryReader::processLines(LVLines *DebugLines, 585 LVSectionIndex SectionIndex, 586 LVScope *Function) { 587 assert(DebugLines && "DebugLines is null."); 588 589 // Just return if this compilation unit does not have any line records 590 // and no instruction lines were created. 591 if (DebugLines->empty() && !options().getPrintInstructions()) 592 return; 593 594 // Merge the debug lines and instruction lines using their text address; 595 // the logical line representing the debug line record is followed by the 596 // line(s) representing the disassembled instructions, whose addresses are 597 // equal or greater that the line address and less than the address of the 598 // next debug line record. 599 LLVM_DEBUG({ 600 size_t Index = 1; 601 size_t PerLine = 4; 602 dbgs() << format("\nProcess debug lines: %d\n", DebugLines->size()); 603 for (const LVLine *Line : *DebugLines) { 604 dbgs() << format_decimal(Index, 5) << ": " << hexValue(Line->getOffset()) 605 << ", (" << Line->getLineNumber() << ")" 606 << ((Index % PerLine) ? " " : "\n"); 607 ++Index; 608 } 609 dbgs() << ((Index % PerLine) ? "\n" : ""); 610 }); 611 612 bool TraverseLines = true; 613 LVLines::iterator Iter = DebugLines->begin(); 614 while (TraverseLines && Iter != DebugLines->end()) { 615 uint64_t DebugAddress = (*Iter)->getAddress(); 616 617 // Get the function with an entry point that matches this line and 618 // its associated assembler entries. In the case of COMDAT, the input 619 // 'Function' is not null. Use it to find its address ranges. 620 LVScope *Scope = Function; 621 if (!Function) { 622 Scope = AssemblerMappings.find(SectionIndex, DebugAddress); 623 if (!Scope) { 624 ++Iter; 625 continue; 626 } 627 } 628 629 // Get the associated instructions for the found 'Scope'. 630 LVLines InstructionLines; 631 LVLines *Lines = ScopeInstructions.find(SectionIndex, Scope); 632 if (Lines) 633 InstructionLines = std::move(*Lines); 634 635 LLVM_DEBUG({ 636 size_t Index = 0; 637 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 638 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 639 << format("Process instruction lines: %d\n", 640 InstructionLines.size()); 641 for (const LVLine *Line : InstructionLines) 642 dbgs() << format_decimal(++Index, 5) << ": " 643 << hexValue(Line->getOffset()) << ", (" << Line->getName() 644 << ")\n"; 645 }); 646 647 // Continue with next debug line if there are not instructions lines. 648 if (InstructionLines.empty()) { 649 ++Iter; 650 continue; 651 } 652 653 for (LVLine *InstructionLine : InstructionLines) { 654 uint64_t InstructionAddress = InstructionLine->getAddress(); 655 LLVM_DEBUG({ 656 dbgs() << "Instruction address: " << hexValue(InstructionAddress) 657 << "\n"; 658 }); 659 if (TraverseLines) { 660 while (Iter != DebugLines->end()) { 661 DebugAddress = (*Iter)->getAddress(); 662 LLVM_DEBUG({ 663 bool IsDebug = (*Iter)->getIsLineDebug(); 664 dbgs() << "Line " << (IsDebug ? "dbg:" : "ins:") << " [" 665 << hexValue(DebugAddress) << "]"; 666 if (IsDebug) 667 dbgs() << format(" %d", (*Iter)->getLineNumber()); 668 dbgs() << "\n"; 669 }); 670 // Instruction address before debug line. 671 if (InstructionAddress < DebugAddress) { 672 LLVM_DEBUG({ 673 dbgs() << "Inserted instruction address: " 674 << hexValue(InstructionAddress) << " before line: " 675 << format("%d", (*Iter)->getLineNumber()) << " [" 676 << hexValue(DebugAddress) << "]\n"; 677 }); 678 Iter = DebugLines->insert(Iter, InstructionLine); 679 // The returned iterator points to the inserted instruction. 680 // Skip it and point to the line acting as reference. 681 ++Iter; 682 break; 683 } 684 ++Iter; 685 } 686 if (Iter == DebugLines->end()) { 687 // We have reached the end of the source lines and the current 688 // instruction line address is greater than the last source line. 689 TraverseLines = false; 690 DebugLines->push_back(InstructionLine); 691 } 692 } else { 693 DebugLines->push_back(InstructionLine); 694 } 695 } 696 } 697 698 LLVM_DEBUG({ 699 dbgs() << format("Lines after merge: %d\n", DebugLines->size()); 700 size_t Index = 0; 701 for (const LVLine *Line : *DebugLines) { 702 dbgs() << format_decimal(++Index, 5) << ": " 703 << hexValue(Line->getOffset()) << ", (" 704 << ((Line->getIsLineDebug()) 705 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) 706 : Line->getName()) 707 << ")\n"; 708 } 709 }); 710 711 // If this compilation unit does not have line records, traverse its scopes 712 // and take any collected instruction lines as the working set in order 713 // to move them to their associated scope. 714 if (DebugLines->empty()) { 715 if (const LVScopes *Scopes = CompileUnit->getScopes()) 716 for (LVScope *Scope : *Scopes) { 717 LVLines *Lines = ScopeInstructions.find(Scope); 718 if (Lines) { 719 720 LLVM_DEBUG({ 721 size_t Index = 0; 722 dbgs() << "\nSectionIndex: " << format_decimal(SectionIndex, 3) 723 << " Scope DIE: " << hexValue(Scope->getOffset()) << "\n" 724 << format("Instruction lines: %d\n", Lines->size()); 725 for (const LVLine *Line : *Lines) 726 dbgs() << format_decimal(++Index, 5) << ": " 727 << hexValue(Line->getOffset()) << ", (" << Line->getName() 728 << ")\n"; 729 }); 730 731 if (Scope->getIsArtificial()) { 732 // Add the instruction lines to their artificial scope. 733 for (LVLine *Line : *Lines) 734 Scope->addElement(Line); 735 } else { 736 DebugLines->append(*Lines); 737 } 738 Lines->clear(); 739 } 740 } 741 } 742 743 LVRange *ScopesWithRanges = getSectionRanges(SectionIndex); 744 ScopesWithRanges->startSearch(); 745 746 // Process collected lines. 747 LVScope *Scope; 748 for (LVLine *Line : *DebugLines) { 749 // Using the current line address, get its associated lexical scope and 750 // add the line information to it. 751 Scope = ScopesWithRanges->getEntry(Line->getAddress()); 752 if (!Scope) { 753 // If missing scope, use the compile unit. 754 Scope = CompileUnit; 755 LLVM_DEBUG({ 756 dbgs() << "Adding line to CU: " << hexValue(Line->getOffset()) << ", (" 757 << ((Line->getIsLineDebug()) 758 ? Line->lineNumberAsStringStripped(/*ShowZero=*/true) 759 : Line->getName()) 760 << ")\n"; 761 }); 762 } 763 764 // Add line object to scope. 765 Scope->addElement(Line); 766 767 // Report any line zero. 768 if (options().getWarningLines() && Line->getIsLineDebug() && 769 !Line->getLineNumber()) 770 CompileUnit->addLineZero(Line); 771 772 // Some compilers generate ranges in the compile unit; other compilers 773 // only DW_AT_low_pc/DW_AT_high_pc. In order to correctly map global 774 // variables, we need to generate the map ranges for the compile unit. 775 // If we use the ranges stored at the scope level, there are cases where 776 // the address referenced by a symbol location, is not in the enclosing 777 // scope, but in an outer one. By using the ranges stored in the compile 778 // unit, we can catch all those addresses. 779 if (Line->getIsLineDebug()) 780 CompileUnit->addMapping(Line, SectionIndex); 781 782 // Resolve any given pattern. 783 patterns().resolvePatternMatch(Line); 784 } 785 786 ScopesWithRanges->endSearch(); 787 } 788 789 void LVBinaryReader::processLines(LVLines *DebugLines, 790 LVSectionIndex SectionIndex) { 791 assert(DebugLines && "DebugLines is null."); 792 if (DebugLines->empty() && !ScopeInstructions.findMap(SectionIndex)) 793 return; 794 795 // If the Compile Unit does not contain comdat functions, use the whole 796 // set of debug lines, as the addresses don't have conflicts. 797 if (!CompileUnit->getHasComdatScopes()) { 798 processLines(DebugLines, SectionIndex, nullptr); 799 return; 800 } 801 802 // Find the indexes for the lines whose address is zero. 803 std::vector<size_t> AddressZero; 804 LVLines::iterator It = 805 std::find_if(std::begin(*DebugLines), std::end(*DebugLines), 806 [](LVLine *Line) { return !Line->getAddress(); }); 807 while (It != std::end(*DebugLines)) { 808 AddressZero.emplace_back(std::distance(std::begin(*DebugLines), It)); 809 It = std::find_if(std::next(It), std::end(*DebugLines), 810 [](LVLine *Line) { return !Line->getAddress(); }); 811 } 812 813 // If the set of debug lines does not contain any line with address zero, 814 // use the whole set. It means we are dealing with an initialization 815 // section from a fully linked binary. 816 if (AddressZero.empty()) { 817 processLines(DebugLines, SectionIndex, nullptr); 818 return; 819 } 820 821 // The Compile unit contains comdat functions. Traverse the collected 822 // debug lines and identify logical groups based on their start and 823 // address. Each group starts with a zero address. 824 // Begin, End, Address, IsDone. 825 using LVBucket = std::tuple<size_t, size_t, LVAddress, bool>; 826 std::vector<LVBucket> Buckets; 827 828 LVAddress Address; 829 size_t Begin = 0; 830 size_t End = 0; 831 size_t Index = 0; 832 for (Index = 0; Index < AddressZero.size() - 1; ++Index) { 833 Begin = AddressZero[Index]; 834 End = AddressZero[Index + 1] - 1; 835 Address = (*DebugLines)[End]->getAddress(); 836 Buckets.emplace_back(Begin, End, Address, false); 837 } 838 839 // Add the last bucket. 840 if (Index) { 841 Begin = AddressZero[Index]; 842 End = DebugLines->size() - 1; 843 Address = (*DebugLines)[End]->getAddress(); 844 Buckets.emplace_back(Begin, End, Address, false); 845 } 846 847 LLVM_DEBUG({ 848 dbgs() << "\nDebug Lines buckets: " << Buckets.size() << "\n"; 849 for (LVBucket &Bucket : Buckets) { 850 dbgs() << "Begin: " << format_decimal(std::get<0>(Bucket), 5) << ", " 851 << "End: " << format_decimal(std::get<1>(Bucket), 5) << ", " 852 << "Address: " << hexValue(std::get<2>(Bucket)) << "\n"; 853 } 854 }); 855 856 // Traverse the sections and buckets looking for matches on the section 857 // sizes. In the unlikely event of different buckets with the same size 858 // process them in order and mark them as done. 859 LVLines Group; 860 for (LVSections::reference Entry : Sections) { 861 LVSectionIndex SectionIndex = Entry.first; 862 const object::SectionRef Section = Entry.second; 863 uint64_t Size = Section.getSize(); 864 LLVM_DEBUG({ 865 dbgs() << "\nSection Index: " << format_decimal(SectionIndex, 3) 866 << " , Section Size: " << hexValue(Section.getSize()) 867 << " , Section Address: " << hexValue(Section.getAddress()) 868 << "\n"; 869 }); 870 871 for (LVBucket &Bucket : Buckets) { 872 if (std::get<3>(Bucket)) 873 // Already done for previous section. 874 continue; 875 if (Size == std::get<2>(Bucket)) { 876 // We have a match on the section size. 877 Group.clear(); 878 LVLines::iterator IterStart = DebugLines->begin() + std::get<0>(Bucket); 879 LVLines::iterator IterEnd = 880 DebugLines->begin() + std::get<1>(Bucket) + 1; 881 for (LVLines::iterator Iter = IterStart; Iter < IterEnd; ++Iter) 882 Group.push_back(*Iter); 883 processLines(&Group, SectionIndex, /*Function=*/nullptr); 884 std::get<3>(Bucket) = true; 885 break; 886 } 887 } 888 } 889 } 890 891 // Traverse the scopes for the given 'Function' looking for any inlined 892 // scopes with inlined lines, which are found in 'CUInlineeLines'. 893 void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex, 894 LVScope *Function) { 895 SmallVector<LVInlineeLine::iterator> InlineeIters; 896 std::function<void(LVScope * Parent)> FindInlinedScopes = 897 [&](LVScope *Parent) { 898 if (const LVScopes *Scopes = Parent->getScopes()) 899 for (LVScope *Scope : *Scopes) { 900 LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope); 901 if (Iter != CUInlineeLines.end()) 902 InlineeIters.push_back(Iter); 903 FindInlinedScopes(Scope); 904 } 905 }; 906 907 // Find all inlined scopes for the given 'Function'. 908 FindInlinedScopes(Function); 909 for (LVInlineeLine::iterator InlineeIter : InlineeIters) { 910 LVScope *Scope = InlineeIter->first; 911 addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex); 912 913 // TODO: Convert this into a reference. 914 LVLines *InlineeLines = InlineeIter->second.get(); 915 LLVM_DEBUG({ 916 dbgs() << "Inlined lines for: " << Scope->getName() << "\n"; 917 for (const LVLine *Line : *InlineeLines) 918 dbgs() << "[" << hexValue(Line->getAddress()) << "] " 919 << Line->getLineNumber() << "\n"; 920 dbgs() << format("Debug lines: %d\n", CULines.size()); 921 for (const LVLine *Line : CULines) 922 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" 923 << Line->getLineNumber() << ")\n"; 924 ; 925 }); 926 927 // The inlined lines must be merged using its address, in order to keep 928 // the real order of the instructions. The inlined lines are mixed with 929 // the other non-inlined lines. 930 if (InlineeLines->size()) { 931 // First address of inlinee code. 932 uint64_t InlineeStart = (InlineeLines->front())->getAddress(); 933 LVLines::iterator Iter = std::find_if( 934 CULines.begin(), CULines.end(), [&](LVLine *Item) -> bool { 935 return Item->getAddress() == InlineeStart; 936 }); 937 if (Iter != CULines.end()) { 938 // 'Iter' points to the line where the inlined function is called. 939 // Emulate the DW_AT_call_line attribute. 940 Scope->setCallLineNumber((*Iter)->getLineNumber()); 941 // Mark the referenced line as the start of the inlined function. 942 // Skip the first line during the insertion, as the address and 943 // line number as the same. Otherwise we have to erase and insert. 944 (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber()); 945 ++Iter; 946 CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end()); 947 } 948 } 949 950 // Remove this set of lines from the container; each inlined function 951 // creates an unique set of lines. Remove only the created container. 952 CUInlineeLines.erase(InlineeIter); 953 InlineeLines->clear(); 954 } 955 LLVM_DEBUG({ 956 dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n"; 957 dbgs() << format("Debug lines: %d\n", CULines.size()); 958 for (const LVLine *Line : CULines) 959 dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", (" 960 << Line->getLineNumber() << ")\n"; 961 ; 962 }); 963 } 964 965 void LVBinaryReader::print(raw_ostream &OS) const { 966 OS << "LVBinaryReader\n"; 967 LLVM_DEBUG(dbgs() << "PrintReader\n"); 968 } 969