1 //===- DwarfTransformer.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include <thread> 10 #include <unordered_set> 11 12 #include "llvm/DebugInfo/DIContext.h" 13 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 14 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 15 #include "llvm/Support/Error.h" 16 #include "llvm/Support/ThreadPool.h" 17 #include "llvm/Support/raw_ostream.h" 18 19 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h" 20 #include "llvm/DebugInfo/GSYM/FunctionInfo.h" 21 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 22 #include "llvm/DebugInfo/GSYM/GsymReader.h" 23 #include "llvm/DebugInfo/GSYM/InlineInfo.h" 24 #include "llvm/DebugInfo/GSYM/OutputAggregator.h" 25 26 #include <optional> 27 28 using namespace llvm; 29 using namespace gsym; 30 31 struct llvm::gsym::CUInfo { 32 const DWARFDebugLine::LineTable *LineTable; 33 const char *CompDir; 34 std::vector<uint32_t> FileCache; 35 uint64_t Language = 0; 36 uint8_t AddrSize = 0; 37 38 CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { 39 LineTable = DICtx.getLineTableForUnit(CU); 40 CompDir = CU->getCompilationDir(); 41 FileCache.clear(); 42 if (LineTable) 43 FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); 44 DWARFDie Die = CU->getUnitDIE(); 45 Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); 46 AddrSize = CU->getAddressByteSize(); 47 } 48 49 /// Return true if Addr is the highest address for a given compile unit. The 50 /// highest address is encoded as -1, of all ones in the address. These high 51 /// addresses are used by some linkers to indicate that a function has been 52 /// dead stripped or didn't end up in the linked executable. 53 bool isHighestAddress(uint64_t Addr) const { 54 if (AddrSize == 4) 55 return Addr == UINT32_MAX; 56 else if (AddrSize == 8) 57 return Addr == UINT64_MAX; 58 return false; 59 } 60 61 /// Convert a DWARF compile unit file index into a GSYM global file index. 62 /// 63 /// Each compile unit in DWARF has its own file table in the line table 64 /// prologue. GSYM has a single large file table that applies to all files 65 /// from all of the info in a GSYM file. This function converts between the 66 /// two and caches and DWARF CU file index that has already been converted so 67 /// the first client that asks for a compile unit file index will end up 68 /// doing the conversion, and subsequent clients will get the cached GSYM 69 /// index. 70 std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym, 71 uint32_t DwarfFileIdx) { 72 if (!LineTable || DwarfFileIdx >= FileCache.size()) 73 return std::nullopt; 74 uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; 75 if (GsymFileIdx != UINT32_MAX) 76 return GsymFileIdx; 77 std::string File; 78 if (LineTable->getFileNameByIndex( 79 DwarfFileIdx, CompDir, 80 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) 81 GsymFileIdx = Gsym.insertFile(File); 82 else 83 GsymFileIdx = 0; 84 return GsymFileIdx; 85 } 86 }; 87 88 89 static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { 90 if (DWARFDie SpecDie = 91 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { 92 if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) 93 return SpecParent; 94 } 95 if (DWARFDie AbstDie = 96 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { 97 if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) 98 return AbstParent; 99 } 100 101 // We never want to follow parent for inlined subroutine - that would 102 // give us information about where the function is inlined, not what 103 // function is inlined 104 if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) 105 return DWARFDie(); 106 107 DWARFDie ParentDie = Die.getParent(); 108 if (!ParentDie) 109 return DWARFDie(); 110 111 switch (ParentDie.getTag()) { 112 case dwarf::DW_TAG_namespace: 113 case dwarf::DW_TAG_structure_type: 114 case dwarf::DW_TAG_union_type: 115 case dwarf::DW_TAG_class_type: 116 case dwarf::DW_TAG_subprogram: 117 return ParentDie; // Found parent decl context DIE 118 case dwarf::DW_TAG_lexical_block: 119 return GetParentDeclContextDIE(ParentDie); 120 default: 121 break; 122 } 123 124 return DWARFDie(); 125 } 126 127 /// Get the GsymCreator string table offset for the qualified name for the 128 /// DIE passed in. This function will avoid making copies of any strings in 129 /// the GsymCreator when possible. We don't need to copy a string when the 130 /// string comes from our .debug_str section or is an inlined string in the 131 /// .debug_info. If we create a qualified name string in this function by 132 /// combining multiple strings in the DWARF string table or info, we will make 133 /// a copy of the string when we add it to the string table. 134 static std::optional<uint32_t> 135 getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) { 136 // If the dwarf has mangled name, use mangled name 137 if (auto LinkageName = Die.getLinkageName()) { 138 // We have seen cases were linkage name is actually empty. 139 if (strlen(LinkageName) > 0) 140 return Gsym.insertString(LinkageName, /* Copy */ false); 141 } 142 143 StringRef ShortName(Die.getName(DINameKind::ShortName)); 144 if (ShortName.empty()) 145 return std::nullopt; 146 147 // For C++ and ObjC, prepend names of all parent declaration contexts 148 if (!(Language == dwarf::DW_LANG_C_plus_plus || 149 Language == dwarf::DW_LANG_C_plus_plus_03 || 150 Language == dwarf::DW_LANG_C_plus_plus_11 || 151 Language == dwarf::DW_LANG_C_plus_plus_14 || 152 Language == dwarf::DW_LANG_ObjC_plus_plus || 153 // This should not be needed for C, but we see C++ code marked as C 154 // in some binaries. This should hurt, so let's do it for C as well 155 Language == dwarf::DW_LANG_C)) 156 return Gsym.insertString(ShortName, /* Copy */ false); 157 158 // Some GCC optimizations create functions with names ending with .isra.<num> 159 // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name 160 // If it looks like it could be the case, don't add any prefix 161 if (ShortName.starts_with("_Z") && 162 (ShortName.contains(".isra.") || ShortName.contains(".part."))) 163 return Gsym.insertString(ShortName, /* Copy */ false); 164 165 DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); 166 if (ParentDeclCtxDie) { 167 std::string Name = ShortName.str(); 168 while (ParentDeclCtxDie) { 169 StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); 170 if (!ParentName.empty()) { 171 // "lambda" names are wrapped in < >. Replace with { } 172 // to be consistent with demangled names and not to confuse with 173 // templates 174 if (ParentName.front() == '<' && ParentName.back() == '>') 175 Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + 176 "::" + Name; 177 else 178 Name = ParentName.str() + "::" + Name; 179 } 180 ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); 181 } 182 // Copy the name since we created a new name in a std::string. 183 return Gsym.insertString(Name, /* Copy */ true); 184 } 185 // Don't copy the name since it exists in the DWARF object file. 186 return Gsym.insertString(ShortName, /* Copy */ false); 187 } 188 189 static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { 190 bool CheckChildren = true; 191 switch (Die.getTag()) { 192 case dwarf::DW_TAG_subprogram: 193 // Don't look into functions within functions. 194 CheckChildren = Depth == 0; 195 break; 196 case dwarf::DW_TAG_inlined_subroutine: 197 return true; 198 default: 199 break; 200 } 201 if (!CheckChildren) 202 return false; 203 for (DWARFDie ChildDie : Die.children()) { 204 if (hasInlineInfo(ChildDie, Depth + 1)) 205 return true; 206 } 207 return false; 208 } 209 210 static AddressRanges 211 ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) { 212 AddressRanges Ranges; 213 for (const DWARFAddressRange &DwarfRange : DwarfRanges) { 214 if (DwarfRange.LowPC < DwarfRange.HighPC) 215 Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC}); 216 } 217 return Ranges; 218 } 219 220 static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out, 221 CUInfo &CUI, DWARFDie Die, uint32_t Depth, 222 FunctionInfo &FI, InlineInfo &Parent, 223 const AddressRanges &AllParentRanges, 224 bool &WarnIfEmpty) { 225 if (!hasInlineInfo(Die, Depth)) 226 return; 227 228 dwarf::Tag Tag = Die.getTag(); 229 if (Tag == dwarf::DW_TAG_inlined_subroutine) { 230 // create new InlineInfo and append to parent.children 231 InlineInfo II; 232 AddressRanges AllInlineRanges; 233 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); 234 if (RangesOrError) { 235 AllInlineRanges = ConvertDWARFRanges(RangesOrError.get()); 236 uint32_t EmptyCount = 0; 237 for (const AddressRange &InlineRange : AllInlineRanges) { 238 // Check for empty inline range in case inline function was outlined 239 // or has not code 240 if (InlineRange.empty()) { 241 ++EmptyCount; 242 } else { 243 if (Parent.Ranges.contains(InlineRange)) { 244 II.Ranges.insert(InlineRange); 245 } else { 246 // Only warn if the current inline range is not within any of all 247 // of the parent ranges. If we have a DW_TAG_subpgram with multiple 248 // ranges we will emit a FunctionInfo for each range of that 249 // function that only emits information within the current range, 250 // so we only want to emit an error if the DWARF has issues, not 251 // when a range currently just isn't in the range we are currently 252 // parsing for. 253 if (AllParentRanges.contains(InlineRange)) { 254 WarnIfEmpty = false; 255 } else 256 Out.Report("Function DIE has uncontained address range", 257 [&](raw_ostream &OS) { 258 OS << "error: inlined function DIE at " 259 << HEX32(Die.getOffset()) << " has a range [" 260 << HEX64(InlineRange.start()) << " - " 261 << HEX64(InlineRange.end()) 262 << ") that isn't contained in " 263 << "any parent address ranges, this inline range " 264 "will be " 265 "removed.\n"; 266 }); 267 } 268 } 269 } 270 // If we have all empty ranges for the inlines, then don't warn if we 271 // have an empty InlineInfo at the top level as all inline functions 272 // were elided. 273 if (EmptyCount == AllInlineRanges.size()) 274 WarnIfEmpty = false; 275 } 276 if (II.Ranges.empty()) 277 return; 278 279 if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) 280 II.Name = *NameIndex; 281 const uint64_t DwarfFileIdx = dwarf::toUnsigned( 282 Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX); 283 std::optional<uint32_t> OptGSymFileIdx = 284 CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx); 285 if (OptGSymFileIdx) { 286 II.CallFile = OptGSymFileIdx.value(); 287 II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); 288 // parse all children and append to parent 289 for (DWARFDie ChildDie : Die.children()) 290 parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, II, 291 AllInlineRanges, WarnIfEmpty); 292 Parent.Children.emplace_back(std::move(II)); 293 } else 294 Out.Report( 295 "Inlined function die has invlaid file index in DW_AT_call_file", 296 [&](raw_ostream &OS) { 297 OS << "error: inlined function DIE at " << HEX32(Die.getOffset()) 298 << " has an invalid file index " << DwarfFileIdx 299 << " in its DW_AT_call_file attribute, this inline entry and " 300 "all " 301 << "children will be removed.\n"; 302 }); 303 return; 304 } 305 if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { 306 // skip this Die and just recurse down 307 for (DWARFDie ChildDie : Die.children()) 308 parseInlineInfo(Gsym, Out, CUI, ChildDie, Depth + 1, FI, Parent, 309 AllParentRanges, WarnIfEmpty); 310 } 311 } 312 313 static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI, 314 DWARFDie Die, GsymCreator &Gsym, 315 FunctionInfo &FI) { 316 std::vector<uint32_t> RowVector; 317 const uint64_t StartAddress = FI.startAddress(); 318 const uint64_t EndAddress = FI.endAddress(); 319 const uint64_t RangeSize = EndAddress - StartAddress; 320 const object::SectionedAddress SecAddress{ 321 StartAddress, object::SectionedAddress::UndefSection}; 322 323 324 if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) { 325 // If we have a DW_TAG_subprogram but no line entries, fall back to using 326 // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. 327 std::string FilePath = Die.getDeclFile( 328 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); 329 if (FilePath.empty()) { 330 // If we had a DW_AT_decl_file, but got no file then we need to emit a 331 // warning. 332 Out.Report("Invalid file index in DW_AT_decl_file", [&](raw_ostream &OS) { 333 const uint64_t DwarfFileIdx = dwarf::toUnsigned( 334 Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); 335 OS << "error: function DIE at " << HEX32(Die.getOffset()) 336 << " has an invalid file index " << DwarfFileIdx 337 << " in its DW_AT_decl_file attribute, unable to create a single " 338 << "line entry from the DW_AT_decl_file/DW_AT_decl_line " 339 << "attributes.\n"; 340 }); 341 return; 342 } 343 if (auto Line = 344 dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { 345 LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line); 346 FI.OptLineTable = LineTable(); 347 FI.OptLineTable->push(LE); 348 } 349 return; 350 } 351 352 FI.OptLineTable = LineTable(); 353 DWARFDebugLine::Row PrevRow; 354 for (uint32_t RowIndex : RowVector) { 355 // Take file number and line/column from the row. 356 const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; 357 std::optional<uint32_t> OptFileIdx = 358 CUI.DWARFToGSYMFileIndex(Gsym, Row.File); 359 if (!OptFileIdx) { 360 Out.Report( 361 "Invalid file index in DWARF line table", [&](raw_ostream &OS) { 362 OS << "error: function DIE at " << HEX32(Die.getOffset()) << " has " 363 << "a line entry with invalid DWARF file index, this entry will " 364 << "be removed:\n"; 365 Row.dumpTableHeader(OS, /*Indent=*/0); 366 Row.dump(OS); 367 OS << "\n"; 368 }); 369 continue; 370 } 371 const uint32_t FileIdx = OptFileIdx.value(); 372 uint64_t RowAddress = Row.Address.Address; 373 // Watch out for a RowAddress that is in the middle of a line table entry 374 // in the DWARF. If we pass an address in between two line table entries 375 // we will get a RowIndex for the previous valid line table row which won't 376 // be contained in our function. This is usually a bug in the DWARF due to 377 // linker problems or LTO or other DWARF re-linking so it is worth emitting 378 // an error, but not worth stopping the creation of the GSYM. 379 if (!FI.Range.contains(RowAddress)) { 380 if (RowAddress < FI.Range.start()) { 381 Out.Report("Start address lies between valid Row table entries", 382 [&](raw_ostream &OS) { 383 OS << "error: DIE has a start address whose LowPC is " 384 "between the " 385 "line table Row[" 386 << RowIndex << "] with address " << HEX64(RowAddress) 387 << " and the next one.\n"; 388 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 389 }); 390 RowAddress = FI.Range.start(); 391 } else { 392 continue; 393 } 394 } 395 396 LineEntry LE(RowAddress, FileIdx, Row.Line); 397 if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { 398 // We have seen full duplicate line tables for functions in some 399 // DWARF files. Watch for those here by checking the last 400 // row was the function's end address (HighPC) and that the 401 // current line table entry's address is the same as the first 402 // line entry we already have in our "function_info.Lines". If 403 // so break out after printing a warning. 404 auto FirstLE = FI.OptLineTable->first(); 405 if (FirstLE && *FirstLE == LE) 406 // if (Log && !Gsym.isQuiet()) { TODO <-- This looks weird 407 Out.Report("Duplicate line table detected", [&](raw_ostream &OS) { 408 OS << "warning: duplicate line table detected for DIE:\n"; 409 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 410 }); 411 else 412 Out.Report("Non-monotonically increasing addresses", 413 [&](raw_ostream &OS) { 414 OS << "error: line table has addresses that do not " 415 << "monotonically increase:\n"; 416 for (uint32_t RowIndex2 : RowVector) 417 CUI.LineTable->Rows[RowIndex2].dump(OS); 418 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 419 }); 420 break; 421 } 422 423 // Skip multiple line entries for the same file and line. 424 auto LastLE = FI.OptLineTable->last(); 425 if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) 426 continue; 427 // Only push a row if it isn't an end sequence. End sequence markers are 428 // included for the last address in a function or the last contiguous 429 // address in a sequence. 430 if (Row.EndSequence) { 431 // End sequence means that the next line entry could have a lower address 432 // that the previous entries. So we clear the previous row so we don't 433 // trigger the line table error about address that do not monotonically 434 // increase. 435 PrevRow = DWARFDebugLine::Row(); 436 } else { 437 FI.OptLineTable->push(LE); 438 PrevRow = Row; 439 } 440 } 441 // If not line table rows were added, clear the line table so we don't encode 442 // on in the GSYM file. 443 if (FI.OptLineTable->empty()) 444 FI.OptLineTable = std::nullopt; 445 } 446 447 void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI, 448 DWARFDie Die) { 449 switch (Die.getTag()) { 450 case dwarf::DW_TAG_subprogram: { 451 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); 452 if (!RangesOrError) { 453 consumeError(RangesOrError.takeError()); 454 break; 455 } 456 const DWARFAddressRangesVector &Ranges = RangesOrError.get(); 457 if (Ranges.empty()) 458 break; 459 auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); 460 if (!NameIndex) { 461 Out.Report("Function has no name", [&](raw_ostream &OS) { 462 OS << "error: function at " << HEX64(Die.getOffset()) 463 << " has no name\n "; 464 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 465 }); 466 break; 467 } 468 // All ranges for the subprogram DIE in case it has multiple. We need to 469 // pass this down into parseInlineInfo so we don't warn about inline 470 // ranges that are not in the current subrange of a function when they 471 // actually are in another subgrange. We do this because when a function 472 // has discontiguos ranges, we create multiple function entries with only 473 // the info for that range contained inside of it. 474 AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges); 475 476 // Create a function_info for each range 477 for (const DWARFAddressRange &Range : Ranges) { 478 // The low PC must be less than the high PC. Many linkers don't remove 479 // DWARF for functions that don't get linked into the final executable. 480 // If both the high and low pc have relocations, linkers will often set 481 // the address values for both to the same value to indicate the function 482 // has been remove. Other linkers have been known to set the one or both 483 // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 484 // byte addresses to indicate the function isn't valid. The check below 485 // tries to watch for these cases and abort if it runs into them. 486 if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) 487 break; 488 489 // Many linkers can't remove DWARF and might set the LowPC to zero. Since 490 // high PC can be an offset from the low PC in more recent DWARF versions 491 // we need to watch for a zero'ed low pc which we do using ValidTextRanges 492 // below. 493 if (!Gsym.IsValidTextAddress(Range.LowPC)) { 494 // We expect zero and -1 to be invalid addresses in DWARF depending 495 // on the linker of the DWARF. This indicates a function was stripped 496 // and the debug info wasn't able to be stripped from the DWARF. If 497 // the LowPC isn't zero or -1, then we should emit an error. 498 if (Range.LowPC != 0) { 499 if (!Gsym.isQuiet()) { 500 // Unexpected invalid address, emit a warning 501 Out.Report("Address range starts outside executable section", 502 [&](raw_ostream &OS) { 503 OS << "warning: DIE has an address range whose " 504 "start address " 505 "is not in any executable sections (" 506 << *Gsym.GetValidTextRanges() 507 << ") and will not be processed:\n"; 508 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 509 }); 510 } 511 } 512 break; 513 } 514 515 FunctionInfo FI; 516 FI.Range = {Range.LowPC, Range.HighPC}; 517 FI.Name = *NameIndex; 518 if (CUI.LineTable) 519 convertFunctionLineTable(Out, CUI, Die, Gsym, FI); 520 521 if (hasInlineInfo(Die, 0)) { 522 FI.Inline = InlineInfo(); 523 FI.Inline->Name = *NameIndex; 524 FI.Inline->Ranges.insert(FI.Range); 525 bool WarnIfEmpty = true; 526 parseInlineInfo(Gsym, Out, CUI, Die, 0, FI, *FI.Inline, 527 AllSubprogramRanges, WarnIfEmpty); 528 // Make sure we at least got some valid inline info other than just 529 // the top level function. If we didn't then remove the inline info 530 // from the function info. We have seen cases where LTO tries to modify 531 // the DWARF for functions and it messes up the address ranges for 532 // the inline functions so it is no longer valid. 533 // 534 // By checking if there are any valid children on the top level inline 535 // information object, we will know if we got anything valid from the 536 // debug info. 537 if (FI.Inline->Children.empty()) { 538 if (WarnIfEmpty && !Gsym.isQuiet()) 539 Out.Report("DIE contains inline functions with no valid ranges", 540 [&](raw_ostream &OS) { 541 OS << "warning: DIE contains inline function " 542 "information that has no valid ranges, removing " 543 "inline information:\n"; 544 Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); 545 }); 546 FI.Inline = std::nullopt; 547 } 548 } 549 Gsym.addFunctionInfo(std::move(FI)); 550 } 551 } break; 552 default: 553 break; 554 } 555 for (DWARFDie ChildDie : Die.children()) 556 handleDie(Out, CUI, ChildDie); 557 } 558 559 Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) { 560 size_t NumBefore = Gsym.getNumFunctionInfos(); 561 auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { 562 DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false); 563 if (DwarfUnit.getDWOId()) { 564 DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit(); 565 if (!DWOCU->isDWOUnit()) 566 Out.Report( 567 "warning: Unable to retrieve DWO .debug_info section for some " 568 "object files. (Remove the --quiet flag for full output)", 569 [&](raw_ostream &OS) { 570 std::string DWOName = dwarf::toString( 571 DwarfUnit.getUnitDIE().find( 572 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 573 ""); 574 OS << "warning: Unable to retrieve DWO .debug_info section for " 575 << DWOName << "\n"; 576 }); 577 else { 578 ReturnDie = DWOCU->getUnitDIE(false); 579 } 580 } 581 return ReturnDie; 582 }; 583 if (NumThreads == 1) { 584 // Parse all DWARF data from this thread, use the same string/file table 585 // for everything 586 for (const auto &CU : DICtx.compile_units()) { 587 DWARFDie Die = getDie(*CU); 588 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); 589 handleDie(Out, CUI, Die); 590 } 591 } else { 592 // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up 593 // front before we start accessing any DIEs since there might be 594 // cross compile unit references in the DWARF. If we don't do this we can 595 // end up crashing. 596 597 // We need to call getAbbreviations sequentially first so that getUnitDIE() 598 // only works with its local data. 599 for (const auto &CU : DICtx.compile_units()) 600 CU->getAbbreviations(); 601 602 // Now parse all DIEs in case we have cross compile unit references in a 603 // thread pool. 604 DefaultThreadPool pool(hardware_concurrency(NumThreads)); 605 for (const auto &CU : DICtx.compile_units()) 606 pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); 607 pool.wait(); 608 609 // Now convert all DWARF to GSYM in a thread pool. 610 std::mutex LogMutex; 611 for (const auto &CU : DICtx.compile_units()) { 612 DWARFDie Die = getDie(*CU); 613 if (Die) { 614 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); 615 pool.async([this, CUI, &LogMutex, &Out, Die]() mutable { 616 std::string storage; 617 raw_string_ostream StrStream(storage); 618 OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr); 619 handleDie(ThreadOut, CUI, Die); 620 // Print ThreadLogStorage lines into an actual stream under a lock 621 std::lock_guard<std::mutex> guard(LogMutex); 622 if (Out.GetOS()) { 623 StrStream.flush(); 624 Out << storage; 625 } 626 Out.Merge(ThreadOut); 627 }); 628 } 629 } 630 pool.wait(); 631 } 632 size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; 633 Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; 634 return Error::success(); 635 } 636 637 llvm::Error DwarfTransformer::verify(StringRef GsymPath, 638 OutputAggregator &Out) { 639 Out << "Verifying GSYM file \"" << GsymPath << "\":\n"; 640 641 auto Gsym = GsymReader::openFile(GsymPath); 642 if (!Gsym) 643 return Gsym.takeError(); 644 645 auto NumAddrs = Gsym->getNumAddresses(); 646 DILineInfoSpecifier DLIS( 647 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, 648 DILineInfoSpecifier::FunctionNameKind::LinkageName); 649 std::string gsymFilename; 650 for (uint32_t I = 0; I < NumAddrs; ++I) { 651 auto FuncAddr = Gsym->getAddress(I); 652 if (!FuncAddr) 653 return createStringError(std::errc::invalid_argument, 654 "failed to extract address[%i]", I); 655 656 auto FI = Gsym->getFunctionInfo(*FuncAddr); 657 if (!FI) 658 return createStringError( 659 std::errc::invalid_argument, 660 "failed to extract function info for address 0x%" PRIu64, *FuncAddr); 661 662 for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { 663 const object::SectionedAddress SectAddr{ 664 Addr, object::SectionedAddress::UndefSection}; 665 auto LR = Gsym->lookup(Addr); 666 if (!LR) 667 return LR.takeError(); 668 669 auto DwarfInlineInfos = 670 DICtx.getInliningInfoForAddress(SectAddr, DLIS); 671 uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); 672 if (NumDwarfInlineInfos == 0) { 673 DwarfInlineInfos.addFrame( 674 DICtx.getLineInfoForAddress(SectAddr, DLIS)); 675 } 676 677 // Check for 1 entry that has no file and line info 678 if (NumDwarfInlineInfos == 1 && 679 DwarfInlineInfos.getFrame(0).FileName == "<invalid>") { 680 DwarfInlineInfos = DIInliningInfo(); 681 NumDwarfInlineInfos = 0; 682 } 683 if (NumDwarfInlineInfos > 0 && 684 NumDwarfInlineInfos != LR->Locations.size()) { 685 if (Out.GetOS()) { 686 raw_ostream &Log = *Out.GetOS(); 687 Log << "error: address " << HEX64(Addr) << " has " 688 << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " 689 << LR->Locations.size() << "\n"; 690 Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; 691 for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { 692 const auto &dii = DwarfInlineInfos.getFrame(Idx); 693 Log << " [" << Idx << "]: " << dii.FunctionName << " @ " 694 << dii.FileName << ':' << dii.Line << '\n'; 695 } 696 Log << " " << LR->Locations.size() << " GSYM frames:\n"; 697 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; 698 ++Idx) { 699 const auto &gii = LR->Locations[Idx]; 700 Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir 701 << '/' << gii.Base << ':' << gii.Line << '\n'; 702 } 703 DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); 704 Gsym->dump(Log, *FI); 705 } 706 continue; 707 } 708 709 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; 710 ++Idx) { 711 const auto &gii = LR->Locations[Idx]; 712 if (Idx < NumDwarfInlineInfos) { 713 const auto &dii = DwarfInlineInfos.getFrame(Idx); 714 gsymFilename = LR->getSourceFile(Idx); 715 // Verify function name 716 if (dii.FunctionName.find(gii.Name.str()) != 0) 717 Out << "error: address " << HEX64(Addr) << " DWARF function \"" 718 << dii.FunctionName.c_str() 719 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; 720 721 // Verify source file path 722 if (dii.FileName != gsymFilename) 723 Out << "error: address " << HEX64(Addr) << " DWARF path \"" 724 << dii.FileName.c_str() << "\" doesn't match GSYM path \"" 725 << gsymFilename.c_str() << "\"\n"; 726 // Verify source file line 727 if (dii.Line != gii.Line) 728 Out << "error: address " << HEX64(Addr) << " DWARF line " 729 << dii.Line << " != GSYM line " << gii.Line << "\n"; 730 } 731 } 732 } 733 } 734 return Error::success(); 735 } 736