1 //===- DwarfTransformer.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include <thread> 10 #include <unordered_set> 11 12 #include "llvm/DebugInfo/DIContext.h" 13 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" 14 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 15 #include "llvm/Support/Error.h" 16 #include "llvm/Support/ThreadPool.h" 17 #include "llvm/Support/raw_ostream.h" 18 19 #include "llvm/DebugInfo/GSYM/DwarfTransformer.h" 20 #include "llvm/DebugInfo/GSYM/FunctionInfo.h" 21 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 22 #include "llvm/DebugInfo/GSYM/GsymReader.h" 23 #include "llvm/DebugInfo/GSYM/InlineInfo.h" 24 #include <optional> 25 26 using namespace llvm; 27 using namespace gsym; 28 29 struct llvm::gsym::CUInfo { 30 const DWARFDebugLine::LineTable *LineTable; 31 const char *CompDir; 32 std::vector<uint32_t> FileCache; 33 uint64_t Language = 0; 34 uint8_t AddrSize = 0; 35 36 CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { 37 LineTable = DICtx.getLineTableForUnit(CU); 38 CompDir = CU->getCompilationDir(); 39 FileCache.clear(); 40 if (LineTable) 41 FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); 42 DWARFDie Die = CU->getUnitDIE(); 43 Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); 44 AddrSize = CU->getAddressByteSize(); 45 } 46 47 /// Return true if Addr is the highest address for a given compile unit. The 48 /// highest address is encoded as -1, of all ones in the address. These high 49 /// addresses are used by some linkers to indicate that a function has been 50 /// dead stripped or didn't end up in the linked executable. 51 bool isHighestAddress(uint64_t Addr) const { 52 if (AddrSize == 4) 53 return Addr == UINT32_MAX; 54 else if (AddrSize == 8) 55 return Addr == UINT64_MAX; 56 return false; 57 } 58 59 /// Convert a DWARF compile unit file index into a GSYM global file index. 60 /// 61 /// Each compile unit in DWARF has its own file table in the line table 62 /// prologue. GSYM has a single large file table that applies to all files 63 /// from all of the info in a GSYM file. This function converts between the 64 /// two and caches and DWARF CU file index that has already been converted so 65 /// the first client that asks for a compile unit file index will end up 66 /// doing the conversion, and subsequent clients will get the cached GSYM 67 /// index. 68 std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym, 69 uint32_t DwarfFileIdx) { 70 if (!LineTable || DwarfFileIdx >= FileCache.size()) 71 return std::nullopt; 72 uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; 73 if (GsymFileIdx != UINT32_MAX) 74 return GsymFileIdx; 75 std::string File; 76 if (LineTable->getFileNameByIndex( 77 DwarfFileIdx, CompDir, 78 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) 79 GsymFileIdx = Gsym.insertFile(File); 80 else 81 GsymFileIdx = 0; 82 return GsymFileIdx; 83 } 84 }; 85 86 87 static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { 88 if (DWARFDie SpecDie = 89 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { 90 if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) 91 return SpecParent; 92 } 93 if (DWARFDie AbstDie = 94 Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { 95 if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) 96 return AbstParent; 97 } 98 99 // We never want to follow parent for inlined subroutine - that would 100 // give us information about where the function is inlined, not what 101 // function is inlined 102 if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) 103 return DWARFDie(); 104 105 DWARFDie ParentDie = Die.getParent(); 106 if (!ParentDie) 107 return DWARFDie(); 108 109 switch (ParentDie.getTag()) { 110 case dwarf::DW_TAG_namespace: 111 case dwarf::DW_TAG_structure_type: 112 case dwarf::DW_TAG_union_type: 113 case dwarf::DW_TAG_class_type: 114 case dwarf::DW_TAG_subprogram: 115 return ParentDie; // Found parent decl context DIE 116 case dwarf::DW_TAG_lexical_block: 117 return GetParentDeclContextDIE(ParentDie); 118 default: 119 break; 120 } 121 122 return DWARFDie(); 123 } 124 125 /// Get the GsymCreator string table offset for the qualified name for the 126 /// DIE passed in. This function will avoid making copies of any strings in 127 /// the GsymCreator when possible. We don't need to copy a string when the 128 /// string comes from our .debug_str section or is an inlined string in the 129 /// .debug_info. If we create a qualified name string in this function by 130 /// combining multiple strings in the DWARF string table or info, we will make 131 /// a copy of the string when we add it to the string table. 132 static std::optional<uint32_t> 133 getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) { 134 // If the dwarf has mangled name, use mangled name 135 if (auto LinkageName = Die.getLinkageName()) { 136 // We have seen cases were linkage name is actually empty. 137 if (strlen(LinkageName) > 0) 138 return Gsym.insertString(LinkageName, /* Copy */ false); 139 } 140 141 StringRef ShortName(Die.getName(DINameKind::ShortName)); 142 if (ShortName.empty()) 143 return std::nullopt; 144 145 // For C++ and ObjC, prepend names of all parent declaration contexts 146 if (!(Language == dwarf::DW_LANG_C_plus_plus || 147 Language == dwarf::DW_LANG_C_plus_plus_03 || 148 Language == dwarf::DW_LANG_C_plus_plus_11 || 149 Language == dwarf::DW_LANG_C_plus_plus_14 || 150 Language == dwarf::DW_LANG_ObjC_plus_plus || 151 // This should not be needed for C, but we see C++ code marked as C 152 // in some binaries. This should hurt, so let's do it for C as well 153 Language == dwarf::DW_LANG_C)) 154 return Gsym.insertString(ShortName, /* Copy */ false); 155 156 // Some GCC optimizations create functions with names ending with .isra.<num> 157 // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name 158 // If it looks like it could be the case, don't add any prefix 159 if (ShortName.starts_with("_Z") && 160 (ShortName.contains(".isra.") || ShortName.contains(".part."))) 161 return Gsym.insertString(ShortName, /* Copy */ false); 162 163 DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); 164 if (ParentDeclCtxDie) { 165 std::string Name = ShortName.str(); 166 while (ParentDeclCtxDie) { 167 StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); 168 if (!ParentName.empty()) { 169 // "lambda" names are wrapped in < >. Replace with { } 170 // to be consistent with demangled names and not to confuse with 171 // templates 172 if (ParentName.front() == '<' && ParentName.back() == '>') 173 Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + 174 "::" + Name; 175 else 176 Name = ParentName.str() + "::" + Name; 177 } 178 ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); 179 } 180 // Copy the name since we created a new name in a std::string. 181 return Gsym.insertString(Name, /* Copy */ true); 182 } 183 // Don't copy the name since it exists in the DWARF object file. 184 return Gsym.insertString(ShortName, /* Copy */ false); 185 } 186 187 static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { 188 bool CheckChildren = true; 189 switch (Die.getTag()) { 190 case dwarf::DW_TAG_subprogram: 191 // Don't look into functions within functions. 192 CheckChildren = Depth == 0; 193 break; 194 case dwarf::DW_TAG_inlined_subroutine: 195 return true; 196 default: 197 break; 198 } 199 if (!CheckChildren) 200 return false; 201 for (DWARFDie ChildDie : Die.children()) { 202 if (hasInlineInfo(ChildDie, Depth + 1)) 203 return true; 204 } 205 return false; 206 } 207 208 static AddressRanges 209 ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) { 210 AddressRanges Ranges; 211 for (const DWARFAddressRange &DwarfRange : DwarfRanges) { 212 if (DwarfRange.LowPC < DwarfRange.HighPC) 213 Ranges.insert({DwarfRange.LowPC, DwarfRange.HighPC}); 214 } 215 return Ranges; 216 } 217 218 static void parseInlineInfo(GsymCreator &Gsym, raw_ostream *Log, CUInfo &CUI, 219 DWARFDie Die, uint32_t Depth, FunctionInfo &FI, 220 InlineInfo &Parent, 221 const AddressRanges &AllParentRanges, 222 bool &WarnIfEmpty) { 223 if (!hasInlineInfo(Die, Depth)) 224 return; 225 226 dwarf::Tag Tag = Die.getTag(); 227 if (Tag == dwarf::DW_TAG_inlined_subroutine) { 228 // create new InlineInfo and append to parent.children 229 InlineInfo II; 230 AddressRanges AllInlineRanges; 231 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); 232 if (RangesOrError) { 233 AllInlineRanges = ConvertDWARFRanges(RangesOrError.get()); 234 uint32_t EmptyCount = 0; 235 for (const AddressRange &InlineRange : AllInlineRanges) { 236 // Check for empty inline range in case inline function was outlined 237 // or has not code 238 if (InlineRange.empty()) { 239 ++EmptyCount; 240 } else { 241 if (Parent.Ranges.contains(InlineRange)) { 242 II.Ranges.insert(InlineRange); 243 } else { 244 // Only warn if the current inline range is not within any of all 245 // of the parent ranges. If we have a DW_TAG_subpgram with multiple 246 // ranges we will emit a FunctionInfo for each range of that 247 // function that only emits information within the current range, 248 // so we only want to emit an error if the DWARF has issues, not 249 // when a range currently just isn't in the range we are currently 250 // parsing for. 251 if (AllParentRanges.contains(InlineRange)) { 252 WarnIfEmpty = false; 253 } else if (Log) { 254 *Log << "error: inlined function DIE at " 255 << HEX32(Die.getOffset()) << " has a range [" 256 << HEX64(InlineRange.start()) << " - " 257 << HEX64(InlineRange.end()) << ") that isn't contained in " 258 << "any parent address ranges, this inline range will be " 259 "removed.\n"; 260 } 261 } 262 } 263 } 264 // If we have all empty ranges for the inlines, then don't warn if we 265 // have an empty InlineInfo at the top level as all inline functions 266 // were elided. 267 if (EmptyCount == AllInlineRanges.size()) 268 WarnIfEmpty = false; 269 } 270 if (II.Ranges.empty()) 271 return; 272 273 if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) 274 II.Name = *NameIndex; 275 const uint64_t DwarfFileIdx = dwarf::toUnsigned( 276 Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX); 277 std::optional<uint32_t> OptGSymFileIdx = 278 CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx); 279 if (OptGSymFileIdx) { 280 II.CallFile = OptGSymFileIdx.value(); 281 II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); 282 // parse all children and append to parent 283 for (DWARFDie ChildDie : Die.children()) 284 parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II, 285 AllInlineRanges, WarnIfEmpty); 286 Parent.Children.emplace_back(std::move(II)); 287 } else if (Log) { 288 *Log << "error: inlined function DIE at " << HEX32(Die.getOffset()) 289 << " has an invalid file index " << DwarfFileIdx 290 << " in its DW_AT_call_file attribute, this inline entry and all " 291 << "children will be removed.\n"; 292 } 293 return; 294 } 295 if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { 296 // skip this Die and just recurse down 297 for (DWARFDie ChildDie : Die.children()) 298 parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, Parent, 299 AllParentRanges, WarnIfEmpty); 300 } 301 } 302 303 static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI, 304 DWARFDie Die, GsymCreator &Gsym, 305 FunctionInfo &FI) { 306 std::vector<uint32_t> RowVector; 307 const uint64_t StartAddress = FI.startAddress(); 308 const uint64_t EndAddress = FI.endAddress(); 309 const uint64_t RangeSize = EndAddress - StartAddress; 310 const object::SectionedAddress SecAddress{ 311 StartAddress, object::SectionedAddress::UndefSection}; 312 313 314 if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) { 315 // If we have a DW_TAG_subprogram but no line entries, fall back to using 316 // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. 317 std::string FilePath = Die.getDeclFile( 318 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath); 319 if (FilePath.empty()) { 320 // If we had a DW_AT_decl_file, but got no file then we need to emit a 321 // warning. 322 if (Log) { 323 const uint64_t DwarfFileIdx = dwarf::toUnsigned( 324 Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX); 325 *Log << "error: function DIE at " << HEX32(Die.getOffset()) 326 << " has an invalid file index " << DwarfFileIdx 327 << " in its DW_AT_decl_file attribute, unable to create a single " 328 << "line entry from the DW_AT_decl_file/DW_AT_decl_line " 329 << "attributes.\n"; 330 } 331 return; 332 } 333 if (auto Line = 334 dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { 335 LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line); 336 FI.OptLineTable = LineTable(); 337 FI.OptLineTable->push(LE); 338 } 339 return; 340 } 341 342 FI.OptLineTable = LineTable(); 343 DWARFDebugLine::Row PrevRow; 344 for (uint32_t RowIndex : RowVector) { 345 // Take file number and line/column from the row. 346 const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; 347 std::optional<uint32_t> OptFileIdx = 348 CUI.DWARFToGSYMFileIndex(Gsym, Row.File); 349 if (!OptFileIdx) { 350 if (Log) { 351 *Log << "error: function DIE at " << HEX32(Die.getOffset()) << " has " 352 << "a line entry with invalid DWARF file index, this entry will " 353 << "be removed:\n"; 354 Row.dumpTableHeader(*Log, /*Indent=*/0); 355 Row.dump(*Log); 356 *Log << "\n"; 357 } 358 continue; 359 } 360 const uint32_t FileIdx = OptFileIdx.value(); 361 uint64_t RowAddress = Row.Address.Address; 362 // Watch out for a RowAddress that is in the middle of a line table entry 363 // in the DWARF. If we pass an address in between two line table entries 364 // we will get a RowIndex for the previous valid line table row which won't 365 // be contained in our function. This is usually a bug in the DWARF due to 366 // linker problems or LTO or other DWARF re-linking so it is worth emitting 367 // an error, but not worth stopping the creation of the GSYM. 368 if (!FI.Range.contains(RowAddress)) { 369 if (RowAddress < FI.Range.start()) { 370 if (Log) { 371 *Log << "error: DIE has a start address whose LowPC is between the " 372 "line table Row[" << RowIndex << "] with address " 373 << HEX64(RowAddress) << " and the next one.\n"; 374 Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE()); 375 } 376 RowAddress = FI.Range.start(); 377 } else { 378 continue; 379 } 380 } 381 382 LineEntry LE(RowAddress, FileIdx, Row.Line); 383 if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { 384 // We have seen full duplicate line tables for functions in some 385 // DWARF files. Watch for those here by checking the last 386 // row was the function's end address (HighPC) and that the 387 // current line table entry's address is the same as the first 388 // line entry we already have in our "function_info.Lines". If 389 // so break out after printing a warning. 390 auto FirstLE = FI.OptLineTable->first(); 391 if (FirstLE && *FirstLE == LE) { 392 if (Log && !Gsym.isQuiet()) { 393 *Log << "warning: duplicate line table detected for DIE:\n"; 394 Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE()); 395 } 396 } else { 397 if (Log) { 398 *Log << "error: line table has addresses that do not " 399 << "monotonically increase:\n"; 400 for (uint32_t RowIndex2 : RowVector) 401 CUI.LineTable->Rows[RowIndex2].dump(*Log); 402 Die.dump(*Log, 0, DIDumpOptions::getForSingleDIE()); 403 } 404 } 405 break; 406 } 407 408 // Skip multiple line entries for the same file and line. 409 auto LastLE = FI.OptLineTable->last(); 410 if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) 411 continue; 412 // Only push a row if it isn't an end sequence. End sequence markers are 413 // included for the last address in a function or the last contiguous 414 // address in a sequence. 415 if (Row.EndSequence) { 416 // End sequence means that the next line entry could have a lower address 417 // that the previous entries. So we clear the previous row so we don't 418 // trigger the line table error about address that do not monotonically 419 // increase. 420 PrevRow = DWARFDebugLine::Row(); 421 } else { 422 FI.OptLineTable->push(LE); 423 PrevRow = Row; 424 } 425 } 426 // If not line table rows were added, clear the line table so we don't encode 427 // on in the GSYM file. 428 if (FI.OptLineTable->empty()) 429 FI.OptLineTable = std::nullopt; 430 } 431 432 void DwarfTransformer::handleDie(raw_ostream *OS, CUInfo &CUI, DWARFDie Die) { 433 switch (Die.getTag()) { 434 case dwarf::DW_TAG_subprogram: { 435 Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); 436 if (!RangesOrError) { 437 consumeError(RangesOrError.takeError()); 438 break; 439 } 440 const DWARFAddressRangesVector &Ranges = RangesOrError.get(); 441 if (Ranges.empty()) 442 break; 443 auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); 444 if (!NameIndex) { 445 if (OS) { 446 *OS << "error: function at " << HEX64(Die.getOffset()) 447 << " has no name\n "; 448 Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE()); 449 } 450 break; 451 } 452 // All ranges for the subprogram DIE in case it has multiple. We need to 453 // pass this down into parseInlineInfo so we don't warn about inline 454 // ranges that are not in the current subrange of a function when they 455 // actually are in another subgrange. We do this because when a function 456 // has discontiguos ranges, we create multiple function entries with only 457 // the info for that range contained inside of it. 458 AddressRanges AllSubprogramRanges = ConvertDWARFRanges(Ranges); 459 460 // Create a function_info for each range 461 for (const DWARFAddressRange &Range : Ranges) { 462 // The low PC must be less than the high PC. Many linkers don't remove 463 // DWARF for functions that don't get linked into the final executable. 464 // If both the high and low pc have relocations, linkers will often set 465 // the address values for both to the same value to indicate the function 466 // has been remove. Other linkers have been known to set the one or both 467 // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 468 // byte addresses to indicate the function isn't valid. The check below 469 // tries to watch for these cases and abort if it runs into them. 470 if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) 471 break; 472 473 // Many linkers can't remove DWARF and might set the LowPC to zero. Since 474 // high PC can be an offset from the low PC in more recent DWARF versions 475 // we need to watch for a zero'ed low pc which we do using 476 // ValidTextRanges below. 477 if (!Gsym.IsValidTextAddress(Range.LowPC)) { 478 // We expect zero and -1 to be invalid addresses in DWARF depending 479 // on the linker of the DWARF. This indicates a function was stripped 480 // and the debug info wasn't able to be stripped from the DWARF. If 481 // the LowPC isn't zero or -1, then we should emit an error. 482 if (Range.LowPC != 0) { 483 if (!Gsym.isQuiet()) { 484 // Unexpected invalid address, emit a warning 485 if (OS) { 486 *OS << "warning: DIE has an address range whose start address " 487 "is not in any executable sections (" 488 << *Gsym.GetValidTextRanges() 489 << ") and will not be processed:\n"; 490 Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE()); 491 } 492 } 493 } 494 break; 495 } 496 497 FunctionInfo FI; 498 FI.Range = {Range.LowPC, Range.HighPC}; 499 FI.Name = *NameIndex; 500 if (CUI.LineTable) 501 convertFunctionLineTable(OS, CUI, Die, Gsym, FI); 502 503 if (hasInlineInfo(Die, 0)) { 504 FI.Inline = InlineInfo(); 505 FI.Inline->Name = *NameIndex; 506 FI.Inline->Ranges.insert(FI.Range); 507 bool WarnIfEmpty = true; 508 parseInlineInfo(Gsym, OS, CUI, Die, 0, FI, *FI.Inline, 509 AllSubprogramRanges, WarnIfEmpty); 510 // Make sure we at least got some valid inline info other than just 511 // the top level function. If we didn't then remove the inline info 512 // from the function info. We have seen cases where LTO tries to modify 513 // the DWARF for functions and it messes up the address ranges for 514 // the inline functions so it is no longer valid. 515 // 516 // By checking if there are any valid children on the top level inline 517 // information object, we will know if we got anything valid from the 518 // debug info. 519 if (FI.Inline->Children.empty()) { 520 if (WarnIfEmpty && OS && !Gsym.isQuiet()) { 521 *OS << "warning: DIE contains inline function information that has " 522 "no valid ranges, removing inline information:\n"; 523 Die.dump(*OS, 0, DIDumpOptions::getForSingleDIE()); 524 } 525 FI.Inline = std::nullopt; 526 } 527 } 528 Gsym.addFunctionInfo(std::move(FI)); 529 } 530 } break; 531 default: 532 break; 533 } 534 for (DWARFDie ChildDie : Die.children()) 535 handleDie(OS, CUI, ChildDie); 536 } 537 538 Error DwarfTransformer::convert(uint32_t NumThreads, raw_ostream *OS) { 539 size_t NumBefore = Gsym.getNumFunctionInfos(); 540 auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie { 541 DWARFDie ReturnDie = DwarfUnit.getUnitDIE(false); 542 if (DwarfUnit.getDWOId()) { 543 DWARFUnit *DWOCU = DwarfUnit.getNonSkeletonUnitDIE(false).getDwarfUnit(); 544 if (OS && !DWOCU->isDWOUnit()) { 545 std::string DWOName = dwarf::toString( 546 DwarfUnit.getUnitDIE().find( 547 {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}), 548 ""); 549 *OS << "warning: Unable to retrieve DWO .debug_info section for " 550 << DWOName << "\n"; 551 } else { 552 ReturnDie = DWOCU->getUnitDIE(false); 553 } 554 } 555 return ReturnDie; 556 }; 557 if (NumThreads == 1) { 558 // Parse all DWARF data from this thread, use the same string/file table 559 // for everything 560 for (const auto &CU : DICtx.compile_units()) { 561 DWARFDie Die = getDie(*CU); 562 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); 563 handleDie(OS, CUI, Die); 564 } 565 } else { 566 // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up 567 // front before we start accessing any DIEs since there might be 568 // cross compile unit references in the DWARF. If we don't do this we can 569 // end up crashing. 570 571 // We need to call getAbbreviations sequentially first so that getUnitDIE() 572 // only works with its local data. 573 for (const auto &CU : DICtx.compile_units()) 574 CU->getAbbreviations(); 575 576 // Now parse all DIEs in case we have cross compile unit references in a 577 // thread pool. 578 ThreadPool pool(hardware_concurrency(NumThreads)); 579 for (const auto &CU : DICtx.compile_units()) 580 pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); 581 pool.wait(); 582 583 // Now convert all DWARF to GSYM in a thread pool. 584 std::mutex LogMutex; 585 for (const auto &CU : DICtx.compile_units()) { 586 DWARFDie Die = getDie(*CU); 587 if (Die) { 588 CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); 589 pool.async([this, CUI, &LogMutex, OS, Die]() mutable { 590 std::string ThreadLogStorage; 591 raw_string_ostream ThreadOS(ThreadLogStorage); 592 handleDie(OS ? &ThreadOS: nullptr, CUI, Die); 593 ThreadOS.flush(); 594 if (OS && !ThreadLogStorage.empty()) { 595 // Print ThreadLogStorage lines into an actual stream under a lock 596 std::lock_guard<std::mutex> guard(LogMutex); 597 *OS << ThreadLogStorage; 598 } 599 }); 600 } 601 } 602 pool.wait(); 603 } 604 size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; 605 if (OS) 606 *OS << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; 607 return Error::success(); 608 } 609 610 llvm::Error DwarfTransformer::verify(StringRef GsymPath, raw_ostream &Log) { 611 Log << "Verifying GSYM file \"" << GsymPath << "\":\n"; 612 613 auto Gsym = GsymReader::openFile(GsymPath); 614 if (!Gsym) 615 return Gsym.takeError(); 616 617 auto NumAddrs = Gsym->getNumAddresses(); 618 DILineInfoSpecifier DLIS( 619 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, 620 DILineInfoSpecifier::FunctionNameKind::LinkageName); 621 std::string gsymFilename; 622 for (uint32_t I = 0; I < NumAddrs; ++I) { 623 auto FuncAddr = Gsym->getAddress(I); 624 if (!FuncAddr) 625 return createStringError(std::errc::invalid_argument, 626 "failed to extract address[%i]", I); 627 628 auto FI = Gsym->getFunctionInfo(*FuncAddr); 629 if (!FI) 630 return createStringError(std::errc::invalid_argument, 631 "failed to extract function info for address 0x%" 632 PRIu64, *FuncAddr); 633 634 for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { 635 const object::SectionedAddress SectAddr{ 636 Addr, object::SectionedAddress::UndefSection}; 637 auto LR = Gsym->lookup(Addr); 638 if (!LR) 639 return LR.takeError(); 640 641 auto DwarfInlineInfos = 642 DICtx.getInliningInfoForAddress(SectAddr, DLIS); 643 uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); 644 if (NumDwarfInlineInfos == 0) { 645 DwarfInlineInfos.addFrame( 646 DICtx.getLineInfoForAddress(SectAddr, DLIS)); 647 } 648 649 // Check for 1 entry that has no file and line info 650 if (NumDwarfInlineInfos == 1 && 651 DwarfInlineInfos.getFrame(0).FileName == "<invalid>") { 652 DwarfInlineInfos = DIInliningInfo(); 653 NumDwarfInlineInfos = 0; 654 } 655 if (NumDwarfInlineInfos > 0 && 656 NumDwarfInlineInfos != LR->Locations.size()) { 657 Log << "error: address " << HEX64(Addr) << " has " 658 << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " 659 << LR->Locations.size() << "\n"; 660 Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; 661 for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { 662 const auto &dii = DwarfInlineInfos.getFrame(Idx); 663 Log << " [" << Idx << "]: " << dii.FunctionName << " @ " 664 << dii.FileName << ':' << dii.Line << '\n'; 665 } 666 Log << " " << LR->Locations.size() << " GSYM frames:\n"; 667 for (size_t Idx = 0, count = LR->Locations.size(); 668 Idx < count; ++Idx) { 669 const auto &gii = LR->Locations[Idx]; 670 Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir 671 << '/' << gii.Base << ':' << gii.Line << '\n'; 672 } 673 DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); 674 Gsym->dump(Log, *FI); 675 continue; 676 } 677 678 for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; 679 ++Idx) { 680 const auto &gii = LR->Locations[Idx]; 681 if (Idx < NumDwarfInlineInfos) { 682 const auto &dii = DwarfInlineInfos.getFrame(Idx); 683 gsymFilename = LR->getSourceFile(Idx); 684 // Verify function name 685 if (dii.FunctionName.find(gii.Name.str()) != 0) 686 Log << "error: address " << HEX64(Addr) << " DWARF function \"" 687 << dii.FunctionName.c_str() 688 << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; 689 // Verify source file path 690 if (dii.FileName != gsymFilename) 691 Log << "error: address " << HEX64(Addr) << " DWARF path \"" 692 << dii.FileName.c_str() << "\" doesn't match GSYM path \"" 693 << gsymFilename.c_str() << "\"\n"; 694 // Verify source file line 695 if (dii.Line != gii.Line) 696 Log << "error: address " << HEX64(Addr) << " DWARF line " 697 << dii.Line << " != GSYM line " << gii.Line << "\n"; 698 } 699 } 700 } 701 } 702 return Error::success(); 703 } 704