1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This utility works much like "addr2line". It is able of transforming 10 // tuples (module name, module offset) to code locations (function name, 11 // file, line number, column number). It is targeted for compiler-rt tools 12 // (especially AddressSanitizer and ThreadSanitizer) that can use it 13 // to symbolize stack traces in their error reports. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Opts.inc" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Config/config.h" 21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h" 22 #include "llvm/DebugInfo/Symbolize/Markup.h" 23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h" 24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 25 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 26 #include "llvm/Debuginfod/DIFetcher.h" 27 #include "llvm/Debuginfod/Debuginfod.h" 28 #include "llvm/Debuginfod/HTTPClient.h" 29 #include "llvm/Option/Arg.h" 30 #include "llvm/Option/ArgList.h" 31 #include "llvm/Option/Option.h" 32 #include "llvm/Support/COM.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/InitLLVM.h" 37 #include "llvm/Support/Path.h" 38 #include "llvm/Support/StringSaver.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <algorithm> 41 #include <cstdio> 42 #include <cstring> 43 #include <string> 44 45 using namespace llvm; 46 using namespace symbolize; 47 48 namespace { 49 enum ID { 50 OPT_INVALID = 0, // This is not an option ID. 51 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ 52 HELPTEXT, METAVAR, VALUES) \ 53 OPT_##ID, 54 #include "Opts.inc" 55 #undef OPTION 56 }; 57 58 #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; 59 #include "Opts.inc" 60 #undef PREFIX 61 62 const opt::OptTable::Info InfoTable[] = { 63 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ 64 HELPTEXT, METAVAR, VALUES) \ 65 { \ 66 PREFIX, NAME, HELPTEXT, \ 67 METAVAR, OPT_##ID, opt::Option::KIND##Class, \ 68 PARAM, FLAGS, OPT_##GROUP, \ 69 OPT_##ALIAS, ALIASARGS, VALUES}, 70 #include "Opts.inc" 71 #undef OPTION 72 }; 73 74 class SymbolizerOptTable : public opt::OptTable { 75 public: 76 SymbolizerOptTable() : OptTable(InfoTable) { 77 setGroupedShortOptions(true); 78 } 79 }; 80 } // namespace 81 82 template <typename T> 83 static void print(const Request &Request, Expected<T> &ResOrErr, 84 DIPrinter &Printer) { 85 if (ResOrErr) { 86 // No error, print the result. 87 Printer.print(Request, *ResOrErr); 88 return; 89 } 90 91 // Handle the error. 92 bool PrintEmpty = true; 93 handleAllErrors(std::move(ResOrErr.takeError()), 94 [&](const ErrorInfoBase &EI) { 95 PrintEmpty = Printer.printError( 96 Request, EI, "LLVMSymbolizer: error reading file: "); 97 }); 98 99 if (PrintEmpty) 100 Printer.print(Request, T()); 101 } 102 103 enum class OutputStyle { LLVM, GNU, JSON }; 104 105 enum class Command { 106 Code, 107 Data, 108 Frame, 109 }; 110 111 static void enableDebuginfod(LLVMSymbolizer &Symbolizer) { 112 static bool IsEnabled = false; 113 if (IsEnabled) 114 return; 115 IsEnabled = true; 116 // Look up symbols using the debuginfod client. 117 Symbolizer.addDIFetcher(std::make_unique<DebuginfodDIFetcher>()); 118 // The HTTPClient must be initialized for use by the debuginfod client. 119 HTTPClient::initialize(); 120 } 121 122 static SmallVector<uint8_t> parseBuildID(StringRef Str) { 123 std::string Bytes; 124 if (!tryGetFromHex(Str, Bytes)) 125 return {}; 126 ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()), 127 Bytes.size()); 128 return SmallVector<uint8_t>(BuildID.begin(), BuildID.end()); 129 } 130 131 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, 132 StringRef InputString, Command &Cmd, 133 std::string &ModuleName, 134 SmallVectorImpl<uint8_t> &BuildID, 135 uint64_t &ModuleOffset) { 136 const char kDelimiters[] = " \n\r"; 137 ModuleName = ""; 138 if (InputString.consume_front("CODE ")) { 139 Cmd = Command::Code; 140 } else if (InputString.consume_front("DATA ")) { 141 Cmd = Command::Data; 142 } else if (InputString.consume_front("FRAME ")) { 143 Cmd = Command::Frame; 144 } else { 145 // If no cmd, assume it's CODE. 146 Cmd = Command::Code; 147 } 148 149 const char *Pos; 150 // Skip delimiters and parse input filename (if needed). 151 if (BinaryName.empty() && BuildID.empty()) { 152 bool HasFilePrefix = false; 153 bool HasBuildIDPrefix = false; 154 while (true) { 155 if (InputString.consume_front("FILE:")) { 156 if (HasFilePrefix) 157 return false; 158 HasFilePrefix = true; 159 continue; 160 } 161 if (InputString.consume_front("BUILDID:")) { 162 if (HasBuildIDPrefix) 163 return false; 164 HasBuildIDPrefix = true; 165 continue; 166 } 167 break; 168 } 169 if (HasFilePrefix && HasBuildIDPrefix) 170 return false; 171 172 Pos = InputString.data(); 173 Pos += strspn(Pos, kDelimiters); 174 if (*Pos == '"' || *Pos == '\'') { 175 char Quote = *Pos; 176 Pos++; 177 const char *End = strchr(Pos, Quote); 178 if (!End) 179 return false; 180 ModuleName = std::string(Pos, End - Pos); 181 Pos = End + 1; 182 } else { 183 int NameLength = strcspn(Pos, kDelimiters); 184 ModuleName = std::string(Pos, NameLength); 185 Pos += NameLength; 186 } 187 if (HasBuildIDPrefix) { 188 BuildID = parseBuildID(ModuleName); 189 if (BuildID.empty()) 190 return false; 191 ModuleName.clear(); 192 } 193 } else { 194 Pos = InputString.data(); 195 ModuleName = BinaryName.str(); 196 } 197 // Skip delimiters and parse module offset. 198 Pos += strspn(Pos, kDelimiters); 199 int OffsetLength = strcspn(Pos, kDelimiters); 200 StringRef Offset(Pos, OffsetLength); 201 // GNU addr2line assumes the offset is hexadecimal and allows a redundant 202 // "0x" or "0X" prefix; do the same for compatibility. 203 if (IsAddr2Line) 204 Offset.consume_front("0x") || Offset.consume_front("0X"); 205 return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); 206 } 207 208 template <typename T> 209 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, 210 uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, 211 OutputStyle Style, LLVMSymbolizer &Symbolizer, 212 DIPrinter &Printer) { 213 uint64_t AdjustedOffset = Offset - AdjustVMA; 214 object::SectionedAddress Address = {AdjustedOffset, 215 object::SectionedAddress::UndefSection}; 216 if (Cmd == Command::Data) { 217 Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address); 218 print({ModuleName, Offset}, ResOrErr, Printer); 219 } else if (Cmd == Command::Frame) { 220 Expected<std::vector<DILocal>> ResOrErr = 221 Symbolizer.symbolizeFrame(ModuleSpec, Address); 222 print({ModuleName, Offset}, ResOrErr, Printer); 223 } else if (ShouldInline) { 224 Expected<DIInliningInfo> ResOrErr = 225 Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); 226 print({ModuleName, Offset}, ResOrErr, Printer); 227 } else if (Style == OutputStyle::GNU) { 228 // With PrintFunctions == FunctionNameKind::LinkageName (default) 229 // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode() 230 // may override the name of an inlined function with the name of the topmost 231 // caller function in the inlining chain. This contradicts the existing 232 // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only 233 // the topmost function, which suits our needs better. 234 Expected<DIInliningInfo> ResOrErr = 235 Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); 236 Expected<DILineInfo> Res0OrErr = 237 !ResOrErr 238 ? Expected<DILineInfo>(ResOrErr.takeError()) 239 : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo() 240 : ResOrErr->getFrame(0)); 241 print({ModuleName, Offset}, Res0OrErr, Printer); 242 } else { 243 Expected<DILineInfo> ResOrErr = 244 Symbolizer.symbolizeCode(ModuleSpec, Address); 245 print({ModuleName, Offset}, ResOrErr, Printer); 246 } 247 Symbolizer.pruneCache(); 248 } 249 250 static void symbolizeInput(const opt::InputArgList &Args, 251 ArrayRef<uint8_t> IncomingBuildID, 252 uint64_t AdjustVMA, bool IsAddr2Line, 253 OutputStyle Style, StringRef InputString, 254 LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { 255 Command Cmd; 256 std::string ModuleName; 257 SmallVector<uint8_t> BuildID(IncomingBuildID.begin(), IncomingBuildID.end()); 258 uint64_t Offset = 0; 259 if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, 260 StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) { 261 Printer.printInvalidCommand({ModuleName, None}, InputString); 262 return; 263 } 264 bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); 265 if (!BuildID.empty()) { 266 assert(ModuleName.empty()); 267 if (!Args.hasArg(OPT_no_debuginfod)) 268 enableDebuginfod(Symbolizer); 269 std::string BuildIDStr = toHex(BuildID); 270 executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, 271 Style, Symbolizer, Printer); 272 } else { 273 executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline, 274 Style, Symbolizer, Printer); 275 } 276 } 277 278 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl, 279 raw_ostream &OS) { 280 const char HelpText[] = " [options] addresses..."; 281 Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(), 282 ToolName.str().c_str()); 283 // TODO Replace this with OptTable API once it adds extrahelp support. 284 OS << "\nPass @FILE as argument to read options from FILE.\n"; 285 } 286 287 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line, 288 StringSaver &Saver, 289 SymbolizerOptTable &Tbl) { 290 StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer"; 291 // The environment variable specifies initial options which can be overridden 292 // by commnad line options. 293 Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" 294 : "LLVM_SYMBOLIZER_OPTS"); 295 bool HasError = false; 296 opt::InputArgList Args = 297 Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { 298 errs() << ("error: " + Msg + "\n"); 299 HasError = true; 300 }); 301 if (HasError) 302 exit(1); 303 if (Args.hasArg(OPT_help)) { 304 printHelp(ToolName, Tbl, outs()); 305 exit(0); 306 } 307 if (Args.hasArg(OPT_version)) { 308 outs() << ToolName << '\n'; 309 cl::PrintVersionMessage(); 310 exit(0); 311 } 312 313 return Args; 314 } 315 316 template <typename T> 317 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) { 318 if (const opt::Arg *A = Args.getLastArg(ID)) { 319 StringRef V(A->getValue()); 320 if (!llvm::to_integer(V, Value, 0)) { 321 errs() << A->getSpelling() + 322 ": expected a non-negative integer, but got '" + V + "'"; 323 exit(1); 324 } 325 } else { 326 Value = 0; 327 } 328 } 329 330 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args, 331 bool IsAddr2Line) { 332 if (Args.hasArg(OPT_functions)) 333 return FunctionNameKind::LinkageName; 334 if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ)) 335 return StringSwitch<FunctionNameKind>(A->getValue()) 336 .Case("none", FunctionNameKind::None) 337 .Case("short", FunctionNameKind::ShortName) 338 .Default(FunctionNameKind::LinkageName); 339 return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName; 340 } 341 342 static Optional<bool> parseColorArg(const opt::InputArgList &Args) { 343 if (Args.hasArg(OPT_color)) 344 return true; 345 if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ)) 346 return StringSwitch<Optional<bool>>(A->getValue()) 347 .Case("always", true) 348 .Case("never", false) 349 .Case("auto", None); 350 return None; 351 } 352 353 static SmallVector<uint8_t> parseBuildIDArg(const opt::InputArgList &Args, 354 int ID) { 355 const opt::Arg *A = Args.getLastArg(ID); 356 if (!A) 357 return {}; 358 359 StringRef V(A->getValue()); 360 SmallVector<uint8_t> BuildID = parseBuildID(V); 361 if (BuildID.empty()) { 362 errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n"; 363 exit(1); 364 } 365 return BuildID; 366 } 367 368 // Symbolize markup from stdin and write the result to stdout. 369 static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) { 370 MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args)); 371 std::string InputString; 372 while (std::getline(std::cin, InputString)) { 373 InputString += '\n'; 374 Filter.filter(InputString); 375 } 376 Filter.finish(); 377 } 378 379 ExitOnError ExitOnErr; 380 381 int main(int argc, char **argv) { 382 InitLLVM X(argc, argv); 383 sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded); 384 385 bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line"); 386 BumpPtrAllocator A; 387 StringSaver Saver(A); 388 SymbolizerOptTable Tbl; 389 opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl); 390 391 LLVMSymbolizer::Options Opts; 392 uint64_t AdjustVMA; 393 PrinterConfig Config; 394 parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA); 395 if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) { 396 Opts.PathStyle = 397 A->getOption().matches(OPT_basenames) 398 ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly 399 : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath; 400 } else { 401 Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath; 402 } 403 Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ); 404 Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str(); 405 Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line); 406 Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str(); 407 Opts.FallbackDebugPath = 408 Args.getLastArgValue(OPT_fallback_debug_path_EQ).str(); 409 Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line); 410 parseIntArg(Args, OPT_print_source_context_lines_EQ, 411 Config.SourceContextLines); 412 Opts.RelativeAddresses = Args.hasArg(OPT_relative_address); 413 Opts.UntagAddresses = 414 Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line); 415 Opts.UseDIA = Args.hasArg(OPT_use_dia); 416 #if !defined(LLVM_ENABLE_DIA_SDK) 417 if (Opts.UseDIA) { 418 WithColor::warning() << "DIA not available; using native PDB reader\n"; 419 Opts.UseDIA = false; 420 } 421 #endif 422 Opts.UseSymbolTable = true; 423 if (Args.hasArg(OPT_cache_size_EQ)) 424 parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize); 425 Config.PrintAddress = Args.hasArg(OPT_addresses); 426 Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None; 427 Config.Pretty = Args.hasArg(OPT_pretty_print); 428 Config.Verbose = Args.hasArg(OPT_verbose); 429 430 for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) { 431 StringRef Hint(A->getValue()); 432 if (sys::path::extension(Hint) == ".dSYM") { 433 Opts.DsymHints.emplace_back(Hint); 434 } else { 435 errs() << "Warning: invalid dSYM hint: \"" << Hint 436 << "\" (must have the '.dSYM' extension).\n"; 437 } 438 } 439 440 LLVMSymbolizer Symbolizer(Opts); 441 442 // A debuginfod lookup could succeed if a HTTP client is available and at 443 // least one backing URL is configured. 444 bool ShouldUseDebuginfodByDefault = 445 HTTPClient::isAvailable() && 446 !ExitOnErr(getDefaultDebuginfodUrls()).empty(); 447 if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, 448 ShouldUseDebuginfodByDefault)) 449 enableDebuginfod(Symbolizer); 450 451 if (Args.hasArg(OPT_filter_markup)) { 452 filterMarkup(Args, Symbolizer); 453 return 0; 454 } 455 456 auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM; 457 if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) { 458 if (strcmp(A->getValue(), "GNU") == 0) 459 Style = OutputStyle::GNU; 460 else if (strcmp(A->getValue(), "JSON") == 0) 461 Style = OutputStyle::JSON; 462 else 463 Style = OutputStyle::LLVM; 464 } 465 466 if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) { 467 errs() << "error: cannot specify both --build-id and --obj\n"; 468 return EXIT_FAILURE; 469 } 470 SmallVector<uint8_t> BuildID = parseBuildIDArg(Args, OPT_build_id_EQ); 471 472 std::unique_ptr<DIPrinter> Printer; 473 if (Style == OutputStyle::GNU) 474 Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config); 475 else if (Style == OutputStyle::JSON) 476 Printer = std::make_unique<JSONPrinter>(outs(), Config); 477 else 478 Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config); 479 480 std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT); 481 if (InputAddresses.empty()) { 482 const int kMaxInputStringLength = 1024; 483 char InputString[kMaxInputStringLength]; 484 485 while (fgets(InputString, sizeof(InputString), stdin)) { 486 // Strip newline characters. 487 std::string StrippedInputString(InputString); 488 llvm::erase_if(StrippedInputString, 489 [](char c) { return c == '\r' || c == '\n'; }); 490 symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, 491 StrippedInputString, Symbolizer, *Printer); 492 outs().flush(); 493 } 494 } else { 495 Printer->listBegin(); 496 for (StringRef Address : InputAddresses) 497 symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address, 498 Symbolizer, *Printer); 499 Printer->listEnd(); 500 } 501 502 return 0; 503 } 504