1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This utility works much like "addr2line". It is able of transforming 10 // tuples (module name, module offset) to code locations (function name, 11 // file, line number, column number). It is targeted for compiler-rt tools 12 // (especially AddressSanitizer and ThreadSanitizer) that can use it 13 // to symbolize stack traces in their error reports. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include "Opts.inc" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/Config/config.h" 21 #include "llvm/DebugInfo/Symbolize/DIPrinter.h" 22 #include "llvm/DebugInfo/Symbolize/Markup.h" 23 #include "llvm/DebugInfo/Symbolize/MarkupFilter.h" 24 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 25 #include "llvm/DebugInfo/Symbolize/Symbolize.h" 26 #include "llvm/Debuginfod/BuildIDFetcher.h" 27 #include "llvm/Debuginfod/Debuginfod.h" 28 #include "llvm/Debuginfod/HTTPClient.h" 29 #include "llvm/Option/Arg.h" 30 #include "llvm/Option/ArgList.h" 31 #include "llvm/Option/Option.h" 32 #include "llvm/Support/COM.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/FileSystem.h" 36 #include "llvm/Support/InitLLVM.h" 37 #include "llvm/Support/Path.h" 38 #include "llvm/Support/StringSaver.h" 39 #include "llvm/Support/WithColor.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <algorithm> 42 #include <cstdio> 43 #include <cstring> 44 #include <iostream> 45 #include <string> 46 47 using namespace llvm; 48 using namespace symbolize; 49 50 namespace { 51 enum ID { 52 OPT_INVALID = 0, // This is not an option ID. 53 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ 54 HELPTEXT, METAVAR, VALUES) \ 55 OPT_##ID, 56 #include "Opts.inc" 57 #undef OPTION 58 }; 59 60 #define PREFIX(NAME, VALUE) \ 61 static constexpr StringLiteral NAME##_init[] = VALUE; \ 62 static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \ 63 std::size(NAME##_init) - 1); 64 #include "Opts.inc" 65 #undef PREFIX 66 67 static constexpr opt::OptTable::Info InfoTable[] = { 68 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ 69 HELPTEXT, METAVAR, VALUES) \ 70 { \ 71 PREFIX, NAME, HELPTEXT, \ 72 METAVAR, OPT_##ID, opt::Option::KIND##Class, \ 73 PARAM, FLAGS, OPT_##GROUP, \ 74 OPT_##ALIAS, ALIASARGS, VALUES}, 75 #include "Opts.inc" 76 #undef OPTION 77 }; 78 79 class SymbolizerOptTable : public opt::GenericOptTable { 80 public: 81 SymbolizerOptTable() : GenericOptTable(InfoTable) { 82 setGroupedShortOptions(true); 83 } 84 }; 85 } // namespace 86 87 static std::string ToolName; 88 89 static void printError(const ErrorInfoBase &EI, StringRef Path) { 90 WithColor::error(errs(), ToolName); 91 if (!EI.isA<FileError>()) 92 errs() << "'" << Path << "': "; 93 EI.log(errs()); 94 errs() << '\n'; 95 } 96 97 template <typename T> 98 static void print(const Request &Request, Expected<T> &ResOrErr, 99 DIPrinter &Printer) { 100 if (ResOrErr) { 101 // No error, print the result. 102 Printer.print(Request, *ResOrErr); 103 return; 104 } 105 106 // Handle the error. 107 bool PrintEmpty = true; 108 handleAllErrors(std::move(ResOrErr.takeError()), 109 [&](const ErrorInfoBase &EI) { 110 PrintEmpty = Printer.printError(Request, EI); 111 }); 112 113 if (PrintEmpty) 114 Printer.print(Request, T()); 115 } 116 117 enum class OutputStyle { LLVM, GNU, JSON }; 118 119 enum class Command { 120 Code, 121 Data, 122 Frame, 123 }; 124 125 static void enableDebuginfod(LLVMSymbolizer &Symbolizer, 126 const opt::ArgList &Args) { 127 static bool IsEnabled = false; 128 if (IsEnabled) 129 return; 130 IsEnabled = true; 131 // Look up symbols using the debuginfod client. 132 Symbolizer.setBuildIDFetcher(std::make_unique<DebuginfodFetcher>( 133 Args.getAllArgValues(OPT_debug_file_directory_EQ))); 134 // The HTTPClient must be initialized for use by the debuginfod client. 135 HTTPClient::initialize(); 136 } 137 138 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, 139 StringRef InputString, Command &Cmd, 140 std::string &ModuleName, object::BuildID &BuildID, 141 uint64_t &ModuleOffset) { 142 const char kDelimiters[] = " \n\r"; 143 ModuleName = ""; 144 if (InputString.consume_front("CODE ")) { 145 Cmd = Command::Code; 146 } else if (InputString.consume_front("DATA ")) { 147 Cmd = Command::Data; 148 } else if (InputString.consume_front("FRAME ")) { 149 Cmd = Command::Frame; 150 } else { 151 // If no cmd, assume it's CODE. 152 Cmd = Command::Code; 153 } 154 155 const char *Pos; 156 // Skip delimiters and parse input filename (if needed). 157 if (BinaryName.empty() && BuildID.empty()) { 158 bool HasFilePrefix = false; 159 bool HasBuildIDPrefix = false; 160 while (true) { 161 if (InputString.consume_front("FILE:")) { 162 if (HasFilePrefix) 163 return false; 164 HasFilePrefix = true; 165 continue; 166 } 167 if (InputString.consume_front("BUILDID:")) { 168 if (HasBuildIDPrefix) 169 return false; 170 HasBuildIDPrefix = true; 171 continue; 172 } 173 break; 174 } 175 if (HasFilePrefix && HasBuildIDPrefix) 176 return false; 177 178 Pos = InputString.data(); 179 Pos += strspn(Pos, kDelimiters); 180 if (*Pos == '"' || *Pos == '\'') { 181 char Quote = *Pos; 182 Pos++; 183 const char *End = strchr(Pos, Quote); 184 if (!End) 185 return false; 186 ModuleName = std::string(Pos, End - Pos); 187 Pos = End + 1; 188 } else { 189 int NameLength = strcspn(Pos, kDelimiters); 190 ModuleName = std::string(Pos, NameLength); 191 Pos += NameLength; 192 } 193 if (HasBuildIDPrefix) { 194 BuildID = parseBuildID(ModuleName); 195 if (BuildID.empty()) 196 return false; 197 ModuleName.clear(); 198 } 199 } else { 200 Pos = InputString.data(); 201 ModuleName = BinaryName.str(); 202 } 203 // Skip delimiters and parse module offset. 204 Pos += strspn(Pos, kDelimiters); 205 int OffsetLength = strcspn(Pos, kDelimiters); 206 StringRef Offset(Pos, OffsetLength); 207 // GNU addr2line assumes the offset is hexadecimal and allows a redundant 208 // "0x" or "0X" prefix; do the same for compatibility. 209 if (IsAddr2Line) 210 Offset.consume_front("0x") || Offset.consume_front("0X"); 211 return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); 212 } 213 214 template <typename T> 215 void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, 216 uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, 217 OutputStyle Style, LLVMSymbolizer &Symbolizer, 218 DIPrinter &Printer) { 219 uint64_t AdjustedOffset = Offset - AdjustVMA; 220 object::SectionedAddress Address = {AdjustedOffset, 221 object::SectionedAddress::UndefSection}; 222 Request SymRequest = {ModuleName, Offset}; 223 if (Cmd == Command::Data) { 224 Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address); 225 print(SymRequest, ResOrErr, Printer); 226 } else if (Cmd == Command::Frame) { 227 Expected<std::vector<DILocal>> ResOrErr = 228 Symbolizer.symbolizeFrame(ModuleSpec, Address); 229 print(SymRequest, ResOrErr, Printer); 230 } else if (ShouldInline) { 231 Expected<DIInliningInfo> ResOrErr = 232 Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); 233 print(SymRequest, ResOrErr, Printer); 234 } else if (Style == OutputStyle::GNU) { 235 // With PrintFunctions == FunctionNameKind::LinkageName (default) 236 // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode() 237 // may override the name of an inlined function with the name of the topmost 238 // caller function in the inlining chain. This contradicts the existing 239 // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only 240 // the topmost function, which suits our needs better. 241 Expected<DIInliningInfo> ResOrErr = 242 Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); 243 Expected<DILineInfo> Res0OrErr = 244 !ResOrErr 245 ? Expected<DILineInfo>(ResOrErr.takeError()) 246 : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo() 247 : ResOrErr->getFrame(0)); 248 print(SymRequest, Res0OrErr, Printer); 249 } else { 250 Expected<DILineInfo> ResOrErr = 251 Symbolizer.symbolizeCode(ModuleSpec, Address); 252 print(SymRequest, ResOrErr, Printer); 253 } 254 Symbolizer.pruneCache(); 255 } 256 257 static void symbolizeInput(const opt::InputArgList &Args, 258 object::BuildIDRef IncomingBuildID, 259 uint64_t AdjustVMA, bool IsAddr2Line, 260 OutputStyle Style, StringRef InputString, 261 LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { 262 Command Cmd; 263 std::string ModuleName; 264 object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end()); 265 uint64_t Offset = 0; 266 if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, 267 StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) { 268 Printer.printInvalidCommand({ModuleName, std::nullopt}, InputString); 269 return; 270 } 271 bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); 272 if (!BuildID.empty()) { 273 assert(ModuleName.empty()); 274 if (!Args.hasArg(OPT_no_debuginfod)) 275 enableDebuginfod(Symbolizer, Args); 276 std::string BuildIDStr = toHex(BuildID); 277 executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, 278 Style, Symbolizer, Printer); 279 } else { 280 executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline, 281 Style, Symbolizer, Printer); 282 } 283 } 284 285 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl, 286 raw_ostream &OS) { 287 const char HelpText[] = " [options] addresses..."; 288 Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(), 289 ToolName.str().c_str()); 290 // TODO Replace this with OptTable API once it adds extrahelp support. 291 OS << "\nPass @FILE as argument to read options from FILE.\n"; 292 } 293 294 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line, 295 StringSaver &Saver, 296 SymbolizerOptTable &Tbl) { 297 StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer"; 298 // The environment variable specifies initial options which can be overridden 299 // by commnad line options. 300 Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS" 301 : "LLVM_SYMBOLIZER_OPTS"); 302 bool HasError = false; 303 opt::InputArgList Args = 304 Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) { 305 errs() << ("error: " + Msg + "\n"); 306 HasError = true; 307 }); 308 if (HasError) 309 exit(1); 310 if (Args.hasArg(OPT_help)) { 311 printHelp(ToolName, Tbl, outs()); 312 exit(0); 313 } 314 if (Args.hasArg(OPT_version)) { 315 outs() << ToolName << '\n'; 316 cl::PrintVersionMessage(); 317 exit(0); 318 } 319 320 return Args; 321 } 322 323 template <typename T> 324 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) { 325 if (const opt::Arg *A = Args.getLastArg(ID)) { 326 StringRef V(A->getValue()); 327 if (!llvm::to_integer(V, Value, 0)) { 328 errs() << A->getSpelling() + 329 ": expected a non-negative integer, but got '" + V + "'"; 330 exit(1); 331 } 332 } else { 333 Value = 0; 334 } 335 } 336 337 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args, 338 bool IsAddr2Line) { 339 if (Args.hasArg(OPT_functions)) 340 return FunctionNameKind::LinkageName; 341 if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ)) 342 return StringSwitch<FunctionNameKind>(A->getValue()) 343 .Case("none", FunctionNameKind::None) 344 .Case("short", FunctionNameKind::ShortName) 345 .Default(FunctionNameKind::LinkageName); 346 return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName; 347 } 348 349 static std::optional<bool> parseColorArg(const opt::InputArgList &Args) { 350 if (Args.hasArg(OPT_color)) 351 return true; 352 if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ)) 353 return StringSwitch<std::optional<bool>>(A->getValue()) 354 .Case("always", true) 355 .Case("never", false) 356 .Case("auto", std::nullopt); 357 return std::nullopt; 358 } 359 360 static object::BuildID parseBuildIDArg(const opt::InputArgList &Args, int ID) { 361 const opt::Arg *A = Args.getLastArg(ID); 362 if (!A) 363 return {}; 364 365 StringRef V(A->getValue()); 366 object::BuildID BuildID = parseBuildID(V); 367 if (BuildID.empty()) { 368 errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n"; 369 exit(1); 370 } 371 return BuildID; 372 } 373 374 // Symbolize markup from stdin and write the result to stdout. 375 static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) { 376 MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args)); 377 std::string InputString; 378 while (std::getline(std::cin, InputString)) { 379 InputString += '\n'; 380 Filter.filter(InputString); 381 } 382 Filter.finish(); 383 } 384 385 ExitOnError ExitOnErr; 386 387 int main(int argc, char **argv) { 388 InitLLVM X(argc, argv); 389 sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded); 390 391 ToolName = argv[0]; 392 bool IsAddr2Line = sys::path::stem(ToolName).contains("addr2line"); 393 BumpPtrAllocator A; 394 StringSaver Saver(A); 395 SymbolizerOptTable Tbl; 396 opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl); 397 398 LLVMSymbolizer::Options Opts; 399 uint64_t AdjustVMA; 400 PrinterConfig Config; 401 parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA); 402 if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) { 403 Opts.PathStyle = 404 A->getOption().matches(OPT_basenames) 405 ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly 406 : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath; 407 } else { 408 Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath; 409 } 410 Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ); 411 Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str(); 412 Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line); 413 Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str(); 414 Opts.FallbackDebugPath = 415 Args.getLastArgValue(OPT_fallback_debug_path_EQ).str(); 416 Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line); 417 parseIntArg(Args, OPT_print_source_context_lines_EQ, 418 Config.SourceContextLines); 419 Opts.RelativeAddresses = Args.hasArg(OPT_relative_address); 420 Opts.UntagAddresses = 421 Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line); 422 Opts.UseDIA = Args.hasArg(OPT_use_dia); 423 #if !defined(LLVM_ENABLE_DIA_SDK) 424 if (Opts.UseDIA) { 425 WithColor::warning() << "DIA not available; using native PDB reader\n"; 426 Opts.UseDIA = false; 427 } 428 #endif 429 Opts.UseSymbolTable = true; 430 if (Args.hasArg(OPT_cache_size_EQ)) 431 parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize); 432 Config.PrintAddress = Args.hasArg(OPT_addresses); 433 Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None; 434 Config.Pretty = Args.hasArg(OPT_pretty_print); 435 Config.Verbose = Args.hasArg(OPT_verbose); 436 437 for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) { 438 StringRef Hint(A->getValue()); 439 if (sys::path::extension(Hint) == ".dSYM") { 440 Opts.DsymHints.emplace_back(Hint); 441 } else { 442 errs() << "Warning: invalid dSYM hint: \"" << Hint 443 << "\" (must have the '.dSYM' extension).\n"; 444 } 445 } 446 447 LLVMSymbolizer Symbolizer(Opts); 448 449 if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, canUseDebuginfod())) 450 enableDebuginfod(Symbolizer, Args); 451 452 if (Args.hasArg(OPT_filter_markup)) { 453 filterMarkup(Args, Symbolizer); 454 return 0; 455 } 456 457 auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM; 458 if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) { 459 if (strcmp(A->getValue(), "GNU") == 0) 460 Style = OutputStyle::GNU; 461 else if (strcmp(A->getValue(), "JSON") == 0) 462 Style = OutputStyle::JSON; 463 else 464 Style = OutputStyle::LLVM; 465 } 466 467 if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) { 468 errs() << "error: cannot specify both --build-id and --obj\n"; 469 return EXIT_FAILURE; 470 } 471 object::BuildID BuildID = parseBuildIDArg(Args, OPT_build_id_EQ); 472 473 std::unique_ptr<DIPrinter> Printer; 474 if (Style == OutputStyle::GNU) 475 Printer = std::make_unique<GNUPrinter>(outs(), printError, Config); 476 else if (Style == OutputStyle::JSON) 477 Printer = std::make_unique<JSONPrinter>(outs(), Config); 478 else 479 Printer = std::make_unique<LLVMPrinter>(outs(), printError, Config); 480 481 // When an input file is specified, exit immediately if the file cannot be 482 // read. If getOrCreateModuleInfo succeeds, symbolizeInput will reuse the 483 // cached file handle. 484 if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) { 485 auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue()); 486 if (!Status) { 487 Request SymRequest = {Arg->getValue(), 0}; 488 handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) { 489 Printer->printError(SymRequest, EI); 490 }); 491 return EXIT_FAILURE; 492 } 493 } 494 495 std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT); 496 if (InputAddresses.empty()) { 497 const int kMaxInputStringLength = 1024; 498 char InputString[kMaxInputStringLength]; 499 500 while (fgets(InputString, sizeof(InputString), stdin)) { 501 // Strip newline characters. 502 std::string StrippedInputString(InputString); 503 llvm::erase_if(StrippedInputString, 504 [](char c) { return c == '\r' || c == '\n'; }); 505 symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, 506 StrippedInputString, Symbolizer, *Printer); 507 outs().flush(); 508 } 509 } else { 510 Printer->listBegin(); 511 for (StringRef Address : InputAddresses) 512 symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address, 513 Symbolizer, *Printer); 514 Printer->listEnd(); 515 } 516 517 return 0; 518 } 519