xref: /freebsd/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 //===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This utility works much like "addr2line". It is able of transforming
10 // tuples (module name, module offset) to code locations (function name,
11 // file, line number, column number). It is targeted for compiler-rt tools
12 // (especially AddressSanitizer and ThreadSanitizer) that can use it
13 // to symbolize stack traces in their error reports.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Opts.inc"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Config/config.h"
20 #include "llvm/DebugInfo/Symbolize/DIPrinter.h"
21 #include "llvm/DebugInfo/Symbolize/Symbolize.h"
22 #include "llvm/Debuginfod/DIFetcher.h"
23 #include "llvm/Debuginfod/HTTPClient.h"
24 #include "llvm/Option/Arg.h"
25 #include "llvm/Option/ArgList.h"
26 #include "llvm/Option/Option.h"
27 #include "llvm/Support/COM.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/FileSystem.h"
31 #include "llvm/Support/InitLLVM.h"
32 #include "llvm/Support/Path.h"
33 #include "llvm/Support/StringSaver.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <algorithm>
36 #include <cstdio>
37 #include <cstring>
38 #include <string>
39 
40 using namespace llvm;
41 using namespace symbolize;
42 
43 namespace {
44 enum ID {
45   OPT_INVALID = 0, // This is not an option ID.
46 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
47                HELPTEXT, METAVAR, VALUES)                                      \
48   OPT_##ID,
49 #include "Opts.inc"
50 #undef OPTION
51 };
52 
53 #define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
54 #include "Opts.inc"
55 #undef PREFIX
56 
57 const opt::OptTable::Info InfoTable[] = {
58 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
59                HELPTEXT, METAVAR, VALUES)                                      \
60   {                                                                            \
61       PREFIX,      NAME,      HELPTEXT,                                        \
62       METAVAR,     OPT_##ID,  opt::Option::KIND##Class,                        \
63       PARAM,       FLAGS,     OPT_##GROUP,                                     \
64       OPT_##ALIAS, ALIASARGS, VALUES},
65 #include "Opts.inc"
66 #undef OPTION
67 };
68 
69 class SymbolizerOptTable : public opt::OptTable {
70 public:
71   SymbolizerOptTable() : OptTable(InfoTable) {
72     setGroupedShortOptions(true);
73   }
74 };
75 } // namespace
76 
77 template <typename T>
78 static void print(const Request &Request, Expected<T> &ResOrErr,
79                   DIPrinter &Printer) {
80   if (ResOrErr) {
81     // No error, print the result.
82     Printer.print(Request, *ResOrErr);
83     return;
84   }
85 
86   // Handle the error.
87   bool PrintEmpty = true;
88   handleAllErrors(std::move(ResOrErr.takeError()),
89                   [&](const ErrorInfoBase &EI) {
90                     PrintEmpty = Printer.printError(
91                         Request, EI, "LLVMSymbolizer: error reading file: ");
92                   });
93 
94   if (PrintEmpty)
95     Printer.print(Request, T());
96 }
97 
98 enum class OutputStyle { LLVM, GNU, JSON };
99 
100 enum class Command {
101   Code,
102   Data,
103   Frame,
104 };
105 
106 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
107                          StringRef InputString, Command &Cmd,
108                          std::string &ModuleName, uint64_t &ModuleOffset) {
109   const char kDelimiters[] = " \n\r";
110   ModuleName = "";
111   if (InputString.consume_front("CODE ")) {
112     Cmd = Command::Code;
113   } else if (InputString.consume_front("DATA ")) {
114     Cmd = Command::Data;
115   } else if (InputString.consume_front("FRAME ")) {
116     Cmd = Command::Frame;
117   } else {
118     // If no cmd, assume it's CODE.
119     Cmd = Command::Code;
120   }
121   const char *Pos = InputString.data();
122   // Skip delimiters and parse input filename (if needed).
123   if (BinaryName.empty()) {
124     Pos += strspn(Pos, kDelimiters);
125     if (*Pos == '"' || *Pos == '\'') {
126       char Quote = *Pos;
127       Pos++;
128       const char *End = strchr(Pos, Quote);
129       if (!End)
130         return false;
131       ModuleName = std::string(Pos, End - Pos);
132       Pos = End + 1;
133     } else {
134       int NameLength = strcspn(Pos, kDelimiters);
135       ModuleName = std::string(Pos, NameLength);
136       Pos += NameLength;
137     }
138   } else {
139     ModuleName = BinaryName.str();
140   }
141   // Skip delimiters and parse module offset.
142   Pos += strspn(Pos, kDelimiters);
143   int OffsetLength = strcspn(Pos, kDelimiters);
144   StringRef Offset(Pos, OffsetLength);
145   // GNU addr2line assumes the offset is hexadecimal and allows a redundant
146   // "0x" or "0X" prefix; do the same for compatibility.
147   if (IsAddr2Line)
148     Offset.consume_front("0x") || Offset.consume_front("0X");
149   return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
150 }
151 
152 static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA,
153                            bool IsAddr2Line, OutputStyle Style,
154                            StringRef InputString, LLVMSymbolizer &Symbolizer,
155                            DIPrinter &Printer) {
156   Command Cmd;
157   std::string ModuleName;
158   uint64_t Offset = 0;
159   if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,
160                     StringRef(InputString), Cmd, ModuleName, Offset)) {
161     Printer.printInvalidCommand({ModuleName, None}, InputString);
162     return;
163   }
164 
165   uint64_t AdjustedOffset = Offset - AdjustVMA;
166   if (Cmd == Command::Data) {
167     Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(
168         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
169     print({ModuleName, Offset}, ResOrErr, Printer);
170   } else if (Cmd == Command::Frame) {
171     Expected<std::vector<DILocal>> ResOrErr = Symbolizer.symbolizeFrame(
172         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
173     print({ModuleName, Offset}, ResOrErr, Printer);
174   } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) {
175     Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
176         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
177     print({ModuleName, Offset}, ResOrErr, Printer);
178   } else if (Style == OutputStyle::GNU) {
179     // With PrintFunctions == FunctionNameKind::LinkageName (default)
180     // and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
181     // may override the name of an inlined function with the name of the topmost
182     // caller function in the inlining chain. This contradicts the existing
183     // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only
184     // the topmost function, which suits our needs better.
185     Expected<DIInliningInfo> ResOrErr = Symbolizer.symbolizeInlinedCode(
186         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
187     Expected<DILineInfo> Res0OrErr =
188         !ResOrErr
189             ? Expected<DILineInfo>(ResOrErr.takeError())
190             : ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
191                                                     : ResOrErr->getFrame(0));
192     print({ModuleName, Offset}, Res0OrErr, Printer);
193   } else {
194     Expected<DILineInfo> ResOrErr = Symbolizer.symbolizeCode(
195         ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection});
196     print({ModuleName, Offset}, ResOrErr, Printer);
197   }
198 }
199 
200 static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,
201                       raw_ostream &OS) {
202   const char HelpText[] = " [options] addresses...";
203   Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),
204                 ToolName.str().c_str());
205   // TODO Replace this with OptTable API once it adds extrahelp support.
206   OS << "\nPass @FILE as argument to read options from FILE.\n";
207 }
208 
209 static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,
210                                       StringSaver &Saver,
211                                       SymbolizerOptTable &Tbl) {
212   StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";
213   // The environment variable specifies initial options which can be overridden
214   // by commnad line options.
215   Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"
216                                                    : "LLVM_SYMBOLIZER_OPTS");
217   bool HasError = false;
218   opt::InputArgList Args =
219       Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
220         errs() << ("error: " + Msg + "\n");
221         HasError = true;
222       });
223   if (HasError)
224     exit(1);
225   if (Args.hasArg(OPT_help)) {
226     printHelp(ToolName, Tbl, outs());
227     exit(0);
228   }
229   if (Args.hasArg(OPT_version)) {
230     outs() << ToolName << '\n';
231     cl::PrintVersionMessage();
232     exit(0);
233   }
234 
235   return Args;
236 }
237 
238 template <typename T>
239 static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {
240   if (const opt::Arg *A = Args.getLastArg(ID)) {
241     StringRef V(A->getValue());
242     if (!llvm::to_integer(V, Value, 0)) {
243       errs() << A->getSpelling() +
244                     ": expected a non-negative integer, but got '" + V + "'";
245       exit(1);
246     }
247   } else {
248     Value = 0;
249   }
250 }
251 
252 static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,
253                                                   bool IsAddr2Line) {
254   if (Args.hasArg(OPT_functions))
255     return FunctionNameKind::LinkageName;
256   if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))
257     return StringSwitch<FunctionNameKind>(A->getValue())
258         .Case("none", FunctionNameKind::None)
259         .Case("short", FunctionNameKind::ShortName)
260         .Default(FunctionNameKind::LinkageName);
261   return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;
262 }
263 
264 int main(int argc, char **argv) {
265   InitLLVM X(argc, argv);
266   sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
267 
268   bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
269   BumpPtrAllocator A;
270   StringSaver Saver(A);
271   SymbolizerOptTable Tbl;
272   opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);
273 
274   LLVMSymbolizer::Options Opts;
275   uint64_t AdjustVMA;
276   PrinterConfig Config;
277   parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);
278   if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {
279     Opts.PathStyle =
280         A->getOption().matches(OPT_basenames)
281             ? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly
282             : DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;
283   } else {
284     Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;
285   }
286   Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);
287   Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();
288   Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);
289   Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();
290   Opts.FallbackDebugPath =
291       Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();
292   Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);
293   parseIntArg(Args, OPT_print_source_context_lines_EQ,
294               Config.SourceContextLines);
295   Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);
296   Opts.UntagAddresses =
297       Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);
298   Opts.UseDIA = Args.hasArg(OPT_use_dia);
299 #if !defined(LLVM_ENABLE_DIA_SDK)
300   if (Opts.UseDIA) {
301     WithColor::warning() << "DIA not available; using native PDB reader\n";
302     Opts.UseDIA = false;
303   }
304 #endif
305   Opts.UseSymbolTable = true;
306   Config.PrintAddress = Args.hasArg(OPT_addresses);
307   Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;
308   Config.Pretty = Args.hasArg(OPT_pretty_print);
309   Config.Verbose = Args.hasArg(OPT_verbose);
310 
311   for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {
312     StringRef Hint(A->getValue());
313     if (sys::path::extension(Hint) == ".dSYM") {
314       Opts.DsymHints.emplace_back(Hint);
315     } else {
316       errs() << "Warning: invalid dSYM hint: \"" << Hint
317              << "\" (must have the '.dSYM' extension).\n";
318     }
319   }
320 
321   auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;
322   if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {
323     if (strcmp(A->getValue(), "GNU") == 0)
324       Style = OutputStyle::GNU;
325     else if (strcmp(A->getValue(), "JSON") == 0)
326       Style = OutputStyle::JSON;
327     else
328       Style = OutputStyle::LLVM;
329   }
330 
331   LLVMSymbolizer Symbolizer(Opts);
332 
333   // Look up symbols using the debuginfod client.
334   Symbolizer.addDIFetcher(std::make_unique<DebuginfodDIFetcher>());
335   // The HTTPClient must be initialized for use by the debuginfod client.
336   HTTPClient::initialize();
337 
338   std::unique_ptr<DIPrinter> Printer;
339   if (Style == OutputStyle::GNU)
340     Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
341   else if (Style == OutputStyle::JSON)
342     Printer = std::make_unique<JSONPrinter>(outs(), Config);
343   else
344     Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config);
345 
346   std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
347   if (InputAddresses.empty()) {
348     const int kMaxInputStringLength = 1024;
349     char InputString[kMaxInputStringLength];
350 
351     while (fgets(InputString, sizeof(InputString), stdin)) {
352       // Strip newline characters.
353       std::string StrippedInputString(InputString);
354       llvm::erase_if(StrippedInputString,
355                      [](char c) { return c == '\r' || c == '\n'; });
356       symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, StrippedInputString,
357                      Symbolizer, *Printer);
358       outs().flush();
359     }
360   } else {
361     Printer->listBegin();
362     for (StringRef Address : InputAddresses)
363       symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer,
364                      *Printer);
365     Printer->listEnd();
366   }
367 
368   return 0;
369 }
370