xref: /freebsd/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h (revision e1c4c8dd8d2d10b6104f06856a77bd5b4813a801)
1 //===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Symbolizer is used by sanitizers to map instruction address to a location in
10 // source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
11 // defined in the program, or (if they are missing) tries to find and
12 // launch "llvm-symbolizer" commandline tool in a separate process and
13 // communicate with it.
14 //
15 // Generally we should try to avoid calling system library functions during
16 // symbolization (and use their replacements from sanitizer_libc.h instead).
17 //===----------------------------------------------------------------------===//
18 #ifndef SANITIZER_SYMBOLIZER_H
19 #define SANITIZER_SYMBOLIZER_H
20 
21 #include "sanitizer_common.h"
22 #include "sanitizer_mutex.h"
23 #include "sanitizer_vector.h"
24 
25 namespace __sanitizer {
26 
27 struct AddressInfo {
28   // Owns all the string members. Storage for them is
29   // (de)allocated using sanitizer internal allocator.
30   uptr address;
31 
32   char *module;
33   uptr module_offset;
34   ModuleArch module_arch;
35   u8 uuid[kModuleUUIDSize];
36   uptr uuid_size;
37 
38   static const uptr kUnknown = ~(uptr)0;
39   char *function;
40   uptr function_offset;
41 
42   char *file;
43   int line;
44   int column;
45 
46   AddressInfo();
47   // Deletes all strings and resets all fields.
48   void Clear();
49   void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
50   void FillModuleInfo(const LoadedModule &mod);
51   uptr module_base() const { return address - module_offset; }
52 };
53 
54 // Linked list of symbolized frames (each frame is described by AddressInfo).
55 struct SymbolizedStack {
56   SymbolizedStack *next;
57   AddressInfo info;
58   static SymbolizedStack *New(uptr addr);
59   // Deletes current, and all subsequent frames in the linked list.
60   // The object cannot be accessed after the call to this function.
61   void ClearAll();
62 
63  private:
64   SymbolizedStack();
65 };
66 
67 class SymbolizedStackHolder {
68   SymbolizedStack *Stack;
69 
70   void clear() {
71     if (Stack)
72       Stack->ClearAll();
73   }
74 
75  public:
76   explicit SymbolizedStackHolder(SymbolizedStack *Stack = nullptr)
77       : Stack(Stack) {}
78   ~SymbolizedStackHolder() { clear(); }
79   void reset(SymbolizedStack *S = nullptr) {
80     if (Stack != S)
81       clear();
82     Stack = S;
83   }
84   const SymbolizedStack *get() const { return Stack; }
85 };
86 
87 // For now, DataInfo is used to describe global variable.
88 struct DataInfo {
89   // Owns all the string members. Storage for them is
90   // (de)allocated using sanitizer internal allocator.
91   char *module;
92   uptr module_offset;
93   ModuleArch module_arch;
94 
95   char *file;
96   uptr line;
97   char *name;
98   uptr start;
99   uptr size;
100 
101   DataInfo();
102   void Clear();
103 };
104 
105 struct LocalInfo {
106   char *function_name = nullptr;
107   char *name = nullptr;
108   char *decl_file = nullptr;
109   unsigned decl_line = 0;
110 
111   bool has_frame_offset = false;
112   bool has_size = false;
113   bool has_tag_offset = false;
114 
115   sptr frame_offset;
116   uptr size;
117   uptr tag_offset;
118 
119   void Clear();
120 };
121 
122 struct FrameInfo {
123   char *module;
124   uptr module_offset;
125   ModuleArch module_arch;
126 
127   InternalMmapVector<LocalInfo> locals;
128   void Clear();
129 };
130 
131 class SymbolizerTool;
132 
133 class Symbolizer final {
134  public:
135   /// Initialize and return platform-specific implementation of symbolizer
136   /// (if it wasn't already initialized).
137   static Symbolizer *GetOrInit();
138   static void LateInitialize();
139   // Returns a list of symbolized frames for a given address (containing
140   // all inlined functions, if necessary).
141   SymbolizedStack *SymbolizePC(uptr address);
142   bool SymbolizeData(uptr address, DataInfo *info);
143   bool SymbolizeFrame(uptr address, FrameInfo *info);
144 
145   // The module names Symbolizer returns are stable and unique for every given
146   // module.  It is safe to store and compare them as pointers.
147   bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
148                                    uptr *module_address);
149   const char *GetModuleNameForPc(uptr pc) {
150     const char *module_name = nullptr;
151     uptr unused;
152     if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
153       return module_name;
154     return nullptr;
155   }
156 
157   // Release internal caches (if any).
158   void Flush();
159   // Attempts to demangle the provided C++ mangled name. Never returns nullptr.
160   const char *Demangle(const char *name);
161 
162   // Allow user to install hooks that would be called before/after Symbolizer
163   // does the actual file/line info fetching. Specific sanitizers may need this
164   // to distinguish system library calls made in user code from calls made
165   // during in-process symbolization.
166   typedef void (*StartSymbolizationHook)();
167   typedef void (*EndSymbolizationHook)();
168   // May be called at most once.
169   void AddHooks(StartSymbolizationHook start_hook,
170                 EndSymbolizationHook end_hook);
171 
172   void RefreshModules();
173   const LoadedModule *FindModuleForAddress(uptr address);
174 
175   void InvalidateModuleList();
176 
177   const ListOfModules &GetRefreshedListOfModules();
178 
179  private:
180   // GetModuleNameAndOffsetForPC has to return a string to the caller.
181   // Since the corresponding module might get unloaded later, we should create
182   // our owned copies of the strings that we can safely return.
183   // ModuleNameOwner does not provide any synchronization, thus calls to
184   // its method should be protected by |mu_|.
185   class ModuleNameOwner {
186    public:
187     explicit ModuleNameOwner(Mutex *synchronized_by)
188         : last_match_(nullptr), mu_(synchronized_by) {
189       storage_.reserve(kInitialCapacity);
190     }
191     const char *GetOwnedCopy(const char *str);
192 
193    private:
194     static const uptr kInitialCapacity = 1000;
195     InternalMmapVector<const char*> storage_;
196     const char *last_match_;
197 
198     Mutex *mu_;
199   } module_names_;
200 
201   /// Platform-specific function for creating a Symbolizer object.
202   static Symbolizer *PlatformInit();
203 
204   bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
205                                          uptr *module_offset,
206                                          ModuleArch *module_arch);
207   ListOfModules modules_;
208   ListOfModules fallback_modules_;
209   // If stale, need to reload the modules before looking up addresses.
210   bool modules_fresh_;
211 
212   // Platform-specific default demangler, returns nullptr on failure.
213   const char *PlatformDemangle(const char *name);
214 
215   static Symbolizer *symbolizer_;
216   static StaticSpinMutex init_mu_;
217 
218   // Mutex locked from public methods of |Symbolizer|, so that the internals
219   // (including individual symbolizer tools and platform-specific methods) are
220   // always synchronized.
221   Mutex mu_;
222 
223   IntrusiveList<SymbolizerTool> tools_;
224 
225   explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
226 
227   static LowLevelAllocator symbolizer_allocator_;
228 
229   StartSymbolizationHook start_hook_;
230   EndSymbolizationHook end_hook_;
231   class SymbolizerScope {
232    public:
233     explicit SymbolizerScope(const Symbolizer *sym);
234     ~SymbolizerScope();
235    private:
236     const Symbolizer *sym_;
237     int errno_;  // Backup errno in case symbolizer change the value.
238   };
239 };
240 
241 #ifdef SANITIZER_WINDOWS
242 void InitializeDbgHelpIfNeeded();
243 #endif
244 
245 }  // namespace __sanitizer
246 
247 #endif  // SANITIZER_SYMBOLIZER_H
248