xref: /freebsd/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 //===-- sanitizer_symbolizer.h ----------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Symbolizer is used by sanitizers to map instruction address to a location in
10 // source code at run-time. Symbolizer either uses __sanitizer_symbolize_*
11 // defined in the program, or (if they are missing) tries to find and
12 // launch "llvm-symbolizer" commandline tool in a separate process and
13 // communicate with it.
14 //
15 // Generally we should try to avoid calling system library functions during
16 // symbolization (and use their replacements from sanitizer_libc.h instead).
17 //===----------------------------------------------------------------------===//
18 #ifndef SANITIZER_SYMBOLIZER_H
19 #define SANITIZER_SYMBOLIZER_H
20 
21 #include "sanitizer_common.h"
22 #include "sanitizer_mutex.h"
23 #include "sanitizer_vector.h"
24 
25 namespace __sanitizer {
26 
27 struct AddressInfo {
28   // Owns all the string members. Storage for them is
29   // (de)allocated using sanitizer internal allocator.
30   uptr address;
31 
32   char *module;
33   uptr module_offset;
34   ModuleArch module_arch;
35   u8 uuid[kModuleUUIDSize];
36   uptr uuid_size;
37 
38   static const uptr kUnknown = ~(uptr)0;
39   char *function;
40   uptr function_offset;
41 
42   char *file;
43   int line;
44   int column;
45 
46   AddressInfo();
47   // Deletes all strings and resets all fields.
48   void Clear();
49   void FillModuleInfo(const char *mod_name, uptr mod_offset, ModuleArch arch);
50   void FillModuleInfo(const LoadedModule &mod);
51   uptr module_base() const { return address - module_offset; }
52 };
53 
54 // Linked list of symbolized frames (each frame is described by AddressInfo).
55 struct SymbolizedStack {
56   SymbolizedStack *next;
57   AddressInfo info;
58   static SymbolizedStack *New(uptr addr);
59   // Deletes current, and all subsequent frames in the linked list.
60   // The object cannot be accessed after the call to this function.
61   void ClearAll();
62 
63  private:
64   SymbolizedStack();
65 };
66 
67 // For now, DataInfo is used to describe global variable.
68 struct DataInfo {
69   // Owns all the string members. Storage for them is
70   // (de)allocated using sanitizer internal allocator.
71   char *module;
72   uptr module_offset;
73   ModuleArch module_arch;
74 
75   char *file;
76   uptr line;
77   char *name;
78   uptr start;
79   uptr size;
80 
81   DataInfo();
82   void Clear();
83 };
84 
85 struct LocalInfo {
86   char *function_name = nullptr;
87   char *name = nullptr;
88   char *decl_file = nullptr;
89   unsigned decl_line = 0;
90 
91   bool has_frame_offset = false;
92   bool has_size = false;
93   bool has_tag_offset = false;
94 
95   sptr frame_offset;
96   uptr size;
97   uptr tag_offset;
98 
99   void Clear();
100 };
101 
102 struct FrameInfo {
103   char *module;
104   uptr module_offset;
105   ModuleArch module_arch;
106 
107   InternalMmapVector<LocalInfo> locals;
108   void Clear();
109 };
110 
111 class SymbolizerTool;
112 
113 class Symbolizer final {
114  public:
115   /// Initialize and return platform-specific implementation of symbolizer
116   /// (if it wasn't already initialized).
117   static Symbolizer *GetOrInit();
118   static void LateInitialize();
119   // Returns a list of symbolized frames for a given address (containing
120   // all inlined functions, if necessary).
121   SymbolizedStack *SymbolizePC(uptr address);
122   bool SymbolizeData(uptr address, DataInfo *info);
123   bool SymbolizeFrame(uptr address, FrameInfo *info);
124 
125   // The module names Symbolizer returns are stable and unique for every given
126   // module.  It is safe to store and compare them as pointers.
127   bool GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
128                                    uptr *module_address);
129   const char *GetModuleNameForPc(uptr pc) {
130     const char *module_name = nullptr;
131     uptr unused;
132     if (GetModuleNameAndOffsetForPC(pc, &module_name, &unused))
133       return module_name;
134     return nullptr;
135   }
136 
137   // Release internal caches (if any).
138   void Flush();
139   // Attempts to demangle the provided C++ mangled name.
140   const char *Demangle(const char *name);
141 
142   // Allow user to install hooks that would be called before/after Symbolizer
143   // does the actual file/line info fetching. Specific sanitizers may need this
144   // to distinguish system library calls made in user code from calls made
145   // during in-process symbolization.
146   typedef void (*StartSymbolizationHook)();
147   typedef void (*EndSymbolizationHook)();
148   // May be called at most once.
149   void AddHooks(StartSymbolizationHook start_hook,
150                 EndSymbolizationHook end_hook);
151 
152   void RefreshModules();
153   const LoadedModule *FindModuleForAddress(uptr address);
154 
155   void InvalidateModuleList();
156 
157  private:
158   // GetModuleNameAndOffsetForPC has to return a string to the caller.
159   // Since the corresponding module might get unloaded later, we should create
160   // our owned copies of the strings that we can safely return.
161   // ModuleNameOwner does not provide any synchronization, thus calls to
162   // its method should be protected by |mu_|.
163   class ModuleNameOwner {
164    public:
165     explicit ModuleNameOwner(Mutex *synchronized_by)
166         : last_match_(nullptr), mu_(synchronized_by) {
167       storage_.reserve(kInitialCapacity);
168     }
169     const char *GetOwnedCopy(const char *str);
170 
171    private:
172     static const uptr kInitialCapacity = 1000;
173     InternalMmapVector<const char*> storage_;
174     const char *last_match_;
175 
176     Mutex *mu_;
177   } module_names_;
178 
179   /// Platform-specific function for creating a Symbolizer object.
180   static Symbolizer *PlatformInit();
181 
182   bool FindModuleNameAndOffsetForAddress(uptr address, const char **module_name,
183                                          uptr *module_offset,
184                                          ModuleArch *module_arch);
185   ListOfModules modules_;
186   ListOfModules fallback_modules_;
187   // If stale, need to reload the modules before looking up addresses.
188   bool modules_fresh_;
189 
190   // Platform-specific default demangler, must not return nullptr.
191   const char *PlatformDemangle(const char *name);
192 
193   static Symbolizer *symbolizer_;
194   static StaticSpinMutex init_mu_;
195 
196   // Mutex locked from public methods of |Symbolizer|, so that the internals
197   // (including individual symbolizer tools and platform-specific methods) are
198   // always synchronized.
199   Mutex mu_;
200 
201   IntrusiveList<SymbolizerTool> tools_;
202 
203   explicit Symbolizer(IntrusiveList<SymbolizerTool> tools);
204 
205   static LowLevelAllocator symbolizer_allocator_;
206 
207   StartSymbolizationHook start_hook_;
208   EndSymbolizationHook end_hook_;
209   class SymbolizerScope {
210    public:
211     explicit SymbolizerScope(const Symbolizer *sym);
212     ~SymbolizerScope();
213    private:
214     const Symbolizer *sym_;
215   };
216 };
217 
218 #ifdef SANITIZER_WINDOWS
219 void InitializeDbgHelpIfNeeded();
220 #endif
221 
222 }  // namespace __sanitizer
223 
224 #endif  // SANITIZER_SYMBOLIZER_H
225