xref: /freebsd/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is shared between AddressSanitizer and ThreadSanitizer
10 // run-time libraries.
11 //===----------------------------------------------------------------------===//
12 
13 #include "sanitizer_allocator_internal.h"
14 #include "sanitizer_internal_defs.h"
15 #include "sanitizer_platform.h"
16 #include "sanitizer_symbolizer_internal.h"
17 
18 namespace __sanitizer {
19 
GetOrInit()20 Symbolizer *Symbolizer::GetOrInit() {
21   SpinMutexLock l(&init_mu_);
22   if (symbolizer_)
23     return symbolizer_;
24   symbolizer_ = PlatformInit();
25   CHECK(symbolizer_);
26   return symbolizer_;
27 }
28 
29 // See sanitizer_symbolizer_markup.cpp.
30 #if !SANITIZER_SYMBOLIZER_MARKUP
31 
ExtractToken(const char * str,const char * delims,char ** result)32 const char *ExtractToken(const char *str, const char *delims, char **result) {
33   uptr prefix_len = internal_strcspn(str, delims);
34   *result = (char *)InternalAlloc(prefix_len + 1);
35   internal_memcpy(*result, str, prefix_len);
36   (*result)[prefix_len] = '\0';
37   const char *prefix_end = str + prefix_len;
38   if (*prefix_end != '\0')
39     prefix_end++;
40   return prefix_end;
41 }
42 
ExtractInt(const char * str,const char * delims,int * result)43 const char *ExtractInt(const char *str, const char *delims, int *result) {
44   char *buff = nullptr;
45   const char *ret = ExtractToken(str, delims, &buff);
46   if (buff) {
47     *result = (int)internal_atoll(buff);
48   }
49   InternalFree(buff);
50   return ret;
51 }
52 
ExtractUptr(const char * str,const char * delims,uptr * result)53 const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
54   char *buff = nullptr;
55   const char *ret = ExtractToken(str, delims, &buff);
56   if (buff) {
57     *result = (uptr)internal_atoll(buff);
58   }
59   InternalFree(buff);
60   return ret;
61 }
62 
ExtractSptr(const char * str,const char * delims,sptr * result)63 const char *ExtractSptr(const char *str, const char *delims, sptr *result) {
64   char *buff = nullptr;
65   const char *ret = ExtractToken(str, delims, &buff);
66   if (buff) {
67     *result = (sptr)internal_atoll(buff);
68   }
69   InternalFree(buff);
70   return ret;
71 }
72 
ExtractTokenUpToDelimiter(const char * str,const char * delimiter,char ** result)73 const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
74                                       char **result) {
75   const char *found_delimiter = internal_strstr(str, delimiter);
76   uptr prefix_len =
77       found_delimiter ? found_delimiter - str : internal_strlen(str);
78   *result = (char *)InternalAlloc(prefix_len + 1);
79   internal_memcpy(*result, str, prefix_len);
80   (*result)[prefix_len] = '\0';
81   const char *prefix_end = str + prefix_len;
82   if (*prefix_end != '\0')
83     prefix_end += internal_strlen(delimiter);
84   return prefix_end;
85 }
86 
SymbolizePC(uptr addr)87 SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
88   Lock l(&mu_);
89   SymbolizedStack *res = SymbolizedStack::New(addr);
90   auto *mod = FindModuleForAddress(addr);
91   if (!mod)
92     return res;
93   // Always fill data about module name and offset.
94   res->info.FillModuleInfo(*mod);
95   for (auto &tool : tools_) {
96     SymbolizerScope sym_scope(this);
97     if (tool.SymbolizePC(addr, res)) {
98       return res;
99     }
100   }
101   return res;
102 }
103 
SymbolizeData(uptr addr,DataInfo * info)104 bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
105   Lock l(&mu_);
106   const char *module_name = nullptr;
107   uptr module_offset;
108   ModuleArch arch;
109   if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
110                                          &arch))
111     return false;
112   info->Clear();
113   info->module = internal_strdup(module_name);
114   info->module_offset = module_offset;
115   info->module_arch = arch;
116   for (auto &tool : tools_) {
117     SymbolizerScope sym_scope(this);
118     if (tool.SymbolizeData(addr, info)) {
119       return true;
120     }
121   }
122   return false;
123 }
124 
SymbolizeFrame(uptr addr,FrameInfo * info)125 bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
126   Lock l(&mu_);
127   const char *module_name = nullptr;
128   if (!FindModuleNameAndOffsetForAddress(
129           addr, &module_name, &info->module_offset, &info->module_arch))
130     return false;
131   info->module = internal_strdup(module_name);
132   for (auto &tool : tools_) {
133     SymbolizerScope sym_scope(this);
134     if (tool.SymbolizeFrame(addr, info)) {
135       return true;
136     }
137   }
138   return false;
139 }
140 
GetModuleNameAndOffsetForPC(uptr pc,const char ** module_name,uptr * module_address)141 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
142                                              uptr *module_address) {
143   Lock l(&mu_);
144   const char *internal_module_name = nullptr;
145   ModuleArch arch;
146   if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
147                                          module_address, &arch))
148     return false;
149 
150   if (module_name)
151     *module_name = module_names_.GetOwnedCopy(internal_module_name);
152   return true;
153 }
154 
Flush()155 void Symbolizer::Flush() {
156   Lock l(&mu_);
157   for (auto &tool : tools_) {
158     SymbolizerScope sym_scope(this);
159     tool.Flush();
160   }
161 }
162 
Demangle(const char * name)163 const char *Symbolizer::Demangle(const char *name) {
164   CHECK(name);
165   Lock l(&mu_);
166   for (auto &tool : tools_) {
167     SymbolizerScope sym_scope(this);
168     if (const char *demangled = tool.Demangle(name))
169       return demangled;
170   }
171   if (const char *demangled = PlatformDemangle(name))
172     return demangled;
173   return name;
174 }
175 
FindModuleNameAndOffsetForAddress(uptr address,const char ** module_name,uptr * module_offset,ModuleArch * module_arch)176 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
177                                                    const char **module_name,
178                                                    uptr *module_offset,
179                                                    ModuleArch *module_arch) {
180   const LoadedModule *module = FindModuleForAddress(address);
181   if (!module)
182     return false;
183   *module_name = module->full_name();
184   *module_offset = address - module->base_address();
185   *module_arch = module->arch();
186   return true;
187 }
188 
RefreshModules()189 void Symbolizer::RefreshModules() {
190   modules_.init();
191   fallback_modules_.fallbackInit();
192   RAW_CHECK(modules_.size() > 0);
193   modules_fresh_ = true;
194 }
195 
GetRefreshedListOfModules()196 const ListOfModules &Symbolizer::GetRefreshedListOfModules() {
197   if (!modules_fresh_)
198     RefreshModules();
199 
200   return modules_;
201 }
202 
SearchForModule(const ListOfModules & modules,uptr address)203 static const LoadedModule *SearchForModule(const ListOfModules &modules,
204                                            uptr address) {
205   for (uptr i = 0; i < modules.size(); i++) {
206     if (modules[i].containsAddress(address)) {
207       return &modules[i];
208     }
209   }
210   return nullptr;
211 }
212 
FindModuleForAddress(uptr address)213 const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
214   bool modules_were_reloaded = false;
215   if (!modules_fresh_) {
216     RefreshModules();
217     modules_were_reloaded = true;
218   }
219   const LoadedModule *module = SearchForModule(modules_, address);
220   if (module)
221     return module;
222 
223   // dlopen/dlclose interceptors invalidate the module list, but when
224   // interception is disabled, we need to retry if the lookup fails in
225   // case the module list changed.
226 #  if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
227   if (!modules_were_reloaded) {
228     RefreshModules();
229     module = SearchForModule(modules_, address);
230     if (module)
231       return module;
232   }
233 #  endif
234 
235   if (fallback_modules_.size()) {
236     module = SearchForModule(fallback_modules_, address);
237   }
238   return module;
239 }
240 
241 // For now we assume the following protocol:
242 // For each request of the form
243 //   <module_name> <module_offset>
244 // passed to STDIN, external symbolizer prints to STDOUT response:
245 //   <function_name>
246 //   <file_name>:<line_number>:<column_number>
247 //   <function_name>
248 //   <file_name>:<line_number>:<column_number>
249 //   ...
250 //   <empty line>
251 class LLVMSymbolizerProcess final : public SymbolizerProcess {
252  public:
LLVMSymbolizerProcess(const char * path)253   explicit LLVMSymbolizerProcess(const char *path)
254       : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {}
255 
256  private:
ReachedEndOfOutput(const char * buffer,uptr length) const257   bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
258     // Empty line marks the end of llvm-symbolizer output.
259     return length >= 2 && buffer[length - 1] == '\n' &&
260            buffer[length - 2] == '\n';
261   }
262 
263   // When adding a new architecture, don't forget to also update
264   // script/asan_symbolize.py and sanitizer_common.h.
GetArgV(const char * path_to_binary,const char * (& argv)[kArgVMax]) const265   void GetArgV(const char *path_to_binary,
266                const char *(&argv)[kArgVMax]) const override {
267 #  if defined(__x86_64h__)
268     const char *const kSymbolizerArch = "--default-arch=x86_64h";
269 #  elif defined(__x86_64__)
270     const char *const kSymbolizerArch = "--default-arch=x86_64";
271 #  elif defined(__i386__)
272     const char *const kSymbolizerArch = "--default-arch=i386";
273 #  elif SANITIZER_LOONGARCH64
274     const char *const kSymbolizerArch = "--default-arch=loongarch64";
275 #  elif SANITIZER_RISCV64
276     const char *const kSymbolizerArch = "--default-arch=riscv64";
277 #  elif defined(__aarch64__)
278     const char *const kSymbolizerArch = "--default-arch=arm64";
279 #  elif defined(__arm__)
280     const char *const kSymbolizerArch = "--default-arch=arm";
281 #  elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
282     const char *const kSymbolizerArch = "--default-arch=powerpc64";
283 #  elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
284     const char *const kSymbolizerArch = "--default-arch=powerpc64le";
285 #  elif defined(__s390x__)
286     const char *const kSymbolizerArch = "--default-arch=s390x";
287 #  elif defined(__s390__)
288     const char *const kSymbolizerArch = "--default-arch=s390";
289 #  else
290     const char *const kSymbolizerArch = "--default-arch=unknown";
291 #  endif
292 
293     const char *const demangle_flag =
294         common_flags()->demangle ? "--demangle" : "--no-demangle";
295     const char *const inline_flag =
296         common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines";
297     int i = 0;
298     argv[i++] = path_to_binary;
299     argv[i++] = demangle_flag;
300     argv[i++] = inline_flag;
301     argv[i++] = kSymbolizerArch;
302     argv[i++] = nullptr;
303     CHECK_LE(i, kArgVMax);
304   }
305 };
306 
LLVMSymbolizer(const char * path,LowLevelAllocator * allocator)307 LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
308     : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
309 
310 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
311 // Windows, so extract tokens from the right hand side first. The column info is
312 // also optional.
ParseFileLineInfo(AddressInfo * info,const char * str)313 static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
314   char *file_line_info = nullptr;
315   str = ExtractToken(str, "\n", &file_line_info);
316   CHECK(file_line_info);
317 
318   if (uptr size = internal_strlen(file_line_info)) {
319     char *back = file_line_info + size - 1;
320     for (int i = 0; i < 2; ++i) {
321       while (back > file_line_info && IsDigit(*back)) --back;
322       if (*back != ':' || !IsDigit(back[1]))
323         break;
324       info->column = info->line;
325       info->line = internal_atoll(back + 1);
326       // Truncate the string at the colon to keep only filename.
327       *back = '\0';
328       --back;
329     }
330     ExtractToken(file_line_info, "", &info->file);
331   }
332 
333   InternalFree(file_line_info);
334   return str;
335 }
336 
337 // Parses one or more two-line strings in the following format:
338 //   <function_name>
339 //   <file_name>:<line_number>[:<column_number>]
340 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
341 // them use the same output format.
ParseSymbolizePCOutput(const char * str,SymbolizedStack * res)342 void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
343   bool top_frame = true;
344   SymbolizedStack *last = res;
345   while (true) {
346     char *function_name = nullptr;
347     str = ExtractToken(str, "\n", &function_name);
348     CHECK(function_name);
349     if (function_name[0] == '\0') {
350       // There are no more frames.
351       InternalFree(function_name);
352       break;
353     }
354     SymbolizedStack *cur;
355     if (top_frame) {
356       cur = res;
357       top_frame = false;
358     } else {
359       cur = SymbolizedStack::New(res->info.address);
360       cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
361                                res->info.module_arch);
362       last->next = cur;
363       last = cur;
364     }
365 
366     AddressInfo *info = &cur->info;
367     info->function = function_name;
368     str = ParseFileLineInfo(info, str);
369 
370     // Functions and filenames can be "??", in which case we write 0
371     // to address info to mark that names are unknown.
372     if (0 == internal_strcmp(info->function, "??")) {
373       InternalFree(info->function);
374       info->function = 0;
375     }
376     if (info->file && 0 == internal_strcmp(info->file, "??")) {
377       InternalFree(info->file);
378       info->file = 0;
379     }
380   }
381 }
382 
383 // Parses a two- or three-line string in the following format:
384 //   <symbol_name>
385 //   <start_address> <size>
386 //   <filename>:<column>
387 // Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support
388 // for symbolizing the third line in D123538, but we support the older two-line
389 // information as well.
ParseSymbolizeDataOutput(const char * str,DataInfo * info)390 void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
391   str = ExtractToken(str, "\n", &info->name);
392   str = ExtractUptr(str, " ", &info->start);
393   str = ExtractUptr(str, "\n", &info->size);
394   // Note: If the third line isn't present, these calls will set info.{file,
395   // line} to empty strings.
396   str = ExtractToken(str, ":", &info->file);
397   str = ExtractUptr(str, "\n", &info->line);
398 }
399 
ParseSymbolizeFrameOutput(const char * str,InternalMmapVector<LocalInfo> * locals)400 void ParseSymbolizeFrameOutput(const char *str,
401                                InternalMmapVector<LocalInfo> *locals) {
402   if (internal_strncmp(str, "??", 2) == 0)
403     return;
404 
405   while (*str) {
406     LocalInfo local;
407     str = ExtractToken(str, "\n", &local.function_name);
408     str = ExtractToken(str, "\n", &local.name);
409 
410     AddressInfo addr;
411     str = ParseFileLineInfo(&addr, str);
412     local.decl_file = addr.file;
413     local.decl_line = addr.line;
414 
415     local.has_frame_offset = internal_strncmp(str, "??", 2) != 0;
416     str = ExtractSptr(str, " ", &local.frame_offset);
417 
418     local.has_size = internal_strncmp(str, "??", 2) != 0;
419     str = ExtractUptr(str, " ", &local.size);
420 
421     local.has_tag_offset = internal_strncmp(str, "??", 2) != 0;
422     str = ExtractUptr(str, "\n", &local.tag_offset);
423 
424     locals->push_back(local);
425   }
426 }
427 
SymbolizePC(uptr addr,SymbolizedStack * stack)428 bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
429   AddressInfo *info = &stack->info;
430   const char *buf = FormatAndSendCommand(
431       "CODE", info->module, info->module_offset, info->module_arch);
432   if (!buf)
433     return false;
434   ParseSymbolizePCOutput(buf, stack);
435   return true;
436 }
437 
SymbolizeData(uptr addr,DataInfo * info)438 bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
439   const char *buf = FormatAndSendCommand(
440       "DATA", info->module, info->module_offset, info->module_arch);
441   if (!buf)
442     return false;
443   ParseSymbolizeDataOutput(buf, info);
444   info->start += (addr - info->module_offset);  // Add the base address.
445   return true;
446 }
447 
SymbolizeFrame(uptr addr,FrameInfo * info)448 bool LLVMSymbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) {
449   const char *buf = FormatAndSendCommand(
450       "FRAME", info->module, info->module_offset, info->module_arch);
451   if (!buf)
452     return false;
453   ParseSymbolizeFrameOutput(buf, &info->locals);
454   return true;
455 }
456 
FormatAndSendCommand(const char * command_prefix,const char * module_name,uptr module_offset,ModuleArch arch)457 const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix,
458                                                  const char *module_name,
459                                                  uptr module_offset,
460                                                  ModuleArch arch) {
461   CHECK(module_name);
462   int size_needed = 0;
463   if (arch == kModuleArchUnknown)
464     size_needed = internal_snprintf(buffer_, kBufferSize, "%s \"%s\" 0x%zx\n",
465                                     command_prefix, module_name, module_offset);
466   else
467     size_needed = internal_snprintf(
468         buffer_, kBufferSize, "%s \"%s:%s\" 0x%zx\n", command_prefix,
469         module_name, ModuleArchToString(arch), module_offset);
470 
471   if (size_needed >= static_cast<int>(kBufferSize)) {
472     Report("WARNING: Command buffer too small");
473     return nullptr;
474   }
475 
476   return symbolizer_process_->SendCommand(buffer_);
477 }
478 
SymbolizerProcess(const char * path,bool use_posix_spawn)479 SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn)
480     : path_(path),
481       input_fd_(kInvalidFd),
482       output_fd_(kInvalidFd),
483       times_restarted_(0),
484       failed_to_start_(false),
485       reported_invalid_path_(false),
486       use_posix_spawn_(use_posix_spawn) {
487   CHECK(path_);
488   CHECK_NE(path_[0], '\0');
489 }
490 
IsSameModule(const char * path)491 static bool IsSameModule(const char *path) {
492   if (const char *ProcessName = GetProcessName()) {
493     if (const char *SymbolizerName = StripModuleName(path)) {
494       return !internal_strcmp(ProcessName, SymbolizerName);
495     }
496   }
497   return false;
498 }
499 
SendCommand(const char * command)500 const char *SymbolizerProcess::SendCommand(const char *command) {
501   if (failed_to_start_)
502     return nullptr;
503   if (IsSameModule(path_)) {
504     Report("WARNING: Symbolizer was blocked from starting itself!\n");
505     failed_to_start_ = true;
506     return nullptr;
507   }
508   for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
509     // Start or restart symbolizer if we failed to send command to it.
510     if (const char *res = SendCommandImpl(command))
511       return res;
512     Restart();
513   }
514   if (!failed_to_start_) {
515     Report("WARNING: Failed to use and restart external symbolizer!\n");
516     failed_to_start_ = true;
517   }
518   return nullptr;
519 }
520 
SendCommandImpl(const char * command)521 const char *SymbolizerProcess::SendCommandImpl(const char *command) {
522   if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
523     return nullptr;
524   if (!WriteToSymbolizer(command, internal_strlen(command)))
525     return nullptr;
526   if (!ReadFromSymbolizer())
527     return nullptr;
528   return buffer_.data();
529 }
530 
Restart()531 bool SymbolizerProcess::Restart() {
532   if (input_fd_ != kInvalidFd)
533     CloseFile(input_fd_);
534   if (output_fd_ != kInvalidFd)
535     CloseFile(output_fd_);
536   return StartSymbolizerSubprocess();
537 }
538 
ReadFromSymbolizer()539 bool SymbolizerProcess::ReadFromSymbolizer() {
540   buffer_.clear();
541   constexpr uptr max_length = 1024;
542   bool ret = true;
543   do {
544     uptr just_read = 0;
545     uptr size_before = buffer_.size();
546     buffer_.resize(size_before + max_length);
547     buffer_.resize(buffer_.capacity());
548     bool ret = ReadFromFile(input_fd_, &buffer_[size_before],
549                             buffer_.size() - size_before, &just_read);
550 
551     if (!ret)
552       just_read = 0;
553 
554     buffer_.resize(size_before + just_read);
555 
556     // We can't read 0 bytes, as we don't expect external symbolizer to close
557     // its stdout.
558     if (just_read == 0) {
559       Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
560       ret = false;
561       break;
562     }
563   } while (!ReachedEndOfOutput(buffer_.data(), buffer_.size()));
564   buffer_.push_back('\0');
565   return ret;
566 }
567 
WriteToSymbolizer(const char * buffer,uptr length)568 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
569   if (length == 0)
570     return true;
571   uptr write_len = 0;
572   bool success = WriteToFile(output_fd_, buffer, length, &write_len);
573   if (!success || write_len != length) {
574     Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
575     return false;
576   }
577   return true;
578 }
579 
580 #endif  // !SANITIZER_SYMBOLIZER_MARKUP
581 
582 }  // namespace __sanitizer
583