168d75effSDimitry Andric //===-- sanitizer_symbolizer_libcdep.cpp ----------------------------------===// 268d75effSDimitry Andric // 368d75effSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 468d75effSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 568d75effSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 668d75effSDimitry Andric // 768d75effSDimitry Andric //===----------------------------------------------------------------------===// 868d75effSDimitry Andric // 968d75effSDimitry Andric // This file is shared between AddressSanitizer and ThreadSanitizer 1068d75effSDimitry Andric // run-time libraries. 1168d75effSDimitry Andric //===----------------------------------------------------------------------===// 1268d75effSDimitry Andric 1368d75effSDimitry Andric #include "sanitizer_allocator_internal.h" 1468d75effSDimitry Andric #include "sanitizer_internal_defs.h" 15e8d8bef9SDimitry Andric #include "sanitizer_platform.h" 1668d75effSDimitry Andric #include "sanitizer_symbolizer_internal.h" 1768d75effSDimitry Andric 1868d75effSDimitry Andric namespace __sanitizer { 1968d75effSDimitry Andric 2068d75effSDimitry Andric Symbolizer *Symbolizer::GetOrInit() { 2168d75effSDimitry Andric SpinMutexLock l(&init_mu_); 2268d75effSDimitry Andric if (symbolizer_) 2368d75effSDimitry Andric return symbolizer_; 2468d75effSDimitry Andric symbolizer_ = PlatformInit(); 2568d75effSDimitry Andric CHECK(symbolizer_); 2668d75effSDimitry Andric return symbolizer_; 2768d75effSDimitry Andric } 2868d75effSDimitry Andric 2968d75effSDimitry Andric // See sanitizer_symbolizer_markup.cpp. 3068d75effSDimitry Andric #if !SANITIZER_SYMBOLIZER_MARKUP 3168d75effSDimitry Andric 3268d75effSDimitry Andric const char *ExtractToken(const char *str, const char *delims, char **result) { 3368d75effSDimitry Andric uptr prefix_len = internal_strcspn(str, delims); 3468d75effSDimitry Andric *result = (char*)InternalAlloc(prefix_len + 1); 3568d75effSDimitry Andric internal_memcpy(*result, str, prefix_len); 3668d75effSDimitry Andric (*result)[prefix_len] = '\0'; 3768d75effSDimitry Andric const char *prefix_end = str + prefix_len; 3868d75effSDimitry Andric if (*prefix_end != '\0') prefix_end++; 3968d75effSDimitry Andric return prefix_end; 4068d75effSDimitry Andric } 4168d75effSDimitry Andric 4268d75effSDimitry Andric const char *ExtractInt(const char *str, const char *delims, int *result) { 435ffd83dbSDimitry Andric char *buff = nullptr; 4468d75effSDimitry Andric const char *ret = ExtractToken(str, delims, &buff); 455ffd83dbSDimitry Andric if (buff) { 4668d75effSDimitry Andric *result = (int)internal_atoll(buff); 4768d75effSDimitry Andric } 4868d75effSDimitry Andric InternalFree(buff); 4968d75effSDimitry Andric return ret; 5068d75effSDimitry Andric } 5168d75effSDimitry Andric 5268d75effSDimitry Andric const char *ExtractUptr(const char *str, const char *delims, uptr *result) { 535ffd83dbSDimitry Andric char *buff = nullptr; 5468d75effSDimitry Andric const char *ret = ExtractToken(str, delims, &buff); 555ffd83dbSDimitry Andric if (buff) { 5668d75effSDimitry Andric *result = (uptr)internal_atoll(buff); 5768d75effSDimitry Andric } 5868d75effSDimitry Andric InternalFree(buff); 5968d75effSDimitry Andric return ret; 6068d75effSDimitry Andric } 6168d75effSDimitry Andric 6268d75effSDimitry Andric const char *ExtractSptr(const char *str, const char *delims, sptr *result) { 635ffd83dbSDimitry Andric char *buff = nullptr; 6468d75effSDimitry Andric const char *ret = ExtractToken(str, delims, &buff); 655ffd83dbSDimitry Andric if (buff) { 6668d75effSDimitry Andric *result = (sptr)internal_atoll(buff); 6768d75effSDimitry Andric } 6868d75effSDimitry Andric InternalFree(buff); 6968d75effSDimitry Andric return ret; 7068d75effSDimitry Andric } 7168d75effSDimitry Andric 7268d75effSDimitry Andric const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter, 7368d75effSDimitry Andric char **result) { 7468d75effSDimitry Andric const char *found_delimiter = internal_strstr(str, delimiter); 7568d75effSDimitry Andric uptr prefix_len = 7668d75effSDimitry Andric found_delimiter ? found_delimiter - str : internal_strlen(str); 7768d75effSDimitry Andric *result = (char *)InternalAlloc(prefix_len + 1); 7868d75effSDimitry Andric internal_memcpy(*result, str, prefix_len); 7968d75effSDimitry Andric (*result)[prefix_len] = '\0'; 8068d75effSDimitry Andric const char *prefix_end = str + prefix_len; 8168d75effSDimitry Andric if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter); 8268d75effSDimitry Andric return prefix_end; 8368d75effSDimitry Andric } 8468d75effSDimitry Andric 8568d75effSDimitry Andric SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) { 86349cc55cSDimitry Andric Lock l(&mu_); 8768d75effSDimitry Andric SymbolizedStack *res = SymbolizedStack::New(addr); 880eae32dcSDimitry Andric auto *mod = FindModuleForAddress(addr); 890eae32dcSDimitry Andric if (!mod) 9068d75effSDimitry Andric return res; 9168d75effSDimitry Andric // Always fill data about module name and offset. 920eae32dcSDimitry Andric res->info.FillModuleInfo(*mod); 9368d75effSDimitry Andric for (auto &tool : tools_) { 9468d75effSDimitry Andric SymbolizerScope sym_scope(this); 9568d75effSDimitry Andric if (tool.SymbolizePC(addr, res)) { 9668d75effSDimitry Andric return res; 9768d75effSDimitry Andric } 9868d75effSDimitry Andric } 9968d75effSDimitry Andric return res; 10068d75effSDimitry Andric } 10168d75effSDimitry Andric 10268d75effSDimitry Andric bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) { 103349cc55cSDimitry Andric Lock l(&mu_); 1045ffd83dbSDimitry Andric const char *module_name = nullptr; 10568d75effSDimitry Andric uptr module_offset; 10668d75effSDimitry Andric ModuleArch arch; 10768d75effSDimitry Andric if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset, 10868d75effSDimitry Andric &arch)) 10968d75effSDimitry Andric return false; 11068d75effSDimitry Andric info->Clear(); 11168d75effSDimitry Andric info->module = internal_strdup(module_name); 11268d75effSDimitry Andric info->module_offset = module_offset; 11368d75effSDimitry Andric info->module_arch = arch; 11468d75effSDimitry Andric for (auto &tool : tools_) { 11568d75effSDimitry Andric SymbolizerScope sym_scope(this); 11668d75effSDimitry Andric if (tool.SymbolizeData(addr, info)) { 11768d75effSDimitry Andric return true; 11868d75effSDimitry Andric } 11968d75effSDimitry Andric } 120*5f757f3fSDimitry Andric return false; 12168d75effSDimitry Andric } 12268d75effSDimitry Andric 12368d75effSDimitry Andric bool Symbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) { 124349cc55cSDimitry Andric Lock l(&mu_); 1255ffd83dbSDimitry Andric const char *module_name = nullptr; 12668d75effSDimitry Andric if (!FindModuleNameAndOffsetForAddress( 12768d75effSDimitry Andric addr, &module_name, &info->module_offset, &info->module_arch)) 12868d75effSDimitry Andric return false; 12968d75effSDimitry Andric info->module = internal_strdup(module_name); 13068d75effSDimitry Andric for (auto &tool : tools_) { 13168d75effSDimitry Andric SymbolizerScope sym_scope(this); 13268d75effSDimitry Andric if (tool.SymbolizeFrame(addr, info)) { 13368d75effSDimitry Andric return true; 13468d75effSDimitry Andric } 13568d75effSDimitry Andric } 136*5f757f3fSDimitry Andric return false; 13768d75effSDimitry Andric } 13868d75effSDimitry Andric 13968d75effSDimitry Andric bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name, 14068d75effSDimitry Andric uptr *module_address) { 141349cc55cSDimitry Andric Lock l(&mu_); 14268d75effSDimitry Andric const char *internal_module_name = nullptr; 14368d75effSDimitry Andric ModuleArch arch; 14468d75effSDimitry Andric if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name, 14568d75effSDimitry Andric module_address, &arch)) 14668d75effSDimitry Andric return false; 14768d75effSDimitry Andric 14868d75effSDimitry Andric if (module_name) 14968d75effSDimitry Andric *module_name = module_names_.GetOwnedCopy(internal_module_name); 15068d75effSDimitry Andric return true; 15168d75effSDimitry Andric } 15268d75effSDimitry Andric 15368d75effSDimitry Andric void Symbolizer::Flush() { 154349cc55cSDimitry Andric Lock l(&mu_); 15568d75effSDimitry Andric for (auto &tool : tools_) { 15668d75effSDimitry Andric SymbolizerScope sym_scope(this); 15768d75effSDimitry Andric tool.Flush(); 15868d75effSDimitry Andric } 15968d75effSDimitry Andric } 16068d75effSDimitry Andric 16168d75effSDimitry Andric const char *Symbolizer::Demangle(const char *name) { 162*5f757f3fSDimitry Andric CHECK(name); 163349cc55cSDimitry Andric Lock l(&mu_); 16468d75effSDimitry Andric for (auto &tool : tools_) { 16568d75effSDimitry Andric SymbolizerScope sym_scope(this); 16668d75effSDimitry Andric if (const char *demangled = tool.Demangle(name)) 16768d75effSDimitry Andric return demangled; 16868d75effSDimitry Andric } 169*5f757f3fSDimitry Andric if (const char *demangled = PlatformDemangle(name)) 170*5f757f3fSDimitry Andric return demangled; 171*5f757f3fSDimitry Andric return name; 17268d75effSDimitry Andric } 17368d75effSDimitry Andric 17468d75effSDimitry Andric bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address, 17568d75effSDimitry Andric const char **module_name, 17668d75effSDimitry Andric uptr *module_offset, 17768d75effSDimitry Andric ModuleArch *module_arch) { 17868d75effSDimitry Andric const LoadedModule *module = FindModuleForAddress(address); 1795ffd83dbSDimitry Andric if (!module) 18068d75effSDimitry Andric return false; 18168d75effSDimitry Andric *module_name = module->full_name(); 18268d75effSDimitry Andric *module_offset = address - module->base_address(); 18368d75effSDimitry Andric *module_arch = module->arch(); 18468d75effSDimitry Andric return true; 18568d75effSDimitry Andric } 18668d75effSDimitry Andric 18768d75effSDimitry Andric void Symbolizer::RefreshModules() { 18868d75effSDimitry Andric modules_.init(); 18968d75effSDimitry Andric fallback_modules_.fallbackInit(); 19068d75effSDimitry Andric RAW_CHECK(modules_.size() > 0); 19168d75effSDimitry Andric modules_fresh_ = true; 19268d75effSDimitry Andric } 19368d75effSDimitry Andric 194*5f757f3fSDimitry Andric const ListOfModules &Symbolizer::GetRefreshedListOfModules() { 195*5f757f3fSDimitry Andric if (!modules_fresh_) 196*5f757f3fSDimitry Andric RefreshModules(); 197*5f757f3fSDimitry Andric 198*5f757f3fSDimitry Andric return modules_; 199*5f757f3fSDimitry Andric } 200*5f757f3fSDimitry Andric 20168d75effSDimitry Andric static const LoadedModule *SearchForModule(const ListOfModules &modules, 20268d75effSDimitry Andric uptr address) { 20368d75effSDimitry Andric for (uptr i = 0; i < modules.size(); i++) { 20468d75effSDimitry Andric if (modules[i].containsAddress(address)) { 20568d75effSDimitry Andric return &modules[i]; 20668d75effSDimitry Andric } 20768d75effSDimitry Andric } 20868d75effSDimitry Andric return nullptr; 20968d75effSDimitry Andric } 21068d75effSDimitry Andric 21168d75effSDimitry Andric const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) { 21268d75effSDimitry Andric bool modules_were_reloaded = false; 21368d75effSDimitry Andric if (!modules_fresh_) { 21468d75effSDimitry Andric RefreshModules(); 21568d75effSDimitry Andric modules_were_reloaded = true; 21668d75effSDimitry Andric } 21768d75effSDimitry Andric const LoadedModule *module = SearchForModule(modules_, address); 21868d75effSDimitry Andric if (module) return module; 21968d75effSDimitry Andric 22068d75effSDimitry Andric // dlopen/dlclose interceptors invalidate the module list, but when 22168d75effSDimitry Andric // interception is disabled, we need to retry if the lookup fails in 22268d75effSDimitry Andric // case the module list changed. 22368d75effSDimitry Andric #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE 22468d75effSDimitry Andric if (!modules_were_reloaded) { 22568d75effSDimitry Andric RefreshModules(); 22668d75effSDimitry Andric module = SearchForModule(modules_, address); 22768d75effSDimitry Andric if (module) return module; 22868d75effSDimitry Andric } 22968d75effSDimitry Andric #endif 23068d75effSDimitry Andric 23168d75effSDimitry Andric if (fallback_modules_.size()) { 23268d75effSDimitry Andric module = SearchForModule(fallback_modules_, address); 23368d75effSDimitry Andric } 23468d75effSDimitry Andric return module; 23568d75effSDimitry Andric } 23668d75effSDimitry Andric 23768d75effSDimitry Andric // For now we assume the following protocol: 23868d75effSDimitry Andric // For each request of the form 23968d75effSDimitry Andric // <module_name> <module_offset> 24068d75effSDimitry Andric // passed to STDIN, external symbolizer prints to STDOUT response: 24168d75effSDimitry Andric // <function_name> 24268d75effSDimitry Andric // <file_name>:<line_number>:<column_number> 24368d75effSDimitry Andric // <function_name> 24468d75effSDimitry Andric // <file_name>:<line_number>:<column_number> 24568d75effSDimitry Andric // ... 24668d75effSDimitry Andric // <empty line> 247e8d8bef9SDimitry Andric class LLVMSymbolizerProcess final : public SymbolizerProcess { 24868d75effSDimitry Andric public: 24968d75effSDimitry Andric explicit LLVMSymbolizerProcess(const char *path) 25081ad6265SDimitry Andric : SymbolizerProcess(path, /*use_posix_spawn=*/SANITIZER_APPLE) {} 25168d75effSDimitry Andric 25268d75effSDimitry Andric private: 25368d75effSDimitry Andric bool ReachedEndOfOutput(const char *buffer, uptr length) const override { 25468d75effSDimitry Andric // Empty line marks the end of llvm-symbolizer output. 25568d75effSDimitry Andric return length >= 2 && buffer[length - 1] == '\n' && 25668d75effSDimitry Andric buffer[length - 2] == '\n'; 25768d75effSDimitry Andric } 25868d75effSDimitry Andric 25968d75effSDimitry Andric // When adding a new architecture, don't forget to also update 26068d75effSDimitry Andric // script/asan_symbolize.py and sanitizer_common.h. 26168d75effSDimitry Andric void GetArgV(const char *path_to_binary, 26268d75effSDimitry Andric const char *(&argv)[kArgVMax]) const override { 26368d75effSDimitry Andric #if defined(__x86_64h__) 26468d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=x86_64h"; 26568d75effSDimitry Andric #elif defined(__x86_64__) 26668d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=x86_64"; 26768d75effSDimitry Andric #elif defined(__i386__) 26868d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=i386"; 269bdd1243dSDimitry Andric #elif SANITIZER_LOONGARCH64 270bdd1243dSDimitry Andric const char *const kSymbolizerArch = "--default-arch=loongarch64"; 271e8d8bef9SDimitry Andric #elif SANITIZER_RISCV64 272e8d8bef9SDimitry Andric const char *const kSymbolizerArch = "--default-arch=riscv64"; 27368d75effSDimitry Andric #elif defined(__aarch64__) 27468d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=arm64"; 27568d75effSDimitry Andric #elif defined(__arm__) 27668d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=arm"; 27768d75effSDimitry Andric #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 27868d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=powerpc64"; 27968d75effSDimitry Andric #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 28068d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=powerpc64le"; 28168d75effSDimitry Andric #elif defined(__s390x__) 28268d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=s390x"; 28368d75effSDimitry Andric #elif defined(__s390__) 28468d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=s390"; 28568d75effSDimitry Andric #else 28668d75effSDimitry Andric const char* const kSymbolizerArch = "--default-arch=unknown"; 28768d75effSDimitry Andric #endif 28868d75effSDimitry Andric 2890eae32dcSDimitry Andric const char *const demangle_flag = 2900eae32dcSDimitry Andric common_flags()->demangle ? "--demangle" : "--no-demangle"; 2910eae32dcSDimitry Andric const char *const inline_flag = 2920eae32dcSDimitry Andric common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines"; 29368d75effSDimitry Andric int i = 0; 29468d75effSDimitry Andric argv[i++] = path_to_binary; 2950eae32dcSDimitry Andric argv[i++] = demangle_flag; 29668d75effSDimitry Andric argv[i++] = inline_flag; 29768d75effSDimitry Andric argv[i++] = kSymbolizerArch; 29868d75effSDimitry Andric argv[i++] = nullptr; 2990eae32dcSDimitry Andric CHECK_LE(i, kArgVMax); 30068d75effSDimitry Andric } 30168d75effSDimitry Andric }; 30268d75effSDimitry Andric 30368d75effSDimitry Andric LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator) 30468d75effSDimitry Andric : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {} 30568d75effSDimitry Andric 30668d75effSDimitry Andric // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on 30768d75effSDimitry Andric // Windows, so extract tokens from the right hand side first. The column info is 30868d75effSDimitry Andric // also optional. 30968d75effSDimitry Andric static const char *ParseFileLineInfo(AddressInfo *info, const char *str) { 3105ffd83dbSDimitry Andric char *file_line_info = nullptr; 31168d75effSDimitry Andric str = ExtractToken(str, "\n", &file_line_info); 31268d75effSDimitry Andric CHECK(file_line_info); 31368d75effSDimitry Andric 31468d75effSDimitry Andric if (uptr size = internal_strlen(file_line_info)) { 31568d75effSDimitry Andric char *back = file_line_info + size - 1; 31668d75effSDimitry Andric for (int i = 0; i < 2; ++i) { 31768d75effSDimitry Andric while (back > file_line_info && IsDigit(*back)) --back; 31868d75effSDimitry Andric if (*back != ':' || !IsDigit(back[1])) break; 31968d75effSDimitry Andric info->column = info->line; 32068d75effSDimitry Andric info->line = internal_atoll(back + 1); 32168d75effSDimitry Andric // Truncate the string at the colon to keep only filename. 32268d75effSDimitry Andric *back = '\0'; 32368d75effSDimitry Andric --back; 32468d75effSDimitry Andric } 32568d75effSDimitry Andric ExtractToken(file_line_info, "", &info->file); 32668d75effSDimitry Andric } 32768d75effSDimitry Andric 32868d75effSDimitry Andric InternalFree(file_line_info); 32968d75effSDimitry Andric return str; 33068d75effSDimitry Andric } 33168d75effSDimitry Andric 33268d75effSDimitry Andric // Parses one or more two-line strings in the following format: 33368d75effSDimitry Andric // <function_name> 33468d75effSDimitry Andric // <file_name>:<line_number>[:<column_number>] 33568d75effSDimitry Andric // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of 33668d75effSDimitry Andric // them use the same output format. 33768d75effSDimitry Andric void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) { 33868d75effSDimitry Andric bool top_frame = true; 33968d75effSDimitry Andric SymbolizedStack *last = res; 34068d75effSDimitry Andric while (true) { 3415ffd83dbSDimitry Andric char *function_name = nullptr; 34268d75effSDimitry Andric str = ExtractToken(str, "\n", &function_name); 34368d75effSDimitry Andric CHECK(function_name); 34468d75effSDimitry Andric if (function_name[0] == '\0') { 34568d75effSDimitry Andric // There are no more frames. 34668d75effSDimitry Andric InternalFree(function_name); 34768d75effSDimitry Andric break; 34868d75effSDimitry Andric } 34968d75effSDimitry Andric SymbolizedStack *cur; 35068d75effSDimitry Andric if (top_frame) { 35168d75effSDimitry Andric cur = res; 35268d75effSDimitry Andric top_frame = false; 35368d75effSDimitry Andric } else { 35468d75effSDimitry Andric cur = SymbolizedStack::New(res->info.address); 35568d75effSDimitry Andric cur->info.FillModuleInfo(res->info.module, res->info.module_offset, 35668d75effSDimitry Andric res->info.module_arch); 35768d75effSDimitry Andric last->next = cur; 35868d75effSDimitry Andric last = cur; 35968d75effSDimitry Andric } 36068d75effSDimitry Andric 36168d75effSDimitry Andric AddressInfo *info = &cur->info; 36268d75effSDimitry Andric info->function = function_name; 36368d75effSDimitry Andric str = ParseFileLineInfo(info, str); 36468d75effSDimitry Andric 36568d75effSDimitry Andric // Functions and filenames can be "??", in which case we write 0 36668d75effSDimitry Andric // to address info to mark that names are unknown. 36768d75effSDimitry Andric if (0 == internal_strcmp(info->function, "??")) { 36868d75effSDimitry Andric InternalFree(info->function); 36968d75effSDimitry Andric info->function = 0; 37068d75effSDimitry Andric } 371fe6060f1SDimitry Andric if (info->file && 0 == internal_strcmp(info->file, "??")) { 37268d75effSDimitry Andric InternalFree(info->file); 37368d75effSDimitry Andric info->file = 0; 37468d75effSDimitry Andric } 37568d75effSDimitry Andric } 37668d75effSDimitry Andric } 37768d75effSDimitry Andric 37881ad6265SDimitry Andric // Parses a two- or three-line string in the following format: 37968d75effSDimitry Andric // <symbol_name> 38068d75effSDimitry Andric // <start_address> <size> 38181ad6265SDimitry Andric // <filename>:<column> 38281ad6265SDimitry Andric // Used by LLVMSymbolizer and InternalSymbolizer. LLVMSymbolizer added support 38381ad6265SDimitry Andric // for symbolizing the third line in D123538, but we support the older two-line 38481ad6265SDimitry Andric // information as well. 38568d75effSDimitry Andric void ParseSymbolizeDataOutput(const char *str, DataInfo *info) { 38668d75effSDimitry Andric str = ExtractToken(str, "\n", &info->name); 38768d75effSDimitry Andric str = ExtractUptr(str, " ", &info->start); 38868d75effSDimitry Andric str = ExtractUptr(str, "\n", &info->size); 38981ad6265SDimitry Andric // Note: If the third line isn't present, these calls will set info.{file, 39081ad6265SDimitry Andric // line} to empty strings. 39181ad6265SDimitry Andric str = ExtractToken(str, ":", &info->file); 39281ad6265SDimitry Andric str = ExtractUptr(str, "\n", &info->line); 39368d75effSDimitry Andric } 39468d75effSDimitry Andric 395*5f757f3fSDimitry Andric void ParseSymbolizeFrameOutput(const char *str, 39668d75effSDimitry Andric InternalMmapVector<LocalInfo> *locals) { 39768d75effSDimitry Andric if (internal_strncmp(str, "??", 2) == 0) 39868d75effSDimitry Andric return; 39968d75effSDimitry Andric 40068d75effSDimitry Andric while (*str) { 40168d75effSDimitry Andric LocalInfo local; 40268d75effSDimitry Andric str = ExtractToken(str, "\n", &local.function_name); 40368d75effSDimitry Andric str = ExtractToken(str, "\n", &local.name); 40468d75effSDimitry Andric 40568d75effSDimitry Andric AddressInfo addr; 40668d75effSDimitry Andric str = ParseFileLineInfo(&addr, str); 40768d75effSDimitry Andric local.decl_file = addr.file; 40868d75effSDimitry Andric local.decl_line = addr.line; 40968d75effSDimitry Andric 41068d75effSDimitry Andric local.has_frame_offset = internal_strncmp(str, "??", 2) != 0; 41168d75effSDimitry Andric str = ExtractSptr(str, " ", &local.frame_offset); 41268d75effSDimitry Andric 41368d75effSDimitry Andric local.has_size = internal_strncmp(str, "??", 2) != 0; 41468d75effSDimitry Andric str = ExtractUptr(str, " ", &local.size); 41568d75effSDimitry Andric 41668d75effSDimitry Andric local.has_tag_offset = internal_strncmp(str, "??", 2) != 0; 41768d75effSDimitry Andric str = ExtractUptr(str, "\n", &local.tag_offset); 41868d75effSDimitry Andric 41968d75effSDimitry Andric locals->push_back(local); 42068d75effSDimitry Andric } 42168d75effSDimitry Andric } 42268d75effSDimitry Andric 42368d75effSDimitry Andric bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) { 42468d75effSDimitry Andric AddressInfo *info = &stack->info; 42568d75effSDimitry Andric const char *buf = FormatAndSendCommand( 42668d75effSDimitry Andric "CODE", info->module, info->module_offset, info->module_arch); 4275ffd83dbSDimitry Andric if (!buf) 4285ffd83dbSDimitry Andric return false; 42968d75effSDimitry Andric ParseSymbolizePCOutput(buf, stack); 43068d75effSDimitry Andric return true; 43168d75effSDimitry Andric } 43268d75effSDimitry Andric 43368d75effSDimitry Andric bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) { 43468d75effSDimitry Andric const char *buf = FormatAndSendCommand( 43568d75effSDimitry Andric "DATA", info->module, info->module_offset, info->module_arch); 4365ffd83dbSDimitry Andric if (!buf) 4375ffd83dbSDimitry Andric return false; 43868d75effSDimitry Andric ParseSymbolizeDataOutput(buf, info); 43968d75effSDimitry Andric info->start += (addr - info->module_offset); // Add the base address. 44068d75effSDimitry Andric return true; 44168d75effSDimitry Andric } 44268d75effSDimitry Andric 44368d75effSDimitry Andric bool LLVMSymbolizer::SymbolizeFrame(uptr addr, FrameInfo *info) { 44468d75effSDimitry Andric const char *buf = FormatAndSendCommand( 44568d75effSDimitry Andric "FRAME", info->module, info->module_offset, info->module_arch); 4465ffd83dbSDimitry Andric if (!buf) 4475ffd83dbSDimitry Andric return false; 44868d75effSDimitry Andric ParseSymbolizeFrameOutput(buf, &info->locals); 44968d75effSDimitry Andric return true; 45068d75effSDimitry Andric } 45168d75effSDimitry Andric 45268d75effSDimitry Andric const char *LLVMSymbolizer::FormatAndSendCommand(const char *command_prefix, 45368d75effSDimitry Andric const char *module_name, 45468d75effSDimitry Andric uptr module_offset, 45568d75effSDimitry Andric ModuleArch arch) { 45668d75effSDimitry Andric CHECK(module_name); 4575ffd83dbSDimitry Andric int size_needed = 0; 4585ffd83dbSDimitry Andric if (arch == kModuleArchUnknown) 4595ffd83dbSDimitry Andric size_needed = internal_snprintf(buffer_, kBufferSize, "%s \"%s\" 0x%zx\n", 4605ffd83dbSDimitry Andric command_prefix, module_name, module_offset); 4615ffd83dbSDimitry Andric else 4625ffd83dbSDimitry Andric size_needed = internal_snprintf(buffer_, kBufferSize, 4635ffd83dbSDimitry Andric "%s \"%s:%s\" 0x%zx\n", command_prefix, 4645ffd83dbSDimitry Andric module_name, ModuleArchToString(arch), 4655ffd83dbSDimitry Andric module_offset); 4665ffd83dbSDimitry Andric 4675ffd83dbSDimitry Andric if (size_needed >= static_cast<int>(kBufferSize)) { 46868d75effSDimitry Andric Report("WARNING: Command buffer too small"); 46968d75effSDimitry Andric return nullptr; 47068d75effSDimitry Andric } 4715ffd83dbSDimitry Andric 47268d75effSDimitry Andric return symbolizer_process_->SendCommand(buffer_); 47368d75effSDimitry Andric } 47468d75effSDimitry Andric 47568d75effSDimitry Andric SymbolizerProcess::SymbolizerProcess(const char *path, bool use_posix_spawn) 47668d75effSDimitry Andric : path_(path), 47768d75effSDimitry Andric input_fd_(kInvalidFd), 47868d75effSDimitry Andric output_fd_(kInvalidFd), 47968d75effSDimitry Andric times_restarted_(0), 48068d75effSDimitry Andric failed_to_start_(false), 48168d75effSDimitry Andric reported_invalid_path_(false), 48268d75effSDimitry Andric use_posix_spawn_(use_posix_spawn) { 48368d75effSDimitry Andric CHECK(path_); 48468d75effSDimitry Andric CHECK_NE(path_[0], '\0'); 48568d75effSDimitry Andric } 48668d75effSDimitry Andric 48768d75effSDimitry Andric static bool IsSameModule(const char* path) { 48868d75effSDimitry Andric if (const char* ProcessName = GetProcessName()) { 48968d75effSDimitry Andric if (const char* SymbolizerName = StripModuleName(path)) { 49068d75effSDimitry Andric return !internal_strcmp(ProcessName, SymbolizerName); 49168d75effSDimitry Andric } 49268d75effSDimitry Andric } 49368d75effSDimitry Andric return false; 49468d75effSDimitry Andric } 49568d75effSDimitry Andric 49668d75effSDimitry Andric const char *SymbolizerProcess::SendCommand(const char *command) { 49768d75effSDimitry Andric if (failed_to_start_) 49868d75effSDimitry Andric return nullptr; 49968d75effSDimitry Andric if (IsSameModule(path_)) { 50068d75effSDimitry Andric Report("WARNING: Symbolizer was blocked from starting itself!\n"); 50168d75effSDimitry Andric failed_to_start_ = true; 50268d75effSDimitry Andric return nullptr; 50368d75effSDimitry Andric } 50468d75effSDimitry Andric for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) { 50568d75effSDimitry Andric // Start or restart symbolizer if we failed to send command to it. 50668d75effSDimitry Andric if (const char *res = SendCommandImpl(command)) 50768d75effSDimitry Andric return res; 50868d75effSDimitry Andric Restart(); 50968d75effSDimitry Andric } 51068d75effSDimitry Andric if (!failed_to_start_) { 51168d75effSDimitry Andric Report("WARNING: Failed to use and restart external symbolizer!\n"); 51268d75effSDimitry Andric failed_to_start_ = true; 51368d75effSDimitry Andric } 5145ffd83dbSDimitry Andric return nullptr; 51568d75effSDimitry Andric } 51668d75effSDimitry Andric 51768d75effSDimitry Andric const char *SymbolizerProcess::SendCommandImpl(const char *command) { 51868d75effSDimitry Andric if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd) 5195ffd83dbSDimitry Andric return nullptr; 52068d75effSDimitry Andric if (!WriteToSymbolizer(command, internal_strlen(command))) 5215ffd83dbSDimitry Andric return nullptr; 52281ad6265SDimitry Andric if (!ReadFromSymbolizer()) 5235ffd83dbSDimitry Andric return nullptr; 52481ad6265SDimitry Andric return buffer_.data(); 52568d75effSDimitry Andric } 52668d75effSDimitry Andric 52768d75effSDimitry Andric bool SymbolizerProcess::Restart() { 52868d75effSDimitry Andric if (input_fd_ != kInvalidFd) 52968d75effSDimitry Andric CloseFile(input_fd_); 53068d75effSDimitry Andric if (output_fd_ != kInvalidFd) 53168d75effSDimitry Andric CloseFile(output_fd_); 53268d75effSDimitry Andric return StartSymbolizerSubprocess(); 53368d75effSDimitry Andric } 53468d75effSDimitry Andric 53581ad6265SDimitry Andric bool SymbolizerProcess::ReadFromSymbolizer() { 53681ad6265SDimitry Andric buffer_.clear(); 53781ad6265SDimitry Andric constexpr uptr max_length = 1024; 53881ad6265SDimitry Andric bool ret = true; 53981ad6265SDimitry Andric do { 54068d75effSDimitry Andric uptr just_read = 0; 54181ad6265SDimitry Andric uptr size_before = buffer_.size(); 54281ad6265SDimitry Andric buffer_.resize(size_before + max_length); 54381ad6265SDimitry Andric buffer_.resize(buffer_.capacity()); 54481ad6265SDimitry Andric bool ret = ReadFromFile(input_fd_, &buffer_[size_before], 54581ad6265SDimitry Andric buffer_.size() - size_before, &just_read); 54681ad6265SDimitry Andric 54781ad6265SDimitry Andric if (!ret) 54881ad6265SDimitry Andric just_read = 0; 54981ad6265SDimitry Andric 55081ad6265SDimitry Andric buffer_.resize(size_before + just_read); 55181ad6265SDimitry Andric 55268d75effSDimitry Andric // We can't read 0 bytes, as we don't expect external symbolizer to close 55368d75effSDimitry Andric // its stdout. 55481ad6265SDimitry Andric if (just_read == 0) { 55568d75effSDimitry Andric Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_); 55681ad6265SDimitry Andric ret = false; 55768d75effSDimitry Andric break; 55868d75effSDimitry Andric } 55981ad6265SDimitry Andric } while (!ReachedEndOfOutput(buffer_.data(), buffer_.size())); 56081ad6265SDimitry Andric buffer_.push_back('\0'); 56181ad6265SDimitry Andric return ret; 56268d75effSDimitry Andric } 56368d75effSDimitry Andric 56468d75effSDimitry Andric bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) { 56568d75effSDimitry Andric if (length == 0) 56668d75effSDimitry Andric return true; 56768d75effSDimitry Andric uptr write_len = 0; 56868d75effSDimitry Andric bool success = WriteToFile(output_fd_, buffer, length, &write_len); 56968d75effSDimitry Andric if (!success || write_len != length) { 57068d75effSDimitry Andric Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_); 57168d75effSDimitry Andric return false; 57268d75effSDimitry Andric } 57368d75effSDimitry Andric return true; 57468d75effSDimitry Andric } 57568d75effSDimitry Andric 57668d75effSDimitry Andric #endif // !SANITIZER_SYMBOLIZER_MARKUP 57768d75effSDimitry Andric 57868d75effSDimitry Andric } // namespace __sanitizer 579