Home | History | Annotate | Line # | Download | only in sanitizer_common
      1 //===-- sanitizer_symbolizer_libcdep.cc -----------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is shared between AddressSanitizer and ThreadSanitizer
     11 // run-time libraries.
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "sanitizer_allocator_internal.h"
     15 #include "sanitizer_internal_defs.h"
     16 #include "sanitizer_symbolizer_internal.h"
     17 
     18 namespace __sanitizer {
     19 
     20 Symbolizer *Symbolizer::GetOrInit() {
     21   SpinMutexLock l(&init_mu_);
     22   if (symbolizer_)
     23     return symbolizer_;
     24   symbolizer_ = PlatformInit();
     25   CHECK(symbolizer_);
     26   return symbolizer_;
     27 }
     28 
     29 // See sanitizer_symbolizer_markup.cc.
     30 #if !SANITIZER_SYMBOLIZER_MARKUP
     31 
     32 const char *ExtractToken(const char *str, const char *delims, char **result) {
     33   uptr prefix_len = internal_strcspn(str, delims);
     34   *result = (char*)InternalAlloc(prefix_len + 1);
     35   internal_memcpy(*result, str, prefix_len);
     36   (*result)[prefix_len] = '\0';
     37   const char *prefix_end = str + prefix_len;
     38   if (*prefix_end != '\0') prefix_end++;
     39   return prefix_end;
     40 }
     41 
     42 const char *ExtractInt(const char *str, const char *delims, int *result) {
     43   char *buff;
     44   const char *ret = ExtractToken(str, delims, &buff);
     45   if (buff != 0) {
     46     *result = (int)internal_atoll(buff);
     47   }
     48   InternalFree(buff);
     49   return ret;
     50 }
     51 
     52 const char *ExtractUptr(const char *str, const char *delims, uptr *result) {
     53   char *buff;
     54   const char *ret = ExtractToken(str, delims, &buff);
     55   if (buff != 0) {
     56     *result = (uptr)internal_atoll(buff);
     57   }
     58   InternalFree(buff);
     59   return ret;
     60 }
     61 
     62 const char *ExtractTokenUpToDelimiter(const char *str, const char *delimiter,
     63                                       char **result) {
     64   const char *found_delimiter = internal_strstr(str, delimiter);
     65   uptr prefix_len =
     66       found_delimiter ? found_delimiter - str : internal_strlen(str);
     67   *result = (char *)InternalAlloc(prefix_len + 1);
     68   internal_memcpy(*result, str, prefix_len);
     69   (*result)[prefix_len] = '\0';
     70   const char *prefix_end = str + prefix_len;
     71   if (*prefix_end != '\0') prefix_end += internal_strlen(delimiter);
     72   return prefix_end;
     73 }
     74 
     75 SymbolizedStack *Symbolizer::SymbolizePC(uptr addr) {
     76   BlockingMutexLock l(&mu_);
     77   const char *module_name;
     78   uptr module_offset;
     79   ModuleArch arch;
     80   SymbolizedStack *res = SymbolizedStack::New(addr);
     81   if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
     82                                          &arch))
     83     return res;
     84   // Always fill data about module name and offset.
     85   res->info.FillModuleInfo(module_name, module_offset, arch);
     86   for (auto &tool : tools_) {
     87     SymbolizerScope sym_scope(this);
     88     if (tool.SymbolizePC(addr, res)) {
     89       return res;
     90     }
     91   }
     92   return res;
     93 }
     94 
     95 bool Symbolizer::SymbolizeData(uptr addr, DataInfo *info) {
     96   BlockingMutexLock l(&mu_);
     97   const char *module_name;
     98   uptr module_offset;
     99   ModuleArch arch;
    100   if (!FindModuleNameAndOffsetForAddress(addr, &module_name, &module_offset,
    101                                          &arch))
    102     return false;
    103   info->Clear();
    104   info->module = internal_strdup(module_name);
    105   info->module_offset = module_offset;
    106   info->module_arch = arch;
    107   for (auto &tool : tools_) {
    108     SymbolizerScope sym_scope(this);
    109     if (tool.SymbolizeData(addr, info)) {
    110       return true;
    111     }
    112   }
    113   return true;
    114 }
    115 
    116 bool Symbolizer::GetModuleNameAndOffsetForPC(uptr pc, const char **module_name,
    117                                              uptr *module_address) {
    118   BlockingMutexLock l(&mu_);
    119   const char *internal_module_name = nullptr;
    120   ModuleArch arch;
    121   if (!FindModuleNameAndOffsetForAddress(pc, &internal_module_name,
    122                                          module_address, &arch))
    123     return false;
    124 
    125   if (module_name)
    126     *module_name = module_names_.GetOwnedCopy(internal_module_name);
    127   return true;
    128 }
    129 
    130 void Symbolizer::Flush() {
    131   BlockingMutexLock l(&mu_);
    132   for (auto &tool : tools_) {
    133     SymbolizerScope sym_scope(this);
    134     tool.Flush();
    135   }
    136 }
    137 
    138 const char *Symbolizer::Demangle(const char *name) {
    139   BlockingMutexLock l(&mu_);
    140   for (auto &tool : tools_) {
    141     SymbolizerScope sym_scope(this);
    142     if (const char *demangled = tool.Demangle(name))
    143       return demangled;
    144   }
    145   return PlatformDemangle(name);
    146 }
    147 
    148 bool Symbolizer::FindModuleNameAndOffsetForAddress(uptr address,
    149                                                    const char **module_name,
    150                                                    uptr *module_offset,
    151                                                    ModuleArch *module_arch) {
    152   const LoadedModule *module = FindModuleForAddress(address);
    153   if (module == nullptr)
    154     return false;
    155   *module_name = module->full_name();
    156   *module_offset = address - module->base_address();
    157   *module_arch = module->arch();
    158   return true;
    159 }
    160 
    161 void Symbolizer::RefreshModules() {
    162   modules_.init();
    163   fallback_modules_.fallbackInit();
    164   RAW_CHECK(modules_.size() > 0);
    165   modules_fresh_ = true;
    166 }
    167 
    168 static const LoadedModule *SearchForModule(const ListOfModules &modules,
    169                                            uptr address) {
    170   for (uptr i = 0; i < modules.size(); i++) {
    171     if (modules[i].containsAddress(address)) {
    172       return &modules[i];
    173     }
    174   }
    175   return nullptr;
    176 }
    177 
    178 const LoadedModule *Symbolizer::FindModuleForAddress(uptr address) {
    179   bool modules_were_reloaded = false;
    180   if (!modules_fresh_) {
    181     RefreshModules();
    182     modules_were_reloaded = true;
    183   }
    184   const LoadedModule *module = SearchForModule(modules_, address);
    185   if (module) return module;
    186 
    187   // dlopen/dlclose interceptors invalidate the module list, but when
    188   // interception is disabled, we need to retry if the lookup fails in
    189   // case the module list changed.
    190 #if !SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
    191   if (!modules_were_reloaded) {
    192     RefreshModules();
    193     module = SearchForModule(modules_, address);
    194     if (module) return module;
    195   }
    196 #endif
    197 
    198   if (fallback_modules_.size()) {
    199     module = SearchForModule(fallback_modules_, address);
    200   }
    201   return module;
    202 }
    203 
    204 // For now we assume the following protocol:
    205 // For each request of the form
    206 //   <module_name> <module_offset>
    207 // passed to STDIN, external symbolizer prints to STDOUT response:
    208 //   <function_name>
    209 //   <file_name>:<line_number>:<column_number>
    210 //   <function_name>
    211 //   <file_name>:<line_number>:<column_number>
    212 //   ...
    213 //   <empty line>
    214 class LLVMSymbolizerProcess : public SymbolizerProcess {
    215  public:
    216   explicit LLVMSymbolizerProcess(const char *path) : SymbolizerProcess(path) {}
    217 
    218  private:
    219   bool ReachedEndOfOutput(const char *buffer, uptr length) const override {
    220     // Empty line marks the end of llvm-symbolizer output.
    221     return length >= 2 && buffer[length - 1] == '\n' &&
    222            buffer[length - 2] == '\n';
    223   }
    224 
    225   // When adding a new architecture, don't forget to also update
    226   // script/asan_symbolize.py and sanitizer_common.h.
    227   void GetArgV(const char *path_to_binary,
    228                const char *(&argv)[kArgVMax]) const override {
    229 #if defined(__x86_64h__)
    230     const char* const kSymbolizerArch = "--default-arch=x86_64h";
    231 #elif defined(__x86_64__)
    232     const char* const kSymbolizerArch = "--default-arch=x86_64";
    233 #elif defined(__i386__)
    234     const char* const kSymbolizerArch = "--default-arch=i386";
    235 #elif defined(__aarch64__)
    236     const char* const kSymbolizerArch = "--default-arch=arm64";
    237 #elif defined(__arm__)
    238     const char* const kSymbolizerArch = "--default-arch=arm";
    239 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
    240     const char* const kSymbolizerArch = "--default-arch=powerpc64";
    241 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
    242     const char* const kSymbolizerArch = "--default-arch=powerpc64le";
    243 #elif defined(__s390x__)
    244     const char* const kSymbolizerArch = "--default-arch=s390x";
    245 #elif defined(__s390__)
    246     const char* const kSymbolizerArch = "--default-arch=s390";
    247 #else
    248     const char* const kSymbolizerArch = "--default-arch=unknown";
    249 #endif
    250 
    251     const char *const inline_flag = common_flags()->symbolize_inline_frames
    252                                         ? "--inlining=true"
    253                                         : "--inlining=false";
    254     int i = 0;
    255     argv[i++] = path_to_binary;
    256     argv[i++] = inline_flag;
    257     argv[i++] = kSymbolizerArch;
    258     argv[i++] = nullptr;
    259   }
    260 };
    261 
    262 LLVMSymbolizer::LLVMSymbolizer(const char *path, LowLevelAllocator *allocator)
    263     : symbolizer_process_(new(*allocator) LLVMSymbolizerProcess(path)) {}
    264 
    265 // Parse a <file>:<line>[:<column>] buffer. The file path may contain colons on
    266 // Windows, so extract tokens from the right hand side first. The column info is
    267 // also optional.
    268 static const char *ParseFileLineInfo(AddressInfo *info, const char *str) {
    269   char *file_line_info = 0;
    270   str = ExtractToken(str, "\n", &file_line_info);
    271   CHECK(file_line_info);
    272 
    273   if (uptr size = internal_strlen(file_line_info)) {
    274     char *back = file_line_info + size - 1;
    275     for (int i = 0; i < 2; ++i) {
    276       while (back > file_line_info && IsDigit(*back)) --back;
    277       if (*back != ':' || !IsDigit(back[1])) break;
    278       info->column = info->line;
    279       info->line = internal_atoll(back + 1);
    280       // Truncate the string at the colon to keep only filename.
    281       *back = '\0';
    282       --back;
    283     }
    284     ExtractToken(file_line_info, "", &info->file);
    285   }
    286 
    287   InternalFree(file_line_info);
    288   return str;
    289 }
    290 
    291 // Parses one or more two-line strings in the following format:
    292 //   <function_name>
    293 //   <file_name>:<line_number>[:<column_number>]
    294 // Used by LLVMSymbolizer, Addr2LinePool and InternalSymbolizer, since all of
    295 // them use the same output format.
    296 void ParseSymbolizePCOutput(const char *str, SymbolizedStack *res) {
    297   bool top_frame = true;
    298   SymbolizedStack *last = res;
    299   while (true) {
    300     char *function_name = 0;
    301     str = ExtractToken(str, "\n", &function_name);
    302     CHECK(function_name);
    303     if (function_name[0] == '\0') {
    304       // There are no more frames.
    305       InternalFree(function_name);
    306       break;
    307     }
    308     SymbolizedStack *cur;
    309     if (top_frame) {
    310       cur = res;
    311       top_frame = false;
    312     } else {
    313       cur = SymbolizedStack::New(res->info.address);
    314       cur->info.FillModuleInfo(res->info.module, res->info.module_offset,
    315                                res->info.module_arch);
    316       last->next = cur;
    317       last = cur;
    318     }
    319 
    320     AddressInfo *info = &cur->info;
    321     info->function = function_name;
    322     str = ParseFileLineInfo(info, str);
    323 
    324     // Functions and filenames can be "??", in which case we write 0
    325     // to address info to mark that names are unknown.
    326     if (0 == internal_strcmp(info->function, "??")) {
    327       InternalFree(info->function);
    328       info->function = 0;
    329     }
    330     if (0 == internal_strcmp(info->file, "??")) {
    331       InternalFree(info->file);
    332       info->file = 0;
    333     }
    334   }
    335 }
    336 
    337 // Parses a two-line string in the following format:
    338 //   <symbol_name>
    339 //   <start_address> <size>
    340 // Used by LLVMSymbolizer and InternalSymbolizer.
    341 void ParseSymbolizeDataOutput(const char *str, DataInfo *info) {
    342   str = ExtractToken(str, "\n", &info->name);
    343   str = ExtractUptr(str, " ", &info->start);
    344   str = ExtractUptr(str, "\n", &info->size);
    345 }
    346 
    347 bool LLVMSymbolizer::SymbolizePC(uptr addr, SymbolizedStack *stack) {
    348   AddressInfo *info = &stack->info;
    349   const char *buf = FormatAndSendCommand(
    350       /*is_data*/ false, info->module, info->module_offset, info->module_arch);
    351   if (buf) {
    352     ParseSymbolizePCOutput(buf, stack);
    353     return true;
    354   }
    355   return false;
    356 }
    357 
    358 bool LLVMSymbolizer::SymbolizeData(uptr addr, DataInfo *info) {
    359   const char *buf = FormatAndSendCommand(
    360       /*is_data*/ true, info->module, info->module_offset, info->module_arch);
    361   if (buf) {
    362     ParseSymbolizeDataOutput(buf, info);
    363     info->start += (addr - info->module_offset); // Add the base address.
    364     return true;
    365   }
    366   return false;
    367 }
    368 
    369 const char *LLVMSymbolizer::FormatAndSendCommand(bool is_data,
    370                                                  const char *module_name,
    371                                                  uptr module_offset,
    372                                                  ModuleArch arch) {
    373   CHECK(module_name);
    374   const char *is_data_str = is_data ? "DATA " : "";
    375   if (arch == kModuleArchUnknown) {
    376     if (internal_snprintf(buffer_, kBufferSize, "%s\"%s\" 0x%zx\n", is_data_str,
    377                           module_name,
    378                           module_offset) >= static_cast<int>(kBufferSize)) {
    379       Report("WARNING: Command buffer too small");
    380       return nullptr;
    381     }
    382   } else {
    383     if (internal_snprintf(buffer_, kBufferSize, "%s\"%s:%s\" 0x%zx\n",
    384                           is_data_str, module_name, ModuleArchToString(arch),
    385                           module_offset) >= static_cast<int>(kBufferSize)) {
    386       Report("WARNING: Command buffer too small");
    387       return nullptr;
    388     }
    389   }
    390   return symbolizer_process_->SendCommand(buffer_);
    391 }
    392 
    393 SymbolizerProcess::SymbolizerProcess(const char *path, bool use_forkpty)
    394     : path_(path),
    395       input_fd_(kInvalidFd),
    396       output_fd_(kInvalidFd),
    397       times_restarted_(0),
    398       failed_to_start_(false),
    399       reported_invalid_path_(false),
    400       use_forkpty_(use_forkpty) {
    401   CHECK(path_);
    402   CHECK_NE(path_[0], '\0');
    403 }
    404 
    405 static bool IsSameModule(const char* path) {
    406   if (const char* ProcessName = GetProcessName()) {
    407     if (const char* SymbolizerName = StripModuleName(path)) {
    408       return !internal_strcmp(ProcessName, SymbolizerName);
    409     }
    410   }
    411   return false;
    412 }
    413 
    414 const char *SymbolizerProcess::SendCommand(const char *command) {
    415   if (failed_to_start_)
    416     return nullptr;
    417   if (IsSameModule(path_)) {
    418     Report("WARNING: Symbolizer was blocked from starting itself!\n");
    419     failed_to_start_ = true;
    420     return nullptr;
    421   }
    422   for (; times_restarted_ < kMaxTimesRestarted; times_restarted_++) {
    423     // Start or restart symbolizer if we failed to send command to it.
    424     if (const char *res = SendCommandImpl(command))
    425       return res;
    426     Restart();
    427   }
    428   if (!failed_to_start_) {
    429     Report("WARNING: Failed to use and restart external symbolizer!\n");
    430     failed_to_start_ = true;
    431   }
    432   return 0;
    433 }
    434 
    435 const char *SymbolizerProcess::SendCommandImpl(const char *command) {
    436   if (input_fd_ == kInvalidFd || output_fd_ == kInvalidFd)
    437       return 0;
    438   if (!WriteToSymbolizer(command, internal_strlen(command)))
    439       return 0;
    440   if (!ReadFromSymbolizer(buffer_, kBufferSize))
    441       return 0;
    442   return buffer_;
    443 }
    444 
    445 bool SymbolizerProcess::Restart() {
    446   if (input_fd_ != kInvalidFd)
    447     CloseFile(input_fd_);
    448   if (output_fd_ != kInvalidFd)
    449     CloseFile(output_fd_);
    450   return StartSymbolizerSubprocess();
    451 }
    452 
    453 bool SymbolizerProcess::ReadFromSymbolizer(char *buffer, uptr max_length) {
    454   if (max_length == 0)
    455     return true;
    456   uptr read_len = 0;
    457   while (true) {
    458     uptr just_read = 0;
    459     bool success = ReadFromFile(input_fd_, buffer + read_len,
    460                                 max_length - read_len - 1, &just_read);
    461     // We can't read 0 bytes, as we don't expect external symbolizer to close
    462     // its stdout.
    463     if (!success || just_read == 0) {
    464       Report("WARNING: Can't read from symbolizer at fd %d\n", input_fd_);
    465       return false;
    466     }
    467     read_len += just_read;
    468     if (ReachedEndOfOutput(buffer, read_len))
    469       break;
    470     if (read_len + 1 == max_length) {
    471       Report("WARNING: Symbolizer buffer too small\n");
    472       read_len = 0;
    473       break;
    474     }
    475   }
    476   buffer[read_len] = '\0';
    477   return true;
    478 }
    479 
    480 bool SymbolizerProcess::WriteToSymbolizer(const char *buffer, uptr length) {
    481   if (length == 0)
    482     return true;
    483   uptr write_len = 0;
    484   bool success = WriteToFile(output_fd_, buffer, length, &write_len);
    485   if (!success || write_len != length) {
    486     Report("WARNING: Can't write to symbolizer at fd %d\n", output_fd_);
    487     return false;
    488   }
    489   return true;
    490 }
    491 
    492 #endif  // !SANITIZER_SYMBOLIZER_MARKUP
    493 
    494 }  // namespace __sanitizer
    495