Home | History | Annotate | Line # | Download | only in xray
      1 //===-- xray_basic_logging.cc -----------------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is a part of XRay, a dynamic runtime instrumentation system.
     11 //
     12 // Implementation of a simple in-memory log of XRay events. This defines a
     13 // logging function that's compatible with the XRay handler interface, and
     14 // routines for exporting data to files.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 
     18 #include <errno.h>
     19 #include <fcntl.h>
     20 #include <pthread.h>
     21 #include <sys/stat.h>
     22 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
     23 #include <sys/syscall.h>
     24 #endif
     25 #include <sys/types.h>
     26 #include <time.h>
     27 #include <unistd.h>
     28 
     29 #include "sanitizer_common/sanitizer_allocator_internal.h"
     30 #include "sanitizer_common/sanitizer_libc.h"
     31 #include "xray/xray_records.h"
     32 #include "xray_recursion_guard.h"
     33 #include "xray_basic_flags.h"
     34 #include "xray_basic_logging.h"
     35 #include "xray_defs.h"
     36 #include "xray_flags.h"
     37 #include "xray_interface_internal.h"
     38 #include "xray_tsc.h"
     39 #include "xray_utils.h"
     40 
     41 namespace __xray {
     42 
     43 static SpinMutex LogMutex;
     44 
     45 namespace {
     46 // We use elements of this type to record the entry TSC of every function ID we
     47 // see as we're tracing a particular thread's execution.
     48 struct alignas(16) StackEntry {
     49   int32_t FuncId;
     50   uint16_t Type;
     51   uint8_t CPU;
     52   uint8_t Padding;
     53   uint64_t TSC;
     54 };
     55 
     56 static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry");
     57 
     58 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
     59   void *InMemoryBuffer = nullptr;
     60   size_t BufferSize = 0;
     61   size_t BufferOffset = 0;
     62   void *ShadowStack = nullptr;
     63   size_t StackSize = 0;
     64   size_t StackEntries = 0;
     65   __xray::LogWriter *LogWriter = nullptr;
     66 };
     67 
     68 struct BasicLoggingOptions {
     69   int DurationFilterMicros = 0;
     70   size_t MaxStackDepth = 0;
     71   size_t ThreadBufferSize = 0;
     72 };
     73 } // namespace
     74 
     75 static pthread_key_t PThreadKey;
     76 
     77 static atomic_uint8_t BasicInitialized{0};
     78 
     79 struct BasicLoggingOptions GlobalOptions;
     80 
     81 thread_local atomic_uint8_t Guard{0};
     82 
     83 static atomic_uint8_t UseRealTSC{0};
     84 static atomic_uint64_t ThresholdTicks{0};
     85 static atomic_uint64_t TicksPerSec{0};
     86 static atomic_uint64_t CycleFrequency{NanosecondsPerSecond};
     87 
     88 static LogWriter *getLog() XRAY_NEVER_INSTRUMENT {
     89   LogWriter* LW = LogWriter::Open();
     90   if (LW == nullptr)
     91     return LW;
     92 
     93   static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT;
     94   pthread_once(&DetectOnce, +[] {
     95     if (atomic_load(&UseRealTSC, memory_order_acquire))
     96       atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release);
     97   });
     98 
     99   // Since we're here, we get to write the header. We set it up so that the
    100   // header will only be written once, at the start, and let the threads
    101   // logging do writes which just append.
    102   XRayFileHeader Header;
    103   // Version 2 includes tail exit records.
    104   // Version 3 includes pid inside records.
    105   Header.Version = 3;
    106   Header.Type = FileTypes::NAIVE_LOG;
    107   Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire);
    108 
    109   // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
    110   // before setting the values in the header.
    111   Header.ConstantTSC = 1;
    112   Header.NonstopTSC = 1;
    113   LW->WriteAll(reinterpret_cast<char *>(&Header),
    114                reinterpret_cast<char *>(&Header) + sizeof(Header));
    115   return LW;
    116 }
    117 
    118 static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT {
    119   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
    120   static LogWriter *LW = nullptr;
    121   pthread_once(&OnceInit, +[] { LW = getLog(); });
    122   return LW;
    123 }
    124 
    125 static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
    126   thread_local ThreadLocalData TLD;
    127   thread_local bool UNUSED TOnce = [] {
    128     if (GlobalOptions.ThreadBufferSize == 0) {
    129       if (Verbosity())
    130         Report("Not initializing TLD since ThreadBufferSize == 0.\n");
    131       return false;
    132     }
    133     pthread_setspecific(PThreadKey, &TLD);
    134     TLD.LogWriter = getGlobalLog();
    135     TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>(
    136         InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize,
    137                       nullptr, alignof(XRayRecord)));
    138     TLD.BufferSize = GlobalOptions.ThreadBufferSize;
    139     TLD.BufferOffset = 0;
    140     if (GlobalOptions.MaxStackDepth == 0) {
    141       if (Verbosity())
    142         Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n");
    143       TLD.StackSize = 0;
    144       TLD.StackEntries = 0;
    145       TLD.ShadowStack = nullptr;
    146       return false;
    147     }
    148     TLD.ShadowStack = reinterpret_cast<StackEntry *>(
    149         InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr,
    150                       alignof(StackEntry)));
    151     TLD.StackSize = GlobalOptions.MaxStackDepth;
    152     TLD.StackEntries = 0;
    153     return false;
    154   }();
    155   return TLD;
    156 }
    157 
    158 template <class RDTSC>
    159 void InMemoryRawLog(int32_t FuncId, XRayEntryType Type,
    160                     RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
    161   auto &TLD = getThreadLocalData();
    162   LogWriter *LW = getGlobalLog();
    163   if (LW == nullptr)
    164     return;
    165 
    166   // Use a simple recursion guard, to handle cases where we're already logging
    167   // and for one reason or another, this function gets called again in the same
    168   // thread.
    169   RecursionGuard G(Guard);
    170   if (!G)
    171     return;
    172 
    173   uint8_t CPU = 0;
    174   uint64_t TSC = ReadTSC(CPU);
    175 
    176   switch (Type) {
    177   case XRayEntryType::ENTRY:
    178   case XRayEntryType::LOG_ARGS_ENTRY: {
    179     // Short circuit if we've reached the maximum depth of the stack.
    180     if (TLD.StackEntries++ >= TLD.StackSize)
    181       return;
    182 
    183     // When we encounter an entry event, we keep track of the TSC and the CPU,
    184     // and put it in the stack.
    185     StackEntry E;
    186     E.FuncId = FuncId;
    187     E.CPU = CPU;
    188     E.Type = Type;
    189     E.TSC = TSC;
    190     auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
    191                          (sizeof(StackEntry) * (TLD.StackEntries - 1));
    192     internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry));
    193     break;
    194   }
    195   case XRayEntryType::EXIT:
    196   case XRayEntryType::TAIL: {
    197     if (TLD.StackEntries == 0)
    198       break;
    199 
    200     if (--TLD.StackEntries >= TLD.StackSize)
    201       return;
    202 
    203     // When we encounter an exit event, we check whether all the following are
    204     // true:
    205     //
    206     // - The Function ID is the same as the most recent entry in the stack.
    207     // - The CPU is the same as the most recent entry in the stack.
    208     // - The Delta of the TSCs is less than the threshold amount of time we're
    209     //   looking to record.
    210     //
    211     // If all of these conditions are true, we pop the stack and don't write a
    212     // record and move the record offset back.
    213     StackEntry StackTop;
    214     auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) +
    215                          (sizeof(StackEntry) * TLD.StackEntries);
    216     internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry));
    217     if (StackTop.FuncId == FuncId && StackTop.CPU == CPU &&
    218         StackTop.TSC < TSC) {
    219       auto Delta = TSC - StackTop.TSC;
    220       if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) {
    221         DCHECK(TLD.BufferOffset > 0);
    222         TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2;
    223         return;
    224       }
    225     }
    226     break;
    227   }
    228   default:
    229     // Should be unreachable.
    230     DCHECK(false && "Unsupported XRayEntryType encountered.");
    231     break;
    232   }
    233 
    234   // First determine whether the delta between the function's enter record and
    235   // the exit record is higher than the threshold.
    236   XRayRecord R;
    237   R.RecordType = RecordTypes::NORMAL;
    238   R.CPU = CPU;
    239   R.TSC = TSC;
    240   R.TId = GetTid();
    241   R.PId = internal_getpid();
    242   R.Type = Type;
    243   R.FuncId = FuncId;
    244   auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer);
    245   internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R));
    246   if (++TLD.BufferOffset == TLD.BufferSize) {
    247     SpinMutexLock Lock(&LogMutex);
    248     LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
    249                  reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
    250     TLD.BufferOffset = 0;
    251     TLD.StackEntries = 0;
    252   }
    253 }
    254 
    255 template <class RDTSC>
    256 void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1,
    257                            RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT {
    258   auto &TLD = getThreadLocalData();
    259   auto FirstEntry =
    260       reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer);
    261   const auto &BuffLen = TLD.BufferSize;
    262   LogWriter *LW = getGlobalLog();
    263   if (LW == nullptr)
    264     return;
    265 
    266   // First we check whether there's enough space to write the data consecutively
    267   // in the thread-local buffer. If not, we first flush the buffer before
    268   // attempting to write the two records that must be consecutive.
    269   if (TLD.BufferOffset + 2 > BuffLen) {
    270     SpinMutexLock Lock(&LogMutex);
    271     LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
    272                  reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
    273     TLD.BufferOffset = 0;
    274     TLD.StackEntries = 0;
    275   }
    276 
    277   // Then we write the "we have an argument" record.
    278   InMemoryRawLog(FuncId, Type, ReadTSC);
    279 
    280   RecursionGuard G(Guard);
    281   if (!G)
    282     return;
    283 
    284   // And, from here on write the arg payload.
    285   XRayArgPayload R;
    286   R.RecordType = RecordTypes::ARG_PAYLOAD;
    287   R.FuncId = FuncId;
    288   R.TId = GetTid();
    289   R.PId = internal_getpid();
    290   R.Arg = Arg1;
    291   internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R));
    292   if (++TLD.BufferOffset == BuffLen) {
    293     SpinMutexLock Lock(&LogMutex);
    294     LW->WriteAll(reinterpret_cast<char *>(FirstEntry),
    295                  reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset));
    296     TLD.BufferOffset = 0;
    297     TLD.StackEntries = 0;
    298   }
    299 }
    300 
    301 void basicLoggingHandleArg0RealTSC(int32_t FuncId,
    302                                    XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
    303   InMemoryRawLog(FuncId, Type, readTSC);
    304 }
    305 
    306 void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type)
    307     XRAY_NEVER_INSTRUMENT {
    308   InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
    309     timespec TS;
    310     int result = clock_gettime(CLOCK_REALTIME, &TS);
    311     if (result != 0) {
    312       Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
    313       TS = {0, 0};
    314     }
    315     CPU = 0;
    316     return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
    317   });
    318 }
    319 
    320 void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type,
    321                                    uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
    322   InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC);
    323 }
    324 
    325 void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type,
    326                                       uint64_t Arg1) XRAY_NEVER_INSTRUMENT {
    327   InMemoryRawLogWithArg(
    328       FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
    329         timespec TS;
    330         int result = clock_gettime(CLOCK_REALTIME, &TS);
    331         if (result != 0) {
    332           Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno));
    333           TS = {0, 0};
    334         }
    335         CPU = 0;
    336         return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
    337       });
    338 }
    339 
    340 static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT {
    341   ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P);
    342   auto ExitGuard = at_scope_exit([&TLD] {
    343     // Clean up dynamic resources.
    344     if (TLD.InMemoryBuffer)
    345       InternalFree(TLD.InMemoryBuffer);
    346     if (TLD.ShadowStack)
    347       InternalFree(TLD.ShadowStack);
    348     if (Verbosity())
    349       Report("Cleaned up log for TID: %d\n", GetTid());
    350   });
    351 
    352   if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) {
    353     if (Verbosity())
    354       Report("Skipping buffer for TID: %d; Offset = %llu\n", GetTid(),
    355              TLD.BufferOffset);
    356     return;
    357   }
    358 
    359   {
    360     SpinMutexLock L(&LogMutex);
    361     TLD.LogWriter->WriteAll(reinterpret_cast<char *>(TLD.InMemoryBuffer),
    362                             reinterpret_cast<char *>(TLD.InMemoryBuffer) +
    363                             (sizeof(XRayRecord) * TLD.BufferOffset));
    364   }
    365 
    366   // Because this thread's exit could be the last one trying to write to
    367   // the file and that we're not able to close out the file properly, we
    368   // sync instead and hope that the pending writes are flushed as the
    369   // thread exits.
    370   TLD.LogWriter->Flush();
    371 }
    372 
    373 XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize,
    374                                    UNUSED size_t BufferMax, void *Options,
    375                                    size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
    376   uint8_t Expected = 0;
    377   if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1,
    378                                       memory_order_acq_rel)) {
    379     if (Verbosity())
    380       Report("Basic logging already initialized.\n");
    381     return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
    382   }
    383 
    384   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
    385   pthread_once(&OnceInit, +[] {
    386     pthread_key_create(&PThreadKey, TLDDestructor);
    387     atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release);
    388     // Initialize the global TicksPerSec value.
    389     atomic_store(&TicksPerSec,
    390                  probeRequiredCPUFeatures() ? getTSCFrequency()
    391                                             : NanosecondsPerSecond,
    392                  memory_order_release);
    393     if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity())
    394       Report("WARNING: Required CPU features missing for XRay instrumentation, "
    395              "using emulation instead.\n");
    396   });
    397 
    398   FlagParser P;
    399   BasicFlags F;
    400   F.setDefaults();
    401   registerXRayBasicFlags(&P, &F);
    402   P.ParseString(useCompilerDefinedBasicFlags());
    403   auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS");
    404   if (EnvOpts == nullptr)
    405     EnvOpts = "";
    406 
    407   P.ParseString(EnvOpts);
    408 
    409   // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options
    410   // set through XRAY_OPTIONS instead.
    411   if (internal_strlen(EnvOpts) == 0) {
    412     F.func_duration_threshold_us =
    413         flags()->xray_naive_log_func_duration_threshold_us;
    414     F.max_stack_depth = flags()->xray_naive_log_max_stack_depth;
    415     F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size;
    416   }
    417 
    418   P.ParseString(static_cast<const char *>(Options));
    419   GlobalOptions.ThreadBufferSize = F.thread_buffer_size;
    420   GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us;
    421   GlobalOptions.MaxStackDepth = F.max_stack_depth;
    422   *basicFlags() = F;
    423 
    424   atomic_store(&ThresholdTicks,
    425                atomic_load(&TicksPerSec, memory_order_acquire) *
    426                    GlobalOptions.DurationFilterMicros / 1000000,
    427                memory_order_release);
    428   __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire)
    429                               ? basicLoggingHandleArg1RealTSC
    430                               : basicLoggingHandleArg1EmulateTSC);
    431   __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire)
    432                          ? basicLoggingHandleArg0RealTSC
    433                          : basicLoggingHandleArg0EmulateTSC);
    434 
    435   // TODO: Implement custom event and typed event handling support in Basic
    436   // Mode.
    437   __xray_remove_customevent_handler();
    438   __xray_remove_typedevent_handler();
    439 
    440   return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
    441 }
    442 
    443 XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT {
    444   uint8_t Expected = 0;
    445   if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0,
    446                                       memory_order_acq_rel) &&
    447       Verbosity())
    448     Report("Basic logging already finalized.\n");
    449 
    450   // Nothing really to do aside from marking state of the global to be
    451   // uninitialized.
    452 
    453   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
    454 }
    455 
    456 XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT {
    457   // This really does nothing, since flushing the logs happen at the end of a
    458   // thread's lifetime, or when the buffers are full.
    459   return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
    460 }
    461 
    462 // This is a handler that, effectively, does nothing.
    463 void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT {
    464 }
    465 
    466 bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
    467   XRayLogImpl Impl{
    468       basicLoggingInit,
    469       basicLoggingFinalize,
    470       basicLoggingHandleArg0Empty,
    471       basicLoggingFlush,
    472   };
    473   auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl);
    474   if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
    475       Verbosity())
    476     Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n",
    477            RegistrationResult);
    478   if (flags()->xray_naive_log ||
    479       !internal_strcmp(flags()->xray_mode, "xray-basic")) {
    480     auto SelectResult = __xray_log_select_mode("xray-basic");
    481     if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
    482       if (Verbosity())
    483         Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult);
    484       return false;
    485     }
    486 
    487     // We initialize the implementation using the data we get from the
    488     // XRAY_BASIC_OPTIONS environment variable, at this point of the
    489     // implementation.
    490     auto *Env = GetEnv("XRAY_BASIC_OPTIONS");
    491     auto InitResult =
    492         __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env);
    493     if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
    494       if (Verbosity())
    495         Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult);
    496       return false;
    497     }
    498 
    499     // At this point we know that we've successfully initialized Basic mode
    500     // tracing, and the only chance we're going to get for the current thread to
    501     // clean-up may be at thread/program exit. To ensure that we're going to get
    502     // the cleanup even without calling the finalization routines, we're
    503     // registering a program exit function that will do the cleanup.
    504     static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT;
    505     pthread_once(&DynamicOnce, +[] {
    506       static void *FakeTLD = nullptr;
    507       FakeTLD = &getThreadLocalData();
    508       Atexit(+[] { TLDDestructor(FakeTLD); });
    509     });
    510   }
    511   return true;
    512 }
    513 
    514 } // namespace __xray
    515 
    516 static auto UNUSED Unused = __xray::basicLogDynamicInitializer();
    517