Home | History | Annotate | Line # | Download | only in xray
      1 //===-- xray_profiling.cc ---------------------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is a part of XRay, a dynamic runtime instrumentation system.
     11 //
     12 // This is the implementation of a profiling handler.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 #include <memory>
     16 #include <time.h>
     17 
     18 #include "sanitizer_common/sanitizer_atomic.h"
     19 #include "sanitizer_common/sanitizer_flags.h"
     20 #include "xray/xray_interface.h"
     21 #include "xray/xray_log_interface.h"
     22 #include "xray_buffer_queue.h"
     23 #include "xray_flags.h"
     24 #include "xray_profile_collector.h"
     25 #include "xray_profiling_flags.h"
     26 #include "xray_recursion_guard.h"
     27 #include "xray_tsc.h"
     28 #include "xray_utils.h"
     29 #include <pthread.h>
     30 
     31 namespace __xray {
     32 
     33 namespace {
     34 
     35 static atomic_sint32_t ProfilerLogFlushStatus = {
     36     XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
     37 
     38 static atomic_sint32_t ProfilerLogStatus = {
     39     XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
     40 
     41 static SpinMutex ProfilerOptionsMutex;
     42 
     43 struct ProfilingData {
     44   atomic_uintptr_t Allocators;
     45   atomic_uintptr_t FCT;
     46 };
     47 
     48 static pthread_key_t ProfilingKey;
     49 
     50 // We use a global buffer queue, which gets initialized once at initialisation
     51 // time, and gets reset when profiling is "done".
     52 static std::aligned_storage<sizeof(BufferQueue), alignof(BufferQueue)>::type
     53     BufferQueueStorage;
     54 static BufferQueue *BQ = nullptr;
     55 
     56 thread_local FunctionCallTrie::Allocators::Buffers ThreadBuffers;
     57 thread_local std::aligned_storage<sizeof(FunctionCallTrie::Allocators),
     58                                   alignof(FunctionCallTrie::Allocators)>::type
     59     AllocatorsStorage;
     60 thread_local std::aligned_storage<sizeof(FunctionCallTrie),
     61                                   alignof(FunctionCallTrie)>::type
     62     FunctionCallTrieStorage;
     63 thread_local ProfilingData TLD{{0}, {0}};
     64 thread_local atomic_uint8_t ReentranceGuard{0};
     65 
     66 // We use a separate guard for ensuring that for this thread, if we're already
     67 // cleaning up, that any signal handlers don't attempt to cleanup nor
     68 // initialise.
     69 thread_local atomic_uint8_t TLDInitGuard{0};
     70 
     71 // We also use a separate latch to signal that the thread is exiting, and
     72 // non-essential work should be ignored (things like recording events, etc.).
     73 thread_local atomic_uint8_t ThreadExitingLatch{0};
     74 
     75 static ProfilingData *getThreadLocalData() XRAY_NEVER_INSTRUMENT {
     76   thread_local auto ThreadOnce = []() XRAY_NEVER_INSTRUMENT {
     77     pthread_setspecific(ProfilingKey, &TLD);
     78     return false;
     79   }();
     80   (void)ThreadOnce;
     81 
     82   RecursionGuard TLDInit(TLDInitGuard);
     83   if (!TLDInit)
     84     return nullptr;
     85 
     86   if (atomic_load_relaxed(&ThreadExitingLatch))
     87     return nullptr;
     88 
     89   uptr Allocators = 0;
     90   if (atomic_compare_exchange_strong(&TLD.Allocators, &Allocators, 1,
     91                                      memory_order_acq_rel)) {
     92     bool Success = false;
     93     auto AllocatorsUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
     94       if (!Success)
     95         atomic_store(&TLD.Allocators, 0, memory_order_release);
     96     });
     97 
     98     // Acquire a set of buffers for this thread.
     99     if (BQ == nullptr)
    100       return nullptr;
    101 
    102     if (BQ->getBuffer(ThreadBuffers.NodeBuffer) != BufferQueue::ErrorCode::Ok)
    103       return nullptr;
    104     auto NodeBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
    105       if (!Success)
    106         BQ->releaseBuffer(ThreadBuffers.NodeBuffer);
    107     });
    108 
    109     if (BQ->getBuffer(ThreadBuffers.RootsBuffer) != BufferQueue::ErrorCode::Ok)
    110       return nullptr;
    111     auto RootsBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
    112       if (!Success)
    113         BQ->releaseBuffer(ThreadBuffers.RootsBuffer);
    114     });
    115 
    116     if (BQ->getBuffer(ThreadBuffers.ShadowStackBuffer) !=
    117         BufferQueue::ErrorCode::Ok)
    118       return nullptr;
    119     auto ShadowStackBufferUndo = at_scope_exit([&]() XRAY_NEVER_INSTRUMENT {
    120       if (!Success)
    121         BQ->releaseBuffer(ThreadBuffers.ShadowStackBuffer);
    122     });
    123 
    124     if (BQ->getBuffer(ThreadBuffers.NodeIdPairBuffer) !=
    125         BufferQueue::ErrorCode::Ok)
    126       return nullptr;
    127 
    128     Success = true;
    129     new (&AllocatorsStorage) FunctionCallTrie::Allocators(
    130         FunctionCallTrie::InitAllocatorsFromBuffers(ThreadBuffers));
    131     Allocators = reinterpret_cast<uptr>(
    132         reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage));
    133     atomic_store(&TLD.Allocators, Allocators, memory_order_release);
    134   }
    135 
    136   if (Allocators == 1)
    137     return nullptr;
    138 
    139   uptr FCT = 0;
    140   if (atomic_compare_exchange_strong(&TLD.FCT, &FCT, 1, memory_order_acq_rel)) {
    141     new (&FunctionCallTrieStorage)
    142         FunctionCallTrie(*reinterpret_cast<FunctionCallTrie::Allocators *>(
    143             atomic_load_relaxed(&TLD.Allocators)));
    144     FCT = reinterpret_cast<uptr>(
    145         reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage));
    146     atomic_store(&TLD.FCT, FCT, memory_order_release);
    147   }
    148 
    149   if (FCT == 1)
    150     return nullptr;
    151 
    152   return &TLD;
    153 }
    154 
    155 static void cleanupTLD() XRAY_NEVER_INSTRUMENT {
    156   auto FCT = atomic_exchange(&TLD.FCT, 0, memory_order_acq_rel);
    157   if (FCT == reinterpret_cast<uptr>(reinterpret_cast<FunctionCallTrie *>(
    158                  &FunctionCallTrieStorage)))
    159     reinterpret_cast<FunctionCallTrie *>(FCT)->~FunctionCallTrie();
    160 
    161   auto Allocators = atomic_exchange(&TLD.Allocators, 0, memory_order_acq_rel);
    162   if (Allocators ==
    163       reinterpret_cast<uptr>(
    164           reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
    165     reinterpret_cast<FunctionCallTrie::Allocators *>(Allocators)->~Allocators();
    166 }
    167 
    168 static void postCurrentThreadFCT(ProfilingData &T) XRAY_NEVER_INSTRUMENT {
    169   RecursionGuard TLDInit(TLDInitGuard);
    170   if (!TLDInit)
    171     return;
    172 
    173   uptr P = atomic_exchange(&T.FCT, 0, memory_order_acq_rel);
    174   if (P != reinterpret_cast<uptr>(
    175                reinterpret_cast<FunctionCallTrie *>(&FunctionCallTrieStorage)))
    176     return;
    177 
    178   auto FCT = reinterpret_cast<FunctionCallTrie *>(P);
    179   DCHECK_NE(FCT, nullptr);
    180 
    181   uptr A = atomic_exchange(&T.Allocators, 0, memory_order_acq_rel);
    182   if (A !=
    183       reinterpret_cast<uptr>(
    184           reinterpret_cast<FunctionCallTrie::Allocators *>(&AllocatorsStorage)))
    185     return;
    186 
    187   auto Allocators = reinterpret_cast<FunctionCallTrie::Allocators *>(A);
    188   DCHECK_NE(Allocators, nullptr);
    189 
    190   // Always move the data into the profile collector.
    191   profileCollectorService::post(BQ, std::move(*FCT), std::move(*Allocators),
    192                                 std::move(ThreadBuffers), GetTid());
    193 
    194   // Re-initialize the ThreadBuffers object to a known "default" state.
    195   ThreadBuffers = FunctionCallTrie::Allocators::Buffers{};
    196 }
    197 
    198 } // namespace
    199 
    200 const char *profilingCompilerDefinedFlags() XRAY_NEVER_INSTRUMENT {
    201 #ifdef XRAY_PROFILER_DEFAULT_OPTIONS
    202   return SANITIZER_STRINGIFY(XRAY_PROFILER_DEFAULT_OPTIONS);
    203 #else
    204   return "";
    205 #endif
    206 }
    207 
    208 XRayLogFlushStatus profilingFlush() XRAY_NEVER_INSTRUMENT {
    209   if (atomic_load(&ProfilerLogStatus, memory_order_acquire) !=
    210       XRayLogInitStatus::XRAY_LOG_FINALIZED) {
    211     if (Verbosity())
    212       Report("Not flushing profiles, profiling not been finalized.\n");
    213     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
    214   }
    215 
    216   RecursionGuard SignalGuard(ReentranceGuard);
    217   if (!SignalGuard) {
    218     if (Verbosity())
    219       Report("Cannot finalize properly inside a signal handler!\n");
    220     atomic_store(&ProfilerLogFlushStatus,
    221                  XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING,
    222                  memory_order_release);
    223     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
    224   }
    225 
    226   s32 Previous = atomic_exchange(&ProfilerLogFlushStatus,
    227                                  XRayLogFlushStatus::XRAY_LOG_FLUSHING,
    228                                  memory_order_acq_rel);
    229   if (Previous == XRayLogFlushStatus::XRAY_LOG_FLUSHING) {
    230     if (Verbosity())
    231       Report("Not flushing profiles, implementation still flushing.\n");
    232     return XRayLogFlushStatus::XRAY_LOG_FLUSHING;
    233   }
    234 
    235   // At this point, we'll create the file that will contain the profile, but
    236   // only if the options say so.
    237   if (!profilingFlags()->no_flush) {
    238     // First check whether we have data in the profile collector service
    239     // before we try and write anything down.
    240     XRayBuffer B = profileCollectorService::nextBuffer({nullptr, 0});
    241     if (B.Data == nullptr) {
    242       if (Verbosity())
    243         Report("profiling: No data to flush.\n");
    244     } else {
    245       LogWriter *LW = LogWriter::Open();
    246       if (LW == nullptr) {
    247         if (Verbosity())
    248           Report("profiling: Failed to flush to file, dropping data.\n");
    249       } else {
    250         // Now for each of the buffers, write out the profile data as we would
    251         // see it in memory, verbatim.
    252         while (B.Data != nullptr && B.Size != 0) {
    253           LW->WriteAll(reinterpret_cast<const char *>(B.Data),
    254                        reinterpret_cast<const char *>(B.Data) + B.Size);
    255           B = profileCollectorService::nextBuffer(B);
    256         }
    257       }
    258       LogWriter::Close(LW);
    259     }
    260   }
    261 
    262   profileCollectorService::reset();
    263 
    264   atomic_store(&ProfilerLogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
    265                memory_order_release);
    266   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
    267                memory_order_release);
    268 
    269   return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
    270 }
    271 
    272 void profilingHandleArg0(int32_t FuncId,
    273                          XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
    274   unsigned char CPU;
    275   auto TSC = readTSC(CPU);
    276   RecursionGuard G(ReentranceGuard);
    277   if (!G)
    278     return;
    279 
    280   auto Status = atomic_load(&ProfilerLogStatus, memory_order_acquire);
    281   if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_UNINITIALIZED ||
    282                Status == XRayLogInitStatus::XRAY_LOG_INITIALIZING))
    283     return;
    284 
    285   if (UNLIKELY(Status == XRayLogInitStatus::XRAY_LOG_FINALIZED ||
    286                Status == XRayLogInitStatus::XRAY_LOG_FINALIZING)) {
    287     postCurrentThreadFCT(TLD);
    288     return;
    289   }
    290 
    291   auto T = getThreadLocalData();
    292   if (T == nullptr)
    293     return;
    294 
    295   auto FCT = reinterpret_cast<FunctionCallTrie *>(atomic_load_relaxed(&T->FCT));
    296   switch (Entry) {
    297   case XRayEntryType::ENTRY:
    298   case XRayEntryType::LOG_ARGS_ENTRY:
    299     FCT->enterFunction(FuncId, TSC, CPU);
    300     break;
    301   case XRayEntryType::EXIT:
    302   case XRayEntryType::TAIL:
    303     FCT->exitFunction(FuncId, TSC, CPU);
    304     break;
    305   default:
    306     // FIXME: Handle bugs.
    307     break;
    308   }
    309 }
    310 
    311 void profilingHandleArg1(int32_t FuncId, XRayEntryType Entry,
    312                          uint64_t) XRAY_NEVER_INSTRUMENT {
    313   return profilingHandleArg0(FuncId, Entry);
    314 }
    315 
    316 XRayLogInitStatus profilingFinalize() XRAY_NEVER_INSTRUMENT {
    317   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
    318   if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
    319                                       XRayLogInitStatus::XRAY_LOG_FINALIZING,
    320                                       memory_order_release)) {
    321     if (Verbosity())
    322       Report("Cannot finalize profile, the profiling is not initialized.\n");
    323     return static_cast<XRayLogInitStatus>(CurrentStatus);
    324   }
    325 
    326   // Mark then finalize the current generation of buffers. This allows us to let
    327   // the threads currently holding onto new buffers still use them, but let the
    328   // last reference do the memory cleanup.
    329   DCHECK_NE(BQ, nullptr);
    330   BQ->finalize();
    331 
    332   // Wait a grace period to allow threads to see that we're finalizing.
    333   SleepForMillis(profilingFlags()->grace_period_ms);
    334 
    335   // If we for some reason are entering this function from an instrumented
    336   // handler, we bail out.
    337   RecursionGuard G(ReentranceGuard);
    338   if (!G)
    339     return static_cast<XRayLogInitStatus>(CurrentStatus);
    340 
    341   // Post the current thread's data if we have any.
    342   postCurrentThreadFCT(TLD);
    343 
    344   // Then we force serialize the log data.
    345   profileCollectorService::serialize();
    346 
    347   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
    348                memory_order_release);
    349   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
    350 }
    351 
    352 XRayLogInitStatus
    353 profilingLoggingInit(size_t, size_t, void *Options,
    354                      size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
    355   RecursionGuard G(ReentranceGuard);
    356   if (!G)
    357     return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
    358 
    359   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
    360   if (!atomic_compare_exchange_strong(&ProfilerLogStatus, &CurrentStatus,
    361                                       XRayLogInitStatus::XRAY_LOG_INITIALIZING,
    362                                       memory_order_acq_rel)) {
    363     if (Verbosity())
    364       Report("Cannot initialize already initialised profiling "
    365              "implementation.\n");
    366     return static_cast<XRayLogInitStatus>(CurrentStatus);
    367   }
    368 
    369   {
    370     SpinMutexLock Lock(&ProfilerOptionsMutex);
    371     FlagParser ConfigParser;
    372     ProfilerFlags Flags;
    373     Flags.setDefaults();
    374     registerProfilerFlags(&ConfigParser, &Flags);
    375     ConfigParser.ParseString(profilingCompilerDefinedFlags());
    376     const char *Env = GetEnv("XRAY_PROFILING_OPTIONS");
    377     if (Env == nullptr)
    378       Env = "";
    379     ConfigParser.ParseString(Env);
    380 
    381     // Then parse the configuration string provided.
    382     ConfigParser.ParseString(static_cast<const char *>(Options));
    383     if (Verbosity())
    384       ReportUnrecognizedFlags();
    385     *profilingFlags() = Flags;
    386   }
    387 
    388   // We need to reset the profile data collection implementation now.
    389   profileCollectorService::reset();
    390 
    391   // Then also reset the buffer queue implementation.
    392   if (BQ == nullptr) {
    393     bool Success = false;
    394     new (&BufferQueueStorage)
    395         BufferQueue(profilingFlags()->per_thread_allocator_max,
    396                     profilingFlags()->buffers_max, Success);
    397     if (!Success) {
    398       if (Verbosity())
    399         Report("Failed to initialize preallocated memory buffers!");
    400       atomic_store(&ProfilerLogStatus,
    401                    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
    402                    memory_order_release);
    403       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
    404     }
    405 
    406     // If we've succeded, set the global pointer to the initialised storage.
    407     BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
    408   } else {
    409     BQ->finalize();
    410     auto InitStatus = BQ->init(profilingFlags()->per_thread_allocator_max,
    411                                profilingFlags()->buffers_max);
    412 
    413     if (InitStatus != BufferQueue::ErrorCode::Ok) {
    414       if (Verbosity())
    415         Report("Failed to initialize preallocated memory buffers; error: %s",
    416                BufferQueue::getErrorString(InitStatus));
    417       atomic_store(&ProfilerLogStatus,
    418                    XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
    419                    memory_order_release);
    420       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
    421     }
    422 
    423     DCHECK(!BQ->finalizing());
    424   }
    425 
    426   // We need to set up the exit handlers.
    427   static pthread_once_t Once = PTHREAD_ONCE_INIT;
    428   pthread_once(
    429       &Once, +[] {
    430         pthread_key_create(
    431             &ProfilingKey, +[](void *P) XRAY_NEVER_INSTRUMENT {
    432               if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
    433                 return;
    434 
    435               if (P == nullptr)
    436                 return;
    437 
    438               auto T = reinterpret_cast<ProfilingData *>(P);
    439               if (atomic_load_relaxed(&T->Allocators) == 0)
    440                 return;
    441 
    442               {
    443                 // If we're somehow executing this while inside a
    444                 // non-reentrant-friendly context, we skip attempting to post
    445                 // the current thread's data.
    446                 RecursionGuard G(ReentranceGuard);
    447                 if (!G)
    448                   return;
    449 
    450                 postCurrentThreadFCT(*T);
    451               }
    452             });
    453 
    454         // We also need to set up an exit handler, so that we can get the
    455         // profile information at exit time. We use the C API to do this, to not
    456         // rely on C++ ABI functions for registering exit handlers.
    457         Atexit(+[]() XRAY_NEVER_INSTRUMENT {
    458           if (atomic_exchange(&ThreadExitingLatch, 1, memory_order_acq_rel))
    459             return;
    460 
    461           auto Cleanup =
    462               at_scope_exit([]() XRAY_NEVER_INSTRUMENT { cleanupTLD(); });
    463 
    464           // Finalize and flush.
    465           if (profilingFinalize() != XRAY_LOG_FINALIZED ||
    466               profilingFlush() != XRAY_LOG_FLUSHED)
    467             return;
    468 
    469           if (Verbosity())
    470             Report("XRay Profile flushed at exit.");
    471         });
    472       });
    473 
    474   __xray_log_set_buffer_iterator(profileCollectorService::nextBuffer);
    475   __xray_set_handler(profilingHandleArg0);
    476   __xray_set_handler_arg1(profilingHandleArg1);
    477 
    478   atomic_store(&ProfilerLogStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
    479                memory_order_release);
    480   if (Verbosity())
    481     Report("XRay Profiling init successful.\n");
    482 
    483   return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
    484 }
    485 
    486 bool profilingDynamicInitializer() XRAY_NEVER_INSTRUMENT {
    487   // Set up the flag defaults from the static defaults and the
    488   // compiler-provided defaults.
    489   {
    490     SpinMutexLock Lock(&ProfilerOptionsMutex);
    491     auto *F = profilingFlags();
    492     F->setDefaults();
    493     FlagParser ProfilingParser;
    494     registerProfilerFlags(&ProfilingParser, F);
    495     ProfilingParser.ParseString(profilingCompilerDefinedFlags());
    496   }
    497 
    498   XRayLogImpl Impl{
    499       profilingLoggingInit,
    500       profilingFinalize,
    501       profilingHandleArg0,
    502       profilingFlush,
    503   };
    504   auto RegistrationResult = __xray_log_register_mode("xray-profiling", Impl);
    505   if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
    506     if (Verbosity())
    507       Report("Cannot register XRay Profiling mode to 'xray-profiling'; error = "
    508              "%d\n",
    509              RegistrationResult);
    510     return false;
    511   }
    512 
    513   if (!internal_strcmp(flags()->xray_mode, "xray-profiling"))
    514     __xray_log_select_mode("xray_profiling");
    515   return true;
    516 }
    517 
    518 } // namespace __xray
    519 
    520 static auto UNUSED Unused = __xray::profilingDynamicInitializer();
    521