Home | History | Annotate | Line # | Download | only in xray
      1 //===-- xray_fdr_logging.cc ------------------------------------*- C++ -*-===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file is a part of XRay, a dynamic runtime instrumentation system.
     11 //
     12 // Here we implement the Flight Data Recorder mode for XRay, where we use
     13 // compact structures to store records in memory as well as when writing out the
     14 // data to files.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 #include "xray_fdr_logging.h"
     18 #include <cassert>
     19 #include <errno.h>
     20 #include <limits>
     21 #include <memory>
     22 #include <pthread.h>
     23 #include <sys/time.h>
     24 #include <time.h>
     25 #include <unistd.h>
     26 
     27 #include "sanitizer_common/sanitizer_allocator_internal.h"
     28 #include "sanitizer_common/sanitizer_atomic.h"
     29 #include "sanitizer_common/sanitizer_common.h"
     30 #include "xray/xray_interface.h"
     31 #include "xray/xray_records.h"
     32 #include "xray_allocator.h"
     33 #include "xray_buffer_queue.h"
     34 #include "xray_defs.h"
     35 #include "xray_fdr_controller.h"
     36 #include "xray_fdr_flags.h"
     37 #include "xray_fdr_log_writer.h"
     38 #include "xray_flags.h"
     39 #include "xray_recursion_guard.h"
     40 #include "xray_tsc.h"
     41 #include "xray_utils.h"
     42 
     43 namespace __xray {
     44 
     45 static atomic_sint32_t LoggingStatus = {
     46     XRayLogInitStatus::XRAY_LOG_UNINITIALIZED};
     47 
     48 namespace {
     49 
     50 // Group together thread-local-data in a struct, then hide it behind a function
     51 // call so that it can be initialized on first use instead of as a global. We
     52 // force the alignment to 64-bytes for x86 cache line alignment, as this
     53 // structure is used in the hot path of implementation.
     54 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData {
     55   BufferQueue::Buffer Buffer{};
     56   BufferQueue *BQ = nullptr;
     57 
     58   using LogWriterStorage =
     59       typename std::aligned_storage<sizeof(FDRLogWriter),
     60                                     alignof(FDRLogWriter)>::type;
     61 
     62   LogWriterStorage LWStorage;
     63   FDRLogWriter *Writer = nullptr;
     64 
     65   using ControllerStorage =
     66       typename std::aligned_storage<sizeof(FDRController<>),
     67                                     alignof(FDRController<>)>::type;
     68   ControllerStorage CStorage;
     69   FDRController<> *Controller = nullptr;
     70 };
     71 
     72 } // namespace
     73 
     74 static_assert(std::is_trivially_destructible<ThreadLocalData>::value,
     75               "ThreadLocalData must be trivially destructible");
     76 
     77 // Use a global pthread key to identify thread-local data for logging.
     78 static pthread_key_t Key;
     79 
     80 // Global BufferQueue.
     81 static std::aligned_storage<sizeof(BufferQueue)>::type BufferQueueStorage;
     82 static BufferQueue *BQ = nullptr;
     83 
     84 // Global thresholds for function durations.
     85 static atomic_uint64_t ThresholdTicks{0};
     86 
     87 // Global for ticks per second.
     88 static atomic_uint64_t TicksPerSec{0};
     89 
     90 static atomic_sint32_t LogFlushStatus = {
     91     XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING};
     92 
     93 // This function will initialize the thread-local data structure used by the FDR
     94 // logging implementation and return a reference to it. The implementation
     95 // details require a bit of care to maintain.
     96 //
     97 // First, some requirements on the implementation in general:
     98 //
     99 //   - XRay handlers should not call any memory allocation routines that may
    100 //     delegate to an instrumented implementation. This means functions like
    101 //     malloc() and free() should not be called while instrumenting.
    102 //
    103 //   - We would like to use some thread-local data initialized on first-use of
    104 //     the XRay instrumentation. These allow us to implement unsynchronized
    105 //     routines that access resources associated with the thread.
    106 //
    107 // The implementation here uses a few mechanisms that allow us to provide both
    108 // the requirements listed above. We do this by:
    109 //
    110 //   1. Using a thread-local aligned storage buffer for representing the
    111 //      ThreadLocalData struct. This data will be uninitialized memory by
    112 //      design.
    113 //
    114 //   2. Not requiring a thread exit handler/implementation, keeping the
    115 //      thread-local as purely a collection of references/data that do not
    116 //      require cleanup.
    117 //
    118 // We're doing this to avoid using a `thread_local` object that has a
    119 // non-trivial destructor, because the C++ runtime might call std::malloc(...)
    120 // to register calls to destructors. Deadlocks may arise when, for example, an
    121 // externally provided malloc implementation is XRay instrumented, and
    122 // initializing the thread-locals involves calling into malloc. A malloc
    123 // implementation that does global synchronization might be holding a lock for a
    124 // critical section, calling a function that might be XRay instrumented (and
    125 // thus in turn calling into malloc by virtue of registration of the
    126 // thread_local's destructor).
    127 #if XRAY_HAS_TLS_ALIGNAS
    128 static_assert(alignof(ThreadLocalData) >= 64,
    129               "ThreadLocalData must be cache line aligned.");
    130 #endif
    131 static ThreadLocalData &getThreadLocalData() {
    132   thread_local typename std::aligned_storage<
    133       sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{};
    134 
    135   if (pthread_getspecific(Key) == NULL) {
    136     new (reinterpret_cast<ThreadLocalData *>(&TLDStorage)) ThreadLocalData{};
    137     pthread_setspecific(Key, &TLDStorage);
    138   }
    139 
    140   return *reinterpret_cast<ThreadLocalData *>(&TLDStorage);
    141 }
    142 
    143 static XRayFileHeader &fdrCommonHeaderInfo() {
    144   static std::aligned_storage<sizeof(XRayFileHeader)>::type HStorage;
    145   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
    146   static bool TSCSupported = true;
    147   static uint64_t CycleFrequency = NanosecondsPerSecond;
    148   pthread_once(
    149       &OnceInit, +[] {
    150         XRayFileHeader &H = reinterpret_cast<XRayFileHeader &>(HStorage);
    151         // Version 2 of the log writes the extents of the buffer, instead of
    152         // relying on an end-of-buffer record.
    153         // Version 3 includes PID metadata record.
    154         // Version 4 includes CPU data in the custom event records.
    155         // Version 5 uses relative deltas for custom and typed event records,
    156         // and removes the CPU data in custom event records (similar to how
    157         // function records use deltas instead of full TSCs and rely on other
    158         // metadata records for TSC wraparound and CPU migration).
    159         H.Version = 5;
    160         H.Type = FileTypes::FDR_LOG;
    161 
    162         // Test for required CPU features and cache the cycle frequency
    163         TSCSupported = probeRequiredCPUFeatures();
    164         if (TSCSupported)
    165           CycleFrequency = getTSCFrequency();
    166         H.CycleFrequency = CycleFrequency;
    167 
    168         // FIXME: Actually check whether we have 'constant_tsc' and
    169         // 'nonstop_tsc' before setting the values in the header.
    170         H.ConstantTSC = 1;
    171         H.NonstopTSC = 1;
    172       });
    173   return reinterpret_cast<XRayFileHeader &>(HStorage);
    174 }
    175 
    176 // This is the iterator implementation, which knows how to handle FDR-mode
    177 // specific buffers. This is used as an implementation of the iterator function
    178 // needed by __xray_set_buffer_iterator(...). It maintains a global state of the
    179 // buffer iteration for the currently installed FDR mode buffers. In particular:
    180 //
    181 //   - If the argument represents the initial state of XRayBuffer ({nullptr, 0})
    182 //     then the iterator returns the header information.
    183 //   - If the argument represents the header information ({address of header
    184 //     info, size of the header info}) then it returns the first FDR buffer's
    185 //     address and extents.
    186 //   - It will keep returning the next buffer and extents as there are more
    187 //     buffers to process. When the input represents the last buffer, it will
    188 //     return the initial state to signal completion ({nullptr, 0}).
    189 //
    190 // See xray/xray_log_interface.h for more details on the requirements for the
    191 // implementations of __xray_set_buffer_iterator(...) and
    192 // __xray_log_process_buffers(...).
    193 XRayBuffer fdrIterator(const XRayBuffer B) {
    194   DCHECK(internal_strcmp(__xray_log_get_current_mode(), "xray-fdr") == 0);
    195   DCHECK(BQ->finalizing());
    196 
    197   if (BQ == nullptr || !BQ->finalizing()) {
    198     if (Verbosity())
    199       Report(
    200           "XRay FDR: Failed global buffer queue is null or not finalizing!\n");
    201     return {nullptr, 0};
    202   }
    203 
    204   // We use a global scratch-pad for the header information, which only gets
    205   // initialized the first time this function is called. We'll update one part
    206   // of this information with some relevant data (in particular the number of
    207   // buffers to expect).
    208   static std::aligned_storage<sizeof(XRayFileHeader)>::type HeaderStorage;
    209   static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT;
    210   pthread_once(
    211       &HeaderOnce, +[] {
    212         reinterpret_cast<XRayFileHeader &>(HeaderStorage) =
    213             fdrCommonHeaderInfo();
    214       });
    215 
    216   // We use a convenience alias for code referring to Header from here on out.
    217   auto &Header = reinterpret_cast<XRayFileHeader &>(HeaderStorage);
    218   if (B.Data == nullptr && B.Size == 0) {
    219     Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
    220     return XRayBuffer{static_cast<void *>(&Header), sizeof(Header)};
    221   }
    222 
    223   static BufferQueue::const_iterator It{};
    224   static BufferQueue::const_iterator End{};
    225   static uint8_t *CurrentBuffer{nullptr};
    226   static size_t SerializedBufferSize = 0;
    227   if (B.Data == static_cast<void *>(&Header) && B.Size == sizeof(Header)) {
    228     // From this point on, we provide raw access to the raw buffer we're getting
    229     // from the BufferQueue. We're relying on the iterators from the current
    230     // Buffer queue.
    231     It = BQ->cbegin();
    232     End = BQ->cend();
    233   }
    234 
    235   if (CurrentBuffer != nullptr) {
    236     deallocateBuffer(CurrentBuffer, SerializedBufferSize);
    237     CurrentBuffer = nullptr;
    238   }
    239 
    240   if (It == End)
    241     return {nullptr, 0};
    242 
    243   // Set up the current buffer to contain the extents like we would when writing
    244   // out to disk. The difference here would be that we still write "empty"
    245   // buffers, or at least go through the iterators faithfully to let the
    246   // handlers see the empty buffers in the queue.
    247   //
    248   // We need this atomic fence here to ensure that writes happening to the
    249   // buffer have been committed before we load the extents atomically. Because
    250   // the buffer is not explicitly synchronised across threads, we rely on the
    251   // fence ordering to ensure that writes we expect to have been completed
    252   // before the fence are fully committed before we read the extents.
    253   atomic_thread_fence(memory_order_acquire);
    254   auto BufferSize = atomic_load(It->Extents, memory_order_acquire);
    255   SerializedBufferSize = BufferSize + sizeof(MetadataRecord);
    256   CurrentBuffer = allocateBuffer(SerializedBufferSize);
    257   if (CurrentBuffer == nullptr)
    258     return {nullptr, 0};
    259 
    260   // Write out the extents as a Metadata Record into the CurrentBuffer.
    261   MetadataRecord ExtentsRecord;
    262   ExtentsRecord.Type = uint8_t(RecordType::Metadata);
    263   ExtentsRecord.RecordKind =
    264       uint8_t(MetadataRecord::RecordKinds::BufferExtents);
    265   internal_memcpy(ExtentsRecord.Data, &BufferSize, sizeof(BufferSize));
    266   auto AfterExtents =
    267       static_cast<char *>(internal_memcpy(CurrentBuffer, &ExtentsRecord,
    268                                           sizeof(MetadataRecord))) +
    269       sizeof(MetadataRecord);
    270   internal_memcpy(AfterExtents, It->Data, BufferSize);
    271 
    272   XRayBuffer Result;
    273   Result.Data = CurrentBuffer;
    274   Result.Size = SerializedBufferSize;
    275   ++It;
    276   return Result;
    277 }
    278 
    279 // Must finalize before flushing.
    280 XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT {
    281   if (atomic_load(&LoggingStatus, memory_order_acquire) !=
    282       XRayLogInitStatus::XRAY_LOG_FINALIZED) {
    283     if (Verbosity())
    284       Report("Not flushing log, implementation is not finalized.\n");
    285     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
    286   }
    287 
    288   s32 Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
    289   if (!atomic_compare_exchange_strong(&LogFlushStatus, &Result,
    290                                       XRayLogFlushStatus::XRAY_LOG_FLUSHING,
    291                                       memory_order_release)) {
    292     if (Verbosity())
    293       Report("Not flushing log, implementation is still finalizing.\n");
    294     return static_cast<XRayLogFlushStatus>(Result);
    295   }
    296 
    297   if (BQ == nullptr) {
    298     if (Verbosity())
    299       Report("Cannot flush when global buffer queue is null.\n");
    300     return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
    301   }
    302 
    303   // We wait a number of milliseconds to allow threads to see that we've
    304   // finalised before attempting to flush the log.
    305   SleepForMillis(fdrFlags()->grace_period_ms);
    306 
    307   // At this point, we're going to uninstall the iterator implementation, before
    308   // we decide to do anything further with the global buffer queue.
    309   __xray_log_remove_buffer_iterator();
    310 
    311   // Once flushed, we should set the global status of the logging implementation
    312   // to "uninitialized" to allow for FDR-logging multiple runs.
    313   auto ResetToUnitialized = at_scope_exit([] {
    314     atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED,
    315                  memory_order_release);
    316   });
    317 
    318   auto CleanupBuffers = at_scope_exit([] {
    319     auto &TLD = getThreadLocalData();
    320     if (TLD.Controller != nullptr)
    321       TLD.Controller->flush();
    322   });
    323 
    324   if (fdrFlags()->no_file_flush) {
    325     if (Verbosity())
    326       Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n");
    327 
    328     atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
    329                  memory_order_release);
    330     return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
    331   }
    332 
    333   // We write out the file in the following format:
    334   //
    335   //   1) We write down the XRay file header with version 1, type FDR_LOG.
    336   //   2) Then we use the 'apply' member of the BufferQueue that's live, to
    337   //      ensure that at this point in time we write down the buffers that have
    338   //      been released (and marked "used") -- we dump the full buffer for now
    339   //      (fixed-sized) and let the tools reading the buffers deal with the data
    340   //      afterwards.
    341   //
    342   LogWriter *LW = LogWriter::Open();
    343   if (LW == nullptr) {
    344     auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING;
    345     atomic_store(&LogFlushStatus, Result, memory_order_release);
    346     return Result;
    347   }
    348 
    349   XRayFileHeader Header = fdrCommonHeaderInfo();
    350   Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()};
    351   LW->WriteAll(reinterpret_cast<char *>(&Header),
    352                reinterpret_cast<char *>(&Header) + sizeof(Header));
    353 
    354   // Release the current thread's buffer before we attempt to write out all the
    355   // buffers. This ensures that in case we had only a single thread going, that
    356   // we are able to capture the data nonetheless.
    357   auto &TLD = getThreadLocalData();
    358   if (TLD.Controller != nullptr)
    359     TLD.Controller->flush();
    360 
    361   BQ->apply([&](const BufferQueue::Buffer &B) {
    362     // Starting at version 2 of the FDR logging implementation, we only write
    363     // the records identified by the extents of the buffer. We use the Extents
    364     // from the Buffer and write that out as the first record in the buffer.  We
    365     // still use a Metadata record, but fill in the extents instead for the
    366     // data.
    367     MetadataRecord ExtentsRecord;
    368     auto BufferExtents = atomic_load(B.Extents, memory_order_acquire);
    369     DCHECK(BufferExtents <= B.Size);
    370     ExtentsRecord.Type = uint8_t(RecordType::Metadata);
    371     ExtentsRecord.RecordKind =
    372         uint8_t(MetadataRecord::RecordKinds::BufferExtents);
    373     internal_memcpy(ExtentsRecord.Data, &BufferExtents, sizeof(BufferExtents));
    374     if (BufferExtents > 0) {
    375       LW->WriteAll(reinterpret_cast<char *>(&ExtentsRecord),
    376                    reinterpret_cast<char *>(&ExtentsRecord) +
    377                        sizeof(MetadataRecord));
    378       LW->WriteAll(reinterpret_cast<char *>(B.Data),
    379                    reinterpret_cast<char *>(B.Data) + BufferExtents);
    380     }
    381   });
    382 
    383   atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED,
    384                memory_order_release);
    385   return XRayLogFlushStatus::XRAY_LOG_FLUSHED;
    386 }
    387 
    388 XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT {
    389   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED;
    390   if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
    391                                       XRayLogInitStatus::XRAY_LOG_FINALIZING,
    392                                       memory_order_release)) {
    393     if (Verbosity())
    394       Report("Cannot finalize log, implementation not initialized.\n");
    395     return static_cast<XRayLogInitStatus>(CurrentStatus);
    396   }
    397 
    398   // Do special things to make the log finalize itself, and not allow any more
    399   // operations to be performed until re-initialized.
    400   if (BQ == nullptr) {
    401     if (Verbosity())
    402       Report("Attempting to finalize an uninitialized global buffer!\n");
    403   } else {
    404     BQ->finalize();
    405   }
    406 
    407   atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED,
    408                memory_order_release);
    409   return XRayLogInitStatus::XRAY_LOG_FINALIZED;
    410 }
    411 
    412 struct TSCAndCPU {
    413   uint64_t TSC = 0;
    414   unsigned char CPU = 0;
    415 };
    416 
    417 static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT {
    418   // We want to get the TSC as early as possible, so that we can check whether
    419   // we've seen this CPU before. We also do it before we load anything else,
    420   // to allow for forward progress with the scheduling.
    421   TSCAndCPU Result;
    422 
    423   // Test once for required CPU features
    424   static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT;
    425   static bool TSCSupported = true;
    426   pthread_once(
    427       &OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); });
    428 
    429   if (TSCSupported) {
    430     Result.TSC = __xray::readTSC(Result.CPU);
    431   } else {
    432     // FIXME: This code needs refactoring as it appears in multiple locations
    433     timespec TS;
    434     int result = clock_gettime(CLOCK_REALTIME, &TS);
    435     if (result != 0) {
    436       Report("clock_gettime(2) return %d, errno=%d", result, int(errno));
    437       TS = {0, 0};
    438     }
    439     Result.CPU = 0;
    440     Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec;
    441   }
    442   return Result;
    443 }
    444 
    445 thread_local atomic_uint8_t Running{0};
    446 
    447 static bool setupTLD(ThreadLocalData &TLD) XRAY_NEVER_INSTRUMENT {
    448   // Check if we're finalizing, before proceeding.
    449   {
    450     auto Status = atomic_load(&LoggingStatus, memory_order_acquire);
    451     if (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING ||
    452         Status == XRayLogInitStatus::XRAY_LOG_FINALIZED) {
    453       if (TLD.Controller != nullptr) {
    454         TLD.Controller->flush();
    455         TLD.Controller = nullptr;
    456       }
    457       return false;
    458     }
    459   }
    460 
    461   if (UNLIKELY(TLD.Controller == nullptr)) {
    462     // Set up the TLD buffer queue.
    463     if (UNLIKELY(BQ == nullptr))
    464       return false;
    465     TLD.BQ = BQ;
    466 
    467     // Check that we have a valid buffer.
    468     if (TLD.Buffer.Generation != BQ->generation() &&
    469         TLD.BQ->releaseBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
    470       return false;
    471 
    472     // Set up a buffer, before setting up the log writer. Bail out on failure.
    473     if (TLD.BQ->getBuffer(TLD.Buffer) != BufferQueue::ErrorCode::Ok)
    474       return false;
    475 
    476     // Set up the Log Writer for this thread.
    477     if (UNLIKELY(TLD.Writer == nullptr)) {
    478       auto *LWStorage = reinterpret_cast<FDRLogWriter *>(&TLD.LWStorage);
    479       new (LWStorage) FDRLogWriter(TLD.Buffer);
    480       TLD.Writer = LWStorage;
    481     } else {
    482       TLD.Writer->resetRecord();
    483     }
    484 
    485     auto *CStorage = reinterpret_cast<FDRController<> *>(&TLD.CStorage);
    486     new (CStorage)
    487         FDRController<>(TLD.BQ, TLD.Buffer, *TLD.Writer, clock_gettime,
    488                         atomic_load_relaxed(&ThresholdTicks));
    489     TLD.Controller = CStorage;
    490   }
    491 
    492   DCHECK_NE(TLD.Controller, nullptr);
    493   return true;
    494 }
    495 
    496 void fdrLoggingHandleArg0(int32_t FuncId,
    497                           XRayEntryType Entry) XRAY_NEVER_INSTRUMENT {
    498   auto TC = getTimestamp();
    499   auto &TSC = TC.TSC;
    500   auto &CPU = TC.CPU;
    501   RecursionGuard Guard{Running};
    502   if (!Guard)
    503     return;
    504 
    505   auto &TLD = getThreadLocalData();
    506   if (!setupTLD(TLD))
    507     return;
    508 
    509   switch (Entry) {
    510   case XRayEntryType::ENTRY:
    511   case XRayEntryType::LOG_ARGS_ENTRY:
    512     TLD.Controller->functionEnter(FuncId, TSC, CPU);
    513     return;
    514   case XRayEntryType::EXIT:
    515     TLD.Controller->functionExit(FuncId, TSC, CPU);
    516     return;
    517   case XRayEntryType::TAIL:
    518     TLD.Controller->functionTailExit(FuncId, TSC, CPU);
    519     return;
    520   case XRayEntryType::CUSTOM_EVENT:
    521   case XRayEntryType::TYPED_EVENT:
    522     break;
    523   }
    524 }
    525 
    526 void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry,
    527                           uint64_t Arg) XRAY_NEVER_INSTRUMENT {
    528   auto TC = getTimestamp();
    529   auto &TSC = TC.TSC;
    530   auto &CPU = TC.CPU;
    531   RecursionGuard Guard{Running};
    532   if (!Guard)
    533     return;
    534 
    535   auto &TLD = getThreadLocalData();
    536   if (!setupTLD(TLD))
    537     return;
    538 
    539   switch (Entry) {
    540   case XRayEntryType::ENTRY:
    541   case XRayEntryType::LOG_ARGS_ENTRY:
    542     TLD.Controller->functionEnterArg(FuncId, TSC, CPU, Arg);
    543     return;
    544   case XRayEntryType::EXIT:
    545     TLD.Controller->functionExit(FuncId, TSC, CPU);
    546     return;
    547   case XRayEntryType::TAIL:
    548     TLD.Controller->functionTailExit(FuncId, TSC, CPU);
    549     return;
    550   case XRayEntryType::CUSTOM_EVENT:
    551   case XRayEntryType::TYPED_EVENT:
    552     break;
    553   }
    554 }
    555 
    556 void fdrLoggingHandleCustomEvent(void *Event,
    557                                  std::size_t EventSize) XRAY_NEVER_INSTRUMENT {
    558   auto TC = getTimestamp();
    559   auto &TSC = TC.TSC;
    560   auto &CPU = TC.CPU;
    561   RecursionGuard Guard{Running};
    562   if (!Guard)
    563     return;
    564 
    565   // Complain when we ever get at least one custom event that's larger than what
    566   // we can possibly support.
    567   if (EventSize >
    568       static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
    569     static pthread_once_t Once = PTHREAD_ONCE_INIT;
    570     pthread_once(
    571         &Once, +[] {
    572           Report("Custom event size too large; truncating to %d.\n",
    573                  std::numeric_limits<int32_t>::max());
    574         });
    575   }
    576 
    577   auto &TLD = getThreadLocalData();
    578   if (!setupTLD(TLD))
    579     return;
    580 
    581   int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
    582   TLD.Controller->customEvent(TSC, CPU, Event, ReducedEventSize);
    583 }
    584 
    585 void fdrLoggingHandleTypedEvent(
    586     uint16_t EventType, const void *Event,
    587     std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT {
    588   auto TC = getTimestamp();
    589   auto &TSC = TC.TSC;
    590   auto &CPU = TC.CPU;
    591   RecursionGuard Guard{Running};
    592   if (!Guard)
    593     return;
    594 
    595   // Complain when we ever get at least one typed event that's larger than what
    596   // we can possibly support.
    597   if (EventSize >
    598       static_cast<std::size_t>(std::numeric_limits<int32_t>::max())) {
    599     static pthread_once_t Once = PTHREAD_ONCE_INIT;
    600     pthread_once(
    601         &Once, +[] {
    602           Report("Typed event size too large; truncating to %d.\n",
    603                  std::numeric_limits<int32_t>::max());
    604         });
    605   }
    606 
    607   auto &TLD = getThreadLocalData();
    608   if (!setupTLD(TLD))
    609     return;
    610 
    611   int32_t ReducedEventSize = static_cast<int32_t>(EventSize);
    612   TLD.Controller->typedEvent(TSC, CPU, EventType, Event, ReducedEventSize);
    613 }
    614 
    615 XRayLogInitStatus fdrLoggingInit(size_t, size_t, void *Options,
    616                                  size_t OptionsSize) XRAY_NEVER_INSTRUMENT {
    617   if (Options == nullptr)
    618     return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
    619 
    620   s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
    621   if (!atomic_compare_exchange_strong(&LoggingStatus, &CurrentStatus,
    622                                       XRayLogInitStatus::XRAY_LOG_INITIALIZING,
    623                                       memory_order_release)) {
    624     if (Verbosity())
    625       Report("Cannot initialize already initialized implementation.\n");
    626     return static_cast<XRayLogInitStatus>(CurrentStatus);
    627   }
    628 
    629   if (Verbosity())
    630     Report("Initializing FDR mode with options: %s\n",
    631            static_cast<const char *>(Options));
    632 
    633   // TODO: Factor out the flags specific to the FDR mode implementation. For
    634   // now, use the global/single definition of the flags, since the FDR mode
    635   // flags are already defined there.
    636   FlagParser FDRParser;
    637   FDRFlags FDRFlags;
    638   registerXRayFDRFlags(&FDRParser, &FDRFlags);
    639   FDRFlags.setDefaults();
    640 
    641   // Override first from the general XRAY_DEFAULT_OPTIONS compiler-provided
    642   // options until we migrate everyone to use the XRAY_FDR_OPTIONS
    643   // compiler-provided options.
    644   FDRParser.ParseString(useCompilerDefinedFlags());
    645   FDRParser.ParseString(useCompilerDefinedFDRFlags());
    646   auto *EnvOpts = GetEnv("XRAY_FDR_OPTIONS");
    647   if (EnvOpts == nullptr)
    648     EnvOpts = "";
    649   FDRParser.ParseString(EnvOpts);
    650 
    651   // FIXME: Remove this when we fully remove the deprecated flags.
    652   if (internal_strlen(EnvOpts) == 0) {
    653     FDRFlags.func_duration_threshold_us =
    654         flags()->xray_fdr_log_func_duration_threshold_us;
    655     FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms;
    656   }
    657 
    658   // The provided options should always override the compiler-provided and
    659   // environment-variable defined options.
    660   FDRParser.ParseString(static_cast<const char *>(Options));
    661   *fdrFlags() = FDRFlags;
    662   auto BufferSize = FDRFlags.buffer_size;
    663   auto BufferMax = FDRFlags.buffer_max;
    664 
    665   if (BQ == nullptr) {
    666     bool Success = false;
    667     BQ = reinterpret_cast<BufferQueue *>(&BufferQueueStorage);
    668     new (BQ) BufferQueue(BufferSize, BufferMax, Success);
    669     if (!Success) {
    670       Report("BufferQueue init failed.\n");
    671       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
    672     }
    673   } else {
    674     if (BQ->init(BufferSize, BufferMax) != BufferQueue::ErrorCode::Ok) {
    675       if (Verbosity())
    676         Report("Failed to re-initialize global buffer queue. Init failed.\n");
    677       return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED;
    678     }
    679   }
    680 
    681   static pthread_once_t OnceInit = PTHREAD_ONCE_INIT;
    682   pthread_once(
    683       &OnceInit, +[] {
    684         atomic_store(&TicksPerSec,
    685                      probeRequiredCPUFeatures() ? getTSCFrequency()
    686                                                 : __xray::NanosecondsPerSecond,
    687                      memory_order_release);
    688         pthread_key_create(
    689             &Key, +[](void *TLDPtr) {
    690               if (TLDPtr == nullptr)
    691                 return;
    692               auto &TLD = *reinterpret_cast<ThreadLocalData *>(TLDPtr);
    693               if (TLD.BQ == nullptr)
    694                 return;
    695               if (TLD.Buffer.Data == nullptr)
    696                 return;
    697               auto EC = TLD.BQ->releaseBuffer(TLD.Buffer);
    698               if (EC != BufferQueue::ErrorCode::Ok)
    699                 Report("At thread exit, failed to release buffer at %p; "
    700                        "error=%s\n",
    701                        TLD.Buffer.Data, BufferQueue::getErrorString(EC));
    702             });
    703       });
    704 
    705   atomic_store(&ThresholdTicks,
    706                atomic_load_relaxed(&TicksPerSec) *
    707                    fdrFlags()->func_duration_threshold_us / 1000000,
    708                memory_order_release);
    709   // Arg1 handler should go in first to avoid concurrent code accidentally
    710   // falling back to arg0 when it should have ran arg1.
    711   __xray_set_handler_arg1(fdrLoggingHandleArg1);
    712   // Install the actual handleArg0 handler after initialising the buffers.
    713   __xray_set_handler(fdrLoggingHandleArg0);
    714   __xray_set_customevent_handler(fdrLoggingHandleCustomEvent);
    715   __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent);
    716 
    717   // Install the buffer iterator implementation.
    718   __xray_log_set_buffer_iterator(fdrIterator);
    719 
    720   atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED,
    721                memory_order_release);
    722 
    723   if (Verbosity())
    724     Report("XRay FDR init successful.\n");
    725   return XRayLogInitStatus::XRAY_LOG_INITIALIZED;
    726 }
    727 
    728 bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT {
    729   XRayLogImpl Impl{
    730       fdrLoggingInit,
    731       fdrLoggingFinalize,
    732       fdrLoggingHandleArg0,
    733       fdrLoggingFlush,
    734   };
    735   auto RegistrationResult = __xray_log_register_mode("xray-fdr", Impl);
    736   if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
    737       Verbosity()) {
    738     Report("Cannot register XRay FDR mode to 'xray-fdr'; error = %d\n",
    739            RegistrationResult);
    740     return false;
    741   }
    742 
    743   if (flags()->xray_fdr_log ||
    744       !internal_strcmp(flags()->xray_mode, "xray-fdr")) {
    745     auto SelectResult = __xray_log_select_mode("xray-fdr");
    746     if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK &&
    747         Verbosity()) {
    748       Report("Cannot select XRay FDR mode as 'xray-fdr'; error = %d\n",
    749              SelectResult);
    750       return false;
    751     }
    752   }
    753   return true;
    754 }
    755 
    756 } // namespace __xray
    757 
    758 static auto UNUSED Unused = __xray::fdrLogDynamicInitializer();
    759