1 //===-- xray_basic_logging.cc -----------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file is a part of XRay, a dynamic runtime instrumentation system. 11 // 12 // Implementation of a simple in-memory log of XRay events. This defines a 13 // logging function that's compatible with the XRay handler interface, and 14 // routines for exporting data to files. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include <errno.h> 19 #include <fcntl.h> 20 #include <pthread.h> 21 #include <sys/stat.h> 22 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC 23 #include <sys/syscall.h> 24 #endif 25 #include <sys/types.h> 26 #include <time.h> 27 #include <unistd.h> 28 29 #include "sanitizer_common/sanitizer_allocator_internal.h" 30 #include "sanitizer_common/sanitizer_libc.h" 31 #include "xray/xray_records.h" 32 #include "xray_recursion_guard.h" 33 #include "xray_basic_flags.h" 34 #include "xray_basic_logging.h" 35 #include "xray_defs.h" 36 #include "xray_flags.h" 37 #include "xray_interface_internal.h" 38 #include "xray_tsc.h" 39 #include "xray_utils.h" 40 41 namespace __xray { 42 43 static SpinMutex LogMutex; 44 45 namespace { 46 // We use elements of this type to record the entry TSC of every function ID we 47 // see as we're tracing a particular thread's execution. 48 struct alignas(16) StackEntry { 49 int32_t FuncId; 50 uint16_t Type; 51 uint8_t CPU; 52 uint8_t Padding; 53 uint64_t TSC; 54 }; 55 56 static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); 57 58 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { 59 void *InMemoryBuffer = nullptr; 60 size_t BufferSize = 0; 61 size_t BufferOffset = 0; 62 void *ShadowStack = nullptr; 63 size_t StackSize = 0; 64 size_t StackEntries = 0; 65 __xray::LogWriter *LogWriter = nullptr; 66 }; 67 68 struct BasicLoggingOptions { 69 int DurationFilterMicros = 0; 70 size_t MaxStackDepth = 0; 71 size_t ThreadBufferSize = 0; 72 }; 73 } // namespace 74 75 static pthread_key_t PThreadKey; 76 77 static atomic_uint8_t BasicInitialized{0}; 78 79 struct BasicLoggingOptions GlobalOptions; 80 81 thread_local atomic_uint8_t Guard{0}; 82 83 static atomic_uint8_t UseRealTSC{0}; 84 static atomic_uint64_t ThresholdTicks{0}; 85 static atomic_uint64_t TicksPerSec{0}; 86 static atomic_uint64_t CycleFrequency{NanosecondsPerSecond}; 87 88 static LogWriter *getLog() XRAY_NEVER_INSTRUMENT { 89 LogWriter* LW = LogWriter::Open(); 90 if (LW == nullptr) 91 return LW; 92 93 static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT; 94 pthread_once(&DetectOnce, +[] { 95 if (atomic_load(&UseRealTSC, memory_order_acquire)) 96 atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); 97 }); 98 99 // Since we're here, we get to write the header. We set it up so that the 100 // header will only be written once, at the start, and let the threads 101 // logging do writes which just append. 102 XRayFileHeader Header; 103 // Version 2 includes tail exit records. 104 // Version 3 includes pid inside records. 105 Header.Version = 3; 106 Header.Type = FileTypes::NAIVE_LOG; 107 Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire); 108 109 // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' 110 // before setting the values in the header. 111 Header.ConstantTSC = 1; 112 Header.NonstopTSC = 1; 113 LW->WriteAll(reinterpret_cast<char *>(&Header), 114 reinterpret_cast<char *>(&Header) + sizeof(Header)); 115 return LW; 116 } 117 118 static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT { 119 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 120 static LogWriter *LW = nullptr; 121 pthread_once(&OnceInit, +[] { LW = getLog(); }); 122 return LW; 123 } 124 125 static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { 126 thread_local ThreadLocalData TLD; 127 thread_local bool UNUSED TOnce = [] { 128 if (GlobalOptions.ThreadBufferSize == 0) { 129 if (Verbosity()) 130 Report("Not initializing TLD since ThreadBufferSize == 0.\n"); 131 return false; 132 } 133 pthread_setspecific(PThreadKey, &TLD); 134 TLD.LogWriter = getGlobalLog(); 135 TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>( 136 InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize, 137 nullptr, alignof(XRayRecord))); 138 TLD.BufferSize = GlobalOptions.ThreadBufferSize; 139 TLD.BufferOffset = 0; 140 if (GlobalOptions.MaxStackDepth == 0) { 141 if (Verbosity()) 142 Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); 143 TLD.StackSize = 0; 144 TLD.StackEntries = 0; 145 TLD.ShadowStack = nullptr; 146 return false; 147 } 148 TLD.ShadowStack = reinterpret_cast<StackEntry *>( 149 InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, 150 alignof(StackEntry))); 151 TLD.StackSize = GlobalOptions.MaxStackDepth; 152 TLD.StackEntries = 0; 153 return false; 154 }(); 155 return TLD; 156 } 157 158 template <class RDTSC> 159 void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, 160 RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { 161 auto &TLD = getThreadLocalData(); 162 LogWriter *LW = getGlobalLog(); 163 if (LW == nullptr) 164 return; 165 166 // Use a simple recursion guard, to handle cases where we're already logging 167 // and for one reason or another, this function gets called again in the same 168 // thread. 169 RecursionGuard G(Guard); 170 if (!G) 171 return; 172 173 uint8_t CPU = 0; 174 uint64_t TSC = ReadTSC(CPU); 175 176 switch (Type) { 177 case XRayEntryType::ENTRY: 178 case XRayEntryType::LOG_ARGS_ENTRY: { 179 // Short circuit if we've reached the maximum depth of the stack. 180 if (TLD.StackEntries++ >= TLD.StackSize) 181 return; 182 183 // When we encounter an entry event, we keep track of the TSC and the CPU, 184 // and put it in the stack. 185 StackEntry E; 186 E.FuncId = FuncId; 187 E.CPU = CPU; 188 E.Type = Type; 189 E.TSC = TSC; 190 auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + 191 (sizeof(StackEntry) * (TLD.StackEntries - 1)); 192 internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); 193 break; 194 } 195 case XRayEntryType::EXIT: 196 case XRayEntryType::TAIL: { 197 if (TLD.StackEntries == 0) 198 break; 199 200 if (--TLD.StackEntries >= TLD.StackSize) 201 return; 202 203 // When we encounter an exit event, we check whether all the following are 204 // true: 205 // 206 // - The Function ID is the same as the most recent entry in the stack. 207 // - The CPU is the same as the most recent entry in the stack. 208 // - The Delta of the TSCs is less than the threshold amount of time we're 209 // looking to record. 210 // 211 // If all of these conditions are true, we pop the stack and don't write a 212 // record and move the record offset back. 213 StackEntry StackTop; 214 auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + 215 (sizeof(StackEntry) * TLD.StackEntries); 216 internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); 217 if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && 218 StackTop.TSC < TSC) { 219 auto Delta = TSC - StackTop.TSC; 220 if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { 221 DCHECK(TLD.BufferOffset > 0); 222 TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; 223 return; 224 } 225 } 226 break; 227 } 228 default: 229 // Should be unreachable. 230 DCHECK(false && "Unsupported XRayEntryType encountered."); 231 break; 232 } 233 234 // First determine whether the delta between the function's enter record and 235 // the exit record is higher than the threshold. 236 XRayRecord R; 237 R.RecordType = RecordTypes::NORMAL; 238 R.CPU = CPU; 239 R.TSC = TSC; 240 R.TId = GetTid(); 241 R.PId = internal_getpid(); 242 R.Type = Type; 243 R.FuncId = FuncId; 244 auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer); 245 internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); 246 if (++TLD.BufferOffset == TLD.BufferSize) { 247 SpinMutexLock Lock(&LogMutex); 248 LW->WriteAll(reinterpret_cast<char *>(FirstEntry), 249 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); 250 TLD.BufferOffset = 0; 251 TLD.StackEntries = 0; 252 } 253 } 254 255 template <class RDTSC> 256 void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, 257 RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { 258 auto &TLD = getThreadLocalData(); 259 auto FirstEntry = 260 reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer); 261 const auto &BuffLen = TLD.BufferSize; 262 LogWriter *LW = getGlobalLog(); 263 if (LW == nullptr) 264 return; 265 266 // First we check whether there's enough space to write the data consecutively 267 // in the thread-local buffer. If not, we first flush the buffer before 268 // attempting to write the two records that must be consecutive. 269 if (TLD.BufferOffset + 2 > BuffLen) { 270 SpinMutexLock Lock(&LogMutex); 271 LW->WriteAll(reinterpret_cast<char *>(FirstEntry), 272 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); 273 TLD.BufferOffset = 0; 274 TLD.StackEntries = 0; 275 } 276 277 // Then we write the "we have an argument" record. 278 InMemoryRawLog(FuncId, Type, ReadTSC); 279 280 RecursionGuard G(Guard); 281 if (!G) 282 return; 283 284 // And, from here on write the arg payload. 285 XRayArgPayload R; 286 R.RecordType = RecordTypes::ARG_PAYLOAD; 287 R.FuncId = FuncId; 288 R.TId = GetTid(); 289 R.PId = internal_getpid(); 290 R.Arg = Arg1; 291 internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); 292 if (++TLD.BufferOffset == BuffLen) { 293 SpinMutexLock Lock(&LogMutex); 294 LW->WriteAll(reinterpret_cast<char *>(FirstEntry), 295 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); 296 TLD.BufferOffset = 0; 297 TLD.StackEntries = 0; 298 } 299 } 300 301 void basicLoggingHandleArg0RealTSC(int32_t FuncId, 302 XRayEntryType Type) XRAY_NEVER_INSTRUMENT { 303 InMemoryRawLog(FuncId, Type, readTSC); 304 } 305 306 void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) 307 XRAY_NEVER_INSTRUMENT { 308 InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { 309 timespec TS; 310 int result = clock_gettime(CLOCK_REALTIME, &TS); 311 if (result != 0) { 312 Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); 313 TS = {0, 0}; 314 } 315 CPU = 0; 316 return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; 317 }); 318 } 319 320 void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, 321 uint64_t Arg1) XRAY_NEVER_INSTRUMENT { 322 InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC); 323 } 324 325 void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, 326 uint64_t Arg1) XRAY_NEVER_INSTRUMENT { 327 InMemoryRawLogWithArg( 328 FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { 329 timespec TS; 330 int result = clock_gettime(CLOCK_REALTIME, &TS); 331 if (result != 0) { 332 Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); 333 TS = {0, 0}; 334 } 335 CPU = 0; 336 return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; 337 }); 338 } 339 340 static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { 341 ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P); 342 auto ExitGuard = at_scope_exit([&TLD] { 343 // Clean up dynamic resources. 344 if (TLD.InMemoryBuffer) 345 InternalFree(TLD.InMemoryBuffer); 346 if (TLD.ShadowStack) 347 InternalFree(TLD.ShadowStack); 348 if (Verbosity()) 349 Report("Cleaned up log for TID: %d\n", GetTid()); 350 }); 351 352 if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) { 353 if (Verbosity()) 354 Report("Skipping buffer for TID: %d; Offset = %llu\n", GetTid(), 355 TLD.BufferOffset); 356 return; 357 } 358 359 { 360 SpinMutexLock L(&LogMutex); 361 TLD.LogWriter->WriteAll(reinterpret_cast<char *>(TLD.InMemoryBuffer), 362 reinterpret_cast<char *>(TLD.InMemoryBuffer) + 363 (sizeof(XRayRecord) * TLD.BufferOffset)); 364 } 365 366 // Because this thread's exit could be the last one trying to write to 367 // the file and that we're not able to close out the file properly, we 368 // sync instead and hope that the pending writes are flushed as the 369 // thread exits. 370 TLD.LogWriter->Flush(); 371 } 372 373 XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize, 374 UNUSED size_t BufferMax, void *Options, 375 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 376 uint8_t Expected = 0; 377 if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, 378 memory_order_acq_rel)) { 379 if (Verbosity()) 380 Report("Basic logging already initialized.\n"); 381 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 382 } 383 384 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 385 pthread_once(&OnceInit, +[] { 386 pthread_key_create(&PThreadKey, TLDDestructor); 387 atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); 388 // Initialize the global TicksPerSec value. 389 atomic_store(&TicksPerSec, 390 probeRequiredCPUFeatures() ? getTSCFrequency() 391 : NanosecondsPerSecond, 392 memory_order_release); 393 if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) 394 Report("WARNING: Required CPU features missing for XRay instrumentation, " 395 "using emulation instead.\n"); 396 }); 397 398 FlagParser P; 399 BasicFlags F; 400 F.setDefaults(); 401 registerXRayBasicFlags(&P, &F); 402 P.ParseString(useCompilerDefinedBasicFlags()); 403 auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); 404 if (EnvOpts == nullptr) 405 EnvOpts = ""; 406 407 P.ParseString(EnvOpts); 408 409 // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options 410 // set through XRAY_OPTIONS instead. 411 if (internal_strlen(EnvOpts) == 0) { 412 F.func_duration_threshold_us = 413 flags()->xray_naive_log_func_duration_threshold_us; 414 F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; 415 F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; 416 } 417 418 P.ParseString(static_cast<const char *>(Options)); 419 GlobalOptions.ThreadBufferSize = F.thread_buffer_size; 420 GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; 421 GlobalOptions.MaxStackDepth = F.max_stack_depth; 422 *basicFlags() = F; 423 424 atomic_store(&ThresholdTicks, 425 atomic_load(&TicksPerSec, memory_order_acquire) * 426 GlobalOptions.DurationFilterMicros / 1000000, 427 memory_order_release); 428 __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire) 429 ? basicLoggingHandleArg1RealTSC 430 : basicLoggingHandleArg1EmulateTSC); 431 __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire) 432 ? basicLoggingHandleArg0RealTSC 433 : basicLoggingHandleArg0EmulateTSC); 434 435 // TODO: Implement custom event and typed event handling support in Basic 436 // Mode. 437 __xray_remove_customevent_handler(); 438 __xray_remove_typedevent_handler(); 439 440 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 441 } 442 443 XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { 444 uint8_t Expected = 0; 445 if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, 446 memory_order_acq_rel) && 447 Verbosity()) 448 Report("Basic logging already finalized.\n"); 449 450 // Nothing really to do aside from marking state of the global to be 451 // uninitialized. 452 453 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 454 } 455 456 XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT { 457 // This really does nothing, since flushing the logs happen at the end of a 458 // thread's lifetime, or when the buffers are full. 459 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 460 } 461 462 // This is a handler that, effectively, does nothing. 463 void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT { 464 } 465 466 bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { 467 XRayLogImpl Impl{ 468 basicLoggingInit, 469 basicLoggingFinalize, 470 basicLoggingHandleArg0Empty, 471 basicLoggingFlush, 472 }; 473 auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl); 474 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 475 Verbosity()) 476 Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n", 477 RegistrationResult); 478 if (flags()->xray_naive_log || 479 !internal_strcmp(flags()->xray_mode, "xray-basic")) { 480 auto SelectResult = __xray_log_select_mode("xray-basic"); 481 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { 482 if (Verbosity()) 483 Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult); 484 return false; 485 } 486 487 // We initialize the implementation using the data we get from the 488 // XRAY_BASIC_OPTIONS environment variable, at this point of the 489 // implementation. 490 auto *Env = GetEnv("XRAY_BASIC_OPTIONS"); 491 auto InitResult = 492 __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env); 493 if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { 494 if (Verbosity()) 495 Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); 496 return false; 497 } 498 499 // At this point we know that we've successfully initialized Basic mode 500 // tracing, and the only chance we're going to get for the current thread to 501 // clean-up may be at thread/program exit. To ensure that we're going to get 502 // the cleanup even without calling the finalization routines, we're 503 // registering a program exit function that will do the cleanup. 504 static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT; 505 pthread_once(&DynamicOnce, +[] { 506 static void *FakeTLD = nullptr; 507 FakeTLD = &getThreadLocalData(); 508 Atexit(+[] { TLDDestructor(FakeTLD); }); 509 }); 510 } 511 return true; 512 } 513 514 } // namespace __xray 515 516 static auto UNUSED Unused = __xray::basicLogDynamicInitializer(); 517