1 //===-- xray_basic_logging.cpp ----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of XRay, a dynamic runtime instrumentation system. 10 // 11 // Implementation of a simple in-memory log of XRay events. This defines a 12 // logging function that's compatible with the XRay handler interface, and 13 // routines for exporting data to files. 14 // 15 //===----------------------------------------------------------------------===// 16 17 #include <errno.h> 18 #include <fcntl.h> 19 #include <pthread.h> 20 #include <sys/stat.h> 21 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE 22 #include <sys/syscall.h> 23 #endif 24 #include <sys/types.h> 25 #include <time.h> 26 #include <unistd.h> 27 28 #include "sanitizer_common/sanitizer_allocator_internal.h" 29 #include "sanitizer_common/sanitizer_libc.h" 30 #include "xray/xray_records.h" 31 #include "xray_recursion_guard.h" 32 #include "xray_basic_flags.h" 33 #include "xray_basic_logging.h" 34 #include "xray_defs.h" 35 #include "xray_flags.h" 36 #include "xray_interface_internal.h" 37 #include "xray_tsc.h" 38 #include "xray_utils.h" 39 40 namespace __xray { 41 42 static SpinMutex LogMutex; 43 44 namespace { 45 // We use elements of this type to record the entry TSC of every function ID we 46 // see as we're tracing a particular thread's execution. 47 struct alignas(16) StackEntry { 48 int32_t FuncId; 49 uint16_t Type; 50 uint8_t CPU; 51 uint8_t Padding; 52 uint64_t TSC; 53 }; 54 55 static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); 56 57 struct XRAY_TLS_ALIGNAS(64) ThreadLocalData { 58 void *InMemoryBuffer = nullptr; 59 size_t BufferSize = 0; 60 size_t BufferOffset = 0; 61 void *ShadowStack = nullptr; 62 size_t StackSize = 0; 63 size_t StackEntries = 0; 64 __xray::LogWriter *LogWriter = nullptr; 65 }; 66 67 struct BasicLoggingOptions { 68 int DurationFilterMicros = 0; 69 size_t MaxStackDepth = 0; 70 size_t ThreadBufferSize = 0; 71 }; 72 } // namespace 73 74 static pthread_key_t PThreadKey; 75 76 static atomic_uint8_t BasicInitialized{0}; 77 78 struct BasicLoggingOptions GlobalOptions; 79 80 thread_local atomic_uint8_t Guard{0}; 81 82 static atomic_uint8_t UseRealTSC{0}; 83 static atomic_uint64_t ThresholdTicks{0}; 84 static atomic_uint64_t TicksPerSec{0}; 85 static atomic_uint64_t CycleFrequency{NanosecondsPerSecond}; 86 87 static LogWriter *getLog() XRAY_NEVER_INSTRUMENT { 88 LogWriter* LW = LogWriter::Open(); 89 if (LW == nullptr) 90 return LW; 91 92 static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT; 93 pthread_once(&DetectOnce, +[] { 94 if (atomic_load(&UseRealTSC, memory_order_acquire)) 95 atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); 96 }); 97 98 // Since we're here, we get to write the header. We set it up so that the 99 // header will only be written once, at the start, and let the threads 100 // logging do writes which just append. 101 XRayFileHeader Header; 102 // Version 2 includes tail exit records. 103 // Version 3 includes pid inside records. 104 Header.Version = 3; 105 Header.Type = FileTypes::NAIVE_LOG; 106 Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire); 107 108 // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' 109 // before setting the values in the header. 110 Header.ConstantTSC = 1; 111 Header.NonstopTSC = 1; 112 LW->WriteAll(reinterpret_cast<char *>(&Header), 113 reinterpret_cast<char *>(&Header) + sizeof(Header)); 114 return LW; 115 } 116 117 static LogWriter *getGlobalLog() XRAY_NEVER_INSTRUMENT { 118 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 119 static LogWriter *LW = nullptr; 120 pthread_once(&OnceInit, +[] { LW = getLog(); }); 121 return LW; 122 } 123 124 static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { 125 thread_local ThreadLocalData TLD; 126 thread_local bool UNUSED TOnce = [] { 127 if (GlobalOptions.ThreadBufferSize == 0) { 128 if (Verbosity()) 129 Report("Not initializing TLD since ThreadBufferSize == 0.\n"); 130 return false; 131 } 132 pthread_setspecific(PThreadKey, &TLD); 133 TLD.LogWriter = getGlobalLog(); 134 TLD.InMemoryBuffer = reinterpret_cast<XRayRecord *>( 135 InternalAlloc(sizeof(XRayRecord) * GlobalOptions.ThreadBufferSize, 136 nullptr, alignof(XRayRecord))); 137 TLD.BufferSize = GlobalOptions.ThreadBufferSize; 138 TLD.BufferOffset = 0; 139 if (GlobalOptions.MaxStackDepth == 0) { 140 if (Verbosity()) 141 Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); 142 TLD.StackSize = 0; 143 TLD.StackEntries = 0; 144 TLD.ShadowStack = nullptr; 145 return false; 146 } 147 TLD.ShadowStack = reinterpret_cast<StackEntry *>( 148 InternalAlloc(sizeof(StackEntry) * GlobalOptions.MaxStackDepth, nullptr, 149 alignof(StackEntry))); 150 TLD.StackSize = GlobalOptions.MaxStackDepth; 151 TLD.StackEntries = 0; 152 return false; 153 }(); 154 return TLD; 155 } 156 157 template <class RDTSC> 158 void InMemoryRawLog(int32_t FuncId, XRayEntryType Type, 159 RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { 160 auto &TLD = getThreadLocalData(); 161 LogWriter *LW = getGlobalLog(); 162 if (LW == nullptr) 163 return; 164 165 // Use a simple recursion guard, to handle cases where we're already logging 166 // and for one reason or another, this function gets called again in the same 167 // thread. 168 RecursionGuard G(Guard); 169 if (!G) 170 return; 171 172 uint8_t CPU = 0; 173 uint64_t TSC = ReadTSC(CPU); 174 175 switch (Type) { 176 case XRayEntryType::ENTRY: 177 case XRayEntryType::LOG_ARGS_ENTRY: { 178 // Short circuit if we've reached the maximum depth of the stack. 179 if (TLD.StackEntries++ >= TLD.StackSize) 180 return; 181 182 // When we encounter an entry event, we keep track of the TSC and the CPU, 183 // and put it in the stack. 184 StackEntry E; 185 E.FuncId = FuncId; 186 E.CPU = CPU; 187 E.Type = Type; 188 E.TSC = TSC; 189 auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + 190 (sizeof(StackEntry) * (TLD.StackEntries - 1)); 191 internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); 192 break; 193 } 194 case XRayEntryType::EXIT: 195 case XRayEntryType::TAIL: { 196 if (TLD.StackEntries == 0) 197 break; 198 199 if (--TLD.StackEntries >= TLD.StackSize) 200 return; 201 202 // When we encounter an exit event, we check whether all the following are 203 // true: 204 // 205 // - The Function ID is the same as the most recent entry in the stack. 206 // - The CPU is the same as the most recent entry in the stack. 207 // - The Delta of the TSCs is less than the threshold amount of time we're 208 // looking to record. 209 // 210 // If all of these conditions are true, we pop the stack and don't write a 211 // record and move the record offset back. 212 StackEntry StackTop; 213 auto StackEntryPtr = static_cast<char *>(TLD.ShadowStack) + 214 (sizeof(StackEntry) * TLD.StackEntries); 215 internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); 216 if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && 217 StackTop.TSC < TSC) { 218 auto Delta = TSC - StackTop.TSC; 219 if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { 220 DCHECK(TLD.BufferOffset > 0); 221 TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; 222 return; 223 } 224 } 225 break; 226 } 227 default: 228 // Should be unreachable. 229 DCHECK(false && "Unsupported XRayEntryType encountered."); 230 break; 231 } 232 233 // First determine whether the delta between the function's enter record and 234 // the exit record is higher than the threshold. 235 XRayRecord R; 236 R.RecordType = RecordTypes::NORMAL; 237 R.CPU = CPU; 238 R.TSC = TSC; 239 R.TId = GetTid(); 240 R.PId = internal_getpid(); 241 R.Type = Type; 242 R.FuncId = FuncId; 243 auto FirstEntry = reinterpret_cast<XRayRecord *>(TLD.InMemoryBuffer); 244 internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); 245 if (++TLD.BufferOffset == TLD.BufferSize) { 246 SpinMutexLock Lock(&LogMutex); 247 LW->WriteAll(reinterpret_cast<char *>(FirstEntry), 248 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); 249 TLD.BufferOffset = 0; 250 TLD.StackEntries = 0; 251 } 252 } 253 254 template <class RDTSC> 255 void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, 256 RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { 257 auto &TLD = getThreadLocalData(); 258 auto FirstEntry = 259 reinterpret_cast<XRayArgPayload *>(TLD.InMemoryBuffer); 260 const auto &BuffLen = TLD.BufferSize; 261 LogWriter *LW = getGlobalLog(); 262 if (LW == nullptr) 263 return; 264 265 // First we check whether there's enough space to write the data consecutively 266 // in the thread-local buffer. If not, we first flush the buffer before 267 // attempting to write the two records that must be consecutive. 268 if (TLD.BufferOffset + 2 > BuffLen) { 269 SpinMutexLock Lock(&LogMutex); 270 LW->WriteAll(reinterpret_cast<char *>(FirstEntry), 271 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); 272 TLD.BufferOffset = 0; 273 TLD.StackEntries = 0; 274 } 275 276 // Then we write the "we have an argument" record. 277 InMemoryRawLog(FuncId, Type, ReadTSC); 278 279 RecursionGuard G(Guard); 280 if (!G) 281 return; 282 283 // And, from here on write the arg payload. 284 XRayArgPayload R; 285 R.RecordType = RecordTypes::ARG_PAYLOAD; 286 R.FuncId = FuncId; 287 R.TId = GetTid(); 288 R.PId = internal_getpid(); 289 R.Arg = Arg1; 290 internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); 291 if (++TLD.BufferOffset == BuffLen) { 292 SpinMutexLock Lock(&LogMutex); 293 LW->WriteAll(reinterpret_cast<char *>(FirstEntry), 294 reinterpret_cast<char *>(FirstEntry + TLD.BufferOffset)); 295 TLD.BufferOffset = 0; 296 TLD.StackEntries = 0; 297 } 298 } 299 300 void basicLoggingHandleArg0RealTSC(int32_t FuncId, 301 XRayEntryType Type) XRAY_NEVER_INSTRUMENT { 302 InMemoryRawLog(FuncId, Type, readTSC); 303 } 304 305 void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) 306 XRAY_NEVER_INSTRUMENT { 307 InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { 308 timespec TS; 309 int result = clock_gettime(CLOCK_REALTIME, &TS); 310 if (result != 0) { 311 Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); 312 TS = {0, 0}; 313 } 314 CPU = 0; 315 return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; 316 }); 317 } 318 319 void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, 320 uint64_t Arg1) XRAY_NEVER_INSTRUMENT { 321 InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC); 322 } 323 324 void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, 325 uint64_t Arg1) XRAY_NEVER_INSTRUMENT { 326 InMemoryRawLogWithArg( 327 FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { 328 timespec TS; 329 int result = clock_gettime(CLOCK_REALTIME, &TS); 330 if (result != 0) { 331 Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); 332 TS = {0, 0}; 333 } 334 CPU = 0; 335 return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; 336 }); 337 } 338 339 static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { 340 ThreadLocalData &TLD = *reinterpret_cast<ThreadLocalData *>(P); 341 auto ExitGuard = at_scope_exit([&TLD] { 342 // Clean up dynamic resources. 343 if (TLD.InMemoryBuffer) 344 InternalFree(TLD.InMemoryBuffer); 345 if (TLD.ShadowStack) 346 InternalFree(TLD.ShadowStack); 347 if (Verbosity()) 348 Report("Cleaned up log for TID: %llu\n", GetTid()); 349 }); 350 351 if (TLD.LogWriter == nullptr || TLD.BufferOffset == 0) { 352 if (Verbosity()) 353 Report("Skipping buffer for TID: %llu; Offset = %zu\n", GetTid(), 354 TLD.BufferOffset); 355 return; 356 } 357 358 { 359 SpinMutexLock L(&LogMutex); 360 TLD.LogWriter->WriteAll(reinterpret_cast<char *>(TLD.InMemoryBuffer), 361 reinterpret_cast<char *>(TLD.InMemoryBuffer) + 362 (sizeof(XRayRecord) * TLD.BufferOffset)); 363 } 364 365 // Because this thread's exit could be the last one trying to write to 366 // the file and that we're not able to close out the file properly, we 367 // sync instead and hope that the pending writes are flushed as the 368 // thread exits. 369 TLD.LogWriter->Flush(); 370 } 371 372 XRayLogInitStatus basicLoggingInit(UNUSED size_t BufferSize, 373 UNUSED size_t BufferMax, void *Options, 374 size_t OptionsSize) XRAY_NEVER_INSTRUMENT { 375 uint8_t Expected = 0; 376 if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, 377 memory_order_acq_rel)) { 378 if (Verbosity()) 379 Report("Basic logging already initialized.\n"); 380 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 381 } 382 383 static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; 384 pthread_once(&OnceInit, +[] { 385 pthread_key_create(&PThreadKey, TLDDestructor); 386 atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); 387 // Initialize the global TicksPerSec value. 388 atomic_store(&TicksPerSec, 389 probeRequiredCPUFeatures() ? getTSCFrequency() 390 : NanosecondsPerSecond, 391 memory_order_release); 392 if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) 393 Report("WARNING: Required CPU features missing for XRay instrumentation, " 394 "using emulation instead.\n"); 395 }); 396 397 FlagParser P; 398 BasicFlags F; 399 F.setDefaults(); 400 registerXRayBasicFlags(&P, &F); 401 P.ParseString(useCompilerDefinedBasicFlags()); 402 auto *EnvOpts = GetEnv("XRAY_BASIC_OPTIONS"); 403 if (EnvOpts == nullptr) 404 EnvOpts = ""; 405 406 P.ParseString(EnvOpts); 407 408 // If XRAY_BASIC_OPTIONS was not defined, then we use the deprecated options 409 // set through XRAY_OPTIONS instead. 410 if (internal_strlen(EnvOpts) == 0) { 411 F.func_duration_threshold_us = 412 flags()->xray_naive_log_func_duration_threshold_us; 413 F.max_stack_depth = flags()->xray_naive_log_max_stack_depth; 414 F.thread_buffer_size = flags()->xray_naive_log_thread_buffer_size; 415 } 416 417 P.ParseString(static_cast<const char *>(Options)); 418 GlobalOptions.ThreadBufferSize = F.thread_buffer_size; 419 GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; 420 GlobalOptions.MaxStackDepth = F.max_stack_depth; 421 *basicFlags() = F; 422 423 atomic_store(&ThresholdTicks, 424 atomic_load(&TicksPerSec, memory_order_acquire) * 425 GlobalOptions.DurationFilterMicros / 1000000, 426 memory_order_release); 427 __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire) 428 ? basicLoggingHandleArg1RealTSC 429 : basicLoggingHandleArg1EmulateTSC); 430 __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire) 431 ? basicLoggingHandleArg0RealTSC 432 : basicLoggingHandleArg0EmulateTSC); 433 434 // TODO: Implement custom event and typed event handling support in Basic 435 // Mode. 436 __xray_remove_customevent_handler(); 437 __xray_remove_typedevent_handler(); 438 439 return XRayLogInitStatus::XRAY_LOG_INITIALIZED; 440 } 441 442 XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { 443 uint8_t Expected = 0; 444 if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, 445 memory_order_acq_rel) && 446 Verbosity()) 447 Report("Basic logging already finalized.\n"); 448 449 // Nothing really to do aside from marking state of the global to be 450 // uninitialized. 451 452 return XRayLogInitStatus::XRAY_LOG_FINALIZED; 453 } 454 455 XRayLogFlushStatus basicLoggingFlush() XRAY_NEVER_INSTRUMENT { 456 // This really does nothing, since flushing the logs happen at the end of a 457 // thread's lifetime, or when the buffers are full. 458 return XRayLogFlushStatus::XRAY_LOG_FLUSHED; 459 } 460 461 // This is a handler that, effectively, does nothing. 462 void basicLoggingHandleArg0Empty(int32_t, XRayEntryType) XRAY_NEVER_INSTRUMENT { 463 } 464 465 bool basicLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { 466 XRayLogImpl Impl{ 467 basicLoggingInit, 468 basicLoggingFinalize, 469 basicLoggingHandleArg0Empty, 470 basicLoggingFlush, 471 }; 472 auto RegistrationResult = __xray_log_register_mode("xray-basic", Impl); 473 if (RegistrationResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK && 474 Verbosity()) 475 Report("Cannot register XRay Basic Mode to 'xray-basic'; error = %d\n", 476 RegistrationResult); 477 if (flags()->xray_naive_log || 478 !internal_strcmp(flags()->xray_mode, "xray-basic")) { 479 auto SelectResult = __xray_log_select_mode("xray-basic"); 480 if (SelectResult != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) { 481 if (Verbosity()) 482 Report("Failed selecting XRay Basic Mode; error = %d\n", SelectResult); 483 return false; 484 } 485 486 // We initialize the implementation using the data we get from the 487 // XRAY_BASIC_OPTIONS environment variable, at this point of the 488 // implementation. 489 auto *Env = GetEnv("XRAY_BASIC_OPTIONS"); 490 auto InitResult = 491 __xray_log_init_mode("xray-basic", Env == nullptr ? "" : Env); 492 if (InitResult != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { 493 if (Verbosity()) 494 Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); 495 return false; 496 } 497 498 // At this point we know that we've successfully initialized Basic mode 499 // tracing, and the only chance we're going to get for the current thread to 500 // clean-up may be at thread/program exit. To ensure that we're going to get 501 // the cleanup even without calling the finalization routines, we're 502 // registering a program exit function that will do the cleanup. 503 static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT; 504 pthread_once(&DynamicOnce, +[] { 505 static void *FakeTLD = nullptr; 506 FakeTLD = &getThreadLocalData(); 507 Atexit(+[] { TLDDestructor(FakeTLD); }); 508 }); 509 } 510 return true; 511 } 512 513 } // namespace __xray 514 515 static auto UNUSED Unused = __xray::basicLogDynamicInitializer(); 516