1 //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Register objects for access by profilers via the perf JIT interface. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h" 14 15 #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h" 16 17 #include "llvm/Support/FileSystem.h" 18 #include "llvm/Support/MemoryBuffer.h" 19 #include "llvm/Support/Path.h" 20 #include "llvm/Support/Process.h" 21 #include "llvm/Support/Threading.h" 22 23 #include <mutex> 24 #include <optional> 25 26 #ifdef __linux__ 27 28 #include <sys/mman.h> // mmap() 29 #include <time.h> // clock_gettime(), time(), localtime_r() */ 30 #include <unistd.h> // for read(), close() 31 32 #define DEBUG_TYPE "orc" 33 34 // language identifier (XXX: should we generate something better from debug 35 // info?) 36 #define JIT_LANG "llvm-IR" 37 #define LLVM_PERF_JIT_MAGIC \ 38 ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ 39 (uint32_t)'D') 40 #define LLVM_PERF_JIT_VERSION 1 41 42 using namespace llvm; 43 using namespace llvm::orc; 44 45 struct PerfState { 46 // cache lookups 47 uint32_t Pid; 48 49 // base directory for output data 50 std::string JitPath; 51 52 // output data stream, closed via Dumpstream 53 int DumpFd = -1; 54 55 // output data stream 56 std::unique_ptr<raw_fd_ostream> Dumpstream; 57 58 // perf mmap marker 59 void *MarkerAddr = NULL; 60 }; 61 62 // prevent concurrent dumps from messing up the output file 63 static std::mutex Mutex; 64 static std::optional<PerfState> State; 65 66 struct RecHeader { 67 uint32_t Id; 68 uint32_t TotalSize; 69 uint64_t Timestamp; 70 }; 71 72 struct DIR { 73 RecHeader Prefix; 74 uint64_t CodeAddr; 75 uint64_t NrEntry; 76 }; 77 78 struct DIE { 79 uint64_t CodeAddr; 80 uint32_t Line; 81 uint32_t Discrim; 82 }; 83 84 struct CLR { 85 RecHeader Prefix; 86 uint32_t Pid; 87 uint32_t Tid; 88 uint64_t Vma; 89 uint64_t CodeAddr; 90 uint64_t CodeSize; 91 uint64_t CodeIndex; 92 }; 93 94 struct UWR { 95 RecHeader Prefix; 96 uint64_t UnwindDataSize; 97 uint64_t EhFrameHeaderSize; 98 uint64_t MappedSize; 99 }; 100 101 static inline uint64_t timespec_to_ns(const struct timespec *TS) { 102 const uint64_t NanoSecPerSec = 1000000000; 103 return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec; 104 } 105 106 static inline uint64_t perf_get_timestamp() { 107 timespec TS; 108 if (clock_gettime(CLOCK_MONOTONIC, &TS)) 109 return 0; 110 111 return timespec_to_ns(&TS); 112 } 113 114 static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) { 115 assert(State && "PerfState not initialized"); 116 LLVM_DEBUG(dbgs() << "Writing debug record with " 117 << DebugRecord.Entries.size() << " entries\n"); 118 [[maybe_unused]] size_t Written = 0; 119 DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id), 120 DebugRecord.Prefix.TotalSize, perf_get_timestamp()}, 121 DebugRecord.CodeAddr, DebugRecord.Entries.size()}; 122 State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir)); 123 Written += sizeof(Dir); 124 for (auto &Die : DebugRecord.Entries) { 125 DIE d{Die.Addr, Die.Lineno, Die.Discrim}; 126 State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d)); 127 State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1); 128 Written += sizeof(d) + Die.Name.size() + 1; 129 } 130 LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n"); 131 } 132 133 static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) { 134 assert(State && "PerfState not initialized"); 135 uint32_t Tid = get_threadid(); 136 LLVM_DEBUG(dbgs() << "Writing code record with code size " 137 << CodeRecord.CodeSize << " and code index " 138 << CodeRecord.CodeIndex << "\n"); 139 CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id), 140 CodeRecord.Prefix.TotalSize, perf_get_timestamp()}, 141 State->Pid, 142 Tid, 143 CodeRecord.Vma, 144 CodeRecord.CodeAddr, 145 CodeRecord.CodeSize, 146 CodeRecord.CodeIndex}; 147 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, " 148 << CodeRecord.Name.size() + 1 << " bytes of name, " 149 << CodeRecord.CodeSize << " bytes of code\n"); 150 State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr)); 151 State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1); 152 State->Dumpstream->write((const char *)CodeRecord.CodeAddr, 153 CodeRecord.CodeSize); 154 } 155 156 static void 157 writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) { 158 assert(State && "PerfState not initialized"); 159 dbgs() << "Writing unwind record with unwind data size " 160 << UnwindRecord.UnwindDataSize << " and EH frame header size " 161 << UnwindRecord.EHFrameHdrSize << " and mapped size " 162 << UnwindRecord.MappedSize << "\n"; 163 UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id), 164 UnwindRecord.Prefix.TotalSize, perf_get_timestamp()}, 165 UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize, 166 UnwindRecord.MappedSize}; 167 LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, " 168 << UnwindRecord.EHFrameHdrSize 169 << " bytes of EH frame header, " 170 << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize 171 << " bytes of EH frame\n"); 172 State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr)); 173 if (UnwindRecord.EHFrameHdrAddr) 174 State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr, 175 UnwindRecord.EHFrameHdrSize); 176 else 177 State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(), 178 UnwindRecord.EHFrameHdrSize); 179 State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr, 180 UnwindRecord.UnwindDataSize - 181 UnwindRecord.EHFrameHdrSize); 182 } 183 184 static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) { 185 if (!State) 186 return make_error<StringError>("PerfState not initialized", 187 inconvertibleErrorCode()); 188 189 // Serialize the batch 190 std::lock_guard<std::mutex> Lock(Mutex); 191 if (Batch.UnwindingRecord.Prefix.TotalSize > 0) 192 writeUnwindRecord(Batch.UnwindingRecord); 193 194 for (const auto &DebugInfo : Batch.DebugInfoRecords) 195 writeDebugRecord(DebugInfo); 196 197 for (const auto &CodeLoad : Batch.CodeLoadRecords) 198 writeCodeRecord(CodeLoad); 199 200 State->Dumpstream->flush(); 201 202 return Error::success(); 203 } 204 205 struct Header { 206 uint32_t Magic; // characters "JiTD" 207 uint32_t Version; // header version 208 uint32_t TotalSize; // total size of header 209 uint32_t ElfMach; // elf mach target 210 uint32_t Pad1; // reserved 211 uint32_t Pid; 212 uint64_t Timestamp; // timestamp 213 uint64_t Flags; // flags 214 }; 215 216 static Error OpenMarker(PerfState &State) { 217 // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap 218 // is captured either live (perf record running when we mmap) or in deferred 219 // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump 220 // file for more meta data info about the jitted code. Perf report/annotate 221 // detect this special filename and process the jitdump file. 222 // 223 // Mapping must be PROT_EXEC to ensure it is captured by perf record 224 // even when not using -d option. 225 State.MarkerAddr = 226 ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC, 227 MAP_PRIVATE, State.DumpFd, 0); 228 229 if (State.MarkerAddr == MAP_FAILED) 230 return make_error<llvm::StringError>("could not mmap JIT marker", 231 inconvertibleErrorCode()); 232 233 return Error::success(); 234 } 235 236 void CloseMarker(PerfState &State) { 237 if (!State.MarkerAddr) 238 return; 239 240 munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate()); 241 State.MarkerAddr = nullptr; 242 } 243 244 static Expected<Header> FillMachine(PerfState &State) { 245 Header Hdr; 246 Hdr.Magic = LLVM_PERF_JIT_MAGIC; 247 Hdr.Version = LLVM_PERF_JIT_VERSION; 248 Hdr.TotalSize = sizeof(Hdr); 249 Hdr.Pid = State.Pid; 250 Hdr.Timestamp = perf_get_timestamp(); 251 252 char Id[16]; 253 struct { 254 uint16_t e_type; 255 uint16_t e_machine; 256 } Info; 257 258 size_t RequiredMemory = sizeof(Id) + sizeof(Info); 259 260 ErrorOr<std::unique_ptr<MemoryBuffer>> MB = 261 MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0); 262 263 // This'll not guarantee that enough data was actually read from the 264 // underlying file. Instead the trailing part of the buffer would be 265 // zeroed. Given the ELF signature check below that seems ok though, 266 // it's unlikely that the file ends just after that, and the 267 // consequence would just be that perf wouldn't recognize the 268 // signature. 269 if (!MB) 270 return make_error<llvm::StringError>("could not open /proc/self/exe", 271 MB.getError()); 272 273 memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id)); 274 memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info)); 275 276 // check ELF signature 277 if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F') 278 return make_error<llvm::StringError>("invalid ELF signature", 279 inconvertibleErrorCode()); 280 281 Hdr.ElfMach = Info.e_machine; 282 283 return Hdr; 284 } 285 286 static Error InitDebuggingDir(PerfState &State) { 287 time_t Time; 288 struct tm LocalTime; 289 char TimeBuffer[sizeof("YYYYMMDD")]; 290 SmallString<64> Path; 291 292 // search for location to dump data to 293 if (const char *BaseDir = getenv("JITDUMPDIR")) 294 Path.append(BaseDir); 295 else if (!sys::path::home_directory(Path)) 296 Path = "."; 297 298 // create debug directory 299 Path += "/.debug/jit/"; 300 if (auto EC = sys::fs::create_directories(Path)) { 301 std::string ErrStr; 302 raw_string_ostream ErrStream(ErrStr); 303 ErrStream << "could not create jit cache directory " << Path << ": " 304 << EC.message() << "\n"; 305 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode()); 306 } 307 308 // create unique directory for dump data related to this process 309 time(&Time); 310 localtime_r(&Time, &LocalTime); 311 strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); 312 Path += JIT_LANG "-jit-"; 313 Path += TimeBuffer; 314 315 SmallString<128> UniqueDebugDir; 316 317 using sys::fs::createUniqueDirectory; 318 if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { 319 std::string ErrStr; 320 raw_string_ostream ErrStream(ErrStr); 321 ErrStream << "could not create unique jit cache directory " 322 << UniqueDebugDir << ": " << EC.message() << "\n"; 323 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode()); 324 } 325 326 State.JitPath = std::string(UniqueDebugDir); 327 328 return Error::success(); 329 } 330 331 static Error registerJITLoaderPerfStartImpl() { 332 PerfState Tentative; 333 Tentative.Pid = sys::Process::getProcessId(); 334 // check if clock-source is supported 335 if (!perf_get_timestamp()) 336 return make_error<StringError>("kernel does not support CLOCK_MONOTONIC", 337 inconvertibleErrorCode()); 338 339 if (auto Err = InitDebuggingDir(Tentative)) 340 return Err; 341 342 std::string Filename; 343 raw_string_ostream FilenameBuf(Filename); 344 FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump"; 345 346 // Need to open ourselves, because we need to hand the FD to OpenMarker() and 347 // raw_fd_ostream doesn't expose the FD. 348 using sys::fs::openFileForWrite; 349 if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd, 350 sys::fs::CD_CreateNew, sys::fs::OF_None)) { 351 std::string ErrStr; 352 raw_string_ostream ErrStream(ErrStr); 353 ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": " 354 << EC.message() << "\n"; 355 return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode()); 356 } 357 358 Tentative.Dumpstream = 359 std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true); 360 361 auto Header = FillMachine(Tentative); 362 if (!Header) 363 return Header.takeError(); 364 365 // signal this process emits JIT information 366 if (auto Err = OpenMarker(Tentative)) 367 return Err; 368 369 Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()), 370 sizeof(*Header)); 371 372 // Everything initialized, can do profiling now. 373 if (Tentative.Dumpstream->has_error()) 374 return make_error<StringError>("could not write JIT dump header", 375 inconvertibleErrorCode()); 376 377 State = std::move(Tentative); 378 return Error::success(); 379 } 380 381 static Error registerJITLoaderPerfEndImpl() { 382 if (!State) 383 return make_error<StringError>("PerfState not initialized", 384 inconvertibleErrorCode()); 385 386 RecHeader Close; 387 Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE); 388 Close.TotalSize = sizeof(Close); 389 Close.Timestamp = perf_get_timestamp(); 390 State->Dumpstream->write(reinterpret_cast<const char *>(&Close), 391 sizeof(Close)); 392 if (State->MarkerAddr) 393 CloseMarker(*State); 394 395 State.reset(); 396 return Error::success(); 397 } 398 399 extern "C" llvm::orc::shared::CWrapperFunctionResult 400 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { 401 using namespace orc::shared; 402 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle( 403 Data, Size, registerJITLoaderPerfImpl) 404 .release(); 405 } 406 407 extern "C" llvm::orc::shared::CWrapperFunctionResult 408 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { 409 using namespace orc::shared; 410 return WrapperFunction<SPSError()>::handle(Data, Size, 411 registerJITLoaderPerfStartImpl) 412 .release(); 413 } 414 415 extern "C" llvm::orc::shared::CWrapperFunctionResult 416 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { 417 using namespace orc::shared; 418 return WrapperFunction<SPSError()>::handle(Data, Size, 419 registerJITLoaderPerfEndImpl) 420 .release(); 421 } 422 423 #else 424 425 using namespace llvm; 426 using namespace llvm::orc; 427 428 static Error badOS() { 429 using namespace llvm; 430 return llvm::make_error<StringError>( 431 "unsupported OS (perf support is only available on linux!)", 432 inconvertibleErrorCode()); 433 } 434 435 static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); } 436 437 extern "C" llvm::orc::shared::CWrapperFunctionResult 438 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) { 439 using namespace shared; 440 return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size, 441 badOSBatch) 442 .release(); 443 } 444 445 extern "C" llvm::orc::shared::CWrapperFunctionResult 446 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) { 447 using namespace shared; 448 return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); 449 } 450 451 extern "C" llvm::orc::shared::CWrapperFunctionResult 452 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) { 453 using namespace shared; 454 return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release(); 455 } 456 457 #endif 458