xref: /freebsd/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.cpp (revision b9128a37faafede823eb456aa65a11ac69997284)
1 //===------- JITLoaderPerf.cpp - Register profiler objects ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Register objects for access by profilers via the perf JIT interface.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h"
14 
15 #include "llvm/ExecutionEngine/Orc/Shared/PerfSharedStructs.h"
16 
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/Support/Path.h"
20 #include "llvm/Support/Process.h"
21 #include "llvm/Support/Threading.h"
22 
23 #include <mutex>
24 #include <optional>
25 
26 #ifdef __linux__
27 
28 #include <sys/mman.h> // mmap()
29 #include <time.h>     // clock_gettime(), time(), localtime_r() */
30 #include <unistd.h>   // for read(), close()
31 
32 #define DEBUG_TYPE "orc"
33 
34 // language identifier (XXX: should we generate something better from debug
35 // info?)
36 #define JIT_LANG "llvm-IR"
37 #define LLVM_PERF_JIT_MAGIC                                                    \
38   ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 |            \
39    (uint32_t)'D')
40 #define LLVM_PERF_JIT_VERSION 1
41 
42 using namespace llvm;
43 using namespace llvm::orc;
44 
45 struct PerfState {
46   // cache lookups
47   uint32_t Pid;
48 
49   // base directory for output data
50   std::string JitPath;
51 
52   // output data stream, closed via Dumpstream
53   int DumpFd = -1;
54 
55   // output data stream
56   std::unique_ptr<raw_fd_ostream> Dumpstream;
57 
58   // perf mmap marker
59   void *MarkerAddr = NULL;
60 };
61 
62 // prevent concurrent dumps from messing up the output file
63 static std::mutex Mutex;
64 static std::optional<PerfState> State;
65 
66 struct RecHeader {
67   uint32_t Id;
68   uint32_t TotalSize;
69   uint64_t Timestamp;
70 };
71 
72 struct DIR {
73   RecHeader Prefix;
74   uint64_t CodeAddr;
75   uint64_t NrEntry;
76 };
77 
78 struct DIE {
79   uint64_t CodeAddr;
80   uint32_t Line;
81   uint32_t Discrim;
82 };
83 
84 struct CLR {
85   RecHeader Prefix;
86   uint32_t Pid;
87   uint32_t Tid;
88   uint64_t Vma;
89   uint64_t CodeAddr;
90   uint64_t CodeSize;
91   uint64_t CodeIndex;
92 };
93 
94 struct UWR {
95   RecHeader Prefix;
96   uint64_t UnwindDataSize;
97   uint64_t EhFrameHeaderSize;
98   uint64_t MappedSize;
99 };
100 
101 static inline uint64_t timespec_to_ns(const struct timespec *TS) {
102   const uint64_t NanoSecPerSec = 1000000000;
103   return ((uint64_t)TS->tv_sec * NanoSecPerSec) + TS->tv_nsec;
104 }
105 
106 static inline uint64_t perf_get_timestamp() {
107   timespec TS;
108   if (clock_gettime(CLOCK_MONOTONIC, &TS))
109     return 0;
110 
111   return timespec_to_ns(&TS);
112 }
113 
114 static void writeDebugRecord(const PerfJITDebugInfoRecord &DebugRecord) {
115   assert(State && "PerfState not initialized");
116   LLVM_DEBUG(dbgs() << "Writing debug record with "
117                     << DebugRecord.Entries.size() << " entries\n");
118   [[maybe_unused]] size_t Written = 0;
119   DIR Dir{RecHeader{static_cast<uint32_t>(DebugRecord.Prefix.Id),
120                     DebugRecord.Prefix.TotalSize, perf_get_timestamp()},
121           DebugRecord.CodeAddr, DebugRecord.Entries.size()};
122   State->Dumpstream->write(reinterpret_cast<const char *>(&Dir), sizeof(Dir));
123   Written += sizeof(Dir);
124   for (auto &Die : DebugRecord.Entries) {
125     DIE d{Die.Addr, Die.Lineno, Die.Discrim};
126     State->Dumpstream->write(reinterpret_cast<const char *>(&d), sizeof(d));
127     State->Dumpstream->write(Die.Name.data(), Die.Name.size() + 1);
128     Written += sizeof(d) + Die.Name.size() + 1;
129   }
130   LLVM_DEBUG(dbgs() << "wrote " << Written << " bytes of debug info\n");
131 }
132 
133 static void writeCodeRecord(const PerfJITCodeLoadRecord &CodeRecord) {
134   assert(State && "PerfState not initialized");
135   uint32_t Tid = get_threadid();
136   LLVM_DEBUG(dbgs() << "Writing code record with code size "
137                     << CodeRecord.CodeSize << " and code index "
138                     << CodeRecord.CodeIndex << "\n");
139   CLR Clr{RecHeader{static_cast<uint32_t>(CodeRecord.Prefix.Id),
140                     CodeRecord.Prefix.TotalSize, perf_get_timestamp()},
141           State->Pid,
142           Tid,
143           CodeRecord.Vma,
144           CodeRecord.CodeAddr,
145           CodeRecord.CodeSize,
146           CodeRecord.CodeIndex};
147   LLVM_DEBUG(dbgs() << "wrote " << sizeof(Clr) << " bytes of CLR, "
148                     << CodeRecord.Name.size() + 1 << " bytes of name, "
149                     << CodeRecord.CodeSize << " bytes of code\n");
150   State->Dumpstream->write(reinterpret_cast<const char *>(&Clr), sizeof(Clr));
151   State->Dumpstream->write(CodeRecord.Name.data(), CodeRecord.Name.size() + 1);
152   State->Dumpstream->write((const char *)CodeRecord.CodeAddr,
153                            CodeRecord.CodeSize);
154 }
155 
156 static void
157 writeUnwindRecord(const PerfJITCodeUnwindingInfoRecord &UnwindRecord) {
158   assert(State && "PerfState not initialized");
159   dbgs() << "Writing unwind record with unwind data size "
160          << UnwindRecord.UnwindDataSize << " and EH frame header size "
161          << UnwindRecord.EHFrameHdrSize << " and mapped size "
162          << UnwindRecord.MappedSize << "\n";
163   UWR Uwr{RecHeader{static_cast<uint32_t>(UnwindRecord.Prefix.Id),
164                     UnwindRecord.Prefix.TotalSize, perf_get_timestamp()},
165           UnwindRecord.UnwindDataSize, UnwindRecord.EHFrameHdrSize,
166           UnwindRecord.MappedSize};
167   LLVM_DEBUG(dbgs() << "wrote " << sizeof(Uwr) << " bytes of UWR, "
168                     << UnwindRecord.EHFrameHdrSize
169                     << " bytes of EH frame header, "
170                     << UnwindRecord.UnwindDataSize - UnwindRecord.EHFrameHdrSize
171                     << " bytes of EH frame\n");
172   State->Dumpstream->write(reinterpret_cast<const char *>(&Uwr), sizeof(Uwr));
173   if (UnwindRecord.EHFrameHdrAddr)
174     State->Dumpstream->write((const char *)UnwindRecord.EHFrameHdrAddr,
175                              UnwindRecord.EHFrameHdrSize);
176   else
177     State->Dumpstream->write(UnwindRecord.EHFrameHdr.data(),
178                              UnwindRecord.EHFrameHdrSize);
179   State->Dumpstream->write((const char *)UnwindRecord.EHFrameAddr,
180                            UnwindRecord.UnwindDataSize -
181                                UnwindRecord.EHFrameHdrSize);
182 }
183 
184 static Error registerJITLoaderPerfImpl(const PerfJITRecordBatch &Batch) {
185   if (!State)
186     return make_error<StringError>("PerfState not initialized",
187                                    inconvertibleErrorCode());
188 
189   // Serialize the batch
190   std::lock_guard<std::mutex> Lock(Mutex);
191   if (Batch.UnwindingRecord.Prefix.TotalSize > 0)
192     writeUnwindRecord(Batch.UnwindingRecord);
193 
194   for (const auto &DebugInfo : Batch.DebugInfoRecords)
195     writeDebugRecord(DebugInfo);
196 
197   for (const auto &CodeLoad : Batch.CodeLoadRecords)
198     writeCodeRecord(CodeLoad);
199 
200   State->Dumpstream->flush();
201 
202   return Error::success();
203 }
204 
205 struct Header {
206   uint32_t Magic;     // characters "JiTD"
207   uint32_t Version;   // header version
208   uint32_t TotalSize; // total size of header
209   uint32_t ElfMach;   // elf mach target
210   uint32_t Pad1;      // reserved
211   uint32_t Pid;
212   uint64_t Timestamp; // timestamp
213   uint64_t Flags;     // flags
214 };
215 
216 static Error OpenMarker(PerfState &State) {
217   // We mmap the jitdump to create an MMAP RECORD in perf.data file.  The mmap
218   // is captured either live (perf record running when we mmap) or in deferred
219   // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump
220   // file for more meta data info about the jitted code. Perf report/annotate
221   // detect this special filename and process the jitdump file.
222   //
223   // Mapping must be PROT_EXEC to ensure it is captured by perf record
224   // even when not using -d option.
225   State.MarkerAddr =
226       ::mmap(NULL, sys::Process::getPageSizeEstimate(), PROT_READ | PROT_EXEC,
227              MAP_PRIVATE, State.DumpFd, 0);
228 
229   if (State.MarkerAddr == MAP_FAILED)
230     return make_error<llvm::StringError>("could not mmap JIT marker",
231                                          inconvertibleErrorCode());
232 
233   return Error::success();
234 }
235 
236 void CloseMarker(PerfState &State) {
237   if (!State.MarkerAddr)
238     return;
239 
240   munmap(State.MarkerAddr, sys::Process::getPageSizeEstimate());
241   State.MarkerAddr = nullptr;
242 }
243 
244 static Expected<Header> FillMachine(PerfState &State) {
245   Header Hdr;
246   Hdr.Magic = LLVM_PERF_JIT_MAGIC;
247   Hdr.Version = LLVM_PERF_JIT_VERSION;
248   Hdr.TotalSize = sizeof(Hdr);
249   Hdr.Pid = State.Pid;
250   Hdr.Timestamp = perf_get_timestamp();
251 
252   char Id[16];
253   struct {
254     uint16_t e_type;
255     uint16_t e_machine;
256   } Info;
257 
258   size_t RequiredMemory = sizeof(Id) + sizeof(Info);
259 
260   ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
261       MemoryBuffer::getFileSlice("/proc/self/exe", RequiredMemory, 0);
262 
263   // This'll not guarantee that enough data was actually read from the
264   // underlying file. Instead the trailing part of the buffer would be
265   // zeroed. Given the ELF signature check below that seems ok though,
266   // it's unlikely that the file ends just after that, and the
267   // consequence would just be that perf wouldn't recognize the
268   // signature.
269   if (!MB)
270     return make_error<llvm::StringError>("could not open /proc/self/exe",
271                                          MB.getError());
272 
273   memcpy(&Id, (*MB)->getBufferStart(), sizeof(Id));
274   memcpy(&Info, (*MB)->getBufferStart() + sizeof(Id), sizeof(Info));
275 
276   // check ELF signature
277   if (Id[0] != 0x7f || Id[1] != 'E' || Id[2] != 'L' || Id[3] != 'F')
278     return make_error<llvm::StringError>("invalid ELF signature",
279                                          inconvertibleErrorCode());
280 
281   Hdr.ElfMach = Info.e_machine;
282 
283   return Hdr;
284 }
285 
286 static Error InitDebuggingDir(PerfState &State) {
287   time_t Time;
288   struct tm LocalTime;
289   char TimeBuffer[sizeof("YYYYMMDD")];
290   SmallString<64> Path;
291 
292   // search for location to dump data to
293   if (const char *BaseDir = getenv("JITDUMPDIR"))
294     Path.append(BaseDir);
295   else if (!sys::path::home_directory(Path))
296     Path = ".";
297 
298   // create debug directory
299   Path += "/.debug/jit/";
300   if (auto EC = sys::fs::create_directories(Path)) {
301     std::string ErrStr;
302     raw_string_ostream ErrStream(ErrStr);
303     ErrStream << "could not create jit cache directory " << Path << ": "
304               << EC.message() << "\n";
305     return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
306   }
307 
308   // create unique directory for dump data related to this process
309   time(&Time);
310   localtime_r(&Time, &LocalTime);
311   strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime);
312   Path += JIT_LANG "-jit-";
313   Path += TimeBuffer;
314 
315   SmallString<128> UniqueDebugDir;
316 
317   using sys::fs::createUniqueDirectory;
318   if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) {
319     std::string ErrStr;
320     raw_string_ostream ErrStream(ErrStr);
321     ErrStream << "could not create unique jit cache directory "
322               << UniqueDebugDir << ": " << EC.message() << "\n";
323     return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
324   }
325 
326   State.JitPath = std::string(UniqueDebugDir);
327 
328   return Error::success();
329 }
330 
331 static Error registerJITLoaderPerfStartImpl() {
332   PerfState Tentative;
333   Tentative.Pid = sys::Process::getProcessId();
334   // check if clock-source is supported
335   if (!perf_get_timestamp())
336     return make_error<StringError>("kernel does not support CLOCK_MONOTONIC",
337                                    inconvertibleErrorCode());
338 
339   if (auto Err = InitDebuggingDir(Tentative))
340     return Err;
341 
342   std::string Filename;
343   raw_string_ostream FilenameBuf(Filename);
344   FilenameBuf << Tentative.JitPath << "/jit-" << Tentative.Pid << ".dump";
345 
346   // Need to open ourselves, because we need to hand the FD to OpenMarker() and
347   // raw_fd_ostream doesn't expose the FD.
348   using sys::fs::openFileForWrite;
349   if (auto EC = openFileForReadWrite(FilenameBuf.str(), Tentative.DumpFd,
350                                      sys::fs::CD_CreateNew, sys::fs::OF_None)) {
351     std::string ErrStr;
352     raw_string_ostream ErrStream(ErrStr);
353     ErrStream << "could not open JIT dump file " << FilenameBuf.str() << ": "
354               << EC.message() << "\n";
355     return make_error<StringError>(std::move(ErrStr), inconvertibleErrorCode());
356   }
357 
358   Tentative.Dumpstream =
359       std::make_unique<raw_fd_ostream>(Tentative.DumpFd, true);
360 
361   auto Header = FillMachine(Tentative);
362   if (!Header)
363     return Header.takeError();
364 
365   // signal this process emits JIT information
366   if (auto Err = OpenMarker(Tentative))
367     return Err;
368 
369   Tentative.Dumpstream->write(reinterpret_cast<const char *>(&Header.get()),
370                               sizeof(*Header));
371 
372   // Everything initialized, can do profiling now.
373   if (Tentative.Dumpstream->has_error())
374     return make_error<StringError>("could not write JIT dump header",
375                                    inconvertibleErrorCode());
376 
377   State = std::move(Tentative);
378   return Error::success();
379 }
380 
381 static Error registerJITLoaderPerfEndImpl() {
382   if (!State)
383     return make_error<StringError>("PerfState not initialized",
384                                    inconvertibleErrorCode());
385 
386   RecHeader Close;
387   Close.Id = static_cast<uint32_t>(PerfJITRecordType::JIT_CODE_CLOSE);
388   Close.TotalSize = sizeof(Close);
389   Close.Timestamp = perf_get_timestamp();
390   State->Dumpstream->write(reinterpret_cast<const char *>(&Close),
391                            sizeof(Close));
392   if (State->MarkerAddr)
393     CloseMarker(*State);
394 
395   State.reset();
396   return Error::success();
397 }
398 
399 extern "C" llvm::orc::shared::CWrapperFunctionResult
400 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
401   using namespace orc::shared;
402   return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(
403              Data, Size, registerJITLoaderPerfImpl)
404       .release();
405 }
406 
407 extern "C" llvm::orc::shared::CWrapperFunctionResult
408 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
409   using namespace orc::shared;
410   return WrapperFunction<SPSError()>::handle(Data, Size,
411                                              registerJITLoaderPerfStartImpl)
412       .release();
413 }
414 
415 extern "C" llvm::orc::shared::CWrapperFunctionResult
416 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
417   using namespace orc::shared;
418   return WrapperFunction<SPSError()>::handle(Data, Size,
419                                              registerJITLoaderPerfEndImpl)
420       .release();
421 }
422 
423 #else
424 
425 using namespace llvm;
426 using namespace llvm::orc;
427 
428 static Error badOS() {
429   using namespace llvm;
430   return llvm::make_error<StringError>(
431       "unsupported OS (perf support is only available on linux!)",
432       inconvertibleErrorCode());
433 }
434 
435 static Error badOSBatch(PerfJITRecordBatch &Batch) { return badOS(); }
436 
437 extern "C" llvm::orc::shared::CWrapperFunctionResult
438 llvm_orc_registerJITLoaderPerfImpl(const char *Data, uint64_t Size) {
439   using namespace shared;
440   return WrapperFunction<SPSError(SPSPerfJITRecordBatch)>::handle(Data, Size,
441                                                                   badOSBatch)
442       .release();
443 }
444 
445 extern "C" llvm::orc::shared::CWrapperFunctionResult
446 llvm_orc_registerJITLoaderPerfStart(const char *Data, uint64_t Size) {
447   using namespace shared;
448   return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
449 }
450 
451 extern "C" llvm::orc::shared::CWrapperFunctionResult
452 llvm_orc_registerJITLoaderPerfEnd(const char *Data, uint64_t Size) {
453   using namespace shared;
454   return WrapperFunction<SPSError()>::handle(Data, Size, badOS).release();
455 }
456 
457 #endif
458