xref: /freebsd/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp (revision b5a3a89c50671a1ad29e7c43fe15e7b16feac239)
1 #include "cpuid.h"
2 #include "sanitizer_common/sanitizer_common.h"
3 #if !SANITIZER_FUCHSIA
4 #include "sanitizer_common/sanitizer_posix.h"
5 #endif
6 #include "xray_defs.h"
7 #include "xray_interface_internal.h"
8 
9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
10 #include <sys/types.h>
11 #include <sys/sysctl.h>
12 #elif SANITIZER_FUCHSIA
13 #include <zircon/syscalls.h>
14 #endif
15 
16 #include <atomic>
17 #include <cstdint>
18 #include <errno.h>
19 #include <fcntl.h>
20 #include <iterator>
21 #include <limits>
22 #include <tuple>
23 #include <unistd.h>
24 
25 namespace __xray {
26 
27 #if SANITIZER_LINUX
28 static std::pair<ssize_t, bool>
29 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
30   auto BytesToRead = std::distance(Begin, End);
31   ssize_t BytesRead;
32   ssize_t TotalBytesRead = 0;
33   while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
34     if (BytesRead == -1) {
35       if (errno == EINTR)
36         continue;
37       Report("Read error; errno = %d\n", errno);
38       return std::make_pair(TotalBytesRead, false);
39     }
40 
41     TotalBytesRead += BytesRead;
42     BytesToRead -= BytesRead;
43     Begin += BytesRead;
44   }
45   return std::make_pair(TotalBytesRead, true);
46 }
47 
48 static bool readValueFromFile(const char *Filename,
49                               long long *Value) XRAY_NEVER_INSTRUMENT {
50   int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
51   if (Fd == -1)
52     return false;
53   static constexpr size_t BufSize = 256;
54   char Line[BufSize] = {};
55   ssize_t BytesRead;
56   bool Success;
57   std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
58   close(Fd);
59   if (!Success)
60     return false;
61   const char *End = nullptr;
62   long long Tmp = internal_simple_strtoll(Line, &End, 10);
63   bool Result = false;
64   if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
65     *Value = Tmp;
66     Result = true;
67   }
68   return Result;
69 }
70 
71 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
72   long long TSCFrequency = -1;
73   if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
74                         &TSCFrequency)) {
75     TSCFrequency *= 1000;
76   } else if (readValueFromFile(
77                  "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
78                  &TSCFrequency)) {
79     TSCFrequency *= 1000;
80   } else {
81     Report("Unable to determine CPU frequency for TSC accounting.\n");
82   }
83   return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
84 }
85 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_APPLE
86 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
87     long long TSCFrequency = -1;
88     size_t tscfreqsz = sizeof(TSCFrequency);
89 #if SANITIZER_APPLE
90     if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
91                               &tscfreqsz, NULL, 0) != -1) {
92 
93 #else
94     if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
95                               NULL, 0) != -1) {
96 #endif
97         return static_cast<uint64_t>(TSCFrequency);
98     } else {
99       Report("Unable to determine CPU frequency for TSC accounting.\n");
100     }
101 
102     return 0;
103 }
104 #elif !SANITIZER_FUCHSIA
105 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
106     /* Not supported */
107     return 0;
108 }
109 #endif
110 
111 static constexpr uint8_t CallOpCode = 0xe8;
112 static constexpr uint16_t MovR10Seq = 0xba41;
113 static constexpr uint16_t Jmp9Seq = 0x09eb;
114 static constexpr uint16_t Jmp20Seq = 0x14eb;
115 static constexpr uint16_t Jmp15Seq = 0x0feb;
116 static constexpr uint8_t JmpOpCode = 0xe9;
117 static constexpr uint8_t RetOpCode = 0xc3;
118 static constexpr uint16_t NopwSeq = 0x9066;
119 
120 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
121 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
122 
123 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
124                         const XRaySledEntry &Sled,
125                         void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
126   // Here we do the dance of replacing the following sled:
127   //
128   // xray_sled_n:
129   //   jmp +9
130   //   <9 byte nop>
131   //
132   // With the following:
133   //
134   //   mov r10d, <function id>
135   //   call <relative 32bit offset to entry trampoline>
136   //
137   // We need to do this in the following order:
138   //
139   // 1. Put the function id first, 2 bytes from the start of the sled (just
140   // after the 2-byte jmp instruction).
141   // 2. Put the call opcode 6 bytes from the start of the sled.
142   // 3. Put the relative offset 7 bytes from the start of the sled.
143   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
144   // opcode and first operand.
145   //
146   // Prerequisite is to compute the relative offset to the trampoline's address.
147   const uint64_t Address = Sled.address();
148   int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
149                              (static_cast<int64_t>(Address) + 11);
150   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
151     Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
152            reinterpret_cast<void *>(Trampoline),
153            reinterpret_cast<void *>(Address));
154     return false;
155   }
156   if (Enable) {
157     *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
158     *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
159     *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
160     std::atomic_store_explicit(
161         reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
162         std::memory_order_release);
163   } else {
164     std::atomic_store_explicit(
165         reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
166         std::memory_order_release);
167     // FIXME: Write out the nops still?
168   }
169   return true;
170 }
171 
172 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
173                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
174   // Here we do the dance of replacing the following sled:
175   //
176   // xray_sled_n:
177   //   ret
178   //   <10 byte nop>
179   //
180   // With the following:
181   //
182   //   mov r10d, <function id>
183   //   jmp <relative 32bit offset to exit trampoline>
184   //
185   // 1. Put the function id first, 2 bytes from the start of the sled (just
186   // after the 1-byte ret instruction).
187   // 2. Put the jmp opcode 6 bytes from the start of the sled.
188   // 3. Put the relative offset 7 bytes from the start of the sled.
189   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
190   // opcode and first operand.
191   //
192   // Prerequisite is to compute the relative offset fo the
193   // __xray_FunctionExit function's address.
194   const uint64_t Address = Sled.address();
195   int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
196                              (static_cast<int64_t>(Address) + 11);
197   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
198     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
199            reinterpret_cast<void *>(__xray_FunctionExit),
200            reinterpret_cast<void *>(Address));
201     return false;
202   }
203   if (Enable) {
204     *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
205     *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode;
206     *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
207     std::atomic_store_explicit(
208         reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
209         std::memory_order_release);
210   } else {
211     std::atomic_store_explicit(
212         reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode,
213         std::memory_order_release);
214     // FIXME: Write out the nops still?
215   }
216   return true;
217 }
218 
219 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
220                            const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
221   // Here we do the dance of replacing the tail call sled with a similar
222   // sequence as the entry sled, but calls the tail exit sled instead.
223   const uint64_t Address = Sled.address();
224   int64_t TrampolineOffset =
225       reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
226       (static_cast<int64_t>(Address) + 11);
227   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
228     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
229            reinterpret_cast<void *>(__xray_FunctionTailExit),
230            reinterpret_cast<void *>(Address));
231     return false;
232   }
233   if (Enable) {
234     *reinterpret_cast<uint32_t *>(Address + 2) = FuncId;
235     *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode;
236     *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset;
237     std::atomic_store_explicit(
238         reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq,
239         std::memory_order_release);
240   } else {
241     std::atomic_store_explicit(
242         reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq,
243         std::memory_order_release);
244     // FIXME: Write out the nops still?
245   }
246   return true;
247 }
248 
249 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
250                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
251   // Here we do the dance of replacing the following sled:
252   //
253   // In Version 0:
254   //
255   // xray_sled_n:
256   //   jmp +20          // 2 bytes
257   //   ...
258   //
259   // With the following:
260   //
261   //   nopw             // 2 bytes*
262   //   ...
263   //
264   //
265   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
266   //
267   // ---
268   //
269   // In Version 1 or 2:
270   //
271   //   The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
272   //   to a jmp, use 15 bytes instead.
273   //
274   const uint64_t Address = Sled.address();
275   if (Enable) {
276     std::atomic_store_explicit(
277         reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
278         std::memory_order_release);
279   } else {
280     switch (Sled.Version) {
281     case 1:
282     case 2:
283       std::atomic_store_explicit(
284           reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq,
285           std::memory_order_release);
286       break;
287     case 0:
288     default:
289       std::atomic_store_explicit(
290           reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
291           std::memory_order_release);
292       break;
293     }
294     }
295   return false;
296 }
297 
298 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
299                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
300   // Here we do the dance of replacing the following sled:
301   //
302   // xray_sled_n:
303   //   jmp +20          // 2 byte instruction
304   //   ...
305   //
306   // With the following:
307   //
308   //   nopw             // 2 bytes
309   //   ...
310   //
311   //
312   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
313   // The 20 byte sled stashes three argument registers, calls the trampoline,
314   // unstashes the registers and returns. If the arguments are already in
315   // the correct registers, the stashing and unstashing become equivalently
316   // sized nops.
317   const uint64_t Address = Sled.address();
318   if (Enable) {
319     std::atomic_store_explicit(
320         reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq,
321         std::memory_order_release);
322   } else {
323     std::atomic_store_explicit(
324         reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq,
325         std::memory_order_release);
326   }
327   return false;
328 }
329 
330 #if !SANITIZER_FUCHSIA
331 // We determine whether the CPU we're running on has the correct features we
332 // need. In x86_64 this will be rdtscp support.
333 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
334   unsigned int EAX, EBX, ECX, EDX;
335 
336   // We check whether rdtscp support is enabled. According to the x86_64 manual,
337   // level should be set at 0x80000001, and we should have a look at bit 27 in
338   // EDX. That's 0x8000000 (or 1u << 27).
339   __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
340     : "0"(0x80000001));
341   if (!(EDX & (1u << 27))) {
342     Report("Missing rdtscp support.\n");
343     return false;
344   }
345   // Also check whether we can determine the CPU frequency, since if we cannot,
346   // we should use the emulated TSC instead.
347   if (!getTSCFrequency()) {
348     Report("Unable to determine CPU frequency.\n");
349     return false;
350   }
351   return true;
352 }
353 #endif
354 
355 } // namespace __xray
356