xref: /freebsd/contrib/llvm-project/compiler-rt/lib/xray/xray_x86_64.cpp (revision 99282790b7d01ec3c4072621d46a0d7302517ad4)
1 #include "cpuid.h"
2 #include "sanitizer_common/sanitizer_common.h"
3 #if !SANITIZER_FUCHSIA
4 #include "sanitizer_common/sanitizer_posix.h"
5 #endif
6 #include "xray_defs.h"
7 #include "xray_interface_internal.h"
8 
9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
10 #include <sys/types.h>
11 #if SANITIZER_OPENBSD
12 #include <sys/time.h>
13 #include <machine/cpu.h>
14 #endif
15 #include <sys/sysctl.h>
16 #elif SANITIZER_FUCHSIA
17 #include <zircon/syscalls.h>
18 #endif
19 
20 #include <atomic>
21 #include <cstdint>
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <iterator>
25 #include <limits>
26 #include <tuple>
27 #include <unistd.h>
28 
29 namespace __xray {
30 
31 #if SANITIZER_LINUX
32 static std::pair<ssize_t, bool>
33 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
34   auto BytesToRead = std::distance(Begin, End);
35   ssize_t BytesRead;
36   ssize_t TotalBytesRead = 0;
37   while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
38     if (BytesRead == -1) {
39       if (errno == EINTR)
40         continue;
41       Report("Read error; errno = %d\n", errno);
42       return std::make_pair(TotalBytesRead, false);
43     }
44 
45     TotalBytesRead += BytesRead;
46     BytesToRead -= BytesRead;
47     Begin += BytesRead;
48   }
49   return std::make_pair(TotalBytesRead, true);
50 }
51 
52 static bool readValueFromFile(const char *Filename,
53                               long long *Value) XRAY_NEVER_INSTRUMENT {
54   int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
55   if (Fd == -1)
56     return false;
57   static constexpr size_t BufSize = 256;
58   char Line[BufSize] = {};
59   ssize_t BytesRead;
60   bool Success;
61   std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
62   close(Fd);
63   if (!Success)
64     return false;
65   const char *End = nullptr;
66   long long Tmp = internal_simple_strtoll(Line, &End, 10);
67   bool Result = false;
68   if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
69     *Value = Tmp;
70     Result = true;
71   }
72   return Result;
73 }
74 
75 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
76   long long TSCFrequency = -1;
77   if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
78                         &TSCFrequency)) {
79     TSCFrequency *= 1000;
80   } else if (readValueFromFile(
81                  "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
82                  &TSCFrequency)) {
83     TSCFrequency *= 1000;
84   } else {
85     Report("Unable to determine CPU frequency for TSC accounting.\n");
86   }
87   return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency);
88 }
89 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC
90 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
91     long long TSCFrequency = -1;
92     size_t tscfreqsz = sizeof(TSCFrequency);
93 #if SANITIZER_OPENBSD
94     int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ };
95     if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) {
96 #elif SANITIZER_MAC
97     if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency,
98                               &tscfreqsz, NULL, 0) != -1) {
99 
100 #else
101     if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz,
102                               NULL, 0) != -1) {
103 #endif
104         return static_cast<uint64_t>(TSCFrequency);
105     } else {
106       Report("Unable to determine CPU frequency for TSC accounting.\n");
107     }
108 
109     return 0;
110 }
111 #elif !SANITIZER_FUCHSIA
112 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
113     /* Not supported */
114     return 0;
115 }
116 #endif
117 
118 static constexpr uint8_t CallOpCode = 0xe8;
119 static constexpr uint16_t MovR10Seq = 0xba41;
120 static constexpr uint16_t Jmp9Seq = 0x09eb;
121 static constexpr uint16_t Jmp20Seq = 0x14eb;
122 static constexpr uint16_t Jmp15Seq = 0x0feb;
123 static constexpr uint8_t JmpOpCode = 0xe9;
124 static constexpr uint8_t RetOpCode = 0xc3;
125 static constexpr uint16_t NopwSeq = 0x9066;
126 
127 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()};
128 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()};
129 
130 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
131                         const XRaySledEntry &Sled,
132                         void (*Trampoline)()) XRAY_NEVER_INSTRUMENT {
133   // Here we do the dance of replacing the following sled:
134   //
135   // xray_sled_n:
136   //   jmp +9
137   //   <9 byte nop>
138   //
139   // With the following:
140   //
141   //   mov r10d, <function id>
142   //   call <relative 32bit offset to entry trampoline>
143   //
144   // We need to do this in the following order:
145   //
146   // 1. Put the function id first, 2 bytes from the start of the sled (just
147   // after the 2-byte jmp instruction).
148   // 2. Put the call opcode 6 bytes from the start of the sled.
149   // 3. Put the relative offset 7 bytes from the start of the sled.
150   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
151   // opcode and first operand.
152   //
153   // Prerequisite is to compute the relative offset to the trampoline's address.
154   int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) -
155                              (static_cast<int64_t>(Sled.Address) + 11);
156   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
157     Report("XRay Entry trampoline (%p) too far from sled (%p)\n",
158            Trampoline, reinterpret_cast<void *>(Sled.Address));
159     return false;
160   }
161   if (Enable) {
162     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
163     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
164     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
165     std::atomic_store_explicit(
166         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
167         std::memory_order_release);
168   } else {
169     std::atomic_store_explicit(
170         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
171         std::memory_order_release);
172     // FIXME: Write out the nops still?
173   }
174   return true;
175 }
176 
177 bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
178                        const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
179   // Here we do the dance of replacing the following sled:
180   //
181   // xray_sled_n:
182   //   ret
183   //   <10 byte nop>
184   //
185   // With the following:
186   //
187   //   mov r10d, <function id>
188   //   jmp <relative 32bit offset to exit trampoline>
189   //
190   // 1. Put the function id first, 2 bytes from the start of the sled (just
191   // after the 1-byte ret instruction).
192   // 2. Put the jmp opcode 6 bytes from the start of the sled.
193   // 3. Put the relative offset 7 bytes from the start of the sled.
194   // 4. Do an atomic write over the jmp instruction for the "mov r10d"
195   // opcode and first operand.
196   //
197   // Prerequisite is to compute the relative offset fo the
198   // __xray_FunctionExit function's address.
199   int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
200                              (static_cast<int64_t>(Sled.Address) + 11);
201   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
202     Report("XRay Exit trampoline (%p) too far from sled (%p)\n",
203            __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address));
204     return false;
205   }
206   if (Enable) {
207     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
208     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode;
209     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
210     std::atomic_store_explicit(
211         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
212         std::memory_order_release);
213   } else {
214     std::atomic_store_explicit(
215         reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode,
216         std::memory_order_release);
217     // FIXME: Write out the nops still?
218   }
219   return true;
220 }
221 
222 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
223                            const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
224   // Here we do the dance of replacing the tail call sled with a similar
225   // sequence as the entry sled, but calls the tail exit sled instead.
226   int64_t TrampolineOffset =
227       reinterpret_cast<int64_t>(__xray_FunctionTailExit) -
228       (static_cast<int64_t>(Sled.Address) + 11);
229   if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
230     Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n",
231            __xray_FunctionTailExit, reinterpret_cast<void *>(Sled.Address));
232     return false;
233   }
234   if (Enable) {
235     *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
236     *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
237     *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
238     std::atomic_store_explicit(
239         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
240         std::memory_order_release);
241   } else {
242     std::atomic_store_explicit(
243         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
244         std::memory_order_release);
245     // FIXME: Write out the nops still?
246   }
247   return true;
248 }
249 
250 bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
251                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
252   // Here we do the dance of replacing the following sled:
253   //
254   // In Version 0:
255   //
256   // xray_sled_n:
257   //   jmp +20          // 2 bytes
258   //   ...
259   //
260   // With the following:
261   //
262   //   nopw             // 2 bytes*
263   //   ...
264   //
265   //
266   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
267   //
268   // ---
269   //
270   // In Version 1:
271   //
272   //   The jump offset is now 15 bytes (0x0f), so when restoring the nopw back
273   //   to a jmp, use 15 bytes instead.
274   //
275   if (Enable) {
276     std::atomic_store_explicit(
277         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
278         std::memory_order_release);
279   } else {
280     switch (Sled.Version) {
281     case 1:
282       std::atomic_store_explicit(
283           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq,
284           std::memory_order_release);
285       break;
286     case 0:
287     default:
288       std::atomic_store_explicit(
289           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
290           std::memory_order_release);
291       break;
292     }
293     }
294   return false;
295 }
296 
297 bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
298                       const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
299   // Here we do the dance of replacing the following sled:
300   //
301   // xray_sled_n:
302   //   jmp +20          // 2 byte instruction
303   //   ...
304   //
305   // With the following:
306   //
307   //   nopw             // 2 bytes
308   //   ...
309   //
310   //
311   // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'.
312   // The 20 byte sled stashes three argument registers, calls the trampoline,
313   // unstashes the registers and returns. If the arguments are already in
314   // the correct registers, the stashing and unstashing become equivalently
315   // sized nops.
316   if (Enable) {
317     std::atomic_store_explicit(
318         reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq,
319         std::memory_order_release);
320   } else {
321       std::atomic_store_explicit(
322           reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq,
323           std::memory_order_release);
324   }
325   return false;
326 }
327 
328 #if !SANITIZER_FUCHSIA
329 // We determine whether the CPU we're running on has the correct features we
330 // need. In x86_64 this will be rdtscp support.
331 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT {
332   unsigned int EAX, EBX, ECX, EDX;
333 
334   // We check whether rdtscp support is enabled. According to the x86_64 manual,
335   // level should be set at 0x80000001, and we should have a look at bit 27 in
336   // EDX. That's 0x8000000 (or 1u << 27).
337   __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX)
338     : "0"(0x80000001));
339   if (!(EDX & (1u << 27))) {
340     Report("Missing rdtscp support.\n");
341     return false;
342   }
343   // Also check whether we can determine the CPU frequency, since if we cannot,
344   // we should use the emulated TSC instead.
345   if (!getTSCFrequency()) {
346     Report("Unable to determine CPU frequency.\n");
347     return false;
348   }
349   return true;
350 }
351 #endif
352 
353 } // namespace __xray
354