1 #include "cpuid.h" 2 #include "sanitizer_common/sanitizer_common.h" 3 #if !SANITIZER_FUCHSIA 4 #include "sanitizer_common/sanitizer_posix.h" 5 #endif 6 #include "xray_defs.h" 7 #include "xray_interface_internal.h" 8 9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC 10 #include <sys/types.h> 11 #if SANITIZER_OPENBSD 12 #include <sys/time.h> 13 #include <machine/cpu.h> 14 #endif 15 #include <sys/sysctl.h> 16 #elif SANITIZER_FUCHSIA 17 #include <zircon/syscalls.h> 18 #endif 19 20 #include <atomic> 21 #include <cstdint> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <iterator> 25 #include <limits> 26 #include <tuple> 27 #include <unistd.h> 28 29 namespace __xray { 30 31 #if SANITIZER_LINUX 32 static std::pair<ssize_t, bool> 33 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { 34 auto BytesToRead = std::distance(Begin, End); 35 ssize_t BytesRead; 36 ssize_t TotalBytesRead = 0; 37 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { 38 if (BytesRead == -1) { 39 if (errno == EINTR) 40 continue; 41 Report("Read error; errno = %d\n", errno); 42 return std::make_pair(TotalBytesRead, false); 43 } 44 45 TotalBytesRead += BytesRead; 46 BytesToRead -= BytesRead; 47 Begin += BytesRead; 48 } 49 return std::make_pair(TotalBytesRead, true); 50 } 51 52 static bool readValueFromFile(const char *Filename, 53 long long *Value) XRAY_NEVER_INSTRUMENT { 54 int Fd = open(Filename, O_RDONLY | O_CLOEXEC); 55 if (Fd == -1) 56 return false; 57 static constexpr size_t BufSize = 256; 58 char Line[BufSize] = {}; 59 ssize_t BytesRead; 60 bool Success; 61 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); 62 close(Fd); 63 if (!Success) 64 return false; 65 const char *End = nullptr; 66 long long Tmp = internal_simple_strtoll(Line, &End, 10); 67 bool Result = false; 68 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { 69 *Value = Tmp; 70 Result = true; 71 } 72 return Result; 73 } 74 75 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 76 long long TSCFrequency = -1; 77 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", 78 &TSCFrequency)) { 79 TSCFrequency *= 1000; 80 } else if (readValueFromFile( 81 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 82 &TSCFrequency)) { 83 TSCFrequency *= 1000; 84 } else { 85 Report("Unable to determine CPU frequency for TSC accounting.\n"); 86 } 87 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); 88 } 89 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC 90 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 91 long long TSCFrequency = -1; 92 size_t tscfreqsz = sizeof(TSCFrequency); 93 #if SANITIZER_OPENBSD 94 int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ }; 95 if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) { 96 #elif SANITIZER_MAC 97 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, 98 &tscfreqsz, NULL, 0) != -1) { 99 100 #else 101 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, 102 NULL, 0) != -1) { 103 #endif 104 return static_cast<uint64_t>(TSCFrequency); 105 } else { 106 Report("Unable to determine CPU frequency for TSC accounting.\n"); 107 } 108 109 return 0; 110 } 111 #elif !SANITIZER_FUCHSIA 112 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 113 /* Not supported */ 114 return 0; 115 } 116 #endif 117 118 static constexpr uint8_t CallOpCode = 0xe8; 119 static constexpr uint16_t MovR10Seq = 0xba41; 120 static constexpr uint16_t Jmp9Seq = 0x09eb; 121 static constexpr uint16_t Jmp20Seq = 0x14eb; 122 static constexpr uint16_t Jmp15Seq = 0x0feb; 123 static constexpr uint8_t JmpOpCode = 0xe9; 124 static constexpr uint8_t RetOpCode = 0xc3; 125 static constexpr uint16_t NopwSeq = 0x9066; 126 127 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; 128 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; 129 130 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, 131 const XRaySledEntry &Sled, 132 void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { 133 // Here we do the dance of replacing the following sled: 134 // 135 // xray_sled_n: 136 // jmp +9 137 // <9 byte nop> 138 // 139 // With the following: 140 // 141 // mov r10d, <function id> 142 // call <relative 32bit offset to entry trampoline> 143 // 144 // We need to do this in the following order: 145 // 146 // 1. Put the function id first, 2 bytes from the start of the sled (just 147 // after the 2-byte jmp instruction). 148 // 2. Put the call opcode 6 bytes from the start of the sled. 149 // 3. Put the relative offset 7 bytes from the start of the sled. 150 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 151 // opcode and first operand. 152 // 153 // Prerequisite is to compute the relative offset to the trampoline's address. 154 const uint64_t Address = Sled.address(); 155 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - 156 (static_cast<int64_t>(Address) + 11); 157 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 158 Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline, 159 reinterpret_cast<void *>(Address)); 160 return false; 161 } 162 if (Enable) { 163 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 164 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; 165 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 166 std::atomic_store_explicit( 167 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 168 std::memory_order_release); 169 } else { 170 std::atomic_store_explicit( 171 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, 172 std::memory_order_release); 173 // FIXME: Write out the nops still? 174 } 175 return true; 176 } 177 178 bool patchFunctionExit(const bool Enable, const uint32_t FuncId, 179 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 180 // Here we do the dance of replacing the following sled: 181 // 182 // xray_sled_n: 183 // ret 184 // <10 byte nop> 185 // 186 // With the following: 187 // 188 // mov r10d, <function id> 189 // jmp <relative 32bit offset to exit trampoline> 190 // 191 // 1. Put the function id first, 2 bytes from the start of the sled (just 192 // after the 1-byte ret instruction). 193 // 2. Put the jmp opcode 6 bytes from the start of the sled. 194 // 3. Put the relative offset 7 bytes from the start of the sled. 195 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 196 // opcode and first operand. 197 // 198 // Prerequisite is to compute the relative offset fo the 199 // __xray_FunctionExit function's address. 200 const uint64_t Address = Sled.address(); 201 int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - 202 (static_cast<int64_t>(Address) + 11); 203 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 204 Report("XRay Exit trampoline (%p) too far from sled (%p)\n", 205 __xray_FunctionExit, reinterpret_cast<void *>(Address)); 206 return false; 207 } 208 if (Enable) { 209 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 210 *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode; 211 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 212 std::atomic_store_explicit( 213 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 214 std::memory_order_release); 215 } else { 216 std::atomic_store_explicit( 217 reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode, 218 std::memory_order_release); 219 // FIXME: Write out the nops still? 220 } 221 return true; 222 } 223 224 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, 225 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 226 // Here we do the dance of replacing the tail call sled with a similar 227 // sequence as the entry sled, but calls the tail exit sled instead. 228 const uint64_t Address = Sled.address(); 229 int64_t TrampolineOffset = 230 reinterpret_cast<int64_t>(__xray_FunctionTailExit) - 231 (static_cast<int64_t>(Address) + 11); 232 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 233 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", 234 __xray_FunctionTailExit, reinterpret_cast<void *>(Address)); 235 return false; 236 } 237 if (Enable) { 238 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 239 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; 240 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 241 std::atomic_store_explicit( 242 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 243 std::memory_order_release); 244 } else { 245 std::atomic_store_explicit( 246 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, 247 std::memory_order_release); 248 // FIXME: Write out the nops still? 249 } 250 return true; 251 } 252 253 bool patchCustomEvent(const bool Enable, const uint32_t FuncId, 254 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 255 // Here we do the dance of replacing the following sled: 256 // 257 // In Version 0: 258 // 259 // xray_sled_n: 260 // jmp +20 // 2 bytes 261 // ... 262 // 263 // With the following: 264 // 265 // nopw // 2 bytes* 266 // ... 267 // 268 // 269 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 270 // 271 // --- 272 // 273 // In Version 1 or 2: 274 // 275 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back 276 // to a jmp, use 15 bytes instead. 277 // 278 const uint64_t Address = Sled.address(); 279 if (Enable) { 280 std::atomic_store_explicit( 281 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, 282 std::memory_order_release); 283 } else { 284 switch (Sled.Version) { 285 case 1: 286 case 2: 287 std::atomic_store_explicit( 288 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq, 289 std::memory_order_release); 290 break; 291 case 0: 292 default: 293 std::atomic_store_explicit( 294 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, 295 std::memory_order_release); 296 break; 297 } 298 } 299 return false; 300 } 301 302 bool patchTypedEvent(const bool Enable, const uint32_t FuncId, 303 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 304 // Here we do the dance of replacing the following sled: 305 // 306 // xray_sled_n: 307 // jmp +20 // 2 byte instruction 308 // ... 309 // 310 // With the following: 311 // 312 // nopw // 2 bytes 313 // ... 314 // 315 // 316 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 317 // The 20 byte sled stashes three argument registers, calls the trampoline, 318 // unstashes the registers and returns. If the arguments are already in 319 // the correct registers, the stashing and unstashing become equivalently 320 // sized nops. 321 const uint64_t Address = Sled.address(); 322 if (Enable) { 323 std::atomic_store_explicit( 324 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, 325 std::memory_order_release); 326 } else { 327 std::atomic_store_explicit( 328 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, 329 std::memory_order_release); 330 } 331 return false; 332 } 333 334 #if !SANITIZER_FUCHSIA 335 // We determine whether the CPU we're running on has the correct features we 336 // need. In x86_64 this will be rdtscp support. 337 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { 338 unsigned int EAX, EBX, ECX, EDX; 339 340 // We check whether rdtscp support is enabled. According to the x86_64 manual, 341 // level should be set at 0x80000001, and we should have a look at bit 27 in 342 // EDX. That's 0x8000000 (or 1u << 27). 343 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) 344 : "0"(0x80000001)); 345 if (!(EDX & (1u << 27))) { 346 Report("Missing rdtscp support.\n"); 347 return false; 348 } 349 // Also check whether we can determine the CPU frequency, since if we cannot, 350 // we should use the emulated TSC instead. 351 if (!getTSCFrequency()) { 352 Report("Unable to determine CPU frequency.\n"); 353 return false; 354 } 355 return true; 356 } 357 #endif 358 359 } // namespace __xray 360