1 #include "cpuid.h" 2 #include "sanitizer_common/sanitizer_common.h" 3 #if !SANITIZER_FUCHSIA 4 #include "sanitizer_common/sanitizer_posix.h" 5 #endif 6 #include "xray_defs.h" 7 #include "xray_interface_internal.h" 8 9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC 10 #include <sys/types.h> 11 #if SANITIZER_OPENBSD 12 #include <sys/time.h> 13 #include <machine/cpu.h> 14 #endif 15 #include <sys/sysctl.h> 16 #elif SANITIZER_FUCHSIA 17 #include <zircon/syscalls.h> 18 #endif 19 20 #include <atomic> 21 #include <cstdint> 22 #include <errno.h> 23 #include <fcntl.h> 24 #include <iterator> 25 #include <limits> 26 #include <tuple> 27 #include <unistd.h> 28 29 namespace __xray { 30 31 #if SANITIZER_LINUX 32 static std::pair<ssize_t, bool> 33 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { 34 auto BytesToRead = std::distance(Begin, End); 35 ssize_t BytesRead; 36 ssize_t TotalBytesRead = 0; 37 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { 38 if (BytesRead == -1) { 39 if (errno == EINTR) 40 continue; 41 Report("Read error; errno = %d\n", errno); 42 return std::make_pair(TotalBytesRead, false); 43 } 44 45 TotalBytesRead += BytesRead; 46 BytesToRead -= BytesRead; 47 Begin += BytesRead; 48 } 49 return std::make_pair(TotalBytesRead, true); 50 } 51 52 static bool readValueFromFile(const char *Filename, 53 long long *Value) XRAY_NEVER_INSTRUMENT { 54 int Fd = open(Filename, O_RDONLY | O_CLOEXEC); 55 if (Fd == -1) 56 return false; 57 static constexpr size_t BufSize = 256; 58 char Line[BufSize] = {}; 59 ssize_t BytesRead; 60 bool Success; 61 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); 62 close(Fd); 63 if (!Success) 64 return false; 65 const char *End = nullptr; 66 long long Tmp = internal_simple_strtoll(Line, &End, 10); 67 bool Result = false; 68 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { 69 *Value = Tmp; 70 Result = true; 71 } 72 return Result; 73 } 74 75 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 76 long long TSCFrequency = -1; 77 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", 78 &TSCFrequency)) { 79 TSCFrequency *= 1000; 80 } else if (readValueFromFile( 81 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 82 &TSCFrequency)) { 83 TSCFrequency *= 1000; 84 } else { 85 Report("Unable to determine CPU frequency for TSC accounting.\n"); 86 } 87 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); 88 } 89 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_OPENBSD || SANITIZER_MAC 90 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 91 long long TSCFrequency = -1; 92 size_t tscfreqsz = sizeof(TSCFrequency); 93 #if SANITIZER_OPENBSD 94 int Mib[2] = { CTL_MACHDEP, CPU_TSCFREQ }; 95 if (internal_sysctl(Mib, 2, &TSCFrequency, &tscfreqsz, NULL, 0) != -1) { 96 #elif SANITIZER_MAC 97 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, 98 &tscfreqsz, NULL, 0) != -1) { 99 100 #else 101 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, 102 NULL, 0) != -1) { 103 #endif 104 return static_cast<uint64_t>(TSCFrequency); 105 } else { 106 Report("Unable to determine CPU frequency for TSC accounting.\n"); 107 } 108 109 return 0; 110 } 111 #elif !SANITIZER_FUCHSIA 112 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 113 /* Not supported */ 114 return 0; 115 } 116 #endif 117 118 static constexpr uint8_t CallOpCode = 0xe8; 119 static constexpr uint16_t MovR10Seq = 0xba41; 120 static constexpr uint16_t Jmp9Seq = 0x09eb; 121 static constexpr uint16_t Jmp20Seq = 0x14eb; 122 static constexpr uint16_t Jmp15Seq = 0x0feb; 123 static constexpr uint8_t JmpOpCode = 0xe9; 124 static constexpr uint8_t RetOpCode = 0xc3; 125 static constexpr uint16_t NopwSeq = 0x9066; 126 127 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; 128 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; 129 130 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, 131 const XRaySledEntry &Sled, 132 void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { 133 // Here we do the dance of replacing the following sled: 134 // 135 // xray_sled_n: 136 // jmp +9 137 // <9 byte nop> 138 // 139 // With the following: 140 // 141 // mov r10d, <function id> 142 // call <relative 32bit offset to entry trampoline> 143 // 144 // We need to do this in the following order: 145 // 146 // 1. Put the function id first, 2 bytes from the start of the sled (just 147 // after the 2-byte jmp instruction). 148 // 2. Put the call opcode 6 bytes from the start of the sled. 149 // 3. Put the relative offset 7 bytes from the start of the sled. 150 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 151 // opcode and first operand. 152 // 153 // Prerequisite is to compute the relative offset to the trampoline's address. 154 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - 155 (static_cast<int64_t>(Sled.Address) + 11); 156 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 157 Report("XRay Entry trampoline (%p) too far from sled (%p)\n", 158 Trampoline, reinterpret_cast<void *>(Sled.Address)); 159 return false; 160 } 161 if (Enable) { 162 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; 163 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; 164 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; 165 std::atomic_store_explicit( 166 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, 167 std::memory_order_release); 168 } else { 169 std::atomic_store_explicit( 170 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, 171 std::memory_order_release); 172 // FIXME: Write out the nops still? 173 } 174 return true; 175 } 176 177 bool patchFunctionExit(const bool Enable, const uint32_t FuncId, 178 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 179 // Here we do the dance of replacing the following sled: 180 // 181 // xray_sled_n: 182 // ret 183 // <10 byte nop> 184 // 185 // With the following: 186 // 187 // mov r10d, <function id> 188 // jmp <relative 32bit offset to exit trampoline> 189 // 190 // 1. Put the function id first, 2 bytes from the start of the sled (just 191 // after the 1-byte ret instruction). 192 // 2. Put the jmp opcode 6 bytes from the start of the sled. 193 // 3. Put the relative offset 7 bytes from the start of the sled. 194 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 195 // opcode and first operand. 196 // 197 // Prerequisite is to compute the relative offset fo the 198 // __xray_FunctionExit function's address. 199 int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - 200 (static_cast<int64_t>(Sled.Address) + 11); 201 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 202 Report("XRay Exit trampoline (%p) too far from sled (%p)\n", 203 __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address)); 204 return false; 205 } 206 if (Enable) { 207 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; 208 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = JmpOpCode; 209 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; 210 std::atomic_store_explicit( 211 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, 212 std::memory_order_release); 213 } else { 214 std::atomic_store_explicit( 215 reinterpret_cast<std::atomic<uint8_t> *>(Sled.Address), RetOpCode, 216 std::memory_order_release); 217 // FIXME: Write out the nops still? 218 } 219 return true; 220 } 221 222 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, 223 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 224 // Here we do the dance of replacing the tail call sled with a similar 225 // sequence as the entry sled, but calls the tail exit sled instead. 226 int64_t TrampolineOffset = 227 reinterpret_cast<int64_t>(__xray_FunctionTailExit) - 228 (static_cast<int64_t>(Sled.Address) + 11); 229 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 230 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", 231 __xray_FunctionTailExit, reinterpret_cast<void *>(Sled.Address)); 232 return false; 233 } 234 if (Enable) { 235 *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId; 236 *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode; 237 *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset; 238 std::atomic_store_explicit( 239 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq, 240 std::memory_order_release); 241 } else { 242 std::atomic_store_explicit( 243 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq, 244 std::memory_order_release); 245 // FIXME: Write out the nops still? 246 } 247 return true; 248 } 249 250 bool patchCustomEvent(const bool Enable, const uint32_t FuncId, 251 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 252 // Here we do the dance of replacing the following sled: 253 // 254 // In Version 0: 255 // 256 // xray_sled_n: 257 // jmp +20 // 2 bytes 258 // ... 259 // 260 // With the following: 261 // 262 // nopw // 2 bytes* 263 // ... 264 // 265 // 266 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 267 // 268 // --- 269 // 270 // In Version 1: 271 // 272 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back 273 // to a jmp, use 15 bytes instead. 274 // 275 if (Enable) { 276 std::atomic_store_explicit( 277 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq, 278 std::memory_order_release); 279 } else { 280 switch (Sled.Version) { 281 case 1: 282 std::atomic_store_explicit( 283 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp15Seq, 284 std::memory_order_release); 285 break; 286 case 0: 287 default: 288 std::atomic_store_explicit( 289 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq, 290 std::memory_order_release); 291 break; 292 } 293 } 294 return false; 295 } 296 297 bool patchTypedEvent(const bool Enable, const uint32_t FuncId, 298 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 299 // Here we do the dance of replacing the following sled: 300 // 301 // xray_sled_n: 302 // jmp +20 // 2 byte instruction 303 // ... 304 // 305 // With the following: 306 // 307 // nopw // 2 bytes 308 // ... 309 // 310 // 311 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 312 // The 20 byte sled stashes three argument registers, calls the trampoline, 313 // unstashes the registers and returns. If the arguments are already in 314 // the correct registers, the stashing and unstashing become equivalently 315 // sized nops. 316 if (Enable) { 317 std::atomic_store_explicit( 318 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), NopwSeq, 319 std::memory_order_release); 320 } else { 321 std::atomic_store_explicit( 322 reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp20Seq, 323 std::memory_order_release); 324 } 325 return false; 326 } 327 328 #if !SANITIZER_FUCHSIA 329 // We determine whether the CPU we're running on has the correct features we 330 // need. In x86_64 this will be rdtscp support. 331 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { 332 unsigned int EAX, EBX, ECX, EDX; 333 334 // We check whether rdtscp support is enabled. According to the x86_64 manual, 335 // level should be set at 0x80000001, and we should have a look at bit 27 in 336 // EDX. That's 0x8000000 (or 1u << 27). 337 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) 338 : "0"(0x80000001)); 339 if (!(EDX & (1u << 27))) { 340 Report("Missing rdtscp support.\n"); 341 return false; 342 } 343 // Also check whether we can determine the CPU frequency, since if we cannot, 344 // we should use the emulated TSC instead. 345 if (!getTSCFrequency()) { 346 Report("Unable to determine CPU frequency.\n"); 347 return false; 348 } 349 return true; 350 } 351 #endif 352 353 } // namespace __xray 354