1 #include "cpuid.h" 2 #include "sanitizer_common/sanitizer_common.h" 3 #if !SANITIZER_FUCHSIA 4 #include "sanitizer_common/sanitizer_posix.h" 5 #endif 6 #include "xray_defs.h" 7 #include "xray_interface_internal.h" 8 9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC 10 #include <sys/types.h> 11 #include <sys/sysctl.h> 12 #elif SANITIZER_FUCHSIA 13 #include <zircon/syscalls.h> 14 #endif 15 16 #include <atomic> 17 #include <cstdint> 18 #include <errno.h> 19 #include <fcntl.h> 20 #include <iterator> 21 #include <limits> 22 #include <tuple> 23 #include <unistd.h> 24 25 namespace __xray { 26 27 #if SANITIZER_LINUX 28 static std::pair<ssize_t, bool> 29 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { 30 auto BytesToRead = std::distance(Begin, End); 31 ssize_t BytesRead; 32 ssize_t TotalBytesRead = 0; 33 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { 34 if (BytesRead == -1) { 35 if (errno == EINTR) 36 continue; 37 Report("Read error; errno = %d\n", errno); 38 return std::make_pair(TotalBytesRead, false); 39 } 40 41 TotalBytesRead += BytesRead; 42 BytesToRead -= BytesRead; 43 Begin += BytesRead; 44 } 45 return std::make_pair(TotalBytesRead, true); 46 } 47 48 static bool readValueFromFile(const char *Filename, 49 long long *Value) XRAY_NEVER_INSTRUMENT { 50 int Fd = open(Filename, O_RDONLY | O_CLOEXEC); 51 if (Fd == -1) 52 return false; 53 static constexpr size_t BufSize = 256; 54 char Line[BufSize] = {}; 55 ssize_t BytesRead; 56 bool Success; 57 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); 58 close(Fd); 59 if (!Success) 60 return false; 61 const char *End = nullptr; 62 long long Tmp = internal_simple_strtoll(Line, &End, 10); 63 bool Result = false; 64 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { 65 *Value = Tmp; 66 Result = true; 67 } 68 return Result; 69 } 70 71 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 72 long long TSCFrequency = -1; 73 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", 74 &TSCFrequency)) { 75 TSCFrequency *= 1000; 76 } else if (readValueFromFile( 77 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 78 &TSCFrequency)) { 79 TSCFrequency *= 1000; 80 } else { 81 Report("Unable to determine CPU frequency for TSC accounting.\n"); 82 } 83 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); 84 } 85 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC 86 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 87 long long TSCFrequency = -1; 88 size_t tscfreqsz = sizeof(TSCFrequency); 89 #if SANITIZER_MAC 90 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, 91 &tscfreqsz, NULL, 0) != -1) { 92 93 #else 94 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, 95 NULL, 0) != -1) { 96 #endif 97 return static_cast<uint64_t>(TSCFrequency); 98 } else { 99 Report("Unable to determine CPU frequency for TSC accounting.\n"); 100 } 101 102 return 0; 103 } 104 #elif !SANITIZER_FUCHSIA 105 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 106 /* Not supported */ 107 return 0; 108 } 109 #endif 110 111 static constexpr uint8_t CallOpCode = 0xe8; 112 static constexpr uint16_t MovR10Seq = 0xba41; 113 static constexpr uint16_t Jmp9Seq = 0x09eb; 114 static constexpr uint16_t Jmp20Seq = 0x14eb; 115 static constexpr uint16_t Jmp15Seq = 0x0feb; 116 static constexpr uint8_t JmpOpCode = 0xe9; 117 static constexpr uint8_t RetOpCode = 0xc3; 118 static constexpr uint16_t NopwSeq = 0x9066; 119 120 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; 121 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; 122 123 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, 124 const XRaySledEntry &Sled, 125 void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { 126 // Here we do the dance of replacing the following sled: 127 // 128 // xray_sled_n: 129 // jmp +9 130 // <9 byte nop> 131 // 132 // With the following: 133 // 134 // mov r10d, <function id> 135 // call <relative 32bit offset to entry trampoline> 136 // 137 // We need to do this in the following order: 138 // 139 // 1. Put the function id first, 2 bytes from the start of the sled (just 140 // after the 2-byte jmp instruction). 141 // 2. Put the call opcode 6 bytes from the start of the sled. 142 // 3. Put the relative offset 7 bytes from the start of the sled. 143 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 144 // opcode and first operand. 145 // 146 // Prerequisite is to compute the relative offset to the trampoline's address. 147 const uint64_t Address = Sled.address(); 148 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - 149 (static_cast<int64_t>(Address) + 11); 150 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 151 Report("XRay Entry trampoline (%p) too far from sled (%p)\n", 152 reinterpret_cast<void *>(Trampoline), 153 reinterpret_cast<void *>(Address)); 154 return false; 155 } 156 if (Enable) { 157 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 158 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; 159 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 160 std::atomic_store_explicit( 161 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 162 std::memory_order_release); 163 } else { 164 std::atomic_store_explicit( 165 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, 166 std::memory_order_release); 167 // FIXME: Write out the nops still? 168 } 169 return true; 170 } 171 172 bool patchFunctionExit(const bool Enable, const uint32_t FuncId, 173 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 174 // Here we do the dance of replacing the following sled: 175 // 176 // xray_sled_n: 177 // ret 178 // <10 byte nop> 179 // 180 // With the following: 181 // 182 // mov r10d, <function id> 183 // jmp <relative 32bit offset to exit trampoline> 184 // 185 // 1. Put the function id first, 2 bytes from the start of the sled (just 186 // after the 1-byte ret instruction). 187 // 2. Put the jmp opcode 6 bytes from the start of the sled. 188 // 3. Put the relative offset 7 bytes from the start of the sled. 189 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 190 // opcode and first operand. 191 // 192 // Prerequisite is to compute the relative offset fo the 193 // __xray_FunctionExit function's address. 194 const uint64_t Address = Sled.address(); 195 int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - 196 (static_cast<int64_t>(Address) + 11); 197 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 198 Report("XRay Exit trampoline (%p) too far from sled (%p)\n", 199 reinterpret_cast<void *>(__xray_FunctionExit), 200 reinterpret_cast<void *>(Address)); 201 return false; 202 } 203 if (Enable) { 204 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 205 *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode; 206 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 207 std::atomic_store_explicit( 208 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 209 std::memory_order_release); 210 } else { 211 std::atomic_store_explicit( 212 reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode, 213 std::memory_order_release); 214 // FIXME: Write out the nops still? 215 } 216 return true; 217 } 218 219 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, 220 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 221 // Here we do the dance of replacing the tail call sled with a similar 222 // sequence as the entry sled, but calls the tail exit sled instead. 223 const uint64_t Address = Sled.address(); 224 int64_t TrampolineOffset = 225 reinterpret_cast<int64_t>(__xray_FunctionTailExit) - 226 (static_cast<int64_t>(Address) + 11); 227 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 228 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", 229 reinterpret_cast<void *>(__xray_FunctionTailExit), 230 reinterpret_cast<void *>(Address)); 231 return false; 232 } 233 if (Enable) { 234 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 235 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; 236 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 237 std::atomic_store_explicit( 238 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 239 std::memory_order_release); 240 } else { 241 std::atomic_store_explicit( 242 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, 243 std::memory_order_release); 244 // FIXME: Write out the nops still? 245 } 246 return true; 247 } 248 249 bool patchCustomEvent(const bool Enable, const uint32_t FuncId, 250 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 251 // Here we do the dance of replacing the following sled: 252 // 253 // In Version 0: 254 // 255 // xray_sled_n: 256 // jmp +20 // 2 bytes 257 // ... 258 // 259 // With the following: 260 // 261 // nopw // 2 bytes* 262 // ... 263 // 264 // 265 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 266 // 267 // --- 268 // 269 // In Version 1 or 2: 270 // 271 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back 272 // to a jmp, use 15 bytes instead. 273 // 274 const uint64_t Address = Sled.address(); 275 if (Enable) { 276 std::atomic_store_explicit( 277 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, 278 std::memory_order_release); 279 } else { 280 switch (Sled.Version) { 281 case 1: 282 case 2: 283 std::atomic_store_explicit( 284 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq, 285 std::memory_order_release); 286 break; 287 case 0: 288 default: 289 std::atomic_store_explicit( 290 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, 291 std::memory_order_release); 292 break; 293 } 294 } 295 return false; 296 } 297 298 bool patchTypedEvent(const bool Enable, const uint32_t FuncId, 299 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 300 // Here we do the dance of replacing the following sled: 301 // 302 // xray_sled_n: 303 // jmp +20 // 2 byte instruction 304 // ... 305 // 306 // With the following: 307 // 308 // nopw // 2 bytes 309 // ... 310 // 311 // 312 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 313 // The 20 byte sled stashes three argument registers, calls the trampoline, 314 // unstashes the registers and returns. If the arguments are already in 315 // the correct registers, the stashing and unstashing become equivalently 316 // sized nops. 317 const uint64_t Address = Sled.address(); 318 if (Enable) { 319 std::atomic_store_explicit( 320 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, 321 std::memory_order_release); 322 } else { 323 std::atomic_store_explicit( 324 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, 325 std::memory_order_release); 326 } 327 return false; 328 } 329 330 #if !SANITIZER_FUCHSIA 331 // We determine whether the CPU we're running on has the correct features we 332 // need. In x86_64 this will be rdtscp support. 333 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { 334 unsigned int EAX, EBX, ECX, EDX; 335 336 // We check whether rdtscp support is enabled. According to the x86_64 manual, 337 // level should be set at 0x80000001, and we should have a look at bit 27 in 338 // EDX. That's 0x8000000 (or 1u << 27). 339 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) 340 : "0"(0x80000001)); 341 if (!(EDX & (1u << 27))) { 342 Report("Missing rdtscp support.\n"); 343 return false; 344 } 345 // Also check whether we can determine the CPU frequency, since if we cannot, 346 // we should use the emulated TSC instead. 347 if (!getTSCFrequency()) { 348 Report("Unable to determine CPU frequency.\n"); 349 return false; 350 } 351 return true; 352 } 353 #endif 354 355 } // namespace __xray 356