1 #include "cpuid.h" 2 #include "sanitizer_common/sanitizer_common.h" 3 #if !SANITIZER_FUCHSIA 4 #include "sanitizer_common/sanitizer_posix.h" 5 #endif 6 #include "xray_defs.h" 7 #include "xray_interface_internal.h" 8 9 #if SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC 10 #include <sys/types.h> 11 #include <sys/sysctl.h> 12 #elif SANITIZER_FUCHSIA 13 #include <zircon/syscalls.h> 14 #endif 15 16 #include <atomic> 17 #include <cstdint> 18 #include <errno.h> 19 #include <fcntl.h> 20 #include <iterator> 21 #include <limits> 22 #include <tuple> 23 #include <unistd.h> 24 25 namespace __xray { 26 27 #if SANITIZER_LINUX 28 static std::pair<ssize_t, bool> 29 retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { 30 auto BytesToRead = std::distance(Begin, End); 31 ssize_t BytesRead; 32 ssize_t TotalBytesRead = 0; 33 while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { 34 if (BytesRead == -1) { 35 if (errno == EINTR) 36 continue; 37 Report("Read error; errno = %d\n", errno); 38 return std::make_pair(TotalBytesRead, false); 39 } 40 41 TotalBytesRead += BytesRead; 42 BytesToRead -= BytesRead; 43 Begin += BytesRead; 44 } 45 return std::make_pair(TotalBytesRead, true); 46 } 47 48 static bool readValueFromFile(const char *Filename, 49 long long *Value) XRAY_NEVER_INSTRUMENT { 50 int Fd = open(Filename, O_RDONLY | O_CLOEXEC); 51 if (Fd == -1) 52 return false; 53 static constexpr size_t BufSize = 256; 54 char Line[BufSize] = {}; 55 ssize_t BytesRead; 56 bool Success; 57 std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); 58 close(Fd); 59 if (!Success) 60 return false; 61 const char *End = nullptr; 62 long long Tmp = internal_simple_strtoll(Line, &End, 10); 63 bool Result = false; 64 if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { 65 *Value = Tmp; 66 Result = true; 67 } 68 return Result; 69 } 70 71 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 72 long long TSCFrequency = -1; 73 if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", 74 &TSCFrequency)) { 75 TSCFrequency *= 1000; 76 } else if (readValueFromFile( 77 "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 78 &TSCFrequency)) { 79 TSCFrequency *= 1000; 80 } else { 81 Report("Unable to determine CPU frequency for TSC accounting.\n"); 82 } 83 return TSCFrequency == -1 ? 0 : static_cast<uint64_t>(TSCFrequency); 84 } 85 #elif SANITIZER_FREEBSD || SANITIZER_NETBSD || SANITIZER_MAC 86 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 87 long long TSCFrequency = -1; 88 size_t tscfreqsz = sizeof(TSCFrequency); 89 #if SANITIZER_MAC 90 if (internal_sysctlbyname("machdep.tsc.frequency", &TSCFrequency, 91 &tscfreqsz, NULL, 0) != -1) { 92 93 #else 94 if (internal_sysctlbyname("machdep.tsc_freq", &TSCFrequency, &tscfreqsz, 95 NULL, 0) != -1) { 96 #endif 97 return static_cast<uint64_t>(TSCFrequency); 98 } else { 99 Report("Unable to determine CPU frequency for TSC accounting.\n"); 100 } 101 102 return 0; 103 } 104 #elif !SANITIZER_FUCHSIA 105 uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { 106 /* Not supported */ 107 return 0; 108 } 109 #endif 110 111 static constexpr uint8_t CallOpCode = 0xe8; 112 static constexpr uint16_t MovR10Seq = 0xba41; 113 static constexpr uint16_t Jmp9Seq = 0x09eb; 114 static constexpr uint16_t Jmp20Seq = 0x14eb; 115 static constexpr uint16_t Jmp15Seq = 0x0feb; 116 static constexpr uint8_t JmpOpCode = 0xe9; 117 static constexpr uint8_t RetOpCode = 0xc3; 118 static constexpr uint16_t NopwSeq = 0x9066; 119 120 static constexpr int64_t MinOffset{std::numeric_limits<int32_t>::min()}; 121 static constexpr int64_t MaxOffset{std::numeric_limits<int32_t>::max()}; 122 123 bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, 124 const XRaySledEntry &Sled, 125 void (*Trampoline)()) XRAY_NEVER_INSTRUMENT { 126 // Here we do the dance of replacing the following sled: 127 // 128 // xray_sled_n: 129 // jmp +9 130 // <9 byte nop> 131 // 132 // With the following: 133 // 134 // mov r10d, <function id> 135 // call <relative 32bit offset to entry trampoline> 136 // 137 // We need to do this in the following order: 138 // 139 // 1. Put the function id first, 2 bytes from the start of the sled (just 140 // after the 2-byte jmp instruction). 141 // 2. Put the call opcode 6 bytes from the start of the sled. 142 // 3. Put the relative offset 7 bytes from the start of the sled. 143 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 144 // opcode and first operand. 145 // 146 // Prerequisite is to compute the relative offset to the trampoline's address. 147 const uint64_t Address = Sled.address(); 148 int64_t TrampolineOffset = reinterpret_cast<int64_t>(Trampoline) - 149 (static_cast<int64_t>(Address) + 11); 150 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 151 Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline, 152 reinterpret_cast<void *>(Address)); 153 return false; 154 } 155 if (Enable) { 156 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 157 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; 158 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 159 std::atomic_store_explicit( 160 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 161 std::memory_order_release); 162 } else { 163 std::atomic_store_explicit( 164 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, 165 std::memory_order_release); 166 // FIXME: Write out the nops still? 167 } 168 return true; 169 } 170 171 bool patchFunctionExit(const bool Enable, const uint32_t FuncId, 172 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 173 // Here we do the dance of replacing the following sled: 174 // 175 // xray_sled_n: 176 // ret 177 // <10 byte nop> 178 // 179 // With the following: 180 // 181 // mov r10d, <function id> 182 // jmp <relative 32bit offset to exit trampoline> 183 // 184 // 1. Put the function id first, 2 bytes from the start of the sled (just 185 // after the 1-byte ret instruction). 186 // 2. Put the jmp opcode 6 bytes from the start of the sled. 187 // 3. Put the relative offset 7 bytes from the start of the sled. 188 // 4. Do an atomic write over the jmp instruction for the "mov r10d" 189 // opcode and first operand. 190 // 191 // Prerequisite is to compute the relative offset fo the 192 // __xray_FunctionExit function's address. 193 const uint64_t Address = Sled.address(); 194 int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) - 195 (static_cast<int64_t>(Address) + 11); 196 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 197 Report("XRay Exit trampoline (%p) too far from sled (%p)\n", 198 __xray_FunctionExit, reinterpret_cast<void *>(Address)); 199 return false; 200 } 201 if (Enable) { 202 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 203 *reinterpret_cast<uint8_t *>(Address + 6) = JmpOpCode; 204 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 205 std::atomic_store_explicit( 206 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 207 std::memory_order_release); 208 } else { 209 std::atomic_store_explicit( 210 reinterpret_cast<std::atomic<uint8_t> *>(Address), RetOpCode, 211 std::memory_order_release); 212 // FIXME: Write out the nops still? 213 } 214 return true; 215 } 216 217 bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, 218 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 219 // Here we do the dance of replacing the tail call sled with a similar 220 // sequence as the entry sled, but calls the tail exit sled instead. 221 const uint64_t Address = Sled.address(); 222 int64_t TrampolineOffset = 223 reinterpret_cast<int64_t>(__xray_FunctionTailExit) - 224 (static_cast<int64_t>(Address) + 11); 225 if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { 226 Report("XRay Tail Exit trampoline (%p) too far from sled (%p)\n", 227 __xray_FunctionTailExit, reinterpret_cast<void *>(Address)); 228 return false; 229 } 230 if (Enable) { 231 *reinterpret_cast<uint32_t *>(Address + 2) = FuncId; 232 *reinterpret_cast<uint8_t *>(Address + 6) = CallOpCode; 233 *reinterpret_cast<uint32_t *>(Address + 7) = TrampolineOffset; 234 std::atomic_store_explicit( 235 reinterpret_cast<std::atomic<uint16_t> *>(Address), MovR10Seq, 236 std::memory_order_release); 237 } else { 238 std::atomic_store_explicit( 239 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp9Seq, 240 std::memory_order_release); 241 // FIXME: Write out the nops still? 242 } 243 return true; 244 } 245 246 bool patchCustomEvent(const bool Enable, const uint32_t FuncId, 247 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 248 // Here we do the dance of replacing the following sled: 249 // 250 // In Version 0: 251 // 252 // xray_sled_n: 253 // jmp +20 // 2 bytes 254 // ... 255 // 256 // With the following: 257 // 258 // nopw // 2 bytes* 259 // ... 260 // 261 // 262 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 263 // 264 // --- 265 // 266 // In Version 1 or 2: 267 // 268 // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back 269 // to a jmp, use 15 bytes instead. 270 // 271 const uint64_t Address = Sled.address(); 272 if (Enable) { 273 std::atomic_store_explicit( 274 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, 275 std::memory_order_release); 276 } else { 277 switch (Sled.Version) { 278 case 1: 279 case 2: 280 std::atomic_store_explicit( 281 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp15Seq, 282 std::memory_order_release); 283 break; 284 case 0: 285 default: 286 std::atomic_store_explicit( 287 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, 288 std::memory_order_release); 289 break; 290 } 291 } 292 return false; 293 } 294 295 bool patchTypedEvent(const bool Enable, const uint32_t FuncId, 296 const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { 297 // Here we do the dance of replacing the following sled: 298 // 299 // xray_sled_n: 300 // jmp +20 // 2 byte instruction 301 // ... 302 // 303 // With the following: 304 // 305 // nopw // 2 bytes 306 // ... 307 // 308 // 309 // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. 310 // The 20 byte sled stashes three argument registers, calls the trampoline, 311 // unstashes the registers and returns. If the arguments are already in 312 // the correct registers, the stashing and unstashing become equivalently 313 // sized nops. 314 const uint64_t Address = Sled.address(); 315 if (Enable) { 316 std::atomic_store_explicit( 317 reinterpret_cast<std::atomic<uint16_t> *>(Address), NopwSeq, 318 std::memory_order_release); 319 } else { 320 std::atomic_store_explicit( 321 reinterpret_cast<std::atomic<uint16_t> *>(Address), Jmp20Seq, 322 std::memory_order_release); 323 } 324 return false; 325 } 326 327 #if !SANITIZER_FUCHSIA 328 // We determine whether the CPU we're running on has the correct features we 329 // need. In x86_64 this will be rdtscp support. 330 bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { 331 unsigned int EAX, EBX, ECX, EDX; 332 333 // We check whether rdtscp support is enabled. According to the x86_64 manual, 334 // level should be set at 0x80000001, and we should have a look at bit 27 in 335 // EDX. That's 0x8000000 (or 1u << 27). 336 __asm__ __volatile__("cpuid" : "=a"(EAX), "=b"(EBX), "=c"(ECX), "=d"(EDX) 337 : "0"(0x80000001)); 338 if (!(EDX & (1u << 27))) { 339 Report("Missing rdtscp support.\n"); 340 return false; 341 } 342 // Also check whether we can determine the CPU frequency, since if we cannot, 343 // we should use the emulated TSC instead. 344 if (!getTSCFrequency()) { 345 Report("Unable to determine CPU frequency.\n"); 346 return false; 347 } 348 return true; 349 } 350 #endif 351 352 } // namespace __xray 353