1 //===-- tsan_rtl_access.cpp -----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file is a part of ThreadSanitizer (TSan), a race detector. 10 // 11 // Definitions of memory access and function entry/exit entry points. 12 //===----------------------------------------------------------------------===// 13 14 #include "tsan_rtl.h" 15 16 namespace __tsan { 17 18 ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc, 19 uptr addr, uptr size, 20 AccessType typ) { 21 DCHECK(size == 1 || size == 2 || size == 4 || size == 8); 22 if (!kCollectHistory) 23 return true; 24 EventAccess* ev; 25 if (UNLIKELY(!TraceAcquire(thr, &ev))) 26 return false; 27 u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3; 28 uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1)); 29 thr->trace_prev_pc = pc; 30 if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) { 31 ev->is_access = 1; 32 ev->is_read = !!(typ & kAccessRead); 33 ev->is_atomic = !!(typ & kAccessAtomic); 34 ev->size_log = size_log; 35 ev->pc_delta = pc_delta; 36 DCHECK_EQ(ev->pc_delta, pc_delta); 37 ev->addr = CompressAddr(addr); 38 TraceRelease(thr, ev); 39 return true; 40 } 41 auto* evex = reinterpret_cast<EventAccessExt*>(ev); 42 evex->is_access = 0; 43 evex->is_func = 0; 44 evex->type = EventType::kAccessExt; 45 evex->is_read = !!(typ & kAccessRead); 46 evex->is_atomic = !!(typ & kAccessAtomic); 47 evex->size_log = size_log; 48 // Note: this is important, see comment in EventAccessExt. 49 evex->_ = 0; 50 evex->addr = CompressAddr(addr); 51 evex->pc = pc; 52 TraceRelease(thr, evex); 53 return true; 54 } 55 56 ALWAYS_INLINE 57 bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, 58 AccessType typ) { 59 if (!kCollectHistory) 60 return true; 61 EventAccessRange* ev; 62 if (UNLIKELY(!TraceAcquire(thr, &ev))) 63 return false; 64 thr->trace_prev_pc = pc; 65 ev->is_access = 0; 66 ev->is_func = 0; 67 ev->type = EventType::kAccessRange; 68 ev->is_read = !!(typ & kAccessRead); 69 ev->is_free = !!(typ & kAccessFree); 70 ev->size_lo = size; 71 ev->pc = CompressAddr(pc); 72 ev->addr = CompressAddr(addr); 73 ev->size_hi = size >> EventAccessRange::kSizeLoBits; 74 TraceRelease(thr, ev); 75 return true; 76 } 77 78 void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size, 79 AccessType typ) { 80 if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ))) 81 return; 82 TraceSwitchPart(thr); 83 UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ); 84 DCHECK(res); 85 } 86 87 void TraceFunc(ThreadState* thr, uptr pc) { 88 if (LIKELY(TryTraceFunc(thr, pc))) 89 return; 90 TraceSwitchPart(thr); 91 UNUSED bool res = TryTraceFunc(thr, pc); 92 DCHECK(res); 93 } 94 95 NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) { 96 TraceSwitchPart(thr); 97 FuncEntry(thr, pc); 98 } 99 100 NOINLINE void TraceRestartFuncExit(ThreadState* thr) { 101 TraceSwitchPart(thr); 102 FuncExit(thr); 103 } 104 105 void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr, 106 StackID stk) { 107 DCHECK(type == EventType::kLock || type == EventType::kRLock); 108 if (!kCollectHistory) 109 return; 110 EventLock ev; 111 ev.is_access = 0; 112 ev.is_func = 0; 113 ev.type = type; 114 ev.pc = CompressAddr(pc); 115 ev.stack_lo = stk; 116 ev.stack_hi = stk >> EventLock::kStackIDLoBits; 117 ev._ = 0; 118 ev.addr = CompressAddr(addr); 119 TraceEvent(thr, ev); 120 } 121 122 void TraceMutexUnlock(ThreadState* thr, uptr addr) { 123 if (!kCollectHistory) 124 return; 125 EventUnlock ev; 126 ev.is_access = 0; 127 ev.is_func = 0; 128 ev.type = EventType::kUnlock; 129 ev._ = 0; 130 ev.addr = CompressAddr(addr); 131 TraceEvent(thr, ev); 132 } 133 134 void TraceTime(ThreadState* thr) { 135 if (!kCollectHistory) 136 return; 137 FastState fast_state = thr->fast_state; 138 EventTime ev; 139 ev.is_access = 0; 140 ev.is_func = 0; 141 ev.type = EventType::kTime; 142 ev.sid = static_cast<u64>(fast_state.sid()); 143 ev.epoch = static_cast<u64>(fast_state.epoch()); 144 ev._ = 0; 145 TraceEvent(thr, ev); 146 } 147 148 ALWAYS_INLINE RawShadow LoadShadow(RawShadow* p) { 149 return static_cast<RawShadow>( 150 atomic_load((atomic_uint32_t*)p, memory_order_relaxed)); 151 } 152 153 ALWAYS_INLINE void StoreShadow(RawShadow* sp, RawShadow s) { 154 atomic_store((atomic_uint32_t*)sp, static_cast<u32>(s), memory_order_relaxed); 155 } 156 157 NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, 158 Shadow old, 159 AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS { 160 // For the free shadow markers the first element (that contains kFreeSid) 161 // triggers the race, but the second element contains info about the freeing 162 // thread, take it. 163 if (old.sid() == kFreeSid) 164 old = Shadow(LoadShadow(&shadow_mem[1])); 165 // This prevents trapping on this address in future. 166 for (uptr i = 0; i < kShadowCnt; i++) 167 StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty); 168 // See the comment in MemoryRangeFreed as to why the slot is locked 169 // for free memory accesses. ReportRace must not be called with 170 // the slot locked because of the fork. But MemoryRangeFreed is not 171 // called during fork because fork sets ignore_reads_and_writes, 172 // so simply unlocking the slot should be fine. 173 if (typ & kAccessFree) 174 SlotUnlock(thr); 175 ReportRace(thr, shadow_mem, cur, Shadow(old), typ); 176 if (typ & kAccessFree) 177 SlotLock(thr); 178 } 179 180 #if !TSAN_VECTORIZE 181 ALWAYS_INLINE 182 bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1, 183 AccessType typ) { 184 for (uptr i = 0; i < kShadowCnt; i++) { 185 auto old = LoadShadow(&s[i]); 186 if (!(typ & kAccessRead)) { 187 if (old == cur.raw()) 188 return true; 189 continue; 190 } 191 auto masked = static_cast<RawShadow>(static_cast<u32>(old) | 192 static_cast<u32>(Shadow::kRodata)); 193 if (masked == cur.raw()) 194 return true; 195 if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { 196 if (old == Shadow::kRodata) 197 return true; 198 } 199 } 200 return false; 201 } 202 203 ALWAYS_INLINE 204 bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, 205 int unused0, int unused1, AccessType typ) { 206 bool stored = false; 207 for (uptr idx = 0; idx < kShadowCnt; idx++) { 208 RawShadow* sp = &shadow_mem[idx]; 209 Shadow old(LoadShadow(sp)); 210 if (LIKELY(old.raw() == Shadow::kEmpty)) { 211 if (!(typ & kAccessCheckOnly) && !stored) 212 StoreShadow(sp, cur.raw()); 213 return false; 214 } 215 if (LIKELY(!(cur.access() & old.access()))) 216 continue; 217 if (LIKELY(cur.sid() == old.sid())) { 218 if (!(typ & kAccessCheckOnly) && 219 LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) { 220 StoreShadow(sp, cur.raw()); 221 stored = true; 222 } 223 continue; 224 } 225 if (LIKELY(old.IsBothReadsOrAtomic(typ))) 226 continue; 227 if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch())) 228 continue; 229 DoReportRace(thr, shadow_mem, cur, old, typ); 230 return true; 231 } 232 // We did not find any races and had already stored 233 // the current access info, so we are done. 234 if (LIKELY(stored)) 235 return false; 236 // Choose a random candidate slot and replace it. 237 uptr index = 238 atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt; 239 StoreShadow(&shadow_mem[index], cur.raw()); 240 return false; 241 } 242 243 # define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0 244 245 #else /* !TSAN_VECTORIZE */ 246 247 ALWAYS_INLINE 248 bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow, 249 m128 access, AccessType typ) { 250 // Note: we could check if there is a larger access of the same type, 251 // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes) 252 // and now do smaller reads/writes, these can also be considered as "same 253 // access". However, it will make the check more expensive, so it's unclear 254 // if it's worth it. But this would conserve trace space, so it's useful 255 // besides potential speed up. 256 if (!(typ & kAccessRead)) { 257 const m128 same = _mm_cmpeq_epi32(shadow, access); 258 return _mm_movemask_epi8(same); 259 } 260 // For reads we need to reset read bit in the shadow, 261 // because we need to match read with both reads and writes. 262 // Shadow::kRodata has only read bit set, so it does what we want. 263 // We also abuse it for rodata check to save few cycles 264 // since we already loaded Shadow::kRodata into a register. 265 // Reads from rodata can't race. 266 // Measurements show that they can be 10-20% of all memory accesses. 267 // Shadow::kRodata has epoch 0 which cannot appear in shadow normally 268 // (thread epochs start from 1). So the same read bit mask 269 // serves as rodata indicator. 270 const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata)); 271 const m128 masked_shadow = _mm_or_si128(shadow, read_mask); 272 m128 same = _mm_cmpeq_epi32(masked_shadow, access); 273 // Range memory accesses check Shadow::kRodata before calling this, 274 // Shadow::kRodatas is not possible for free memory access 275 // and Go does not use Shadow::kRodata. 276 if (!(typ & kAccessNoRodata) && !SANITIZER_GO) { 277 const m128 ro = _mm_cmpeq_epi32(shadow, read_mask); 278 same = _mm_or_si128(ro, same); 279 } 280 return _mm_movemask_epi8(same); 281 } 282 283 NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, 284 u32 race_mask, m128 shadow, AccessType typ) { 285 // race_mask points which of the shadow elements raced with the current 286 // access. Extract that element. 287 CHECK_NE(race_mask, 0); 288 u32 old; 289 // Note: _mm_extract_epi32 index must be a constant value. 290 switch (__builtin_ffs(race_mask) / 4) { 291 case 0: 292 old = _mm_extract_epi32(shadow, 0); 293 break; 294 case 1: 295 old = _mm_extract_epi32(shadow, 1); 296 break; 297 case 2: 298 old = _mm_extract_epi32(shadow, 2); 299 break; 300 case 3: 301 old = _mm_extract_epi32(shadow, 3); 302 break; 303 } 304 Shadow prev(static_cast<RawShadow>(old)); 305 // For the free shadow markers the first element (that contains kFreeSid) 306 // triggers the race, but the second element contains info about the freeing 307 // thread, take it. 308 if (prev.sid() == kFreeSid) 309 prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1))); 310 DoReportRace(thr, shadow_mem, cur, prev, typ); 311 } 312 313 ALWAYS_INLINE 314 bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, 315 m128 shadow, m128 access, AccessType typ) { 316 // Note: empty/zero slots don't intersect with any access. 317 const m128 zero = _mm_setzero_si128(); 318 const m128 mask_access = _mm_set1_epi32(0x000000ff); 319 const m128 mask_sid = _mm_set1_epi32(0x0000ff00); 320 const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000); 321 const m128 access_and = _mm_and_si128(access, shadow); 322 const m128 access_xor = _mm_xor_si128(access, shadow); 323 const m128 intersect = _mm_and_si128(access_and, mask_access); 324 const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero); 325 const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid); 326 const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero); 327 const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic); 328 const m128 no_race = 329 _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic); 330 const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero)); 331 if (UNLIKELY(race_mask)) 332 goto SHARED; 333 334 STORE : { 335 if (typ & kAccessCheckOnly) 336 return false; 337 // We could also replace different sid's if access is the same, 338 // rw weaker and happens before. However, just checking access below 339 // is not enough because we also need to check that !both_read_or_atomic 340 // (reads from different sids can be concurrent). 341 // Theoretically we could replace smaller accesses with larger accesses, 342 // but it's unclear if it's worth doing. 343 const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff); 344 const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid); 345 const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero); 346 const m128 access_read_atomic = 347 _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30); 348 const m128 rw_weaker = 349 _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow); 350 const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker); 351 const int rewrite_mask = _mm_movemask_epi8(rewrite); 352 int index = __builtin_ffs(rewrite_mask); 353 if (UNLIKELY(index == 0)) { 354 const m128 empty = _mm_cmpeq_epi32(shadow, zero); 355 const int empty_mask = _mm_movemask_epi8(empty); 356 index = __builtin_ffs(empty_mask); 357 if (UNLIKELY(index == 0)) 358 index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16; 359 } 360 StoreShadow(&shadow_mem[index / 4], cur.raw()); 361 // We could zero other slots determined by rewrite_mask. 362 // That would help other threads to evict better slots, 363 // but it's unclear if it's worth it. 364 return false; 365 } 366 367 SHARED: 368 m128 thread_epochs = _mm_set1_epi32(0x7fffffff); 369 // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32 370 // indexes must be constants. 371 # define LOAD_EPOCH(idx) \ 372 if (LIKELY(race_mask & (1 << (idx * 4)))) { \ 373 u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1); \ 374 u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid))); \ 375 thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \ 376 } 377 LOAD_EPOCH(0); 378 LOAD_EPOCH(1); 379 LOAD_EPOCH(2); 380 LOAD_EPOCH(3); 381 # undef LOAD_EPOCH 382 const m128 mask_epoch = _mm_set1_epi32(0x3fff0000); 383 const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch); 384 const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs); 385 const int concurrent_mask = _mm_movemask_epi8(concurrent); 386 if (LIKELY(concurrent_mask == 0)) 387 goto STORE; 388 389 DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ); 390 return true; 391 } 392 393 # define LOAD_CURRENT_SHADOW(cur, shadow_mem) \ 394 const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \ 395 const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem)) 396 #endif 397 398 char* DumpShadow(char* buf, RawShadow raw) { 399 if (raw == Shadow::kEmpty) { 400 internal_snprintf(buf, 64, "0"); 401 return buf; 402 } 403 Shadow s(raw); 404 AccessType typ; 405 s.GetAccess(nullptr, nullptr, &typ); 406 internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}", 407 static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()), 408 s.access(), static_cast<u32>(typ)); 409 return buf; 410 } 411 412 // TryTrace* and TraceRestart* functions allow to turn memory access and func 413 // entry/exit callbacks into leaf functions with all associated performance 414 // benefits. These hottest callbacks do only 2 slow path calls: report a race 415 // and trace part switching. Race reporting is easy to turn into a tail call, we 416 // just always return from the runtime after reporting a race. But trace part 417 // switching is harder because it needs to be in the middle of callbacks. To 418 // turn it into a tail call we immidiately return after TraceRestart* functions, 419 // but TraceRestart* functions themselves recurse into the callback after 420 // switching trace part. As the result the hottest callbacks contain only tail 421 // calls, which effectively makes them leaf functions (can use all registers, 422 // no frame setup, etc). 423 NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr, 424 uptr size, AccessType typ) { 425 TraceSwitchPart(thr); 426 MemoryAccess(thr, pc, addr, size, typ); 427 } 428 429 ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr, 430 uptr size, AccessType typ) { 431 RawShadow* shadow_mem = MemToShadow(addr); 432 UNUSED char memBuf[4][64]; 433 DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid, 434 static_cast<int>(thr->fast_state.sid()), 435 static_cast<int>(thr->fast_state.epoch()), (void*)addr, size, 436 static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]), 437 DumpShadow(memBuf[1], shadow_mem[1]), 438 DumpShadow(memBuf[2], shadow_mem[2]), 439 DumpShadow(memBuf[3], shadow_mem[3])); 440 441 FastState fast_state = thr->fast_state; 442 Shadow cur(fast_state, addr, size, typ); 443 444 LOAD_CURRENT_SHADOW(cur, shadow_mem); 445 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) 446 return; 447 if (UNLIKELY(fast_state.GetIgnoreBit())) 448 return; 449 if (!TryTraceMemoryAccess(thr, pc, addr, size, typ)) 450 return TraceRestartMemoryAccess(thr, pc, addr, size, typ); 451 CheckRaces(thr, shadow_mem, cur, shadow, access, typ); 452 } 453 454 void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ); 455 456 NOINLINE 457 void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr, 458 AccessType typ) { 459 TraceSwitchPart(thr); 460 MemoryAccess16(thr, pc, addr, typ); 461 } 462 463 ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, 464 AccessType typ) { 465 const uptr size = 16; 466 FastState fast_state = thr->fast_state; 467 if (UNLIKELY(fast_state.GetIgnoreBit())) 468 return; 469 Shadow cur(fast_state, 0, 8, typ); 470 RawShadow* shadow_mem = MemToShadow(addr); 471 bool traced = false; 472 { 473 LOAD_CURRENT_SHADOW(cur, shadow_mem); 474 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) 475 goto SECOND; 476 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) 477 return RestartMemoryAccess16(thr, pc, addr, typ); 478 traced = true; 479 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) 480 return; 481 } 482 SECOND: 483 shadow_mem += kShadowCnt; 484 LOAD_CURRENT_SHADOW(cur, shadow_mem); 485 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) 486 return; 487 if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) 488 return RestartMemoryAccess16(thr, pc, addr, typ); 489 CheckRaces(thr, shadow_mem, cur, shadow, access, typ); 490 } 491 492 NOINLINE 493 void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr, 494 uptr size, AccessType typ) { 495 TraceSwitchPart(thr); 496 UnalignedMemoryAccess(thr, pc, addr, size, typ); 497 } 498 499 ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc, 500 uptr addr, uptr size, 501 AccessType typ) { 502 DCHECK_LE(size, 8); 503 FastState fast_state = thr->fast_state; 504 if (UNLIKELY(fast_state.GetIgnoreBit())) 505 return; 506 RawShadow* shadow_mem = MemToShadow(addr); 507 bool traced = false; 508 uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr); 509 { 510 Shadow cur(fast_state, addr, size1, typ); 511 LOAD_CURRENT_SHADOW(cur, shadow_mem); 512 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) 513 goto SECOND; 514 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) 515 return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); 516 traced = true; 517 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) 518 return; 519 } 520 SECOND: 521 uptr size2 = size - size1; 522 if (LIKELY(size2 == 0)) 523 return; 524 shadow_mem += kShadowCnt; 525 Shadow cur(fast_state, 0, size2, typ); 526 LOAD_CURRENT_SHADOW(cur, shadow_mem); 527 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) 528 return; 529 if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) 530 return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ); 531 CheckRaces(thr, shadow_mem, cur, shadow, access, typ); 532 } 533 534 void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) { 535 DCHECK_LE(p, end); 536 DCHECK(IsShadowMem(p)); 537 DCHECK(IsShadowMem(end)); 538 UNUSED const uptr kAlign = kShadowCnt * kShadowSize; 539 DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0); 540 DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0); 541 #if !TSAN_VECTORIZE 542 for (; p < end; p += kShadowCnt) { 543 p[0] = v; 544 for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty; 545 } 546 #else 547 m128 vv = _mm_setr_epi32( 548 static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty), 549 static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty)); 550 m128* vp = reinterpret_cast<m128*>(p); 551 m128* vend = reinterpret_cast<m128*>(end); 552 for (; vp < vend; vp++) _mm_store_si128(vp, vv); 553 #endif 554 } 555 556 static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) { 557 if (size == 0) 558 return; 559 DCHECK_EQ(addr % kShadowCell, 0); 560 DCHECK_EQ(size % kShadowCell, 0); 561 // If a user passes some insane arguments (memset(0)), 562 // let it just crash as usual. 563 if (!IsAppMem(addr) || !IsAppMem(addr + size - 1)) 564 return; 565 RawShadow* begin = MemToShadow(addr); 566 RawShadow* end = begin + size / kShadowCell * kShadowCnt; 567 // Don't want to touch lots of shadow memory. 568 // If a program maps 10MB stack, there is no need reset the whole range. 569 // UnmapOrDie/MmapFixedNoReserve does not work on Windows. 570 if (SANITIZER_WINDOWS || 571 size <= common_flags()->clear_shadow_mmap_threshold) { 572 ShadowSet(begin, end, val); 573 return; 574 } 575 // The region is big, reset only beginning and end. 576 const uptr kPageSize = GetPageSizeCached(); 577 // Set at least first kPageSize/2 to page boundary. 578 RawShadow* mid1 = 579 Min(end, reinterpret_cast<RawShadow*>(RoundUp( 580 reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize))); 581 ShadowSet(begin, mid1, val); 582 // Reset middle part. 583 RawShadow* mid2 = RoundDown(end, kPageSize); 584 if (mid2 > mid1) { 585 if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1)) 586 Die(); 587 } 588 // Set the ending. 589 ShadowSet(mid2, end, val); 590 } 591 592 void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) { 593 uptr addr1 = RoundDown(addr, kShadowCell); 594 uptr size1 = RoundUp(size + addr - addr1, kShadowCell); 595 MemoryRangeSet(addr1, size1, Shadow::kEmpty); 596 } 597 598 void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) { 599 // Callers must lock the slot to ensure synchronization with the reset. 600 // The problem with "freed" memory is that it's not "monotonic" 601 // with respect to bug detection: freed memory is bad to access, 602 // but then if the heap block is reallocated later, it's good to access. 603 // As the result a garbage "freed" shadow can lead to a false positive 604 // if it happens to match a real free in the thread trace, 605 // but the heap block was reallocated before the current memory access, 606 // so it's still good to access. It's not the case with data races. 607 DCHECK(thr->slot_locked); 608 DCHECK_EQ(addr % kShadowCell, 0); 609 size = RoundUp(size, kShadowCell); 610 // Processing more than 1k (2k of shadow) is expensive, 611 // can cause excessive memory consumption (user does not necessary touch 612 // the whole range) and most likely unnecessary. 613 size = Min<uptr>(size, 1024); 614 const AccessType typ = 615 kAccessWrite | kAccessFree | kAccessCheckOnly | kAccessNoRodata; 616 TraceMemoryAccessRange(thr, pc, addr, size, typ); 617 RawShadow* shadow_mem = MemToShadow(addr); 618 Shadow cur(thr->fast_state, 0, kShadowCell, typ); 619 #if TSAN_VECTORIZE 620 const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw())); 621 const m128 freed = _mm_setr_epi32( 622 static_cast<u32>(Shadow::FreedMarker()), 623 static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0); 624 for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { 625 const m128 shadow = _mm_load_si128((m128*)shadow_mem); 626 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ))) 627 return; 628 _mm_store_si128((m128*)shadow_mem, freed); 629 } 630 #else 631 for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) { 632 if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ))) 633 return; 634 StoreShadow(&shadow_mem[0], Shadow::FreedMarker()); 635 StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch())); 636 StoreShadow(&shadow_mem[2], Shadow::kEmpty); 637 StoreShadow(&shadow_mem[3], Shadow::kEmpty); 638 } 639 #endif 640 } 641 642 void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) { 643 DCHECK_EQ(addr % kShadowCell, 0); 644 size = RoundUp(size, kShadowCell); 645 TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite); 646 Shadow cur(thr->fast_state, 0, 8, kAccessWrite); 647 MemoryRangeSet(addr, size, cur.raw()); 648 } 649 650 void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr, 651 uptr size) { 652 if (thr->ignore_reads_and_writes == 0) 653 MemoryRangeImitateWrite(thr, pc, addr, size); 654 else 655 MemoryResetRange(thr, pc, addr, size); 656 } 657 658 ALWAYS_INLINE 659 bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur, 660 AccessType typ) { 661 LOAD_CURRENT_SHADOW(cur, shadow_mem); 662 if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ))) 663 return false; 664 return CheckRaces(thr, shadow_mem, cur, shadow, access, typ); 665 } 666 667 template <bool is_read> 668 NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, 669 uptr size) { 670 TraceSwitchPart(thr); 671 MemoryAccessRangeT<is_read>(thr, pc, addr, size); 672 } 673 674 template <bool is_read> 675 void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { 676 const AccessType typ = 677 (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata; 678 RawShadow* shadow_mem = MemToShadow(addr); 679 DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid, 680 (void*)pc, (void*)addr, (int)size, is_read); 681 682 #if SANITIZER_DEBUG 683 if (!IsAppMem(addr)) { 684 Printf("Access to non app mem %zx\n", addr); 685 DCHECK(IsAppMem(addr)); 686 } 687 if (!IsAppMem(addr + size - 1)) { 688 Printf("Access to non app mem %zx\n", addr + size - 1); 689 DCHECK(IsAppMem(addr + size - 1)); 690 } 691 if (!IsShadowMem(shadow_mem)) { 692 Printf("Bad shadow addr %p (%zx)\n", static_cast<void*>(shadow_mem), addr); 693 DCHECK(IsShadowMem(shadow_mem)); 694 } 695 if (!IsShadowMem(shadow_mem + size * kShadowCnt - 1)) { 696 Printf("Bad shadow addr %p (%zx)\n", 697 static_cast<void*>(shadow_mem + size * kShadowCnt - 1), 698 addr + size - 1); 699 DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt - 1)); 700 } 701 #endif 702 703 // Access to .rodata section, no races here. 704 // Measurements show that it can be 10-20% of all memory accesses. 705 // Check here once to not check for every access separately. 706 // Note: we could (and should) do this only for the is_read case 707 // (writes shouldn't go to .rodata). But it happens in Chromium tests: 708 // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19 709 // Details are unknown since it happens only on CI machines. 710 if (*shadow_mem == Shadow::kRodata) 711 return; 712 713 FastState fast_state = thr->fast_state; 714 if (UNLIKELY(fast_state.GetIgnoreBit())) 715 return; 716 717 if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ)) 718 return RestartMemoryAccessRange<is_read>(thr, pc, addr, size); 719 720 if (UNLIKELY(addr % kShadowCell)) { 721 // Handle unaligned beginning, if any. 722 uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr); 723 size -= size1; 724 Shadow cur(fast_state, addr, size1, typ); 725 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) 726 return; 727 shadow_mem += kShadowCnt; 728 } 729 // Handle middle part, if any. 730 Shadow cur(fast_state, 0, kShadowCell, typ); 731 for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) { 732 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) 733 return; 734 } 735 // Handle ending, if any. 736 if (UNLIKELY(size)) { 737 Shadow cur(fast_state, 0, size, typ); 738 if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ))) 739 return; 740 } 741 } 742 743 template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr, 744 uptr size); 745 template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr, 746 uptr size); 747 748 } // namespace __tsan 749 750 #if !SANITIZER_GO 751 // Must be included in this file to make sure everything is inlined. 752 # include "tsan_interface.inc" 753 #endif 754