xref: /freebsd/contrib/llvm-project/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp (revision 5036d9652a5701d00e9e40ea942c278e9f77d33d)
1 //===-- tsan_rtl_access.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of ThreadSanitizer (TSan), a race detector.
10 //
11 // Definitions of memory access and function entry/exit entry points.
12 //===----------------------------------------------------------------------===//
13 
14 #include "tsan_rtl.h"
15 
16 namespace __tsan {
17 
18 ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState* thr, uptr pc,
19                                              uptr addr, uptr size,
20                                              AccessType typ) {
21   DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
22   if (!kCollectHistory)
23     return true;
24   EventAccess* ev;
25   if (UNLIKELY(!TraceAcquire(thr, &ev)))
26     return false;
27   u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
28   uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
29   thr->trace_prev_pc = pc;
30   if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
31     ev->is_access = 1;
32     ev->is_read = !!(typ & kAccessRead);
33     ev->is_atomic = !!(typ & kAccessAtomic);
34     ev->size_log = size_log;
35     ev->pc_delta = pc_delta;
36     DCHECK_EQ(ev->pc_delta, pc_delta);
37     ev->addr = CompressAddr(addr);
38     TraceRelease(thr, ev);
39     return true;
40   }
41   auto* evex = reinterpret_cast<EventAccessExt*>(ev);
42   evex->is_access = 0;
43   evex->is_func = 0;
44   evex->type = EventType::kAccessExt;
45   evex->is_read = !!(typ & kAccessRead);
46   evex->is_atomic = !!(typ & kAccessAtomic);
47   evex->size_log = size_log;
48   // Note: this is important, see comment in EventAccessExt.
49   evex->_ = 0;
50   evex->addr = CompressAddr(addr);
51   evex->pc = pc;
52   TraceRelease(thr, evex);
53   return true;
54 }
55 
56 ALWAYS_INLINE
57 bool TryTraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
58                                AccessType typ) {
59   if (!kCollectHistory)
60     return true;
61   EventAccessRange* ev;
62   if (UNLIKELY(!TraceAcquire(thr, &ev)))
63     return false;
64   thr->trace_prev_pc = pc;
65   ev->is_access = 0;
66   ev->is_func = 0;
67   ev->type = EventType::kAccessRange;
68   ev->is_read = !!(typ & kAccessRead);
69   ev->is_free = !!(typ & kAccessFree);
70   ev->size_lo = size;
71   ev->pc = CompressAddr(pc);
72   ev->addr = CompressAddr(addr);
73   ev->size_hi = size >> EventAccessRange::kSizeLoBits;
74   TraceRelease(thr, ev);
75   return true;
76 }
77 
78 void TraceMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr, uptr size,
79                             AccessType typ) {
80   if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
81     return;
82   TraceSwitchPart(thr);
83   UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
84   DCHECK(res);
85 }
86 
87 void TraceFunc(ThreadState* thr, uptr pc) {
88   if (LIKELY(TryTraceFunc(thr, pc)))
89     return;
90   TraceSwitchPart(thr);
91   UNUSED bool res = TryTraceFunc(thr, pc);
92   DCHECK(res);
93 }
94 
95 NOINLINE void TraceRestartFuncEntry(ThreadState* thr, uptr pc) {
96   TraceSwitchPart(thr);
97   FuncEntry(thr, pc);
98 }
99 
100 NOINLINE void TraceRestartFuncExit(ThreadState* thr) {
101   TraceSwitchPart(thr);
102   FuncExit(thr);
103 }
104 
105 void TraceMutexLock(ThreadState* thr, EventType type, uptr pc, uptr addr,
106                     StackID stk) {
107   DCHECK(type == EventType::kLock || type == EventType::kRLock);
108   if (!kCollectHistory)
109     return;
110   EventLock ev;
111   ev.is_access = 0;
112   ev.is_func = 0;
113   ev.type = type;
114   ev.pc = CompressAddr(pc);
115   ev.stack_lo = stk;
116   ev.stack_hi = stk >> EventLock::kStackIDLoBits;
117   ev._ = 0;
118   ev.addr = CompressAddr(addr);
119   TraceEvent(thr, ev);
120 }
121 
122 void TraceMutexUnlock(ThreadState* thr, uptr addr) {
123   if (!kCollectHistory)
124     return;
125   EventUnlock ev;
126   ev.is_access = 0;
127   ev.is_func = 0;
128   ev.type = EventType::kUnlock;
129   ev._ = 0;
130   ev.addr = CompressAddr(addr);
131   TraceEvent(thr, ev);
132 }
133 
134 void TraceTime(ThreadState* thr) {
135   if (!kCollectHistory)
136     return;
137   FastState fast_state = thr->fast_state;
138   EventTime ev;
139   ev.is_access = 0;
140   ev.is_func = 0;
141   ev.type = EventType::kTime;
142   ev.sid = static_cast<u64>(fast_state.sid());
143   ev.epoch = static_cast<u64>(fast_state.epoch());
144   ev._ = 0;
145   TraceEvent(thr, ev);
146 }
147 
148 NOINLINE void DoReportRace(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
149                            Shadow old,
150                            AccessType typ) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
151   // For the free shadow markers the first element (that contains kFreeSid)
152   // triggers the race, but the second element contains info about the freeing
153   // thread, take it.
154   if (old.sid() == kFreeSid)
155     old = Shadow(LoadShadow(&shadow_mem[1]));
156   // This prevents trapping on this address in future.
157   for (uptr i = 0; i < kShadowCnt; i++)
158     StoreShadow(&shadow_mem[i], i == 0 ? Shadow::kRodata : Shadow::kEmpty);
159   // See the comment in MemoryRangeFreed as to why the slot is locked
160   // for free memory accesses. ReportRace must not be called with
161   // the slot locked because of the fork. But MemoryRangeFreed is not
162   // called during fork because fork sets ignore_reads_and_writes,
163   // so simply unlocking the slot should be fine.
164   if (typ & kAccessSlotLocked)
165     SlotUnlock(thr);
166   ReportRace(thr, shadow_mem, cur, Shadow(old), typ);
167   if (typ & kAccessSlotLocked)
168     SlotLock(thr);
169 }
170 
171 #if !TSAN_VECTORIZE
172 ALWAYS_INLINE
173 bool ContainsSameAccess(RawShadow* s, Shadow cur, int unused0, int unused1,
174                         AccessType typ) {
175   for (uptr i = 0; i < kShadowCnt; i++) {
176     auto old = LoadShadow(&s[i]);
177     if (!(typ & kAccessRead)) {
178       if (old == cur.raw())
179         return true;
180       continue;
181     }
182     auto masked = static_cast<RawShadow>(static_cast<u32>(old) |
183                                          static_cast<u32>(Shadow::kRodata));
184     if (masked == cur.raw())
185       return true;
186     if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
187       if (old == Shadow::kRodata)
188         return true;
189     }
190   }
191   return false;
192 }
193 
194 ALWAYS_INLINE
195 bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
196                 int unused0, int unused1, AccessType typ) {
197   bool stored = false;
198   for (uptr idx = 0; idx < kShadowCnt; idx++) {
199     RawShadow* sp = &shadow_mem[idx];
200     Shadow old(LoadShadow(sp));
201     if (LIKELY(old.raw() == Shadow::kEmpty)) {
202       if (!(typ & kAccessCheckOnly) && !stored)
203         StoreShadow(sp, cur.raw());
204       return false;
205     }
206     if (LIKELY(!(cur.access() & old.access())))
207       continue;
208     if (LIKELY(cur.sid() == old.sid())) {
209       if (!(typ & kAccessCheckOnly) &&
210           LIKELY(cur.access() == old.access() && old.IsRWWeakerOrEqual(typ))) {
211         StoreShadow(sp, cur.raw());
212         stored = true;
213       }
214       continue;
215     }
216     if (LIKELY(old.IsBothReadsOrAtomic(typ)))
217       continue;
218     if (LIKELY(thr->clock.Get(old.sid()) >= old.epoch()))
219       continue;
220     DoReportRace(thr, shadow_mem, cur, old, typ);
221     return true;
222   }
223   // We did not find any races and had already stored
224   // the current access info, so we are done.
225   if (LIKELY(stored))
226     return false;
227   // Choose a random candidate slot and replace it.
228   uptr index =
229       atomic_load_relaxed(&thr->trace_pos) / sizeof(Event) % kShadowCnt;
230   StoreShadow(&shadow_mem[index], cur.raw());
231   return false;
232 }
233 
234 #  define LOAD_CURRENT_SHADOW(cur, shadow_mem) UNUSED int access = 0, shadow = 0
235 
236 #else /* !TSAN_VECTORIZE */
237 
238 ALWAYS_INLINE
239 bool ContainsSameAccess(RawShadow* unused0, Shadow unused1, m128 shadow,
240                         m128 access, AccessType typ) {
241   // Note: we could check if there is a larger access of the same type,
242   // e.g. we just allocated/memset-ed a block (so it contains 8 byte writes)
243   // and now do smaller reads/writes, these can also be considered as "same
244   // access". However, it will make the check more expensive, so it's unclear
245   // if it's worth it. But this would conserve trace space, so it's useful
246   // besides potential speed up.
247   if (!(typ & kAccessRead)) {
248     const m128 same = _mm_cmpeq_epi32(shadow, access);
249     return _mm_movemask_epi8(same);
250   }
251   // For reads we need to reset read bit in the shadow,
252   // because we need to match read with both reads and writes.
253   // Shadow::kRodata has only read bit set, so it does what we want.
254   // We also abuse it for rodata check to save few cycles
255   // since we already loaded Shadow::kRodata into a register.
256   // Reads from rodata can't race.
257   // Measurements show that they can be 10-20% of all memory accesses.
258   // Shadow::kRodata has epoch 0 which cannot appear in shadow normally
259   // (thread epochs start from 1). So the same read bit mask
260   // serves as rodata indicator.
261   const m128 read_mask = _mm_set1_epi32(static_cast<u32>(Shadow::kRodata));
262   const m128 masked_shadow = _mm_or_si128(shadow, read_mask);
263   m128 same = _mm_cmpeq_epi32(masked_shadow, access);
264   // Range memory accesses check Shadow::kRodata before calling this,
265   // Shadow::kRodatas is not possible for free memory access
266   // and Go does not use Shadow::kRodata.
267   if (!(typ & kAccessNoRodata) && !SANITIZER_GO) {
268     const m128 ro = _mm_cmpeq_epi32(shadow, read_mask);
269     same = _mm_or_si128(ro, same);
270   }
271   return _mm_movemask_epi8(same);
272 }
273 
274 NOINLINE void DoReportRaceV(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
275                             u32 race_mask, m128 shadow, AccessType typ) {
276   // race_mask points which of the shadow elements raced with the current
277   // access. Extract that element.
278   CHECK_NE(race_mask, 0);
279   u32 old;
280   // Note: _mm_extract_epi32 index must be a constant value.
281   switch (__builtin_ffs(race_mask) / 4) {
282     case 0:
283       old = _mm_extract_epi32(shadow, 0);
284       break;
285     case 1:
286       old = _mm_extract_epi32(shadow, 1);
287       break;
288     case 2:
289       old = _mm_extract_epi32(shadow, 2);
290       break;
291     case 3:
292       old = _mm_extract_epi32(shadow, 3);
293       break;
294   }
295   Shadow prev(static_cast<RawShadow>(old));
296   // For the free shadow markers the first element (that contains kFreeSid)
297   // triggers the race, but the second element contains info about the freeing
298   // thread, take it.
299   if (prev.sid() == kFreeSid)
300     prev = Shadow(static_cast<RawShadow>(_mm_extract_epi32(shadow, 1)));
301   DoReportRace(thr, shadow_mem, cur, prev, typ);
302 }
303 
304 ALWAYS_INLINE
305 bool CheckRaces(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
306                 m128 shadow, m128 access, AccessType typ) {
307   // Note: empty/zero slots don't intersect with any access.
308   const m128 zero = _mm_setzero_si128();
309   const m128 mask_access = _mm_set1_epi32(0x000000ff);
310   const m128 mask_sid = _mm_set1_epi32(0x0000ff00);
311   const m128 mask_read_atomic = _mm_set1_epi32(0xc0000000);
312   const m128 access_and = _mm_and_si128(access, shadow);
313   const m128 access_xor = _mm_xor_si128(access, shadow);
314   const m128 intersect = _mm_and_si128(access_and, mask_access);
315   const m128 not_intersect = _mm_cmpeq_epi32(intersect, zero);
316   const m128 not_same_sid = _mm_and_si128(access_xor, mask_sid);
317   const m128 same_sid = _mm_cmpeq_epi32(not_same_sid, zero);
318   const m128 both_read_or_atomic = _mm_and_si128(access_and, mask_read_atomic);
319   const m128 no_race =
320       _mm_or_si128(_mm_or_si128(not_intersect, same_sid), both_read_or_atomic);
321   const int race_mask = _mm_movemask_epi8(_mm_cmpeq_epi32(no_race, zero));
322   if (UNLIKELY(race_mask))
323     goto SHARED;
324 
325 STORE : {
326   if (typ & kAccessCheckOnly)
327     return false;
328   // We could also replace different sid's if access is the same,
329   // rw weaker and happens before. However, just checking access below
330   // is not enough because we also need to check that !both_read_or_atomic
331   // (reads from different sids can be concurrent).
332   // Theoretically we could replace smaller accesses with larger accesses,
333   // but it's unclear if it's worth doing.
334   const m128 mask_access_sid = _mm_set1_epi32(0x0000ffff);
335   const m128 not_same_sid_access = _mm_and_si128(access_xor, mask_access_sid);
336   const m128 same_sid_access = _mm_cmpeq_epi32(not_same_sid_access, zero);
337   const m128 access_read_atomic =
338       _mm_set1_epi32((typ & (kAccessRead | kAccessAtomic)) << 30);
339   const m128 rw_weaker =
340       _mm_cmpeq_epi32(_mm_max_epu32(shadow, access_read_atomic), shadow);
341   const m128 rewrite = _mm_and_si128(same_sid_access, rw_weaker);
342   const int rewrite_mask = _mm_movemask_epi8(rewrite);
343   int index = __builtin_ffs(rewrite_mask);
344   if (UNLIKELY(index == 0)) {
345     const m128 empty = _mm_cmpeq_epi32(shadow, zero);
346     const int empty_mask = _mm_movemask_epi8(empty);
347     index = __builtin_ffs(empty_mask);
348     if (UNLIKELY(index == 0))
349       index = (atomic_load_relaxed(&thr->trace_pos) / 2) % 16;
350   }
351   StoreShadow(&shadow_mem[index / 4], cur.raw());
352   // We could zero other slots determined by rewrite_mask.
353   // That would help other threads to evict better slots,
354   // but it's unclear if it's worth it.
355   return false;
356 }
357 
358 SHARED:
359   m128 thread_epochs = _mm_set1_epi32(0x7fffffff);
360   // Need to unwind this because _mm_extract_epi8/_mm_insert_epi32
361   // indexes must be constants.
362 #  define LOAD_EPOCH(idx)                                                     \
363     if (LIKELY(race_mask & (1 << (idx * 4)))) {                               \
364       u8 sid = _mm_extract_epi8(shadow, idx * 4 + 1);                         \
365       u16 epoch = static_cast<u16>(thr->clock.Get(static_cast<Sid>(sid)));    \
366       thread_epochs = _mm_insert_epi32(thread_epochs, u32(epoch) << 16, idx); \
367     }
368   LOAD_EPOCH(0);
369   LOAD_EPOCH(1);
370   LOAD_EPOCH(2);
371   LOAD_EPOCH(3);
372 #  undef LOAD_EPOCH
373   const m128 mask_epoch = _mm_set1_epi32(0x3fff0000);
374   const m128 shadow_epochs = _mm_and_si128(shadow, mask_epoch);
375   const m128 concurrent = _mm_cmplt_epi32(thread_epochs, shadow_epochs);
376   const int concurrent_mask = _mm_movemask_epi8(concurrent);
377   if (LIKELY(concurrent_mask == 0))
378     goto STORE;
379 
380   DoReportRaceV(thr, shadow_mem, cur, concurrent_mask, shadow, typ);
381   return true;
382 }
383 
384 #  define LOAD_CURRENT_SHADOW(cur, shadow_mem)                         \
385     const m128 access = _mm_set1_epi32(static_cast<u32>((cur).raw())); \
386     const m128 shadow = _mm_load_si128(reinterpret_cast<m128*>(shadow_mem))
387 #endif
388 
389 char* DumpShadow(char* buf, RawShadow raw) {
390   if (raw == Shadow::kEmpty) {
391     internal_snprintf(buf, 64, "0");
392     return buf;
393   }
394   Shadow s(raw);
395   AccessType typ;
396   s.GetAccess(nullptr, nullptr, &typ);
397   internal_snprintf(buf, 64, "{tid=%u@%u access=0x%x typ=%x}",
398                     static_cast<u32>(s.sid()), static_cast<u32>(s.epoch()),
399                     s.access(), static_cast<u32>(typ));
400   return buf;
401 }
402 
403 // TryTrace* and TraceRestart* functions allow to turn memory access and func
404 // entry/exit callbacks into leaf functions with all associated performance
405 // benefits. These hottest callbacks do only 2 slow path calls: report a race
406 // and trace part switching. Race reporting is easy to turn into a tail call, we
407 // just always return from the runtime after reporting a race. But trace part
408 // switching is harder because it needs to be in the middle of callbacks. To
409 // turn it into a tail call we immidiately return after TraceRestart* functions,
410 // but TraceRestart* functions themselves recurse into the callback after
411 // switching trace part. As the result the hottest callbacks contain only tail
412 // calls, which effectively makes them leaf functions (can use all registers,
413 // no frame setup, etc).
414 NOINLINE void TraceRestartMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
415                                        uptr size, AccessType typ) {
416   TraceSwitchPart(thr);
417   MemoryAccess(thr, pc, addr, size, typ);
418 }
419 
420 ALWAYS_INLINE USED void MemoryAccess(ThreadState* thr, uptr pc, uptr addr,
421                                      uptr size, AccessType typ) {
422   RawShadow* shadow_mem = MemToShadow(addr);
423   UNUSED char memBuf[4][64];
424   DPrintf2("#%d: Access: %d@%d %p/%zd typ=0x%x {%s, %s, %s, %s}\n", thr->tid,
425            static_cast<int>(thr->fast_state.sid()),
426            static_cast<int>(thr->fast_state.epoch()), (void*)addr, size,
427            static_cast<int>(typ), DumpShadow(memBuf[0], shadow_mem[0]),
428            DumpShadow(memBuf[1], shadow_mem[1]),
429            DumpShadow(memBuf[2], shadow_mem[2]),
430            DumpShadow(memBuf[3], shadow_mem[3]));
431 
432   FastState fast_state = thr->fast_state;
433   Shadow cur(fast_state, addr, size, typ);
434 
435   LOAD_CURRENT_SHADOW(cur, shadow_mem);
436   if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
437     return;
438   if (UNLIKELY(fast_state.GetIgnoreBit()))
439     return;
440   if (!TryTraceMemoryAccess(thr, pc, addr, size, typ))
441     return TraceRestartMemoryAccess(thr, pc, addr, size, typ);
442   CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
443 }
444 
445 void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr, AccessType typ);
446 
447 NOINLINE
448 void RestartMemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
449                            AccessType typ) {
450   TraceSwitchPart(thr);
451   MemoryAccess16(thr, pc, addr, typ);
452 }
453 
454 ALWAYS_INLINE USED void MemoryAccess16(ThreadState* thr, uptr pc, uptr addr,
455                                        AccessType typ) {
456   const uptr size = 16;
457   FastState fast_state = thr->fast_state;
458   if (UNLIKELY(fast_state.GetIgnoreBit()))
459     return;
460   Shadow cur(fast_state, 0, 8, typ);
461   RawShadow* shadow_mem = MemToShadow(addr);
462   bool traced = false;
463   {
464     LOAD_CURRENT_SHADOW(cur, shadow_mem);
465     if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
466       goto SECOND;
467     if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
468       return RestartMemoryAccess16(thr, pc, addr, typ);
469     traced = true;
470     if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
471       return;
472   }
473 SECOND:
474   shadow_mem += kShadowCnt;
475   LOAD_CURRENT_SHADOW(cur, shadow_mem);
476   if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
477     return;
478   if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
479     return RestartMemoryAccess16(thr, pc, addr, typ);
480   CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
481 }
482 
483 NOINLINE
484 void RestartUnalignedMemoryAccess(ThreadState* thr, uptr pc, uptr addr,
485                                   uptr size, AccessType typ) {
486   TraceSwitchPart(thr);
487   UnalignedMemoryAccess(thr, pc, addr, size, typ);
488 }
489 
490 ALWAYS_INLINE USED void UnalignedMemoryAccess(ThreadState* thr, uptr pc,
491                                               uptr addr, uptr size,
492                                               AccessType typ) {
493   DCHECK_LE(size, 8);
494   FastState fast_state = thr->fast_state;
495   if (UNLIKELY(fast_state.GetIgnoreBit()))
496     return;
497   RawShadow* shadow_mem = MemToShadow(addr);
498   bool traced = false;
499   uptr size1 = Min<uptr>(size, RoundUp(addr + 1, kShadowCell) - addr);
500   {
501     Shadow cur(fast_state, addr, size1, typ);
502     LOAD_CURRENT_SHADOW(cur, shadow_mem);
503     if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
504       goto SECOND;
505     if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
506       return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
507     traced = true;
508     if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
509       return;
510   }
511 SECOND:
512   uptr size2 = size - size1;
513   if (LIKELY(size2 == 0))
514     return;
515   shadow_mem += kShadowCnt;
516   Shadow cur(fast_state, 0, size2, typ);
517   LOAD_CURRENT_SHADOW(cur, shadow_mem);
518   if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
519     return;
520   if (!traced && !TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
521     return RestartUnalignedMemoryAccess(thr, pc, addr, size, typ);
522   CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
523 }
524 
525 void ShadowSet(RawShadow* p, RawShadow* end, RawShadow v) {
526   DCHECK_LE(p, end);
527   DCHECK(IsShadowMem(p));
528   DCHECK(IsShadowMem(end));
529   UNUSED const uptr kAlign = kShadowCnt * kShadowSize;
530   DCHECK_EQ(reinterpret_cast<uptr>(p) % kAlign, 0);
531   DCHECK_EQ(reinterpret_cast<uptr>(end) % kAlign, 0);
532 #if !TSAN_VECTORIZE
533   for (; p < end; p += kShadowCnt) {
534     p[0] = v;
535     for (uptr i = 1; i < kShadowCnt; i++) p[i] = Shadow::kEmpty;
536   }
537 #else
538   m128 vv = _mm_setr_epi32(
539       static_cast<u32>(v), static_cast<u32>(Shadow::kEmpty),
540       static_cast<u32>(Shadow::kEmpty), static_cast<u32>(Shadow::kEmpty));
541   m128* vp = reinterpret_cast<m128*>(p);
542   m128* vend = reinterpret_cast<m128*>(end);
543   for (; vp < vend; vp++) _mm_store_si128(vp, vv);
544 #endif
545 }
546 
547 static void MemoryRangeSet(uptr addr, uptr size, RawShadow val) {
548   if (size == 0)
549     return;
550   DCHECK_EQ(addr % kShadowCell, 0);
551   DCHECK_EQ(size % kShadowCell, 0);
552   // If a user passes some insane arguments (memset(0)),
553   // let it just crash as usual.
554   if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
555     return;
556   RawShadow* begin = MemToShadow(addr);
557   RawShadow* end = begin + size / kShadowCell * kShadowCnt;
558   // Don't want to touch lots of shadow memory.
559   // If a program maps 10MB stack, there is no need reset the whole range.
560   // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
561   if (SANITIZER_WINDOWS ||
562       size <= common_flags()->clear_shadow_mmap_threshold) {
563     ShadowSet(begin, end, val);
564     return;
565   }
566   // The region is big, reset only beginning and end.
567   const uptr kPageSize = GetPageSizeCached();
568   // Set at least first kPageSize/2 to page boundary.
569   RawShadow* mid1 =
570       Min(end, reinterpret_cast<RawShadow*>(RoundUp(
571                    reinterpret_cast<uptr>(begin) + kPageSize / 2, kPageSize)));
572   ShadowSet(begin, mid1, val);
573   // Reset middle part.
574   RawShadow* mid2 = RoundDown(end, kPageSize);
575   if (mid2 > mid1) {
576     if (!MmapFixedSuperNoReserve((uptr)mid1, (uptr)mid2 - (uptr)mid1))
577       Die();
578   }
579   // Set the ending.
580   ShadowSet(mid2, end, val);
581 }
582 
583 void MemoryResetRange(ThreadState* thr, uptr pc, uptr addr, uptr size) {
584   uptr addr1 = RoundDown(addr, kShadowCell);
585   uptr size1 = RoundUp(size + addr - addr1, kShadowCell);
586   MemoryRangeSet(addr1, size1, Shadow::kEmpty);
587 }
588 
589 void MemoryRangeFreed(ThreadState* thr, uptr pc, uptr addr, uptr size) {
590   // Callers must lock the slot to ensure synchronization with the reset.
591   // The problem with "freed" memory is that it's not "monotonic"
592   // with respect to bug detection: freed memory is bad to access,
593   // but then if the heap block is reallocated later, it's good to access.
594   // As the result a garbage "freed" shadow can lead to a false positive
595   // if it happens to match a real free in the thread trace,
596   // but the heap block was reallocated before the current memory access,
597   // so it's still good to access. It's not the case with data races.
598   DCHECK(thr->slot_locked);
599   DCHECK_EQ(addr % kShadowCell, 0);
600   size = RoundUp(size, kShadowCell);
601   // Processing more than 1k (2k of shadow) is expensive,
602   // can cause excessive memory consumption (user does not necessary touch
603   // the whole range) and most likely unnecessary.
604   size = Min<uptr>(size, 1024);
605   const AccessType typ = kAccessWrite | kAccessFree | kAccessSlotLocked |
606                          kAccessCheckOnly | kAccessNoRodata;
607   TraceMemoryAccessRange(thr, pc, addr, size, typ);
608   RawShadow* shadow_mem = MemToShadow(addr);
609   Shadow cur(thr->fast_state, 0, kShadowCell, typ);
610 #if TSAN_VECTORIZE
611   const m128 access = _mm_set1_epi32(static_cast<u32>(cur.raw()));
612   const m128 freed = _mm_setr_epi32(
613       static_cast<u32>(Shadow::FreedMarker()),
614       static_cast<u32>(Shadow::FreedInfo(cur.sid(), cur.epoch())), 0, 0);
615   for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
616     const m128 shadow = _mm_load_si128((m128*)shadow_mem);
617     if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, shadow, access, typ)))
618       return;
619     _mm_store_si128((m128*)shadow_mem, freed);
620   }
621 #else
622   for (; size; size -= kShadowCell, shadow_mem += kShadowCnt) {
623     if (UNLIKELY(CheckRaces(thr, shadow_mem, cur, 0, 0, typ)))
624       return;
625     StoreShadow(&shadow_mem[0], Shadow::FreedMarker());
626     StoreShadow(&shadow_mem[1], Shadow::FreedInfo(cur.sid(), cur.epoch()));
627     StoreShadow(&shadow_mem[2], Shadow::kEmpty);
628     StoreShadow(&shadow_mem[3], Shadow::kEmpty);
629   }
630 #endif
631 }
632 
633 void MemoryRangeImitateWrite(ThreadState* thr, uptr pc, uptr addr, uptr size) {
634   DCHECK_EQ(addr % kShadowCell, 0);
635   size = RoundUp(size, kShadowCell);
636   TraceMemoryAccessRange(thr, pc, addr, size, kAccessWrite);
637   Shadow cur(thr->fast_state, 0, 8, kAccessWrite);
638   MemoryRangeSet(addr, size, cur.raw());
639 }
640 
641 void MemoryRangeImitateWriteOrResetRange(ThreadState* thr, uptr pc, uptr addr,
642                                          uptr size) {
643   if (thr->ignore_reads_and_writes == 0)
644     MemoryRangeImitateWrite(thr, pc, addr, size);
645   else
646     MemoryResetRange(thr, pc, addr, size);
647 }
648 
649 ALWAYS_INLINE
650 bool MemoryAccessRangeOne(ThreadState* thr, RawShadow* shadow_mem, Shadow cur,
651                           AccessType typ) {
652   LOAD_CURRENT_SHADOW(cur, shadow_mem);
653   if (LIKELY(ContainsSameAccess(shadow_mem, cur, shadow, access, typ)))
654     return false;
655   return CheckRaces(thr, shadow_mem, cur, shadow, access, typ);
656 }
657 
658 template <bool is_read>
659 NOINLINE void RestartMemoryAccessRange(ThreadState* thr, uptr pc, uptr addr,
660                                        uptr size) {
661   TraceSwitchPart(thr);
662   MemoryAccessRangeT<is_read>(thr, pc, addr, size);
663 }
664 
665 template <bool is_read>
666 void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) {
667   const AccessType typ =
668       (is_read ? kAccessRead : kAccessWrite) | kAccessNoRodata;
669   RawShadow* shadow_mem = MemToShadow(addr);
670   DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_read=%d\n", thr->tid,
671            (void*)pc, (void*)addr, (int)size, is_read);
672 
673 #if SANITIZER_DEBUG
674   if (!IsAppMem(addr)) {
675     Printf("Access to non app mem start: %p\n", (void*)addr);
676     DCHECK(IsAppMem(addr));
677   }
678   if (!IsAppMem(addr + size - 1)) {
679     Printf("Access to non app mem end: %p\n", (void*)(addr + size - 1));
680     DCHECK(IsAppMem(addr + size - 1));
681   }
682   if (!IsShadowMem(shadow_mem)) {
683     Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr);
684     DCHECK(IsShadowMem(shadow_mem));
685   }
686 
687   RawShadow* shadow_mem_end = reinterpret_cast<RawShadow*>(
688       reinterpret_cast<uptr>(shadow_mem) + size * kShadowMultiplier - 1);
689   if (!IsShadowMem(shadow_mem_end)) {
690     Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end,
691            (void*)(addr + size - 1));
692     Printf(
693         "Shadow start addr (ok): %p (%p); size: 0x%zx; kShadowMultiplier: "
694         "%zx\n",
695         shadow_mem, (void*)addr, size, kShadowMultiplier);
696     DCHECK(IsShadowMem(shadow_mem_end));
697   }
698 #endif
699 
700   // Access to .rodata section, no races here.
701   // Measurements show that it can be 10-20% of all memory accesses.
702   // Check here once to not check for every access separately.
703   // Note: we could (and should) do this only for the is_read case
704   // (writes shouldn't go to .rodata). But it happens in Chromium tests:
705   // https://bugs.chromium.org/p/chromium/issues/detail?id=1275581#c19
706   // Details are unknown since it happens only on CI machines.
707   if (*shadow_mem == Shadow::kRodata)
708     return;
709 
710   FastState fast_state = thr->fast_state;
711   if (UNLIKELY(fast_state.GetIgnoreBit()))
712     return;
713 
714   if (!TryTraceMemoryAccessRange(thr, pc, addr, size, typ))
715     return RestartMemoryAccessRange<is_read>(thr, pc, addr, size);
716 
717   if (UNLIKELY(addr % kShadowCell)) {
718     // Handle unaligned beginning, if any.
719     uptr size1 = Min(size, RoundUp(addr, kShadowCell) - addr);
720     size -= size1;
721     Shadow cur(fast_state, addr, size1, typ);
722     if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
723       return;
724     shadow_mem += kShadowCnt;
725   }
726   // Handle middle part, if any.
727   Shadow cur(fast_state, 0, kShadowCell, typ);
728   for (; size >= kShadowCell; size -= kShadowCell, shadow_mem += kShadowCnt) {
729     if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
730       return;
731   }
732   // Handle ending, if any.
733   if (UNLIKELY(size)) {
734     Shadow cur(fast_state, 0, size, typ);
735     if (UNLIKELY(MemoryAccessRangeOne(thr, shadow_mem, cur, typ)))
736       return;
737   }
738 }
739 
740 template void MemoryAccessRangeT<true>(ThreadState* thr, uptr pc, uptr addr,
741                                        uptr size);
742 template void MemoryAccessRangeT<false>(ThreadState* thr, uptr pc, uptr addr,
743                                         uptr size);
744 
745 }  // namespace __tsan
746 
747 #if !SANITIZER_GO
748 // Must be included in this file to make sure everything is inlined.
749 #  include "tsan_interface.inc"
750 #endif
751