1 //===-- stack_depot.h -------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef SCUDO_STACK_DEPOT_H_ 10 #define SCUDO_STACK_DEPOT_H_ 11 12 #include "atomic_helpers.h" 13 #include "common.h" 14 #include "mutex.h" 15 16 namespace scudo { 17 18 class MurMur2HashBuilder { 19 static const u32 M = 0x5bd1e995; 20 static const u32 Seed = 0x9747b28c; 21 static const u32 R = 24; 22 u32 H; 23 24 public: 25 explicit MurMur2HashBuilder(u32 Init = 0) { H = Seed ^ Init; } add(u32 K)26 void add(u32 K) { 27 K *= M; 28 K ^= K >> R; 29 K *= M; 30 H *= M; 31 H ^= K; 32 } get()33 u32 get() { 34 u32 X = H; 35 X ^= X >> 13; 36 X *= M; 37 X ^= X >> 15; 38 return X; 39 } 40 }; 41 42 class alignas(atomic_u64) StackDepot { 43 HybridMutex RingEndMu; 44 u32 RingEnd = 0; 45 46 // This data structure stores a stack trace for each allocation and 47 // deallocation when stack trace recording is enabled, that may be looked up 48 // using a hash of the stack trace. The lower bits of the hash are an index 49 // into the Tab array, which stores an index into the Ring array where the 50 // stack traces are stored. As the name implies, Ring is a ring buffer, so a 51 // stack trace may wrap around to the start of the array. 52 // 53 // Each stack trace in Ring is prefixed by a stack trace marker consisting of 54 // a fixed 1 bit in bit 0 (this allows disambiguation between stack frames 55 // and stack trace markers in the case where instruction pointers are 4-byte 56 // aligned, as they are on arm64), the stack trace hash in bits 1-32, and the 57 // size of the stack trace in bits 33-63. 58 // 59 // The insert() function is potentially racy in its accesses to the Tab and 60 // Ring arrays, but find() is resilient to races in the sense that, barring 61 // hash collisions, it will either return the correct stack trace or no stack 62 // trace at all, even if two instances of insert() raced with one another. 63 // This is achieved by re-checking the hash of the stack trace before 64 // returning the trace. 65 66 u32 RingSize = 0; 67 u32 RingMask = 0; 68 u32 TabMask = 0; 69 // This is immediately followed by RingSize atomic_u64 and 70 // (TabMask + 1) atomic_u32. 71 getRing()72 atomic_u64 *getRing() { 73 return reinterpret_cast<atomic_u64 *>(reinterpret_cast<char *>(this) + 74 sizeof(StackDepot)); 75 } 76 getTab()77 atomic_u32 *getTab() { 78 return reinterpret_cast<atomic_u32 *>(reinterpret_cast<char *>(this) + 79 sizeof(StackDepot) + 80 sizeof(atomic_u64) * RingSize); 81 } 82 getRing()83 const atomic_u64 *getRing() const { 84 return reinterpret_cast<const atomic_u64 *>( 85 reinterpret_cast<const char *>(this) + sizeof(StackDepot)); 86 } 87 getTab()88 const atomic_u32 *getTab() const { 89 return reinterpret_cast<const atomic_u32 *>( 90 reinterpret_cast<const char *>(this) + sizeof(StackDepot) + 91 sizeof(atomic_u64) * RingSize); 92 } 93 94 public: init(u32 RingSz,u32 TabSz)95 void init(u32 RingSz, u32 TabSz) { 96 DCHECK(isPowerOfTwo(RingSz)); 97 DCHECK(isPowerOfTwo(TabSz)); 98 RingSize = RingSz; 99 RingMask = RingSz - 1; 100 TabMask = TabSz - 1; 101 } 102 103 // Ensure that RingSize, RingMask and TabMask are set up in a way that 104 // all accesses are within range of BufSize. isValid(uptr BufSize)105 bool isValid(uptr BufSize) const { 106 if (!isPowerOfTwo(RingSize)) 107 return false; 108 uptr RingBytes = sizeof(atomic_u64) * RingSize; 109 if (RingMask + 1 != RingSize) 110 return false; 111 112 if (TabMask == 0) 113 return false; 114 uptr TabSize = TabMask + 1; 115 if (!isPowerOfTwo(TabSize)) 116 return false; 117 uptr TabBytes = sizeof(atomic_u32) * TabSize; 118 119 // Subtract and detect underflow. 120 if (BufSize < sizeof(StackDepot)) 121 return false; 122 BufSize -= sizeof(StackDepot); 123 if (BufSize < TabBytes) 124 return false; 125 BufSize -= TabBytes; 126 if (BufSize < RingBytes) 127 return false; 128 return BufSize == RingBytes; 129 } 130 131 // Insert hash of the stack trace [Begin, End) into the stack depot, and 132 // return the hash. insert(uptr * Begin,uptr * End)133 u32 insert(uptr *Begin, uptr *End) { 134 auto *Tab = getTab(); 135 auto *Ring = getRing(); 136 137 MurMur2HashBuilder B; 138 for (uptr *I = Begin; I != End; ++I) 139 B.add(u32(*I) >> 2); 140 u32 Hash = B.get(); 141 142 u32 Pos = Hash & TabMask; 143 u32 RingPos = atomic_load_relaxed(&Tab[Pos]); 144 u64 Entry = atomic_load_relaxed(&Ring[RingPos]); 145 u64 Id = (u64(End - Begin) << 33) | (u64(Hash) << 1) | 1; 146 if (Entry == Id) 147 return Hash; 148 149 ScopedLock Lock(RingEndMu); 150 RingPos = RingEnd; 151 atomic_store_relaxed(&Tab[Pos], RingPos); 152 atomic_store_relaxed(&Ring[RingPos], Id); 153 for (uptr *I = Begin; I != End; ++I) { 154 RingPos = (RingPos + 1) & RingMask; 155 atomic_store_relaxed(&Ring[RingPos], *I); 156 } 157 RingEnd = (RingPos + 1) & RingMask; 158 return Hash; 159 } 160 161 // Look up a stack trace by hash. Returns true if successful. The trace may be 162 // accessed via operator[] passing indexes between *RingPosPtr and 163 // *RingPosPtr + *SizePtr. find(u32 Hash,uptr * RingPosPtr,uptr * SizePtr)164 bool find(u32 Hash, uptr *RingPosPtr, uptr *SizePtr) const { 165 auto *Tab = getTab(); 166 auto *Ring = getRing(); 167 168 u32 Pos = Hash & TabMask; 169 u32 RingPos = atomic_load_relaxed(&Tab[Pos]); 170 if (RingPos >= RingSize) 171 return false; 172 u64 Entry = atomic_load_relaxed(&Ring[RingPos]); 173 u64 HashWithTagBit = (u64(Hash) << 1) | 1; 174 if ((Entry & 0x1ffffffff) != HashWithTagBit) 175 return false; 176 u32 Size = u32(Entry >> 33); 177 if (Size >= RingSize) 178 return false; 179 *RingPosPtr = (RingPos + 1) & RingMask; 180 *SizePtr = Size; 181 MurMur2HashBuilder B; 182 for (uptr I = 0; I != Size; ++I) { 183 RingPos = (RingPos + 1) & RingMask; 184 B.add(u32(atomic_load_relaxed(&Ring[RingPos])) >> 2); 185 } 186 return B.get() == Hash; 187 } 188 at(uptr RingPos)189 u64 at(uptr RingPos) const { 190 auto *Ring = getRing(); 191 return atomic_load_relaxed(&Ring[RingPos & RingMask]); 192 } 193 194 // This is done for the purpose of fork safety in multithreaded programs and 195 // does not fully disable StackDepot. In particular, find() still works and 196 // only insert() is blocked. disable()197 void disable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.lock(); } 198 enable()199 void enable() NO_THREAD_SAFETY_ANALYSIS { RingEndMu.unlock(); } 200 }; 201 202 // We need StackDepot to be aligned to 8-bytes so the ring we store after 203 // is correctly assigned. 204 static_assert(sizeof(StackDepot) % alignof(atomic_u64) == 0); 205 206 } // namespace scudo 207 208 #endif // SCUDO_STACK_DEPOT_H_ 209