1 /* 2 * Copyright 2010-2012 PathScale, Inc. All rights reserved. 3 * Copyright 2021 David Chisnall. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS 16 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 17 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /** 29 * guard.cc: Functions for thread-safe static initialisation. 30 * 31 * Static values in C++ can be initialised lazily their first use. This file 32 * contains functions that are used to ensure that two threads attempting to 33 * initialize the same static do not call the constructor twice. This is 34 * important because constructors can have side effects, so calling the 35 * constructor twice may be very bad. 36 * 37 * Statics that require initialisation are protected by a 64-bit value. Any 38 * platform that can do 32-bit atomic test and set operations can use this 39 * value as a low-overhead lock. Because statics (in most sane code) are 40 * accessed far more times than they are initialised, this lock implementation 41 * is heavily optimised towards the case where the static has already been 42 * initialised. 43 */ 44 #include "atomic.h" 45 #include <assert.h> 46 #include <pthread.h> 47 #include <stdint.h> 48 #include <stdlib.h> 49 50 // Older GCC doesn't define __LITTLE_ENDIAN__ 51 #ifndef __LITTLE_ENDIAN__ 52 // If __BYTE_ORDER__ is defined, use that instead 53 # ifdef __BYTE_ORDER__ 54 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 55 # define __LITTLE_ENDIAN__ 56 # endif 57 // x86 and ARM are the most common little-endian CPUs, so let's have a 58 // special case for them (ARM is already special cased). Assume everything 59 // else is big endian. 60 # elif defined(__x86_64) || defined(__i386) 61 # define __LITTLE_ENDIAN__ 62 # endif 63 #endif 64 65 /* 66 * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32) 67 * values with one bit defined to indicate that the guarded variable is and 68 * another bit to indicate that it's currently locked (initialisation in 69 * progress). The bit to use depends on the byte order of the target. 70 * 71 * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we 72 * treat the two halves of the 64-bit word as independent values and 73 */ 74 namespace 75 { 76 /** 77 * The state of the guard variable when an attempt is made to lock it. 78 */ 79 enum class GuardState 80 { 81 /** 82 * The lock is not held but is not needed because initialisation is 83 * one. 84 */ 85 InitDone, 86 87 /** 88 * Initialisation is not done but the lock is held by the caller. 89 */ 90 InitLockSucceeded, 91 92 /** 93 * Attempting to acquire the lock failed. 94 */ 95 InitLockFailed 96 }; 97 98 /** 99 * Class encapsulating a single atomic word being used to represent the 100 * guard. The word size is defined by the type of `GuardWord`. The bit 101 * used to indicate the locked state is `1<<LockedBit`, the bit used to 102 * indicate the initialised state is `1<<InitBit`. 103 */ 104 template<typename GuardWord, int LockedBit, int InitBit> 105 struct SingleWordGuard 106 { 107 /** 108 * The value indicating that the lock bit is set (and no other bits). 109 */ 110 static constexpr GuardWord locked = static_cast<GuardWord>(1) 111 << LockedBit; 112 113 /** 114 * The value indicating that the initialised bit is set (and all other 115 * bits are zero). 116 */ 117 static constexpr GuardWord initialised = static_cast<GuardWord>(1) 118 << InitBit; 119 120 /** 121 * The guard variable. 122 */ 123 atomic<GuardWord> val; 124 125 public: 126 /** 127 * Release the lock and set the initialised state. In the single-word 128 * implementation here, these are both done by a single store. 129 */ 130 void unlock(bool isInitialised) 131 { 132 val.store(isInitialised ? initialised : 0, memory_order::release); 133 #ifndef NDEBUG 134 GuardWord init_state = initialised; 135 assert(*reinterpret_cast<uint8_t*>(&init_state) != 0); 136 #endif 137 } 138 139 /** 140 * Try to acquire the lock. This has a tri-state return, indicating 141 * either that the lock was acquired, it wasn't acquired because it was 142 * contended, or it wasn't acquired because the guarded variable is 143 * already initialised. 144 */ 145 GuardState try_lock() 146 { 147 GuardWord old = 0; 148 // Try to acquire the lock, assuming that we are in the state where 149 // the lock is not held and the variable is not initialised (so the 150 // expected value is 0). 151 if (val.compare_exchange(old, locked)) 152 { 153 return GuardState::InitLockSucceeded; 154 } 155 // If the CAS failed and the old value indicates that this is 156 // initialised, return that initialisation is done and skip further 157 // retries. 158 if (old == initialised) 159 { 160 return GuardState::InitDone; 161 } 162 // Otherwise, report failure. 163 return GuardState::InitLockFailed; 164 } 165 166 /** 167 * Check whether the guard indicates that the variable is initialised. 168 */ 169 bool is_initialised() 170 { 171 return (val.load(memory_order::acquire) & initialised) == 172 initialised; 173 } 174 }; 175 176 /** 177 * Class encapsulating using two 32-bit atomic values to represent a 64-bit 178 * guard variable. 179 */ 180 template<int LockedBit, int InitBit> 181 class DoubleWordGuard 182 { 183 /** 184 * The value of `lock_word` when the lock is held. 185 */ 186 static constexpr uint32_t locked = static_cast<uint32_t>(1) 187 << LockedBit; 188 189 /** 190 * The value of `init_word` when the guarded variable is initialised. 191 */ 192 static constexpr uint32_t initialised = static_cast<uint32_t>(1) 193 << InitBit; 194 195 /** 196 * The word used for the initialised flag. This is always the first 197 * word irrespective of endian because the generated code compares the 198 * first byte in memory against 0. 199 */ 200 atomic<uint32_t> init_word; 201 202 /** 203 * The word used for the lock. 204 */ 205 atomic<uint32_t> lock_word; 206 207 public: 208 /** 209 * Try to acquire the lock. This has a tri-state return, indicating 210 * either that the lock was acquired, it wasn't acquired because it was 211 * contended, or it wasn't acquired because the guarded variable is 212 * already initialised. 213 */ 214 GuardState try_lock() 215 { 216 uint32_t old = 0; 217 // Try to acquire the lock 218 if (lock_word.compare_exchange(old, locked)) 219 { 220 // If we succeeded, check if initialisation has happened. In 221 // this version, we don't have atomic manipulation of both the 222 // lock and initialised bits together. Instead, we have an 223 // ordering rule that the initialised bit is only ever updated 224 // with the lock held. 225 if (is_initialised()) 226 { 227 // If another thread did manage to initialise this, release 228 // the lock and notify the caller that initialisation is 229 // done. 230 lock_word.store(initialised, memory_order::release); 231 return GuardState::InitDone; 232 } 233 return GuardState::InitLockSucceeded; 234 } 235 return GuardState::InitLockFailed; 236 } 237 238 /** 239 * Set the initialised state and release the lock. In this 240 * implementation, this is ordered, not atomic: the initialise bit is 241 * set while the lock is held. 242 */ 243 void unlock(bool isInitialised) 244 { 245 init_word.store(isInitialised ? initialised : 0, 246 memory_order::release); 247 lock_word.store(0, memory_order::release); 248 assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised); 249 } 250 251 /** 252 * Return whether the guarded variable is initialised. 253 */ 254 bool is_initialised() 255 { 256 return (init_word.load(memory_order::acquire) & initialised) == 257 initialised; 258 } 259 }; 260 261 // Check that the two implementations are the correct size. 262 static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t), 263 "Single-word 32-bit guard must be 32 bits"); 264 static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t), 265 "Single-word 64-bit guard must be 64 bits"); 266 static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t), 267 "Double-word guard must be 64 bits"); 268 269 #ifdef __arm__ 270 /** 271 * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words. 272 */ 273 using Guard = SingleWordGuard<uint32_t, 31, 0>; 274 #elif defined(_LP64) 275 # if defined(__LITTLE_ENDIAN__) 276 /** 277 * On little-endian 64-bit platforms the guard word is a single 64-bit 278 * atomic with the lock in the high bit and the initialised flag in the low 279 * bit. 280 */ 281 using Guard = SingleWordGuard<uint64_t, 63, 0>; 282 # else 283 /** 284 * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic 285 * with the lock in the low bit and the initialised bit in the highest 286 * byte. 287 */ 288 using Guard = SingleWordGuard<uint64_t, 0, 56>; 289 # endif 290 #else 291 # if defined(__LITTLE_ENDIAN__) 292 /** 293 * 32-bit platforms use the same layout as 64-bit. 294 */ 295 using Guard = DoubleWordGuard<31, 0>; 296 # else 297 /** 298 * 32-bit platforms use the same layout as 64-bit. 299 */ 300 using Guard = DoubleWordGuard<0, 24>; 301 # endif 302 #endif 303 304 } // namespace 305 306 /** 307 * Acquires a lock on a guard, returning 0 if the object has already been 308 * initialised, and 1 if it has not. If the object is already constructed then 309 * this function just needs to read a byte from memory and return. 310 */ 311 extern "C" int __cxa_guard_acquire(Guard *guard_object) 312 { 313 // Check if this is already initialised. If so, we don't have to do 314 // anything. 315 if (guard_object->is_initialised()) 316 { 317 return 0; 318 } 319 // Spin trying to acquire the lock. If we fail to acquire the lock the 320 // first time then another thread will *probably* initialise it, but if the 321 // constructor throws an exception then we may have to try again in this 322 // thread. 323 for (;;) 324 { 325 // Try to acquire the lock. 326 switch (guard_object->try_lock()) 327 { 328 // If we failed to acquire the lock but another thread has 329 // initialised the lock while we were waiting, return immediately 330 // indicating that initialisation is not required. 331 case GuardState::InitDone: 332 return 0; 333 // If we acquired the lock, return immediately to start 334 // initialisation. 335 case GuardState::InitLockSucceeded: 336 return 1; 337 // If we didn't acquire the lock, pause and retry. 338 case GuardState::InitLockFailed: 339 break; 340 } 341 sched_yield(); 342 } 343 } 344 345 /** 346 * Releases the lock without marking the object as initialised. This function 347 * is called if initialising a static causes an exception to be thrown. 348 */ 349 extern "C" void __cxa_guard_abort(Guard *guard_object) 350 { 351 guard_object->unlock(false); 352 } 353 354 /** 355 * Releases the guard and marks the object as initialised. This function is 356 * called after successful initialisation of a static. 357 */ 358 extern "C" void __cxa_guard_release(Guard *guard_object) 359 { 360 guard_object->unlock(true); 361 } 362