1 /* 2 * Copyright 2010-2012 PathScale, Inc. All rights reserved. 3 * Copyright 2021 David Chisnall. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS 16 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 17 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /** 29 * guard.cc: Functions for thread-safe static initialisation. 30 * 31 * Static values in C++ can be initialised lazily their first use. This file 32 * contains functions that are used to ensure that two threads attempting to 33 * initialize the same static do not call the constructor twice. This is 34 * important because constructors can have side effects, so calling the 35 * constructor twice may be very bad. 36 * 37 * Statics that require initialisation are protected by a 64-bit value. Any 38 * platform that can do 32-bit atomic test and set operations can use this 39 * value as a low-overhead lock. Because statics (in most sane code) are 40 * accessed far more times than they are initialised, this lock implementation 41 * is heavily optimised towards the case where the static has already been 42 * initialised. 43 */ 44 #include "atomic.h" 45 #include <assert.h> 46 #include <pthread.h> 47 #include <stdint.h> 48 #include <stdlib.h> 49 50 // Older GCC doesn't define __LITTLE_ENDIAN__ 51 #ifndef __LITTLE_ENDIAN__ 52 // If __BYTE_ORDER__ is defined, use that instead 53 # ifdef __BYTE_ORDER__ 54 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 55 # define __LITTLE_ENDIAN__ 56 # endif 57 // x86 and ARM are the most common little-endian CPUs, so let's have a 58 // special case for them (ARM is already special cased). Assume everything 59 // else is big endian. 60 # elif defined(__x86_64) || defined(__i386) 61 # define __LITTLE_ENDIAN__ 62 # endif 63 #endif 64 65 /* 66 * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32) 67 * values with one bit defined to indicate that the guarded variable is and 68 * another bit to indicate that it's currently locked (initialisation in 69 * progress). The bit to use depends on the byte order of the target. 70 * 71 * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we 72 * treat the two halves of the 64-bit word as independent values and establish 73 * an ordering on them such that the guard word is never modified unless the 74 * lock word is in the locked state. This means that we can do double-checked 75 * locking by loading the guard word and, if it is not initialised, trying to 76 * transition the lock word from the unlocked to locked state, and then 77 * manipulate the guard word. 78 */ 79 namespace 80 { 81 /** 82 * The state of the guard variable when an attempt is made to lock it. 83 */ 84 enum class GuardState 85 { 86 /** 87 * The lock is not held but is not needed because initialisation is 88 * one. 89 */ 90 InitDone, 91 92 /** 93 * Initialisation is not done but the lock is held by the caller. 94 */ 95 InitLockSucceeded, 96 97 /** 98 * Attempting to acquire the lock failed. 99 */ 100 InitLockFailed 101 }; 102 103 /** 104 * Class encapsulating a single atomic word being used to represent the 105 * guard. The word size is defined by the type of `GuardWord`. The bit 106 * used to indicate the locked state is `1<<LockedBit`, the bit used to 107 * indicate the initialised state is `1<<InitBit`. 108 */ 109 template<typename GuardWord, int LockedBit, int InitBit> 110 struct SingleWordGuard 111 { 112 /** 113 * The value indicating that the lock bit is set (and no other bits). 114 */ 115 static constexpr GuardWord locked = static_cast<GuardWord>(1) 116 << LockedBit; 117 118 /** 119 * The value indicating that the initialised bit is set (and all other 120 * bits are zero). 121 */ 122 static constexpr GuardWord initialised = static_cast<GuardWord>(1) 123 << InitBit; 124 125 /** 126 * The guard variable. 127 */ 128 atomic<GuardWord> val; 129 130 public: 131 /** 132 * Release the lock and set the initialised state. In the single-word 133 * implementation here, these are both done by a single store. 134 */ 135 void unlock(bool isInitialised) 136 { 137 val.store(isInitialised ? initialised : 0, memory_order::release); 138 #ifndef NDEBUG 139 GuardWord init_state = initialised; 140 assert(*reinterpret_cast<uint8_t*>(&init_state) != 0); 141 #endif 142 } 143 144 /** 145 * Try to acquire the lock. This has a tri-state return, indicating 146 * either that the lock was acquired, it wasn't acquired because it was 147 * contended, or it wasn't acquired because the guarded variable is 148 * already initialised. 149 */ 150 GuardState try_lock() 151 { 152 GuardWord old = 0; 153 // Try to acquire the lock, assuming that we are in the state where 154 // the lock is not held and the variable is not initialised (so the 155 // expected value is 0). 156 if (val.compare_exchange(old, locked)) 157 { 158 return GuardState::InitLockSucceeded; 159 } 160 // If the CAS failed and the old value indicates that this is 161 // initialised, return that initialisation is done and skip further 162 // retries. 163 if (old == initialised) 164 { 165 return GuardState::InitDone; 166 } 167 // Otherwise, report failure. 168 return GuardState::InitLockFailed; 169 } 170 171 /** 172 * Check whether the guard indicates that the variable is initialised. 173 */ 174 bool is_initialised() 175 { 176 return (val.load(memory_order::acquire) & initialised) == 177 initialised; 178 } 179 }; 180 181 /** 182 * Class encapsulating using two 32-bit atomic values to represent a 64-bit 183 * guard variable. 184 */ 185 template<int LockedBit, int InitBit> 186 class DoubleWordGuard 187 { 188 /** 189 * The value of `lock_word` when the lock is held. 190 */ 191 static constexpr uint32_t locked = static_cast<uint32_t>(1) 192 << LockedBit; 193 194 /** 195 * The value of `init_word` when the guarded variable is initialised. 196 */ 197 static constexpr uint32_t initialised = static_cast<uint32_t>(1) 198 << InitBit; 199 200 /** 201 * The word used for the initialised flag. This is always the first 202 * word irrespective of endian because the generated code compares the 203 * first byte in memory against 0. 204 */ 205 atomic<uint32_t> init_word; 206 207 /** 208 * The word used for the lock. 209 */ 210 atomic<uint32_t> lock_word; 211 212 public: 213 /** 214 * Try to acquire the lock. This has a tri-state return, indicating 215 * either that the lock was acquired, it wasn't acquired because it was 216 * contended, or it wasn't acquired because the guarded variable is 217 * already initialised. 218 */ 219 GuardState try_lock() 220 { 221 uint32_t old = 0; 222 // Try to acquire the lock 223 if (lock_word.compare_exchange(old, locked)) 224 { 225 // If we succeeded, check if initialisation has happened. In 226 // this version, we don't have atomic manipulation of both the 227 // lock and initialised bits together. Instead, we have an 228 // ordering rule that the initialised bit is only ever updated 229 // with the lock held. 230 if (is_initialised()) 231 { 232 // If another thread did manage to initialise this, release 233 // the lock and notify the caller that initialisation is 234 // done. 235 lock_word.store(0, memory_order::release); 236 return GuardState::InitDone; 237 } 238 return GuardState::InitLockSucceeded; 239 } 240 return GuardState::InitLockFailed; 241 } 242 243 /** 244 * Set the initialised state and release the lock. In this 245 * implementation, this is ordered, not atomic: the initialise bit is 246 * set while the lock is held. 247 */ 248 void unlock(bool isInitialised) 249 { 250 init_word.store(isInitialised ? initialised : 0, 251 memory_order::release); 252 lock_word.store(0, memory_order::release); 253 assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised); 254 } 255 256 /** 257 * Return whether the guarded variable is initialised. 258 */ 259 bool is_initialised() 260 { 261 return (init_word.load(memory_order::acquire) & initialised) == 262 initialised; 263 } 264 }; 265 266 // Check that the two implementations are the correct size. 267 static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t), 268 "Single-word 32-bit guard must be 32 bits"); 269 static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t), 270 "Single-word 64-bit guard must be 64 bits"); 271 static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t), 272 "Double-word guard must be 64 bits"); 273 274 #ifdef __arm__ 275 /** 276 * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words. 277 */ 278 using Guard = SingleWordGuard<uint32_t, 31, 0>; 279 #elif defined(_LP64) 280 # if defined(__LITTLE_ENDIAN__) 281 /** 282 * On little-endian 64-bit platforms the guard word is a single 64-bit 283 * atomic with the lock in the high bit and the initialised flag in the low 284 * bit. 285 */ 286 using Guard = SingleWordGuard<uint64_t, 63, 0>; 287 # else 288 /** 289 * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic 290 * with the lock in the low bit and the initialised bit in the highest 291 * byte. 292 */ 293 using Guard = SingleWordGuard<uint64_t, 0, 56>; 294 # endif 295 #else 296 # if defined(__LITTLE_ENDIAN__) 297 /** 298 * 32-bit platforms use the same layout as 64-bit. 299 */ 300 using Guard = DoubleWordGuard<31, 0>; 301 # else 302 /** 303 * 32-bit platforms use the same layout as 64-bit. 304 */ 305 using Guard = DoubleWordGuard<0, 24>; 306 # endif 307 #endif 308 309 } // namespace 310 311 /** 312 * Acquires a lock on a guard, returning 0 if the object has already been 313 * initialised, and 1 if it has not. If the object is already constructed then 314 * this function just needs to read a byte from memory and return. 315 */ 316 extern "C" int __cxa_guard_acquire(Guard *guard_object) 317 { 318 // Check if this is already initialised. If so, we don't have to do 319 // anything. 320 if (guard_object->is_initialised()) 321 { 322 return 0; 323 } 324 // Spin trying to acquire the lock. If we fail to acquire the lock the 325 // first time then another thread will *probably* initialise it, but if the 326 // constructor throws an exception then we may have to try again in this 327 // thread. 328 for (;;) 329 { 330 // Try to acquire the lock. 331 switch (guard_object->try_lock()) 332 { 333 // If we failed to acquire the lock but another thread has 334 // initialised the lock while we were waiting, return immediately 335 // indicating that initialisation is not required. 336 case GuardState::InitDone: 337 return 0; 338 // If we acquired the lock, return immediately to start 339 // initialisation. 340 case GuardState::InitLockSucceeded: 341 return 1; 342 // If we didn't acquire the lock, pause and retry. 343 case GuardState::InitLockFailed: 344 break; 345 } 346 sched_yield(); 347 } 348 } 349 350 /** 351 * Releases the lock without marking the object as initialised. This function 352 * is called if initialising a static causes an exception to be thrown. 353 */ 354 extern "C" void __cxa_guard_abort(Guard *guard_object) 355 { 356 guard_object->unlock(false); 357 } 358 359 /** 360 * Releases the guard and marks the object as initialised. This function is 361 * called after successful initialisation of a static. 362 */ 363 extern "C" void __cxa_guard_release(Guard *guard_object) 364 { 365 guard_object->unlock(true); 366 } 367