1 /*
2 * Copyright 2010-2012 PathScale, Inc. All rights reserved.
3 * Copyright 2021 David Chisnall. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
16 * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /**
29 * guard.cc: Functions for thread-safe static initialisation.
30 *
31 * Static values in C++ can be initialised lazily their first use. This file
32 * contains functions that are used to ensure that two threads attempting to
33 * initialize the same static do not call the constructor twice. This is
34 * important because constructors can have side effects, so calling the
35 * constructor twice may be very bad.
36 *
37 * Statics that require initialisation are protected by a 64-bit value. Any
38 * platform that can do 32-bit atomic test and set operations can use this
39 * value as a low-overhead lock. Because statics (in most sane code) are
40 * accessed far more times than they are initialised, this lock implementation
41 * is heavily optimised towards the case where the static has already been
42 * initialised.
43 */
44 #include "atomic.h"
45 #include <assert.h>
46 #include <pthread.h>
47 #include <stdint.h>
48 #include <stdlib.h>
49
50 // Older GCC doesn't define __LITTLE_ENDIAN__
51 #ifndef __LITTLE_ENDIAN__
52 // If __BYTE_ORDER__ is defined, use that instead
53 # ifdef __BYTE_ORDER__
54 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
55 # define __LITTLE_ENDIAN__
56 # endif
57 // x86 and ARM are the most common little-endian CPUs, so let's have a
58 // special case for them (ARM is already special cased). Assume everything
59 // else is big endian.
60 # elif defined(__x86_64) || defined(__i386)
61 # define __LITTLE_ENDIAN__
62 # endif
63 #endif
64
65 /*
66 * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32)
67 * values with one bit defined to indicate that the guarded variable is and
68 * another bit to indicate that it's currently locked (initialisation in
69 * progress). The bit to use depends on the byte order of the target.
70 *
71 * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we
72 * treat the two halves of the 64-bit word as independent values and establish
73 * an ordering on them such that the guard word is never modified unless the
74 * lock word is in the locked state. This means that we can do double-checked
75 * locking by loading the guard word and, if it is not initialised, trying to
76 * transition the lock word from the unlocked to locked state, and then
77 * manipulate the guard word.
78 */
79 namespace
80 {
81 /**
82 * The state of the guard variable when an attempt is made to lock it.
83 */
84 enum class GuardState
85 {
86 /**
87 * The lock is not held but is not needed because initialisation is
88 * one.
89 */
90 InitDone,
91
92 /**
93 * Initialisation is not done but the lock is held by the caller.
94 */
95 InitLockSucceeded,
96
97 /**
98 * Attempting to acquire the lock failed.
99 */
100 InitLockFailed
101 };
102
103 /**
104 * Class encapsulating a single atomic word being used to represent the
105 * guard. The word size is defined by the type of `GuardWord`. The bit
106 * used to indicate the locked state is `1<<LockedBit`, the bit used to
107 * indicate the initialised state is `1<<InitBit`.
108 */
109 template<typename GuardWord, int LockedBit, int InitBit>
110 struct SingleWordGuard
111 {
112 /**
113 * The value indicating that the lock bit is set (and no other bits).
114 */
115 static constexpr GuardWord locked = static_cast<GuardWord>(1)
116 << LockedBit;
117
118 /**
119 * The value indicating that the initialised bit is set (and all other
120 * bits are zero).
121 */
122 static constexpr GuardWord initialised = static_cast<GuardWord>(1)
123 << InitBit;
124
125 /**
126 * The guard variable.
127 */
128 atomic<GuardWord> val;
129
130 public:
131 /**
132 * Release the lock and set the initialised state. In the single-word
133 * implementation here, these are both done by a single store.
134 */
unlock__anone6a749c70111::SingleWordGuard135 void unlock(bool isInitialised)
136 {
137 val.store(isInitialised ? initialised : 0, memory_order::release);
138 #ifndef NDEBUG
139 GuardWord init_state = initialised;
140 assert(*reinterpret_cast<uint8_t*>(&init_state) != 0);
141 #endif
142 }
143
144 /**
145 * Try to acquire the lock. This has a tri-state return, indicating
146 * either that the lock was acquired, it wasn't acquired because it was
147 * contended, or it wasn't acquired because the guarded variable is
148 * already initialised.
149 */
try_lock__anone6a749c70111::SingleWordGuard150 GuardState try_lock()
151 {
152 GuardWord old = 0;
153 // Try to acquire the lock, assuming that we are in the state where
154 // the lock is not held and the variable is not initialised (so the
155 // expected value is 0).
156 if (val.compare_exchange(old, locked))
157 {
158 return GuardState::InitLockSucceeded;
159 }
160 // If the CAS failed and the old value indicates that this is
161 // initialised, return that initialisation is done and skip further
162 // retries.
163 if (old == initialised)
164 {
165 return GuardState::InitDone;
166 }
167 // Otherwise, report failure.
168 return GuardState::InitLockFailed;
169 }
170
171 /**
172 * Check whether the guard indicates that the variable is initialised.
173 */
is_initialised__anone6a749c70111::SingleWordGuard174 bool is_initialised()
175 {
176 return (val.load(memory_order::acquire) & initialised) ==
177 initialised;
178 }
179 };
180
181 /**
182 * Class encapsulating using two 32-bit atomic values to represent a 64-bit
183 * guard variable.
184 */
185 template<int LockedBit, int InitBit>
186 class DoubleWordGuard
187 {
188 /**
189 * The value of `lock_word` when the lock is held.
190 */
191 static constexpr uint32_t locked = static_cast<uint32_t>(1)
192 << LockedBit;
193
194 /**
195 * The value of `init_word` when the guarded variable is initialised.
196 */
197 static constexpr uint32_t initialised = static_cast<uint32_t>(1)
198 << InitBit;
199
200 /**
201 * The word used for the initialised flag. This is always the first
202 * word irrespective of endian because the generated code compares the
203 * first byte in memory against 0.
204 */
205 atomic<uint32_t> init_word;
206
207 /**
208 * The word used for the lock.
209 */
210 atomic<uint32_t> lock_word;
211
212 public:
213 /**
214 * Try to acquire the lock. This has a tri-state return, indicating
215 * either that the lock was acquired, it wasn't acquired because it was
216 * contended, or it wasn't acquired because the guarded variable is
217 * already initialised.
218 */
try_lock()219 GuardState try_lock()
220 {
221 uint32_t old = 0;
222 // Try to acquire the lock
223 if (lock_word.compare_exchange(old, locked))
224 {
225 // If we succeeded, check if initialisation has happened. In
226 // this version, we don't have atomic manipulation of both the
227 // lock and initialised bits together. Instead, we have an
228 // ordering rule that the initialised bit is only ever updated
229 // with the lock held.
230 if (is_initialised())
231 {
232 // If another thread did manage to initialise this, release
233 // the lock and notify the caller that initialisation is
234 // done.
235 lock_word.store(0, memory_order::release);
236 return GuardState::InitDone;
237 }
238 return GuardState::InitLockSucceeded;
239 }
240 return GuardState::InitLockFailed;
241 }
242
243 /**
244 * Set the initialised state and release the lock. In this
245 * implementation, this is ordered, not atomic: the initialise bit is
246 * set while the lock is held.
247 */
unlock(bool isInitialised)248 void unlock(bool isInitialised)
249 {
250 init_word.store(isInitialised ? initialised : 0,
251 memory_order::release);
252 lock_word.store(0, memory_order::release);
253 assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised);
254 }
255
256 /**
257 * Return whether the guarded variable is initialised.
258 */
is_initialised()259 bool is_initialised()
260 {
261 return (init_word.load(memory_order::acquire) & initialised) ==
262 initialised;
263 }
264 };
265
266 // Check that the two implementations are the correct size.
267 static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t),
268 "Single-word 32-bit guard must be 32 bits");
269 static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t),
270 "Single-word 64-bit guard must be 64 bits");
271 static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t),
272 "Double-word guard must be 64 bits");
273
274 #ifdef __arm__
275 /**
276 * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words.
277 */
278 using Guard = SingleWordGuard<uint32_t, 31, 0>;
279 #elif defined(_LP64)
280 # if defined(__LITTLE_ENDIAN__)
281 /**
282 * On little-endian 64-bit platforms the guard word is a single 64-bit
283 * atomic with the lock in the high bit and the initialised flag in the low
284 * bit.
285 */
286 using Guard = SingleWordGuard<uint64_t, 63, 0>;
287 # else
288 /**
289 * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic
290 * with the lock in the low bit and the initialised bit in the highest
291 * byte.
292 */
293 using Guard = SingleWordGuard<uint64_t, 0, 56>;
294 # endif
295 #else
296 # if defined(__LITTLE_ENDIAN__)
297 /**
298 * 32-bit platforms use the same layout as 64-bit.
299 */
300 using Guard = DoubleWordGuard<31, 0>;
301 # else
302 /**
303 * 32-bit platforms use the same layout as 64-bit.
304 */
305 using Guard = DoubleWordGuard<0, 24>;
306 # endif
307 #endif
308
309 } // namespace
310
311 /**
312 * Acquires a lock on a guard, returning 0 if the object has already been
313 * initialised, and 1 if it has not. If the object is already constructed then
314 * this function just needs to read a byte from memory and return.
315 */
__cxa_guard_acquire(Guard * guard_object)316 extern "C" int __cxa_guard_acquire(Guard *guard_object)
317 {
318 // Check if this is already initialised. If so, we don't have to do
319 // anything.
320 if (guard_object->is_initialised())
321 {
322 return 0;
323 }
324 // Spin trying to acquire the lock. If we fail to acquire the lock the
325 // first time then another thread will *probably* initialise it, but if the
326 // constructor throws an exception then we may have to try again in this
327 // thread.
328 for (;;)
329 {
330 // Try to acquire the lock.
331 switch (guard_object->try_lock())
332 {
333 // If we failed to acquire the lock but another thread has
334 // initialised the lock while we were waiting, return immediately
335 // indicating that initialisation is not required.
336 case GuardState::InitDone:
337 return 0;
338 // If we acquired the lock, return immediately to start
339 // initialisation.
340 case GuardState::InitLockSucceeded:
341 return 1;
342 // If we didn't acquire the lock, pause and retry.
343 case GuardState::InitLockFailed:
344 break;
345 }
346 sched_yield();
347 }
348 }
349
350 /**
351 * Releases the lock without marking the object as initialised. This function
352 * is called if initialising a static causes an exception to be thrown.
353 */
__cxa_guard_abort(Guard * guard_object)354 extern "C" void __cxa_guard_abort(Guard *guard_object)
355 {
356 guard_object->unlock(false);
357 }
358
359 /**
360 * Releases the guard and marks the object as initialised. This function is
361 * called after successful initialisation of a static.
362 */
__cxa_guard_release(Guard * guard_object)363 extern "C" void __cxa_guard_release(Guard *guard_object)
364 {
365 guard_object->unlock(true);
366 }
367