xref: /freebsd/contrib/libcxxrt/guard.cc (revision 5c6935a645604b1e39b2cf6dbc23f66ec58f3e54)
1 /*
2  * Copyright 2010-2012 PathScale, Inc. All rights reserved.
3  * Copyright 2021 David Chisnall. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  *    this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  *    this list of conditions and the following disclaimer in the documentation
13  *    and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
16  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
17  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /**
29  * guard.cc: Functions for thread-safe static initialisation.
30  *
31  * Static values in C++ can be initialised lazily their first use.  This file
32  * contains functions that are used to ensure that two threads attempting to
33  * initialize the same static do not call the constructor twice.  This is
34  * important because constructors can have side effects, so calling the
35  * constructor twice may be very bad.
36  *
37  * Statics that require initialisation are protected by a 64-bit value.  Any
38  * platform that can do 32-bit atomic test and set operations can use this
39  * value as a low-overhead lock.  Because statics (in most sane code) are
40  * accessed far more times than they are initialised, this lock implementation
41  * is heavily optimised towards the case where the static has already been
42  * initialised.
43  */
44 #include "atomic.h"
45 #include <assert.h>
46 #include <pthread.h>
47 #include <stdint.h>
48 #include <stdlib.h>
49 
50 // Older GCC doesn't define __LITTLE_ENDIAN__
51 #ifndef __LITTLE_ENDIAN__
52 // If __BYTE_ORDER__ is defined, use that instead
53 #	ifdef __BYTE_ORDER__
54 #		if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
55 #			define __LITTLE_ENDIAN__
56 #		endif
57 // x86 and ARM are the most common little-endian CPUs, so let's have a
58 // special case for them (ARM is already special cased).  Assume everything
59 // else is big endian.
60 #	elif defined(__x86_64) || defined(__i386)
61 #		define __LITTLE_ENDIAN__
62 #	endif
63 #endif
64 
65 /*
66  * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32)
67  * values with one bit defined to indicate that the guarded variable is and
68  * another bit to indicate that it's currently locked (initialisation in
69  * progress).  The bit to use depends on the byte order of the target.
70  *
71  * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we
72  * treat the two halves of the 64-bit word as independent values and
73  */
74 namespace
75 {
76 	/**
77 	 * The state of the guard variable when an attempt is made to lock it.
78 	 */
79 	enum class GuardState
80 	{
81 		/**
82 		 * The lock is not held but is not needed because initialisation is
83 		 * one.
84 		 */
85 		InitDone,
86 
87 		/**
88 		 * Initialisation is not done but the lock is held by the caller.
89 		 */
90 		InitLockSucceeded,
91 
92 		/**
93 		 * Attempting to acquire the lock failed.
94 		 */
95 		InitLockFailed
96 	};
97 
98 	/**
99 	 * Class encapsulating a single atomic word being used to represent the
100 	 * guard.  The word size is defined by the type of `GuardWord`.  The bit
101 	 * used to indicate the locked state is `1<<LockedBit`, the bit used to
102 	 * indicate the initialised state is `1<<InitBit`.
103 	 */
104 	template<typename GuardWord, int LockedBit, int InitBit>
105 	struct SingleWordGuard
106 	{
107 		/**
108 		 * The value indicating that the lock bit is set (and no other bits).
109 		 */
110 		static constexpr GuardWord locked = static_cast<GuardWord>(1)
111 		                                    << LockedBit;
112 
113 		/**
114 		 * The value indicating that the initialised bit is set (and all other
115 		 * bits are zero).
116 		 */
117 		static constexpr GuardWord initialised = static_cast<GuardWord>(1)
118 		                                         << InitBit;
119 
120 		/**
121 		 * The guard variable.
122 		 */
123 		atomic<GuardWord> val;
124 
125 		public:
126 		/**
127 		 * Release the lock and set the initialised state.  In the single-word
128 		 * implementation here, these are both done by a single store.
129 		 */
130 		void unlock(bool isInitialised)
131 		{
132 			val.store(isInitialised ? initialised : 0, memory_order::release);
133 #ifndef NDEBUG
134 			GuardWord init_state = initialised;
135 			assert(*reinterpret_cast<uint8_t*>(&init_state) != 0);
136 #endif
137 		}
138 
139 		/**
140 		 * Try to acquire the lock.  This has a tri-state return, indicating
141 		 * either that the lock was acquired, it wasn't acquired because it was
142 		 * contended, or it wasn't acquired because the guarded variable is
143 		 * already initialised.
144 		 */
145 		GuardState try_lock()
146 		{
147 			GuardWord old = 0;
148 			// Try to acquire the lock, assuming that we are in the state where
149 			// the lock is not held and the variable is not initialised (so the
150 			// expected value is 0).
151 			if (val.compare_exchange(old, locked))
152 			{
153 				return GuardState::InitLockSucceeded;
154 			}
155 			// If the CAS failed and the old value indicates that this is
156 			// initialised, return that initialisation is done and skip further
157 			// retries.
158 			if (old == initialised)
159 			{
160 				return GuardState::InitDone;
161 			}
162 			// Otherwise, report failure.
163 			return GuardState::InitLockFailed;
164 		}
165 
166 		/**
167 		 * Check whether the guard indicates that the variable is initialised.
168 		 */
169 		bool is_initialised()
170 		{
171 			return (val.load(memory_order::acquire) & initialised) ==
172 			       initialised;
173 		}
174 	};
175 
176 	/**
177 	 * Class encapsulating using two 32-bit atomic values to represent a 64-bit
178 	 * guard variable.
179 	 */
180 	template<int LockedBit, int InitBit>
181 	class DoubleWordGuard
182 	{
183 		/**
184 		 * The value of `lock_word` when the lock is held.
185 		 */
186 		static constexpr uint32_t locked = static_cast<uint32_t>(1)
187 		                                   << LockedBit;
188 
189 		/**
190 		 * The value of `init_word` when the guarded variable is initialised.
191 		 */
192 		static constexpr uint32_t initialised = static_cast<uint32_t>(1)
193 		                                        << InitBit;
194 
195 		/**
196 		 * The word used for the initialised flag.  This is always the first
197 		 * word irrespective of endian because the generated code compares the
198 		 * first byte in memory against 0.
199 		 */
200 		atomic<uint32_t> init_word;
201 
202 		/**
203 		 * The word used for the lock.
204 		 */
205 		atomic<uint32_t> lock_word;
206 
207 		public:
208 		/**
209 		 * Try to acquire the lock.  This has a tri-state return, indicating
210 		 * either that the lock was acquired, it wasn't acquired because it was
211 		 * contended, or it wasn't acquired because the guarded variable is
212 		 * already initialised.
213 		 */
214 		GuardState try_lock()
215 		{
216 			uint32_t old = 0;
217 			// Try to acquire the lock
218 			if (lock_word.compare_exchange(old, locked))
219 			{
220 				// If we succeeded, check if initialisation has happened.  In
221 				// this version, we don't have atomic manipulation of both the
222 				// lock and initialised bits together.  Instead, we have an
223 				// ordering rule that the initialised bit is only ever updated
224 				// with the lock held.
225 				if (is_initialised())
226 				{
227 					// If another thread did manage to initialise this, release
228 					// the lock and notify the caller that initialisation is
229 					// done.
230 					lock_word.store(initialised, memory_order::release);
231 					return GuardState::InitDone;
232 				}
233 				return GuardState::InitLockSucceeded;
234 			}
235 			return GuardState::InitLockFailed;
236 		}
237 
238 		/**
239 		 * Set the initialised state and release the lock.  In this
240 		 * implementation, this is ordered, not atomic: the initialise bit is
241 		 * set while the lock is held.
242 		 */
243 		void unlock(bool isInitialised)
244 		{
245 			init_word.store(isInitialised ? initialised : 0,
246 			                  memory_order::release);
247 			lock_word.store(0, memory_order::release);
248 			assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised);
249 		}
250 
251 		/**
252 		 * Return whether the guarded variable is initialised.
253 		 */
254 		bool is_initialised()
255 		{
256 			return (init_word.load(memory_order::acquire) & initialised) ==
257 			       initialised;
258 		}
259 	};
260 
261 	// Check that the two implementations are the correct size.
262 	static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t),
263 	              "Single-word 32-bit guard must be 32 bits");
264 	static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t),
265 	              "Single-word 64-bit guard must be 64 bits");
266 	static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t),
267 	              "Double-word guard must be 64 bits");
268 
269 #ifdef __arm__
270 	/**
271 	 * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words.
272 	 */
273 	using Guard = SingleWordGuard<uint32_t, 31, 0>;
274 #elif defined(_LP64)
275 #	if defined(__LITTLE_ENDIAN__)
276 	/**
277 	 * On little-endian 64-bit platforms the guard word is a single 64-bit
278 	 * atomic with the lock in the high bit and the initialised flag in the low
279 	 * bit.
280 	 */
281 	using Guard = SingleWordGuard<uint64_t, 63, 0>;
282 #	else
283 	/**
284 	 * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic
285 	 * with the lock in the low bit and the initialised bit in the highest
286 	 * byte.
287 	 */
288 	using Guard = SingleWordGuard<uint64_t, 0, 56>;
289 #	endif
290 #else
291 #	if defined(__LITTLE_ENDIAN__)
292 	/**
293 	 * 32-bit platforms use the same layout as 64-bit.
294 	 */
295 	using Guard = DoubleWordGuard<31, 0>;
296 #	else
297 	/**
298 	 * 32-bit platforms use the same layout as 64-bit.
299 	 */
300 	using Guard = DoubleWordGuard<0, 24>;
301 #	endif
302 #endif
303 
304 } // namespace
305 
306 /**
307  * Acquires a lock on a guard, returning 0 if the object has already been
308  * initialised, and 1 if it has not.  If the object is already constructed then
309  * this function just needs to read a byte from memory and return.
310  */
311 extern "C" int __cxa_guard_acquire(Guard *guard_object)
312 {
313 	// Check if this is already initialised.  If so, we don't have to do
314 	// anything.
315 	if (guard_object->is_initialised())
316 	{
317 		return 0;
318 	}
319 	// Spin trying to acquire the lock.  If we fail to acquire the lock the
320 	// first time then another thread will *probably* initialise it, but if the
321 	// constructor throws an exception then we may have to try again in this
322 	// thread.
323 	for (;;)
324 	{
325 		// Try to acquire the lock.
326 		switch (guard_object->try_lock())
327 		{
328 			// If we failed to acquire the lock but another thread has
329 			// initialised the lock while we were waiting, return immediately
330 			// indicating that initialisation is not required.
331 			case GuardState::InitDone:
332 				return 0;
333 			// If we acquired the lock, return immediately to start
334 			// initialisation.
335 			case GuardState::InitLockSucceeded:
336 				return 1;
337 			// If we didn't acquire the lock, pause and retry.
338 			case GuardState::InitLockFailed:
339 				break;
340 		}
341 		sched_yield();
342 	}
343 }
344 
345 /**
346  * Releases the lock without marking the object as initialised.  This function
347  * is called if initialising a static causes an exception to be thrown.
348  */
349 extern "C" void __cxa_guard_abort(Guard *guard_object)
350 {
351 	guard_object->unlock(false);
352 }
353 
354 /**
355  * Releases the guard and marks the object as initialised.  This function is
356  * called after successful initialisation of a static.
357  */
358 extern "C" void __cxa_guard_release(Guard *guard_object)
359 {
360 	guard_object->unlock(true);
361 }
362