xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/emutls.c (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1 //===---------- emutls.c - Implements __emutls_get_address ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <stdint.h>
10 #include <stdlib.h>
11 #include <string.h>
12 
13 #include "int_lib.h"
14 
15 #ifdef __BIONIC__
16 // There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
17 // to round 2. We need to delay deallocation because:
18 //  - Android versions older than M lack __cxa_thread_atexit_impl, so apps
19 //    use a pthread key destructor to call C++ destructors.
20 //  - Apps might use __thread/thread_local variables in pthread destructors.
21 // We can't wait until the final two rounds, because jemalloc needs two rounds
22 // after the final malloc/free call to free its thread-specific data (see
23 // https://reviews.llvm.org/D46978#1107507).
24 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
25 #else
26 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
27 #endif
28 
29 #if defined(_MSC_VER) && !defined(__clang__)
30 // MSVC raises a warning about a nonstandard extension being used for the 0
31 // sized element in this array. Disable this for warn-as-error builds.
32 #pragma warning(push)
33 #pragma warning(disable : 4200)
34 #endif
35 
36 typedef struct emutls_address_array {
37   uintptr_t skip_destructor_rounds;
38   uintptr_t size; // number of elements in the 'data' array
39   void *data[];
40 } emutls_address_array;
41 
42 #if defined(_MSC_VER) && !defined(__clang__)
43 #pragma warning(pop)
44 #endif
45 
46 static void emutls_shutdown(emutls_address_array *array);
47 
48 #ifndef _WIN32
49 
50 #include <pthread.h>
51 
52 static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
53 static pthread_key_t emutls_pthread_key;
54 static bool emutls_key_created = false;
55 
56 typedef unsigned int gcc_word __attribute__((mode(word)));
57 typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
58 
59 // Default is not to use posix_memalign, so systems like Android
60 // can use thread local data without heavier POSIX memory allocators.
61 #ifndef EMUTLS_USE_POSIX_MEMALIGN
62 #define EMUTLS_USE_POSIX_MEMALIGN 0
63 #endif
64 
65 static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
66   void *base;
67 #if EMUTLS_USE_POSIX_MEMALIGN
68   if (posix_memalign(&base, align, size) != 0)
69     abort();
70 #else
71 #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *))
72   char *object;
73   if ((object = (char *)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
74     abort();
75   base = (void *)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) &
76                   ~(uintptr_t)(align - 1));
77 
78   ((void **)base)[-1] = object;
79 #endif
80   return base;
81 }
82 
83 static __inline void emutls_memalign_free(void *base) {
84 #if EMUTLS_USE_POSIX_MEMALIGN
85   free(base);
86 #else
87   // The mallocated address is in ((void**)base)[-1]
88   free(((void **)base)[-1]);
89 #endif
90 }
91 
92 static __inline void emutls_setspecific(emutls_address_array *value) {
93   pthread_setspecific(emutls_pthread_key, (void *)value);
94 }
95 
96 static __inline emutls_address_array *emutls_getspecific() {
97   return (emutls_address_array *)pthread_getspecific(emutls_pthread_key);
98 }
99 
100 static void emutls_key_destructor(void *ptr) {
101   emutls_address_array *array = (emutls_address_array *)ptr;
102   if (array->skip_destructor_rounds > 0) {
103     // emutls is deallocated using a pthread key destructor. These
104     // destructors are called in several rounds to accommodate destructor
105     // functions that (re)initialize key values with pthread_setspecific.
106     // Delay the emutls deallocation to accommodate other end-of-thread
107     // cleanup tasks like calling thread_local destructors (e.g. the
108     // __cxa_thread_atexit fallback in libc++abi).
109     array->skip_destructor_rounds--;
110     emutls_setspecific(array);
111   } else {
112     emutls_shutdown(array);
113     free(ptr);
114   }
115 }
116 
117 static __inline void emutls_init(void) {
118   if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
119     abort();
120   emutls_key_created = true;
121 }
122 
123 static __inline void emutls_init_once(void) {
124   static pthread_once_t once = PTHREAD_ONCE_INIT;
125   pthread_once(&once, emutls_init);
126 }
127 
128 static __inline void emutls_lock() { pthread_mutex_lock(&emutls_mutex); }
129 
130 static __inline void emutls_unlock() { pthread_mutex_unlock(&emutls_mutex); }
131 
132 #else // _WIN32
133 
134 #include <assert.h>
135 #include <malloc.h>
136 #include <stdio.h>
137 #include <windows.h>
138 
139 static LPCRITICAL_SECTION emutls_mutex;
140 static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
141 
142 typedef uintptr_t gcc_word;
143 typedef void *gcc_pointer;
144 
145 static void win_error(DWORD last_err, const char *hint) {
146   char *buffer = NULL;
147   if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
148                          FORMAT_MESSAGE_FROM_SYSTEM |
149                          FORMAT_MESSAGE_MAX_WIDTH_MASK,
150                      NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
151     fprintf(stderr, "Windows error: %s\n", buffer);
152   } else {
153     fprintf(stderr, "Unknown Windows error: %s\n", hint);
154   }
155   LocalFree(buffer);
156 }
157 
158 static __inline void win_abort(DWORD last_err, const char *hint) {
159   win_error(last_err, hint);
160   abort();
161 }
162 
163 static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
164   void *base = _aligned_malloc(size, align);
165   if (!base)
166     win_abort(GetLastError(), "_aligned_malloc");
167   return base;
168 }
169 
170 static __inline void emutls_memalign_free(void *base) { _aligned_free(base); }
171 
172 static void emutls_exit(void) {
173   if (emutls_mutex) {
174     DeleteCriticalSection(emutls_mutex);
175     _aligned_free(emutls_mutex);
176     emutls_mutex = NULL;
177   }
178   if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
179     emutls_shutdown((emutls_address_array *)TlsGetValue(emutls_tls_index));
180     TlsFree(emutls_tls_index);
181     emutls_tls_index = TLS_OUT_OF_INDEXES;
182   }
183 }
184 
185 static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
186   (void)p0;
187   (void)p1;
188   (void)p2;
189   emutls_mutex =
190       (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
191   if (!emutls_mutex) {
192     win_error(GetLastError(), "_aligned_malloc");
193     return FALSE;
194   }
195   InitializeCriticalSection(emutls_mutex);
196 
197   emutls_tls_index = TlsAlloc();
198   if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
199     emutls_exit();
200     win_error(GetLastError(), "TlsAlloc");
201     return FALSE;
202   }
203   atexit(&emutls_exit);
204   return TRUE;
205 }
206 
207 static __inline void emutls_init_once(void) {
208   static INIT_ONCE once;
209   InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
210 }
211 
212 static __inline void emutls_lock() { EnterCriticalSection(emutls_mutex); }
213 
214 static __inline void emutls_unlock() { LeaveCriticalSection(emutls_mutex); }
215 
216 static __inline void emutls_setspecific(emutls_address_array *value) {
217   if (TlsSetValue(emutls_tls_index, (LPVOID)value) == 0)
218     win_abort(GetLastError(), "TlsSetValue");
219 }
220 
221 static __inline emutls_address_array *emutls_getspecific() {
222   LPVOID value = TlsGetValue(emutls_tls_index);
223   if (value == NULL) {
224     const DWORD err = GetLastError();
225     if (err != ERROR_SUCCESS)
226       win_abort(err, "TlsGetValue");
227   }
228   return (emutls_address_array *)value;
229 }
230 
231 // Provide atomic load/store functions for emutls_get_index if built with MSVC.
232 #if !defined(__ATOMIC_RELEASE)
233 #include <intrin.h>
234 
235 enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
236 
237 static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
238   assert(type == __ATOMIC_ACQUIRE);
239   // These return the previous value - but since we do an OR with 0,
240   // it's equivalent to a plain load.
241 #ifdef _WIN64
242   return InterlockedOr64(ptr, 0);
243 #else
244   return InterlockedOr(ptr, 0);
245 #endif
246 }
247 
248 static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
249   assert(type == __ATOMIC_RELEASE);
250   InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
251 }
252 
253 #endif // __ATOMIC_RELEASE
254 
255 #endif // _WIN32
256 
257 static size_t emutls_num_object = 0; // number of allocated TLS objects
258 
259 // Free the allocated TLS data
260 static void emutls_shutdown(emutls_address_array *array) {
261   if (array) {
262     uintptr_t i;
263     for (i = 0; i < array->size; ++i) {
264       if (array->data[i])
265         emutls_memalign_free(array->data[i]);
266     }
267   }
268 }
269 
270 // For every TLS variable xyz,
271 // there is one __emutls_control variable named __emutls_v.xyz.
272 // If xyz has non-zero initial value, __emutls_v.xyz's "value"
273 // will point to __emutls_t.xyz, which has the initial value.
274 typedef struct __emutls_control {
275   // Must use gcc_word here, instead of size_t, to match GCC.  When
276   // gcc_word is larger than size_t, the upper extra bits are all
277   // zeros.  We can use variables of size_t to operate on size and
278   // align.
279   gcc_word size;  // size of the object in bytes
280   gcc_word align; // alignment of the object in bytes
281   union {
282     uintptr_t index; // data[index-1] is the object address
283     void *address;   // object address, when in single thread env
284   } object;
285   void *value; // null or non-zero initial value for the object
286 } __emutls_control;
287 
288 // Emulated TLS objects are always allocated at run-time.
289 static __inline void *emutls_allocate_object(__emutls_control *control) {
290   // Use standard C types, check with gcc's emutls.o.
291   COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
292   COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *));
293 
294   size_t size = control->size;
295   size_t align = control->align;
296   void *base;
297   if (align < sizeof(void *))
298     align = sizeof(void *);
299   // Make sure that align is power of 2.
300   if ((align & (align - 1)) != 0)
301     abort();
302 
303   base = emutls_memalign_alloc(align, size);
304   if (control->value)
305     memcpy(base, control->value, size);
306   else
307     memset(base, 0, size);
308   return base;
309 }
310 
311 // Returns control->object.index; set index if not allocated yet.
312 static __inline uintptr_t emutls_get_index(__emutls_control *control) {
313   uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
314   if (!index) {
315     emutls_init_once();
316     emutls_lock();
317     index = control->object.index;
318     if (!index) {
319       index = ++emutls_num_object;
320       __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
321     }
322     emutls_unlock();
323   }
324   return index;
325 }
326 
327 // Updates newly allocated thread local emutls_address_array.
328 static __inline void emutls_check_array_set_size(emutls_address_array *array,
329                                                  uintptr_t size) {
330   if (array == NULL)
331     abort();
332   array->size = size;
333   emutls_setspecific(array);
334 }
335 
336 // Returns the new 'data' array size, number of elements,
337 // which must be no smaller than the given index.
338 static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
339   // Need to allocate emutls_address_array with extra slots
340   // to store the header.
341   // Round up the emutls_address_array size to multiple of 16.
342   uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
343   return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
344 }
345 
346 // Returns the size in bytes required for an emutls_address_array with
347 // N number of elements for data field.
348 static __inline uintptr_t emutls_asize(uintptr_t N) {
349   return N * sizeof(void *) + sizeof(emutls_address_array);
350 }
351 
352 // Returns the thread local emutls_address_array.
353 // Extends its size if necessary to hold address at index.
354 static __inline emutls_address_array *
355 emutls_get_address_array(uintptr_t index) {
356   emutls_address_array *array = emutls_getspecific();
357   if (array == NULL) {
358     uintptr_t new_size = emutls_new_data_array_size(index);
359     array = (emutls_address_array *)malloc(emutls_asize(new_size));
360     if (array) {
361       memset(array->data, 0, new_size * sizeof(void *));
362       array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
363     }
364     emutls_check_array_set_size(array, new_size);
365   } else if (index > array->size) {
366     uintptr_t orig_size = array->size;
367     uintptr_t new_size = emutls_new_data_array_size(index);
368     array = (emutls_address_array *)realloc(array, emutls_asize(new_size));
369     if (array)
370       memset(array->data + orig_size, 0,
371              (new_size - orig_size) * sizeof(void *));
372     emutls_check_array_set_size(array, new_size);
373   }
374   return array;
375 }
376 
377 #ifndef _WIN32
378 // Our emulated TLS implementation relies on local state (e.g. for the pthread
379 // key), and if we duplicate this state across different shared libraries,
380 // accesses to the same TLS variable from different shared libraries will yield
381 // different results (see https://github.com/android/ndk/issues/1551 for an
382 // example). __emutls_get_address is the only external entry point for emulated
383 // TLS, and by making it default visibility and weak, we can rely on the dynamic
384 // linker to coalesce multiple copies at runtime and ensure a single unique copy
385 // of TLS state. This is a best effort; it won't work if the user is linking
386 // with -Bsymbolic or -Bsymbolic-functions, and it also won't work on Windows,
387 // where the dynamic linker has no notion of coalescing weak symbols at runtime.
388 // A more robust solution would be to create a separate shared library for
389 // emulated TLS, to ensure a single copy of its state.
390 __attribute__((visibility("default"), weak))
391 #endif
392 void *__emutls_get_address(__emutls_control *control) {
393   uintptr_t index = emutls_get_index(control);
394   emutls_address_array *array = emutls_get_address_array(index--);
395   if (array->data[index] == NULL)
396     array->data[index] = emutls_allocate_object(control);
397   return array->data[index];
398 }
399 
400 #ifdef __BIONIC__
401 // Called by Bionic on dlclose to delete the emutls pthread key.
402 __attribute__((visibility("hidden"))) void __emutls_unregister_key(void) {
403   if (emutls_key_created) {
404     pthread_key_delete(emutls_pthread_key);
405     emutls_key_created = false;
406   }
407 }
408 #endif
409