1 //===---------- emutls.c - Implements __emutls_get_address ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include <stdint.h> 10 #include <stdlib.h> 11 #include <string.h> 12 13 #include "int_lib.h" 14 15 #ifdef __BIONIC__ 16 // There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation 17 // to round 2. We need to delay deallocation because: 18 // - Android versions older than M lack __cxa_thread_atexit_impl, so apps 19 // use a pthread key destructor to call C++ destructors. 20 // - Apps might use __thread/thread_local variables in pthread destructors. 21 // We can't wait until the final two rounds, because jemalloc needs two rounds 22 // after the final malloc/free call to free its thread-specific data (see 23 // https://reviews.llvm.org/D46978#1107507). 24 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1 25 #else 26 #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0 27 #endif 28 29 #if defined(_MSC_VER) && !defined(__clang__) 30 // MSVC raises a warning about a nonstandard extension being used for the 0 31 // sized element in this array. Disable this for warn-as-error builds. 32 #pragma warning(push) 33 #pragma warning(disable : 4200) 34 #endif 35 36 typedef struct emutls_address_array { 37 uintptr_t skip_destructor_rounds; 38 uintptr_t size; // number of elements in the 'data' array 39 void *data[]; 40 } emutls_address_array; 41 42 #if defined(_MSC_VER) && !defined(__clang__) 43 #pragma warning(pop) 44 #endif 45 46 static void emutls_shutdown(emutls_address_array *array); 47 48 #ifndef _WIN32 49 50 #include <pthread.h> 51 52 static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER; 53 static pthread_key_t emutls_pthread_key; 54 static bool emutls_key_created = false; 55 56 typedef unsigned int gcc_word __attribute__((mode(word))); 57 typedef unsigned int gcc_pointer __attribute__((mode(pointer))); 58 59 // Default is not to use posix_memalign, so systems like Android 60 // can use thread local data without heavier POSIX memory allocators. 61 #ifndef EMUTLS_USE_POSIX_MEMALIGN 62 #define EMUTLS_USE_POSIX_MEMALIGN 0 63 #endif 64 65 static __inline void *emutls_memalign_alloc(size_t align, size_t size) { 66 void *base; 67 #if EMUTLS_USE_POSIX_MEMALIGN 68 if (posix_memalign(&base, align, size) != 0) 69 abort(); 70 #else 71 #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *)) 72 char *object; 73 if ((object = (char *)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL) 74 abort(); 75 base = (void *)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) & 76 ~(uintptr_t)(align - 1)); 77 78 ((void **)base)[-1] = object; 79 #endif 80 return base; 81 } 82 83 static __inline void emutls_memalign_free(void *base) { 84 #if EMUTLS_USE_POSIX_MEMALIGN 85 free(base); 86 #else 87 // The mallocated address is in ((void**)base)[-1] 88 free(((void **)base)[-1]); 89 #endif 90 } 91 92 static __inline void emutls_setspecific(emutls_address_array *value) { 93 pthread_setspecific(emutls_pthread_key, (void *)value); 94 } 95 96 static __inline emutls_address_array *emutls_getspecific() { 97 return (emutls_address_array *)pthread_getspecific(emutls_pthread_key); 98 } 99 100 static void emutls_key_destructor(void *ptr) { 101 emutls_address_array *array = (emutls_address_array *)ptr; 102 if (array->skip_destructor_rounds > 0) { 103 // emutls is deallocated using a pthread key destructor. These 104 // destructors are called in several rounds to accommodate destructor 105 // functions that (re)initialize key values with pthread_setspecific. 106 // Delay the emutls deallocation to accommodate other end-of-thread 107 // cleanup tasks like calling thread_local destructors (e.g. the 108 // __cxa_thread_atexit fallback in libc++abi). 109 array->skip_destructor_rounds--; 110 emutls_setspecific(array); 111 } else { 112 emutls_shutdown(array); 113 free(ptr); 114 } 115 } 116 117 static __inline void emutls_init(void) { 118 if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0) 119 abort(); 120 emutls_key_created = true; 121 } 122 123 static __inline void emutls_init_once(void) { 124 static pthread_once_t once = PTHREAD_ONCE_INIT; 125 pthread_once(&once, emutls_init); 126 } 127 128 static __inline void emutls_lock() { pthread_mutex_lock(&emutls_mutex); } 129 130 static __inline void emutls_unlock() { pthread_mutex_unlock(&emutls_mutex); } 131 132 #else // _WIN32 133 134 #include <assert.h> 135 #include <malloc.h> 136 #include <stdio.h> 137 #include <windows.h> 138 139 static LPCRITICAL_SECTION emutls_mutex; 140 static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES; 141 142 typedef uintptr_t gcc_word; 143 typedef void *gcc_pointer; 144 145 static void win_error(DWORD last_err, const char *hint) { 146 char *buffer = NULL; 147 if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | 148 FORMAT_MESSAGE_FROM_SYSTEM | 149 FORMAT_MESSAGE_MAX_WIDTH_MASK, 150 NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) { 151 fprintf(stderr, "Windows error: %s\n", buffer); 152 } else { 153 fprintf(stderr, "Unknown Windows error: %s\n", hint); 154 } 155 LocalFree(buffer); 156 } 157 158 static __inline void win_abort(DWORD last_err, const char *hint) { 159 win_error(last_err, hint); 160 abort(); 161 } 162 163 static __inline void *emutls_memalign_alloc(size_t align, size_t size) { 164 void *base = _aligned_malloc(size, align); 165 if (!base) 166 win_abort(GetLastError(), "_aligned_malloc"); 167 return base; 168 } 169 170 static __inline void emutls_memalign_free(void *base) { _aligned_free(base); } 171 172 static void emutls_exit(void) { 173 if (emutls_mutex) { 174 DeleteCriticalSection(emutls_mutex); 175 _aligned_free(emutls_mutex); 176 emutls_mutex = NULL; 177 } 178 if (emutls_tls_index != TLS_OUT_OF_INDEXES) { 179 emutls_shutdown((emutls_address_array *)TlsGetValue(emutls_tls_index)); 180 TlsFree(emutls_tls_index); 181 emutls_tls_index = TLS_OUT_OF_INDEXES; 182 } 183 } 184 185 static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) { 186 (void)p0; 187 (void)p1; 188 (void)p2; 189 emutls_mutex = 190 (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16); 191 if (!emutls_mutex) { 192 win_error(GetLastError(), "_aligned_malloc"); 193 return FALSE; 194 } 195 InitializeCriticalSection(emutls_mutex); 196 197 emutls_tls_index = TlsAlloc(); 198 if (emutls_tls_index == TLS_OUT_OF_INDEXES) { 199 emutls_exit(); 200 win_error(GetLastError(), "TlsAlloc"); 201 return FALSE; 202 } 203 atexit(&emutls_exit); 204 return TRUE; 205 } 206 207 static __inline void emutls_init_once(void) { 208 static INIT_ONCE once; 209 InitOnceExecuteOnce(&once, emutls_init, NULL, NULL); 210 } 211 212 static __inline void emutls_lock() { EnterCriticalSection(emutls_mutex); } 213 214 static __inline void emutls_unlock() { LeaveCriticalSection(emutls_mutex); } 215 216 static __inline void emutls_setspecific(emutls_address_array *value) { 217 if (TlsSetValue(emutls_tls_index, (LPVOID)value) == 0) 218 win_abort(GetLastError(), "TlsSetValue"); 219 } 220 221 static __inline emutls_address_array *emutls_getspecific() { 222 LPVOID value = TlsGetValue(emutls_tls_index); 223 if (value == NULL) { 224 const DWORD err = GetLastError(); 225 if (err != ERROR_SUCCESS) 226 win_abort(err, "TlsGetValue"); 227 } 228 return (emutls_address_array *)value; 229 } 230 231 // Provide atomic load/store functions for emutls_get_index if built with MSVC. 232 #if !defined(__ATOMIC_RELEASE) 233 #include <intrin.h> 234 235 enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 }; 236 237 static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) { 238 assert(type == __ATOMIC_ACQUIRE); 239 // These return the previous value - but since we do an OR with 0, 240 // it's equivalent to a plain load. 241 #ifdef _WIN64 242 return InterlockedOr64(ptr, 0); 243 #else 244 return InterlockedOr(ptr, 0); 245 #endif 246 } 247 248 static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) { 249 assert(type == __ATOMIC_RELEASE); 250 InterlockedExchangePointer((void *volatile *)ptr, (void *)val); 251 } 252 253 #endif // __ATOMIC_RELEASE 254 255 #endif // _WIN32 256 257 static size_t emutls_num_object = 0; // number of allocated TLS objects 258 259 // Free the allocated TLS data 260 static void emutls_shutdown(emutls_address_array *array) { 261 if (array) { 262 uintptr_t i; 263 for (i = 0; i < array->size; ++i) { 264 if (array->data[i]) 265 emutls_memalign_free(array->data[i]); 266 } 267 } 268 } 269 270 // For every TLS variable xyz, 271 // there is one __emutls_control variable named __emutls_v.xyz. 272 // If xyz has non-zero initial value, __emutls_v.xyz's "value" 273 // will point to __emutls_t.xyz, which has the initial value. 274 typedef struct __emutls_control { 275 // Must use gcc_word here, instead of size_t, to match GCC. When 276 // gcc_word is larger than size_t, the upper extra bits are all 277 // zeros. We can use variables of size_t to operate on size and 278 // align. 279 gcc_word size; // size of the object in bytes 280 gcc_word align; // alignment of the object in bytes 281 union { 282 uintptr_t index; // data[index-1] is the object address 283 void *address; // object address, when in single thread env 284 } object; 285 void *value; // null or non-zero initial value for the object 286 } __emutls_control; 287 288 // Emulated TLS objects are always allocated at run-time. 289 static __inline void *emutls_allocate_object(__emutls_control *control) { 290 // Use standard C types, check with gcc's emutls.o. 291 COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer)); 292 COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *)); 293 294 size_t size = control->size; 295 size_t align = control->align; 296 void *base; 297 if (align < sizeof(void *)) 298 align = sizeof(void *); 299 // Make sure that align is power of 2. 300 if ((align & (align - 1)) != 0) 301 abort(); 302 303 base = emutls_memalign_alloc(align, size); 304 if (control->value) 305 memcpy(base, control->value, size); 306 else 307 memset(base, 0, size); 308 return base; 309 } 310 311 // Returns control->object.index; set index if not allocated yet. 312 static __inline uintptr_t emutls_get_index(__emutls_control *control) { 313 uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE); 314 if (!index) { 315 emutls_init_once(); 316 emutls_lock(); 317 index = control->object.index; 318 if (!index) { 319 index = ++emutls_num_object; 320 __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE); 321 } 322 emutls_unlock(); 323 } 324 return index; 325 } 326 327 // Updates newly allocated thread local emutls_address_array. 328 static __inline void emutls_check_array_set_size(emutls_address_array *array, 329 uintptr_t size) { 330 if (array == NULL) 331 abort(); 332 array->size = size; 333 emutls_setspecific(array); 334 } 335 336 // Returns the new 'data' array size, number of elements, 337 // which must be no smaller than the given index. 338 static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) { 339 // Need to allocate emutls_address_array with extra slots 340 // to store the header. 341 // Round up the emutls_address_array size to multiple of 16. 342 uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *); 343 return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words; 344 } 345 346 // Returns the size in bytes required for an emutls_address_array with 347 // N number of elements for data field. 348 static __inline uintptr_t emutls_asize(uintptr_t N) { 349 return N * sizeof(void *) + sizeof(emutls_address_array); 350 } 351 352 // Returns the thread local emutls_address_array. 353 // Extends its size if necessary to hold address at index. 354 static __inline emutls_address_array * 355 emutls_get_address_array(uintptr_t index) { 356 emutls_address_array *array = emutls_getspecific(); 357 if (array == NULL) { 358 uintptr_t new_size = emutls_new_data_array_size(index); 359 array = (emutls_address_array *)malloc(emutls_asize(new_size)); 360 if (array) { 361 memset(array->data, 0, new_size * sizeof(void *)); 362 array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS; 363 } 364 emutls_check_array_set_size(array, new_size); 365 } else if (index > array->size) { 366 uintptr_t orig_size = array->size; 367 uintptr_t new_size = emutls_new_data_array_size(index); 368 array = (emutls_address_array *)realloc(array, emutls_asize(new_size)); 369 if (array) 370 memset(array->data + orig_size, 0, 371 (new_size - orig_size) * sizeof(void *)); 372 emutls_check_array_set_size(array, new_size); 373 } 374 return array; 375 } 376 377 #ifndef _WIN32 378 // Our emulated TLS implementation relies on local state (e.g. for the pthread 379 // key), and if we duplicate this state across different shared libraries, 380 // accesses to the same TLS variable from different shared libraries will yield 381 // different results (see https://github.com/android/ndk/issues/1551 for an 382 // example). __emutls_get_address is the only external entry point for emulated 383 // TLS, and by making it default visibility and weak, we can rely on the dynamic 384 // linker to coalesce multiple copies at runtime and ensure a single unique copy 385 // of TLS state. This is a best effort; it won't work if the user is linking 386 // with -Bsymbolic or -Bsymbolic-functions, and it also won't work on Windows, 387 // where the dynamic linker has no notion of coalescing weak symbols at runtime. 388 // A more robust solution would be to create a separate shared library for 389 // emulated TLS, to ensure a single copy of its state. 390 __attribute__((visibility("default"), weak)) 391 #endif 392 void *__emutls_get_address(__emutls_control *control) { 393 uintptr_t index = emutls_get_index(control); 394 emutls_address_array *array = emutls_get_address_array(index--); 395 if (array->data[index] == NULL) 396 array->data[index] = emutls_allocate_object(control); 397 return array->data[index]; 398 } 399 400 #ifdef __BIONIC__ 401 // Called by Bionic on dlclose to delete the emutls pthread key. 402 __attribute__((visibility("hidden"))) void __emutls_unregister_key(void) { 403 if (emutls_key_created) { 404 pthread_key_delete(emutls_pthread_key); 405 emutls_key_created = false; 406 } 407 } 408 #endif 409