1 #define JEMALLOC_TSD_C_ 2 #include "jemalloc/internal/jemalloc_preamble.h" 3 #include "jemalloc/internal/jemalloc_internal_includes.h" 4 5 #include "jemalloc/internal/assert.h" 6 #include "jemalloc/internal/mutex.h" 7 #include "jemalloc/internal/rtree.h" 8 9 /******************************************************************************/ 10 /* Data. */ 11 12 static unsigned ncleanups; 13 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; 14 15 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */ 16 JEMALLOC_DIAGNOSTIC_PUSH 17 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS 18 19 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP 20 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; 21 JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false; 22 bool tsd_booted = false; 23 #elif (defined(JEMALLOC_TLS)) 24 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; 25 pthread_key_t tsd_tsd; 26 bool tsd_booted = false; 27 #elif (defined(_WIN32)) 28 DWORD tsd_tsd; 29 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER}; 30 bool tsd_booted = false; 31 #else 32 33 /* 34 * This contains a mutex, but it's pretty convenient to allow the mutex code to 35 * have a dependency on tsd. So we define the struct here, and only refer to it 36 * by pointer in the header. 37 */ 38 struct tsd_init_head_s { 39 ql_head(tsd_init_block_t) blocks; 40 malloc_mutex_t lock; 41 }; 42 43 pthread_key_t tsd_tsd; 44 tsd_init_head_t tsd_init_head = { 45 ql_head_initializer(blocks), 46 MALLOC_MUTEX_INITIALIZER 47 }; 48 49 tsd_wrapper_t tsd_boot_wrapper = { 50 false, 51 TSD_INITIALIZER 52 }; 53 bool tsd_booted = false; 54 #endif 55 56 JEMALLOC_DIAGNOSTIC_POP 57 58 /******************************************************************************/ 59 60 /* A list of all the tsds in the nominal state. */ 61 typedef ql_head(tsd_t) tsd_list_t; 62 static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds); 63 static malloc_mutex_t tsd_nominal_tsds_lock; 64 65 /* How many slow-path-enabling features are turned on. */ 66 static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0); 67 68 static bool 69 tsd_in_nominal_list(tsd_t *tsd) { 70 tsd_t *tsd_list; 71 bool found = false; 72 /* 73 * We don't know that tsd is nominal; it might not be safe to get data 74 * out of it here. 75 */ 76 malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock); 77 ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) { 78 if (tsd == tsd_list) { 79 found = true; 80 break; 81 } 82 } 83 malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock); 84 return found; 85 } 86 87 static void 88 tsd_add_nominal(tsd_t *tsd) { 89 assert(!tsd_in_nominal_list(tsd)); 90 assert(tsd_state_get(tsd) <= tsd_state_nominal_max); 91 ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link); 92 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 93 ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link); 94 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 95 } 96 97 static void 98 tsd_remove_nominal(tsd_t *tsd) { 99 assert(tsd_in_nominal_list(tsd)); 100 assert(tsd_state_get(tsd) <= tsd_state_nominal_max); 101 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 102 ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link); 103 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 104 } 105 106 static void 107 tsd_force_recompute(tsdn_t *tsdn) { 108 /* 109 * The stores to tsd->state here need to synchronize with the exchange 110 * in tsd_slow_update. 111 */ 112 atomic_fence(ATOMIC_RELEASE); 113 malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock); 114 tsd_t *remote_tsd; 115 ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) { 116 assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED) 117 <= tsd_state_nominal_max); 118 tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute, 119 ATOMIC_RELAXED); 120 } 121 malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock); 122 } 123 124 void 125 tsd_global_slow_inc(tsdn_t *tsdn) { 126 atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); 127 /* 128 * We unconditionally force a recompute, even if the global slow count 129 * was already positive. If we didn't, then it would be possible for us 130 * to return to the user, have the user synchronize externally with some 131 * other thread, and then have that other thread not have picked up the 132 * update yet (since the original incrementing thread might still be 133 * making its way through the tsd list). 134 */ 135 tsd_force_recompute(tsdn); 136 } 137 138 void tsd_global_slow_dec(tsdn_t *tsdn) { 139 atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); 140 /* See the note in ..._inc(). */ 141 tsd_force_recompute(tsdn); 142 } 143 144 static bool 145 tsd_local_slow(tsd_t *tsd) { 146 return !tsd_tcache_enabled_get(tsd) 147 || tsd_reentrancy_level_get(tsd) > 0; 148 } 149 150 bool 151 tsd_global_slow() { 152 return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0; 153 } 154 155 /******************************************************************************/ 156 157 static uint8_t 158 tsd_state_compute(tsd_t *tsd) { 159 if (!tsd_nominal(tsd)) { 160 return tsd_state_get(tsd); 161 } 162 /* We're in *a* nominal state; but which one? */ 163 if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) { 164 return tsd_state_nominal_slow; 165 } else { 166 return tsd_state_nominal; 167 } 168 } 169 170 void 171 tsd_slow_update(tsd_t *tsd) { 172 uint8_t old_state; 173 do { 174 uint8_t new_state = tsd_state_compute(tsd); 175 old_state = tsd_atomic_exchange(&tsd->state, new_state, 176 ATOMIC_ACQUIRE); 177 } while (old_state == tsd_state_nominal_recompute); 178 } 179 180 void 181 tsd_state_set(tsd_t *tsd, uint8_t new_state) { 182 /* Only the tsd module can change the state *to* recompute. */ 183 assert(new_state != tsd_state_nominal_recompute); 184 uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED); 185 if (old_state > tsd_state_nominal_max) { 186 /* 187 * Not currently in the nominal list, but it might need to be 188 * inserted there. 189 */ 190 assert(!tsd_in_nominal_list(tsd)); 191 tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED); 192 if (new_state <= tsd_state_nominal_max) { 193 tsd_add_nominal(tsd); 194 } 195 } else { 196 /* 197 * We're currently nominal. If the new state is non-nominal, 198 * great; we take ourselves off the list and just enter the new 199 * state. 200 */ 201 assert(tsd_in_nominal_list(tsd)); 202 if (new_state > tsd_state_nominal_max) { 203 tsd_remove_nominal(tsd); 204 tsd_atomic_store(&tsd->state, new_state, 205 ATOMIC_RELAXED); 206 } else { 207 /* 208 * This is the tricky case. We're transitioning from 209 * one nominal state to another. The caller can't know 210 * about any races that are occuring at the same time, 211 * so we always have to recompute no matter what. 212 */ 213 tsd_slow_update(tsd); 214 } 215 } 216 } 217 218 static bool 219 tsd_data_init(tsd_t *tsd) { 220 /* 221 * We initialize the rtree context first (before the tcache), since the 222 * tcache initialization depends on it. 223 */ 224 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); 225 226 /* 227 * A nondeterministic seed based on the address of tsd reduces 228 * the likelihood of lockstep non-uniform cache index 229 * utilization among identical concurrent processes, but at the 230 * cost of test repeatability. For debug builds, instead use a 231 * deterministic seed. 232 */ 233 *tsd_offset_statep_get(tsd) = config_debug ? 0 : 234 (uint64_t)(uintptr_t)tsd; 235 236 return tsd_tcache_enabled_data_init(tsd); 237 } 238 239 static void 240 assert_tsd_data_cleanup_done(tsd_t *tsd) { 241 assert(!tsd_nominal(tsd)); 242 assert(!tsd_in_nominal_list(tsd)); 243 assert(*tsd_arenap_get_unsafe(tsd) == NULL); 244 assert(*tsd_iarenap_get_unsafe(tsd) == NULL); 245 assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true); 246 assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL); 247 assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false); 248 assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL); 249 } 250 251 static bool 252 tsd_data_init_nocleanup(tsd_t *tsd) { 253 assert(tsd_state_get(tsd) == tsd_state_reincarnated || 254 tsd_state_get(tsd) == tsd_state_minimal_initialized); 255 /* 256 * During reincarnation, there is no guarantee that the cleanup function 257 * will be called (deallocation may happen after all tsd destructors). 258 * We set up tsd in a way that no cleanup is needed. 259 */ 260 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); 261 *tsd_arenas_tdata_bypassp_get(tsd) = true; 262 *tsd_tcache_enabledp_get_unsafe(tsd) = false; 263 *tsd_reentrancy_levelp_get(tsd) = 1; 264 assert_tsd_data_cleanup_done(tsd); 265 266 return false; 267 } 268 269 tsd_t * 270 tsd_fetch_slow(tsd_t *tsd, bool minimal) { 271 assert(!tsd_fast(tsd)); 272 273 if (tsd_state_get(tsd) == tsd_state_nominal_slow) { 274 /* 275 * On slow path but no work needed. Note that we can't 276 * necessarily *assert* that we're slow, because we might be 277 * slow because of an asynchronous modification to global state, 278 * which might be asynchronously modified *back*. 279 */ 280 } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) { 281 tsd_slow_update(tsd); 282 } else if (tsd_state_get(tsd) == tsd_state_uninitialized) { 283 if (!minimal) { 284 if (tsd_booted) { 285 tsd_state_set(tsd, tsd_state_nominal); 286 tsd_slow_update(tsd); 287 /* Trigger cleanup handler registration. */ 288 tsd_set(tsd); 289 tsd_data_init(tsd); 290 } 291 } else { 292 tsd_state_set(tsd, tsd_state_minimal_initialized); 293 tsd_set(tsd); 294 tsd_data_init_nocleanup(tsd); 295 } 296 } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) { 297 if (!minimal) { 298 /* Switch to fully initialized. */ 299 tsd_state_set(tsd, tsd_state_nominal); 300 assert(*tsd_reentrancy_levelp_get(tsd) >= 1); 301 (*tsd_reentrancy_levelp_get(tsd))--; 302 tsd_slow_update(tsd); 303 tsd_data_init(tsd); 304 } else { 305 assert_tsd_data_cleanup_done(tsd); 306 } 307 } else if (tsd_state_get(tsd) == tsd_state_purgatory) { 308 tsd_state_set(tsd, tsd_state_reincarnated); 309 tsd_set(tsd); 310 tsd_data_init_nocleanup(tsd); 311 } else { 312 assert(tsd_state_get(tsd) == tsd_state_reincarnated); 313 } 314 315 return tsd; 316 } 317 318 void * 319 malloc_tsd_malloc(size_t size) { 320 return a0malloc(CACHELINE_CEILING(size)); 321 } 322 323 void 324 malloc_tsd_dalloc(void *wrapper) { 325 a0dalloc(wrapper); 326 } 327 328 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) 329 #ifndef _WIN32 330 JEMALLOC_EXPORT 331 #endif 332 void 333 _malloc_thread_cleanup(void) { 334 bool pending[MALLOC_TSD_CLEANUPS_MAX], again; 335 unsigned i; 336 337 for (i = 0; i < ncleanups; i++) { 338 pending[i] = true; 339 } 340 341 do { 342 again = false; 343 for (i = 0; i < ncleanups; i++) { 344 if (pending[i]) { 345 pending[i] = cleanups[i](); 346 if (pending[i]) { 347 again = true; 348 } 349 } 350 } 351 } while (again); 352 } 353 #endif 354 355 void 356 malloc_tsd_cleanup_register(bool (*f)(void)) { 357 assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); 358 cleanups[ncleanups] = f; 359 ncleanups++; 360 } 361 362 static void 363 tsd_do_data_cleanup(tsd_t *tsd) { 364 prof_tdata_cleanup(tsd); 365 iarena_cleanup(tsd); 366 arena_cleanup(tsd); 367 arenas_tdata_cleanup(tsd); 368 tcache_cleanup(tsd); 369 witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd)); 370 } 371 372 void 373 tsd_cleanup(void *arg) { 374 tsd_t *tsd = (tsd_t *)arg; 375 376 switch (tsd_state_get(tsd)) { 377 case tsd_state_uninitialized: 378 /* Do nothing. */ 379 break; 380 case tsd_state_minimal_initialized: 381 /* This implies the thread only did free() in its life time. */ 382 /* Fall through. */ 383 case tsd_state_reincarnated: 384 /* 385 * Reincarnated means another destructor deallocated memory 386 * after the destructor was called. Cleanup isn't required but 387 * is still called for testing and completeness. 388 */ 389 assert_tsd_data_cleanup_done(tsd); 390 /* Fall through. */ 391 case tsd_state_nominal: 392 case tsd_state_nominal_slow: 393 tsd_do_data_cleanup(tsd); 394 tsd_state_set(tsd, tsd_state_purgatory); 395 tsd_set(tsd); 396 break; 397 case tsd_state_purgatory: 398 /* 399 * The previous time this destructor was called, we set the 400 * state to tsd_state_purgatory so that other destructors 401 * wouldn't cause re-creation of the tsd. This time, do 402 * nothing, and do not request another callback. 403 */ 404 break; 405 default: 406 not_reached(); 407 } 408 #ifdef JEMALLOC_JET 409 test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd); 410 int *data = tsd_test_datap_get_unsafe(tsd); 411 if (test_callback != NULL) { 412 test_callback(data); 413 } 414 #endif 415 } 416 417 tsd_t * 418 malloc_tsd_boot0(void) { 419 tsd_t *tsd; 420 421 ncleanups = 0; 422 if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock", 423 WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) { 424 return NULL; 425 } 426 if (tsd_boot0()) { 427 return NULL; 428 } 429 tsd = tsd_fetch(); 430 *tsd_arenas_tdata_bypassp_get(tsd) = true; 431 return tsd; 432 } 433 434 void 435 malloc_tsd_boot1(void) { 436 tsd_boot1(); 437 tsd_t *tsd = tsd_fetch(); 438 /* malloc_slow has been set properly. Update tsd_slow. */ 439 tsd_slow_update(tsd); 440 *tsd_arenas_tdata_bypassp_get(tsd) = false; 441 } 442 443 #ifdef _WIN32 444 static BOOL WINAPI 445 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { 446 switch (fdwReason) { 447 #ifdef JEMALLOC_LAZY_LOCK 448 case DLL_THREAD_ATTACH: 449 isthreaded = true; 450 break; 451 #endif 452 case DLL_THREAD_DETACH: 453 _malloc_thread_cleanup(); 454 break; 455 default: 456 break; 457 } 458 return true; 459 } 460 461 /* 462 * We need to be able to say "read" here (in the "pragma section"), but have 463 * hooked "read". We won't read for the rest of the file, so we can get away 464 * with unhooking. 465 */ 466 #ifdef read 467 # undef read 468 #endif 469 470 #ifdef _MSC_VER 471 # ifdef _M_IX86 472 # pragma comment(linker, "/INCLUDE:__tls_used") 473 # pragma comment(linker, "/INCLUDE:_tls_callback") 474 # else 475 # pragma comment(linker, "/INCLUDE:_tls_used") 476 # pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) ) 477 # endif 478 # pragma section(".CRT$XLY",long,read) 479 #endif 480 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) 481 BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, 482 DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; 483 #endif 484 485 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ 486 !defined(_WIN32)) 487 void * 488 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) { 489 pthread_t self = pthread_self(); 490 tsd_init_block_t *iter; 491 492 /* Check whether this thread has already inserted into the list. */ 493 malloc_mutex_lock(TSDN_NULL, &head->lock); 494 ql_foreach(iter, &head->blocks, link) { 495 if (iter->thread == self) { 496 malloc_mutex_unlock(TSDN_NULL, &head->lock); 497 return iter->data; 498 } 499 } 500 /* Insert block into list. */ 501 ql_elm_new(block, link); 502 block->thread = self; 503 ql_tail_insert(&head->blocks, block, link); 504 malloc_mutex_unlock(TSDN_NULL, &head->lock); 505 return NULL; 506 } 507 508 void 509 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { 510 malloc_mutex_lock(TSDN_NULL, &head->lock); 511 ql_remove(&head->blocks, block, link); 512 malloc_mutex_unlock(TSDN_NULL, &head->lock); 513 } 514 #endif 515 516 void 517 tsd_prefork(tsd_t *tsd) { 518 malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 519 } 520 521 void 522 tsd_postfork_parent(tsd_t *tsd) { 523 malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 524 } 525 526 void 527 tsd_postfork_child(tsd_t *tsd) { 528 malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 529 ql_new(&tsd_nominal_tsds); 530 531 if (tsd_state_get(tsd) <= tsd_state_nominal_max) { 532 tsd_add_nominal(tsd); 533 } 534 } 535