1 #include "jemalloc/internal/jemalloc_preamble.h" 2 #include "jemalloc/internal/jemalloc_internal_includes.h" 3 4 #include "jemalloc/internal/assert.h" 5 #include "jemalloc/internal/san.h" 6 #include "jemalloc/internal/mutex.h" 7 #include "jemalloc/internal/rtree.h" 8 9 /******************************************************************************/ 10 /* Data. */ 11 12 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */ 13 JEMALLOC_DIAGNOSTIC_PUSH 14 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS 15 16 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP 17 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; 18 JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false; 19 bool tsd_booted = false; 20 #elif (defined(JEMALLOC_TLS)) 21 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER; 22 pthread_key_t tsd_tsd; 23 bool tsd_booted = false; 24 #elif (defined(_WIN32)) 25 DWORD tsd_tsd; 26 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER}; 27 bool tsd_booted = false; 28 #else 29 30 /* 31 * This contains a mutex, but it's pretty convenient to allow the mutex code to 32 * have a dependency on tsd. So we define the struct here, and only refer to it 33 * by pointer in the header. 34 */ 35 struct tsd_init_head_s { 36 ql_head(tsd_init_block_t) blocks; 37 malloc_mutex_t lock; 38 }; 39 40 pthread_key_t tsd_tsd; 41 tsd_init_head_t tsd_init_head = { 42 ql_head_initializer(blocks), 43 MALLOC_MUTEX_INITIALIZER 44 }; 45 46 tsd_wrapper_t tsd_boot_wrapper = { 47 false, 48 TSD_INITIALIZER 49 }; 50 bool tsd_booted = false; 51 #endif 52 53 JEMALLOC_DIAGNOSTIC_POP 54 55 /******************************************************************************/ 56 57 /* A list of all the tsds in the nominal state. */ 58 typedef ql_head(tsd_t) tsd_list_t; 59 static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds); 60 static malloc_mutex_t tsd_nominal_tsds_lock; 61 62 /* How many slow-path-enabling features are turned on. */ 63 static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0); 64 65 static bool 66 tsd_in_nominal_list(tsd_t *tsd) { 67 tsd_t *tsd_list; 68 bool found = false; 69 /* 70 * We don't know that tsd is nominal; it might not be safe to get data 71 * out of it here. 72 */ 73 malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock); 74 ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) { 75 if (tsd == tsd_list) { 76 found = true; 77 break; 78 } 79 } 80 malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock); 81 return found; 82 } 83 84 static void 85 tsd_add_nominal(tsd_t *tsd) { 86 assert(!tsd_in_nominal_list(tsd)); 87 assert(tsd_state_get(tsd) <= tsd_state_nominal_max); 88 ql_elm_new(tsd, TSD_MANGLE(tsd_link)); 89 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 90 ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link)); 91 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 92 } 93 94 static void 95 tsd_remove_nominal(tsd_t *tsd) { 96 assert(tsd_in_nominal_list(tsd)); 97 assert(tsd_state_get(tsd) <= tsd_state_nominal_max); 98 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 99 ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link)); 100 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 101 } 102 103 static void 104 tsd_force_recompute(tsdn_t *tsdn) { 105 /* 106 * The stores to tsd->state here need to synchronize with the exchange 107 * in tsd_slow_update. 108 */ 109 atomic_fence(ATOMIC_RELEASE); 110 malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock); 111 tsd_t *remote_tsd; 112 ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) { 113 assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED) 114 <= tsd_state_nominal_max); 115 tsd_atomic_store(&remote_tsd->state, 116 tsd_state_nominal_recompute, ATOMIC_RELAXED); 117 /* See comments in te_recompute_fast_threshold(). */ 118 atomic_fence(ATOMIC_SEQ_CST); 119 te_next_event_fast_set_non_nominal(remote_tsd); 120 } 121 malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock); 122 } 123 124 void 125 tsd_global_slow_inc(tsdn_t *tsdn) { 126 atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); 127 /* 128 * We unconditionally force a recompute, even if the global slow count 129 * was already positive. If we didn't, then it would be possible for us 130 * to return to the user, have the user synchronize externally with some 131 * other thread, and then have that other thread not have picked up the 132 * update yet (since the original incrementing thread might still be 133 * making its way through the tsd list). 134 */ 135 tsd_force_recompute(tsdn); 136 } 137 138 void tsd_global_slow_dec(tsdn_t *tsdn) { 139 atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED); 140 /* See the note in ..._inc(). */ 141 tsd_force_recompute(tsdn); 142 } 143 144 static bool 145 tsd_local_slow(tsd_t *tsd) { 146 return !tsd_tcache_enabled_get(tsd) 147 || tsd_reentrancy_level_get(tsd) > 0; 148 } 149 150 bool 151 tsd_global_slow() { 152 return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0; 153 } 154 155 /******************************************************************************/ 156 157 static uint8_t 158 tsd_state_compute(tsd_t *tsd) { 159 if (!tsd_nominal(tsd)) { 160 return tsd_state_get(tsd); 161 } 162 /* We're in *a* nominal state; but which one? */ 163 if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) { 164 return tsd_state_nominal_slow; 165 } else { 166 return tsd_state_nominal; 167 } 168 } 169 170 void 171 tsd_slow_update(tsd_t *tsd) { 172 uint8_t old_state; 173 do { 174 uint8_t new_state = tsd_state_compute(tsd); 175 old_state = tsd_atomic_exchange(&tsd->state, new_state, 176 ATOMIC_ACQUIRE); 177 } while (old_state == tsd_state_nominal_recompute); 178 179 te_recompute_fast_threshold(tsd); 180 } 181 182 void 183 tsd_state_set(tsd_t *tsd, uint8_t new_state) { 184 /* Only the tsd module can change the state *to* recompute. */ 185 assert(new_state != tsd_state_nominal_recompute); 186 uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED); 187 if (old_state > tsd_state_nominal_max) { 188 /* 189 * Not currently in the nominal list, but it might need to be 190 * inserted there. 191 */ 192 assert(!tsd_in_nominal_list(tsd)); 193 tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED); 194 if (new_state <= tsd_state_nominal_max) { 195 tsd_add_nominal(tsd); 196 } 197 } else { 198 /* 199 * We're currently nominal. If the new state is non-nominal, 200 * great; we take ourselves off the list and just enter the new 201 * state. 202 */ 203 assert(tsd_in_nominal_list(tsd)); 204 if (new_state > tsd_state_nominal_max) { 205 tsd_remove_nominal(tsd); 206 tsd_atomic_store(&tsd->state, new_state, 207 ATOMIC_RELAXED); 208 } else { 209 /* 210 * This is the tricky case. We're transitioning from 211 * one nominal state to another. The caller can't know 212 * about any races that are occurring at the same time, 213 * so we always have to recompute no matter what. 214 */ 215 tsd_slow_update(tsd); 216 } 217 } 218 te_recompute_fast_threshold(tsd); 219 } 220 221 static void 222 tsd_prng_state_init(tsd_t *tsd) { 223 /* 224 * A nondeterministic seed based on the address of tsd reduces 225 * the likelihood of lockstep non-uniform cache index 226 * utilization among identical concurrent processes, but at the 227 * cost of test repeatability. For debug builds, instead use a 228 * deterministic seed. 229 */ 230 *tsd_prng_statep_get(tsd) = config_debug ? 0 : 231 (uint64_t)(uintptr_t)tsd; 232 } 233 234 static bool 235 tsd_data_init(tsd_t *tsd) { 236 /* 237 * We initialize the rtree context first (before the tcache), since the 238 * tcache initialization depends on it. 239 */ 240 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); 241 tsd_prng_state_init(tsd); 242 tsd_te_init(tsd); /* event_init may use the prng state above. */ 243 tsd_san_init(tsd); 244 return tsd_tcache_enabled_data_init(tsd); 245 } 246 247 static void 248 assert_tsd_data_cleanup_done(tsd_t *tsd) { 249 assert(!tsd_nominal(tsd)); 250 assert(!tsd_in_nominal_list(tsd)); 251 assert(*tsd_arenap_get_unsafe(tsd) == NULL); 252 assert(*tsd_iarenap_get_unsafe(tsd) == NULL); 253 assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false); 254 assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL); 255 } 256 257 static bool 258 tsd_data_init_nocleanup(tsd_t *tsd) { 259 assert(tsd_state_get(tsd) == tsd_state_reincarnated || 260 tsd_state_get(tsd) == tsd_state_minimal_initialized); 261 /* 262 * During reincarnation, there is no guarantee that the cleanup function 263 * will be called (deallocation may happen after all tsd destructors). 264 * We set up tsd in a way that no cleanup is needed. 265 */ 266 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); 267 *tsd_tcache_enabledp_get_unsafe(tsd) = false; 268 *tsd_reentrancy_levelp_get(tsd) = 1; 269 tsd_prng_state_init(tsd); 270 tsd_te_init(tsd); /* event_init may use the prng state above. */ 271 tsd_san_init(tsd); 272 assert_tsd_data_cleanup_done(tsd); 273 274 return false; 275 } 276 277 tsd_t * 278 tsd_fetch_slow(tsd_t *tsd, bool minimal) { 279 assert(!tsd_fast(tsd)); 280 281 if (tsd_state_get(tsd) == tsd_state_nominal_slow) { 282 /* 283 * On slow path but no work needed. Note that we can't 284 * necessarily *assert* that we're slow, because we might be 285 * slow because of an asynchronous modification to global state, 286 * which might be asynchronously modified *back*. 287 */ 288 } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) { 289 tsd_slow_update(tsd); 290 } else if (tsd_state_get(tsd) == tsd_state_uninitialized) { 291 if (!minimal) { 292 if (tsd_booted) { 293 tsd_state_set(tsd, tsd_state_nominal); 294 tsd_slow_update(tsd); 295 /* Trigger cleanup handler registration. */ 296 tsd_set(tsd); 297 tsd_data_init(tsd); 298 } 299 } else { 300 tsd_state_set(tsd, tsd_state_minimal_initialized); 301 tsd_set(tsd); 302 tsd_data_init_nocleanup(tsd); 303 } 304 } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) { 305 if (!minimal) { 306 /* Switch to fully initialized. */ 307 tsd_state_set(tsd, tsd_state_nominal); 308 assert(*tsd_reentrancy_levelp_get(tsd) >= 1); 309 (*tsd_reentrancy_levelp_get(tsd))--; 310 tsd_slow_update(tsd); 311 tsd_data_init(tsd); 312 } else { 313 assert_tsd_data_cleanup_done(tsd); 314 } 315 } else if (tsd_state_get(tsd) == tsd_state_purgatory) { 316 tsd_state_set(tsd, tsd_state_reincarnated); 317 tsd_set(tsd); 318 tsd_data_init_nocleanup(tsd); 319 } else { 320 assert(tsd_state_get(tsd) == tsd_state_reincarnated); 321 } 322 323 return tsd; 324 } 325 326 void * 327 malloc_tsd_malloc(size_t size) { 328 return a0malloc(CACHELINE_CEILING(size)); 329 } 330 331 void 332 malloc_tsd_dalloc(void *wrapper) { 333 a0dalloc(wrapper); 334 } 335 336 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) 337 static unsigned ncleanups; 338 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX]; 339 340 #ifndef _WIN32 341 JEMALLOC_EXPORT 342 #endif 343 void 344 _malloc_thread_cleanup(void) { 345 bool pending[MALLOC_TSD_CLEANUPS_MAX], again; 346 unsigned i; 347 348 for (i = 0; i < ncleanups; i++) { 349 pending[i] = true; 350 } 351 352 do { 353 again = false; 354 for (i = 0; i < ncleanups; i++) { 355 if (pending[i]) { 356 pending[i] = cleanups[i](); 357 if (pending[i]) { 358 again = true; 359 } 360 } 361 } 362 } while (again); 363 } 364 365 #ifndef _WIN32 366 JEMALLOC_EXPORT 367 #endif 368 void 369 _malloc_tsd_cleanup_register(bool (*f)(void)) { 370 assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX); 371 cleanups[ncleanups] = f; 372 ncleanups++; 373 } 374 375 #endif 376 377 static void 378 tsd_do_data_cleanup(tsd_t *tsd) { 379 prof_tdata_cleanup(tsd); 380 iarena_cleanup(tsd); 381 arena_cleanup(tsd); 382 tcache_cleanup(tsd); 383 witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd)); 384 *tsd_reentrancy_levelp_get(tsd) = 1; 385 } 386 387 void 388 tsd_cleanup(void *arg) { 389 tsd_t *tsd = (tsd_t *)arg; 390 391 switch (tsd_state_get(tsd)) { 392 case tsd_state_uninitialized: 393 /* Do nothing. */ 394 break; 395 case tsd_state_minimal_initialized: 396 /* This implies the thread only did free() in its life time. */ 397 /* Fall through. */ 398 case tsd_state_reincarnated: 399 /* 400 * Reincarnated means another destructor deallocated memory 401 * after the destructor was called. Cleanup isn't required but 402 * is still called for testing and completeness. 403 */ 404 assert_tsd_data_cleanup_done(tsd); 405 JEMALLOC_FALLTHROUGH; 406 case tsd_state_nominal: 407 case tsd_state_nominal_slow: 408 tsd_do_data_cleanup(tsd); 409 tsd_state_set(tsd, tsd_state_purgatory); 410 tsd_set(tsd); 411 break; 412 case tsd_state_purgatory: 413 /* 414 * The previous time this destructor was called, we set the 415 * state to tsd_state_purgatory so that other destructors 416 * wouldn't cause re-creation of the tsd. This time, do 417 * nothing, and do not request another callback. 418 */ 419 break; 420 default: 421 not_reached(); 422 } 423 #ifdef JEMALLOC_JET 424 test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd); 425 int *data = tsd_test_datap_get_unsafe(tsd); 426 if (test_callback != NULL) { 427 test_callback(data); 428 } 429 #endif 430 } 431 432 tsd_t * 433 malloc_tsd_boot0(void) { 434 tsd_t *tsd; 435 436 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32) 437 ncleanups = 0; 438 #endif 439 if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock", 440 WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) { 441 return NULL; 442 } 443 if (tsd_boot0()) { 444 return NULL; 445 } 446 tsd = tsd_fetch(); 447 return tsd; 448 } 449 450 void 451 malloc_tsd_boot1(void) { 452 tsd_boot1(); 453 tsd_t *tsd = tsd_fetch(); 454 /* malloc_slow has been set properly. Update tsd_slow. */ 455 tsd_slow_update(tsd); 456 } 457 458 #ifdef _WIN32 459 static BOOL WINAPI 460 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) { 461 switch (fdwReason) { 462 #ifdef JEMALLOC_LAZY_LOCK 463 case DLL_THREAD_ATTACH: 464 isthreaded = true; 465 break; 466 #endif 467 case DLL_THREAD_DETACH: 468 _malloc_thread_cleanup(); 469 break; 470 default: 471 break; 472 } 473 return true; 474 } 475 476 /* 477 * We need to be able to say "read" here (in the "pragma section"), but have 478 * hooked "read". We won't read for the rest of the file, so we can get away 479 * with unhooking. 480 */ 481 #ifdef read 482 # undef read 483 #endif 484 485 #ifdef _MSC_VER 486 # ifdef _M_IX86 487 # pragma comment(linker, "/INCLUDE:__tls_used") 488 # pragma comment(linker, "/INCLUDE:_tls_callback") 489 # else 490 # pragma comment(linker, "/INCLUDE:_tls_used") 491 # pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) ) 492 # endif 493 # pragma section(".CRT$XLY",long,read) 494 #endif 495 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used) 496 BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL, 497 DWORD fdwReason, LPVOID lpvReserved) = _tls_callback; 498 #endif 499 500 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \ 501 !defined(_WIN32)) 502 void * 503 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) { 504 pthread_t self = pthread_self(); 505 tsd_init_block_t *iter; 506 507 /* Check whether this thread has already inserted into the list. */ 508 malloc_mutex_lock(TSDN_NULL, &head->lock); 509 ql_foreach(iter, &head->blocks, link) { 510 if (iter->thread == self) { 511 malloc_mutex_unlock(TSDN_NULL, &head->lock); 512 return iter->data; 513 } 514 } 515 /* Insert block into list. */ 516 ql_elm_new(block, link); 517 block->thread = self; 518 ql_tail_insert(&head->blocks, block, link); 519 malloc_mutex_unlock(TSDN_NULL, &head->lock); 520 return NULL; 521 } 522 523 void 524 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) { 525 malloc_mutex_lock(TSDN_NULL, &head->lock); 526 ql_remove(&head->blocks, block, link); 527 malloc_mutex_unlock(TSDN_NULL, &head->lock); 528 } 529 #endif 530 531 void 532 tsd_prefork(tsd_t *tsd) { 533 malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 534 } 535 536 void 537 tsd_postfork_parent(tsd_t *tsd) { 538 malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 539 } 540 541 void 542 tsd_postfork_child(tsd_t *tsd) { 543 malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock); 544 ql_new(&tsd_nominal_tsds); 545 546 if (tsd_state_get(tsd) <= tsd_state_nominal_max) { 547 tsd_add_nominal(tsd); 548 } 549 } 550