xref: /freebsd/contrib/jemalloc/src/tsd.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/assert.h"
5 #include "jemalloc/internal/san.h"
6 #include "jemalloc/internal/mutex.h"
7 #include "jemalloc/internal/rtree.h"
8 
9 /******************************************************************************/
10 /* Data. */
11 
12 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
13 JEMALLOC_DIAGNOSTIC_PUSH
14 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
15 
16 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
17 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
18 JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
19 bool tsd_booted = false;
20 #elif (defined(JEMALLOC_TLS))
21 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
22 pthread_key_t tsd_tsd;
23 bool tsd_booted = false;
24 #elif (defined(_WIN32))
25 DWORD tsd_tsd;
26 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
27 bool tsd_booted = false;
28 #else
29 
30 /*
31  * This contains a mutex, but it's pretty convenient to allow the mutex code to
32  * have a dependency on tsd.  So we define the struct here, and only refer to it
33  * by pointer in the header.
34  */
35 struct tsd_init_head_s {
36 	ql_head(tsd_init_block_t) blocks;
37 	malloc_mutex_t lock;
38 };
39 
40 pthread_key_t tsd_tsd;
41 tsd_init_head_t	tsd_init_head = {
42 	ql_head_initializer(blocks),
43 	MALLOC_MUTEX_INITIALIZER
44 };
45 
46 tsd_wrapper_t tsd_boot_wrapper = {
47 	false,
48 	TSD_INITIALIZER
49 };
50 bool tsd_booted = false;
51 #endif
52 
53 JEMALLOC_DIAGNOSTIC_POP
54 
55 /******************************************************************************/
56 
57 /* A list of all the tsds in the nominal state. */
58 typedef ql_head(tsd_t) tsd_list_t;
59 static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
60 static malloc_mutex_t tsd_nominal_tsds_lock;
61 
62 /* How many slow-path-enabling features are turned on. */
63 static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
64 
65 static bool
66 tsd_in_nominal_list(tsd_t *tsd) {
67 	tsd_t *tsd_list;
68 	bool found = false;
69 	/*
70 	 * We don't know that tsd is nominal; it might not be safe to get data
71 	 * out of it here.
72 	 */
73 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
74 	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
75 		if (tsd == tsd_list) {
76 			found = true;
77 			break;
78 		}
79 	}
80 	malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
81 	return found;
82 }
83 
84 static void
85 tsd_add_nominal(tsd_t *tsd) {
86 	assert(!tsd_in_nominal_list(tsd));
87 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
88 	ql_elm_new(tsd, TSD_MANGLE(tsd_link));
89 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
90 	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
91 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
92 }
93 
94 static void
95 tsd_remove_nominal(tsd_t *tsd) {
96 	assert(tsd_in_nominal_list(tsd));
97 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
98 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
99 	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
100 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
101 }
102 
103 static void
104 tsd_force_recompute(tsdn_t *tsdn) {
105 	/*
106 	 * The stores to tsd->state here need to synchronize with the exchange
107 	 * in tsd_slow_update.
108 	 */
109 	atomic_fence(ATOMIC_RELEASE);
110 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
111 	tsd_t *remote_tsd;
112 	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
113 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
114 		    <= tsd_state_nominal_max);
115 		tsd_atomic_store(&remote_tsd->state,
116 		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
117 		/* See comments in te_recompute_fast_threshold(). */
118 		atomic_fence(ATOMIC_SEQ_CST);
119 		te_next_event_fast_set_non_nominal(remote_tsd);
120 	}
121 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
122 }
123 
124 void
125 tsd_global_slow_inc(tsdn_t *tsdn) {
126 	atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
127 	/*
128 	 * We unconditionally force a recompute, even if the global slow count
129 	 * was already positive.  If we didn't, then it would be possible for us
130 	 * to return to the user, have the user synchronize externally with some
131 	 * other thread, and then have that other thread not have picked up the
132 	 * update yet (since the original incrementing thread might still be
133 	 * making its way through the tsd list).
134 	 */
135 	tsd_force_recompute(tsdn);
136 }
137 
138 void tsd_global_slow_dec(tsdn_t *tsdn) {
139 	atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
140 	/* See the note in ..._inc(). */
141 	tsd_force_recompute(tsdn);
142 }
143 
144 static bool
145 tsd_local_slow(tsd_t *tsd) {
146 	return !tsd_tcache_enabled_get(tsd)
147 	    || tsd_reentrancy_level_get(tsd) > 0;
148 }
149 
150 bool
151 tsd_global_slow() {
152 	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
153 }
154 
155 /******************************************************************************/
156 
157 static uint8_t
158 tsd_state_compute(tsd_t *tsd) {
159 	if (!tsd_nominal(tsd)) {
160 		return tsd_state_get(tsd);
161 	}
162 	/* We're in *a* nominal state; but which one? */
163 	if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
164 		return tsd_state_nominal_slow;
165 	} else {
166 		return tsd_state_nominal;
167 	}
168 }
169 
170 void
171 tsd_slow_update(tsd_t *tsd) {
172 	uint8_t old_state;
173 	do {
174 		uint8_t new_state = tsd_state_compute(tsd);
175 		old_state = tsd_atomic_exchange(&tsd->state, new_state,
176 		    ATOMIC_ACQUIRE);
177 	} while (old_state == tsd_state_nominal_recompute);
178 
179 	te_recompute_fast_threshold(tsd);
180 }
181 
182 void
183 tsd_state_set(tsd_t *tsd, uint8_t new_state) {
184 	/* Only the tsd module can change the state *to* recompute. */
185 	assert(new_state != tsd_state_nominal_recompute);
186 	uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
187 	if (old_state > tsd_state_nominal_max) {
188 		/*
189 		 * Not currently in the nominal list, but it might need to be
190 		 * inserted there.
191 		 */
192 		assert(!tsd_in_nominal_list(tsd));
193 		tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
194 		if (new_state <= tsd_state_nominal_max) {
195 			tsd_add_nominal(tsd);
196 		}
197 	} else {
198 		/*
199 		 * We're currently nominal.  If the new state is non-nominal,
200 		 * great; we take ourselves off the list and just enter the new
201 		 * state.
202 		 */
203 		assert(tsd_in_nominal_list(tsd));
204 		if (new_state > tsd_state_nominal_max) {
205 			tsd_remove_nominal(tsd);
206 			tsd_atomic_store(&tsd->state, new_state,
207 			    ATOMIC_RELAXED);
208 		} else {
209 			/*
210 			 * This is the tricky case.  We're transitioning from
211 			 * one nominal state to another.  The caller can't know
212 			 * about any races that are occurring at the same time,
213 			 * so we always have to recompute no matter what.
214 			 */
215 			tsd_slow_update(tsd);
216 		}
217 	}
218 	te_recompute_fast_threshold(tsd);
219 }
220 
221 static void
222 tsd_prng_state_init(tsd_t *tsd) {
223 	/*
224 	 * A nondeterministic seed based on the address of tsd reduces
225 	 * the likelihood of lockstep non-uniform cache index
226 	 * utilization among identical concurrent processes, but at the
227 	 * cost of test repeatability.  For debug builds, instead use a
228 	 * deterministic seed.
229 	 */
230 	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
231 	    (uint64_t)(uintptr_t)tsd;
232 }
233 
234 static bool
235 tsd_data_init(tsd_t *tsd) {
236 	/*
237 	 * We initialize the rtree context first (before the tcache), since the
238 	 * tcache initialization depends on it.
239 	 */
240 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
241 	tsd_prng_state_init(tsd);
242 	tsd_te_init(tsd); /* event_init may use the prng state above. */
243 	tsd_san_init(tsd);
244 	return tsd_tcache_enabled_data_init(tsd);
245 }
246 
247 static void
248 assert_tsd_data_cleanup_done(tsd_t *tsd) {
249 	assert(!tsd_nominal(tsd));
250 	assert(!tsd_in_nominal_list(tsd));
251 	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
252 	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
253 	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
254 	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
255 }
256 
257 static bool
258 tsd_data_init_nocleanup(tsd_t *tsd) {
259 	assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
260 	    tsd_state_get(tsd) == tsd_state_minimal_initialized);
261 	/*
262 	 * During reincarnation, there is no guarantee that the cleanup function
263 	 * will be called (deallocation may happen after all tsd destructors).
264 	 * We set up tsd in a way that no cleanup is needed.
265 	 */
266 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
267 	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
268 	*tsd_reentrancy_levelp_get(tsd) = 1;
269 	tsd_prng_state_init(tsd);
270 	tsd_te_init(tsd); /* event_init may use the prng state above. */
271 	tsd_san_init(tsd);
272 	assert_tsd_data_cleanup_done(tsd);
273 
274 	return false;
275 }
276 
277 tsd_t *
278 tsd_fetch_slow(tsd_t *tsd, bool minimal) {
279 	assert(!tsd_fast(tsd));
280 
281 	if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
282 		/*
283 		 * On slow path but no work needed.  Note that we can't
284 		 * necessarily *assert* that we're slow, because we might be
285 		 * slow because of an asynchronous modification to global state,
286 		 * which might be asynchronously modified *back*.
287 		 */
288 	} else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
289 		tsd_slow_update(tsd);
290 	} else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
291 		if (!minimal) {
292 			if (tsd_booted) {
293 				tsd_state_set(tsd, tsd_state_nominal);
294 				tsd_slow_update(tsd);
295 				/* Trigger cleanup handler registration. */
296 				tsd_set(tsd);
297 				tsd_data_init(tsd);
298 			}
299 		} else {
300 			tsd_state_set(tsd, tsd_state_minimal_initialized);
301 			tsd_set(tsd);
302 			tsd_data_init_nocleanup(tsd);
303 		}
304 	} else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
305 		if (!minimal) {
306 			/* Switch to fully initialized. */
307 			tsd_state_set(tsd, tsd_state_nominal);
308 			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
309 			(*tsd_reentrancy_levelp_get(tsd))--;
310 			tsd_slow_update(tsd);
311 			tsd_data_init(tsd);
312 		} else {
313 			assert_tsd_data_cleanup_done(tsd);
314 		}
315 	} else if (tsd_state_get(tsd) == tsd_state_purgatory) {
316 		tsd_state_set(tsd, tsd_state_reincarnated);
317 		tsd_set(tsd);
318 		tsd_data_init_nocleanup(tsd);
319 	} else {
320 		assert(tsd_state_get(tsd) == tsd_state_reincarnated);
321 	}
322 
323 	return tsd;
324 }
325 
326 void *
327 malloc_tsd_malloc(size_t size) {
328 	return a0malloc(CACHELINE_CEILING(size));
329 }
330 
331 void
332 malloc_tsd_dalloc(void *wrapper) {
333 	a0dalloc(wrapper);
334 }
335 
336 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
337 static unsigned ncleanups;
338 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
339 
340 #ifndef _WIN32
341 JEMALLOC_EXPORT
342 #endif
343 void
344 _malloc_thread_cleanup(void) {
345 	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
346 	unsigned i;
347 
348 	for (i = 0; i < ncleanups; i++) {
349 		pending[i] = true;
350 	}
351 
352 	do {
353 		again = false;
354 		for (i = 0; i < ncleanups; i++) {
355 			if (pending[i]) {
356 				pending[i] = cleanups[i]();
357 				if (pending[i]) {
358 					again = true;
359 				}
360 			}
361 		}
362 	} while (again);
363 }
364 
365 #ifndef _WIN32
366 JEMALLOC_EXPORT
367 #endif
368 void
369 _malloc_tsd_cleanup_register(bool (*f)(void)) {
370 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
371 	cleanups[ncleanups] = f;
372 	ncleanups++;
373 }
374 
375 #endif
376 
377 static void
378 tsd_do_data_cleanup(tsd_t *tsd) {
379 	prof_tdata_cleanup(tsd);
380 	iarena_cleanup(tsd);
381 	arena_cleanup(tsd);
382 	tcache_cleanup(tsd);
383 	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
384 	*tsd_reentrancy_levelp_get(tsd) = 1;
385 }
386 
387 void
388 tsd_cleanup(void *arg) {
389 	tsd_t *tsd = (tsd_t *)arg;
390 
391 	switch (tsd_state_get(tsd)) {
392 	case tsd_state_uninitialized:
393 		/* Do nothing. */
394 		break;
395 	case tsd_state_minimal_initialized:
396 		/* This implies the thread only did free() in its life time. */
397 		/* Fall through. */
398 	case tsd_state_reincarnated:
399 		/*
400 		 * Reincarnated means another destructor deallocated memory
401 		 * after the destructor was called.  Cleanup isn't required but
402 		 * is still called for testing and completeness.
403 		 */
404 		assert_tsd_data_cleanup_done(tsd);
405 		JEMALLOC_FALLTHROUGH;
406 	case tsd_state_nominal:
407 	case tsd_state_nominal_slow:
408 		tsd_do_data_cleanup(tsd);
409 		tsd_state_set(tsd, tsd_state_purgatory);
410 		tsd_set(tsd);
411 		break;
412 	case tsd_state_purgatory:
413 		/*
414 		 * The previous time this destructor was called, we set the
415 		 * state to tsd_state_purgatory so that other destructors
416 		 * wouldn't cause re-creation of the tsd.  This time, do
417 		 * nothing, and do not request another callback.
418 		 */
419 		break;
420 	default:
421 		not_reached();
422 	}
423 #ifdef JEMALLOC_JET
424 	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
425 	int *data = tsd_test_datap_get_unsafe(tsd);
426 	if (test_callback != NULL) {
427 		test_callback(data);
428 	}
429 #endif
430 }
431 
432 tsd_t *
433 malloc_tsd_boot0(void) {
434 	tsd_t *tsd;
435 
436 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
437 	ncleanups = 0;
438 #endif
439 	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
440 	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
441 		return NULL;
442 	}
443 	if (tsd_boot0()) {
444 		return NULL;
445 	}
446 	tsd = tsd_fetch();
447 	return tsd;
448 }
449 
450 void
451 malloc_tsd_boot1(void) {
452 	tsd_boot1();
453 	tsd_t *tsd = tsd_fetch();
454 	/* malloc_slow has been set properly.  Update tsd_slow. */
455 	tsd_slow_update(tsd);
456 }
457 
458 #ifdef _WIN32
459 static BOOL WINAPI
460 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
461 	switch (fdwReason) {
462 #ifdef JEMALLOC_LAZY_LOCK
463 	case DLL_THREAD_ATTACH:
464 		isthreaded = true;
465 		break;
466 #endif
467 	case DLL_THREAD_DETACH:
468 		_malloc_thread_cleanup();
469 		break;
470 	default:
471 		break;
472 	}
473 	return true;
474 }
475 
476 /*
477  * We need to be able to say "read" here (in the "pragma section"), but have
478  * hooked "read". We won't read for the rest of the file, so we can get away
479  * with unhooking.
480  */
481 #ifdef read
482 #  undef read
483 #endif
484 
485 #ifdef _MSC_VER
486 #  ifdef _M_IX86
487 #    pragma comment(linker, "/INCLUDE:__tls_used")
488 #    pragma comment(linker, "/INCLUDE:_tls_callback")
489 #  else
490 #    pragma comment(linker, "/INCLUDE:_tls_used")
491 #    pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
492 #  endif
493 #  pragma section(".CRT$XLY",long,read)
494 #endif
495 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
496 BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
497     DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
498 #endif
499 
500 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
501     !defined(_WIN32))
502 void *
503 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
504 	pthread_t self = pthread_self();
505 	tsd_init_block_t *iter;
506 
507 	/* Check whether this thread has already inserted into the list. */
508 	malloc_mutex_lock(TSDN_NULL, &head->lock);
509 	ql_foreach(iter, &head->blocks, link) {
510 		if (iter->thread == self) {
511 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
512 			return iter->data;
513 		}
514 	}
515 	/* Insert block into list. */
516 	ql_elm_new(block, link);
517 	block->thread = self;
518 	ql_tail_insert(&head->blocks, block, link);
519 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
520 	return NULL;
521 }
522 
523 void
524 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
525 	malloc_mutex_lock(TSDN_NULL, &head->lock);
526 	ql_remove(&head->blocks, block, link);
527 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
528 }
529 #endif
530 
531 void
532 tsd_prefork(tsd_t *tsd) {
533 	malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
534 }
535 
536 void
537 tsd_postfork_parent(tsd_t *tsd) {
538 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
539 }
540 
541 void
542 tsd_postfork_child(tsd_t *tsd) {
543 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
544 	ql_new(&tsd_nominal_tsds);
545 
546 	if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
547 		tsd_add_nominal(tsd);
548 	}
549 }
550