xref: /freebsd/contrib/jemalloc/src/tsd.c (revision a2464ee12761660f50d0b6f59f233949ebcacc87)
1 #define JEMALLOC_TSD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/mutex.h"
7 #include "jemalloc/internal/rtree.h"
8 
9 /******************************************************************************/
10 /* Data. */
11 
12 static unsigned ncleanups;
13 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
14 
15 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
16 JEMALLOC_DIAGNOSTIC_PUSH
17 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
18 
19 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
20 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
21 JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
22 bool tsd_booted = false;
23 #elif (defined(JEMALLOC_TLS))
24 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
25 pthread_key_t tsd_tsd;
26 bool tsd_booted = false;
27 #elif (defined(_WIN32))
28 DWORD tsd_tsd;
29 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
30 bool tsd_booted = false;
31 #else
32 
33 /*
34  * This contains a mutex, but it's pretty convenient to allow the mutex code to
35  * have a dependency on tsd.  So we define the struct here, and only refer to it
36  * by pointer in the header.
37  */
38 struct tsd_init_head_s {
39 	ql_head(tsd_init_block_t) blocks;
40 	malloc_mutex_t lock;
41 };
42 
43 pthread_key_t tsd_tsd;
44 tsd_init_head_t	tsd_init_head = {
45 	ql_head_initializer(blocks),
46 	MALLOC_MUTEX_INITIALIZER
47 };
48 
49 tsd_wrapper_t tsd_boot_wrapper = {
50 	false,
51 	TSD_INITIALIZER
52 };
53 bool tsd_booted = false;
54 #endif
55 
56 JEMALLOC_DIAGNOSTIC_POP
57 
58 /******************************************************************************/
59 
60 /* A list of all the tsds in the nominal state. */
61 typedef ql_head(tsd_t) tsd_list_t;
62 static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
63 static malloc_mutex_t tsd_nominal_tsds_lock;
64 
65 /* How many slow-path-enabling features are turned on. */
66 static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
67 
68 static bool
69 tsd_in_nominal_list(tsd_t *tsd) {
70 	tsd_t *tsd_list;
71 	bool found = false;
72 	/*
73 	 * We don't know that tsd is nominal; it might not be safe to get data
74 	 * out of it here.
75 	 */
76 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
77 	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
78 		if (tsd == tsd_list) {
79 			found = true;
80 			break;
81 		}
82 	}
83 	malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
84 	return found;
85 }
86 
87 static void
88 tsd_add_nominal(tsd_t *tsd) {
89 	assert(!tsd_in_nominal_list(tsd));
90 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
91 	ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link);
92 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
93 	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
94 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
95 }
96 
97 static void
98 tsd_remove_nominal(tsd_t *tsd) {
99 	assert(tsd_in_nominal_list(tsd));
100 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
101 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
102 	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
103 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
104 }
105 
106 static void
107 tsd_force_recompute(tsdn_t *tsdn) {
108 	/*
109 	 * The stores to tsd->state here need to synchronize with the exchange
110 	 * in tsd_slow_update.
111 	 */
112 	atomic_fence(ATOMIC_RELEASE);
113 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
114 	tsd_t *remote_tsd;
115 	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
116 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
117 		    <= tsd_state_nominal_max);
118 		tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute,
119 		    ATOMIC_RELAXED);
120 	}
121 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
122 }
123 
124 void
125 tsd_global_slow_inc(tsdn_t *tsdn) {
126 	atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
127 	/*
128 	 * We unconditionally force a recompute, even if the global slow count
129 	 * was already positive.  If we didn't, then it would be possible for us
130 	 * to return to the user, have the user synchronize externally with some
131 	 * other thread, and then have that other thread not have picked up the
132 	 * update yet (since the original incrementing thread might still be
133 	 * making its way through the tsd list).
134 	 */
135 	tsd_force_recompute(tsdn);
136 }
137 
138 void tsd_global_slow_dec(tsdn_t *tsdn) {
139 	atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
140 	/* See the note in ..._inc(). */
141 	tsd_force_recompute(tsdn);
142 }
143 
144 static bool
145 tsd_local_slow(tsd_t *tsd) {
146 	return !tsd_tcache_enabled_get(tsd)
147 	    || tsd_reentrancy_level_get(tsd) > 0;
148 }
149 
150 bool
151 tsd_global_slow() {
152 	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
153 }
154 
155 /******************************************************************************/
156 
157 static uint8_t
158 tsd_state_compute(tsd_t *tsd) {
159 	if (!tsd_nominal(tsd)) {
160 		return tsd_state_get(tsd);
161 	}
162 	/* We're in *a* nominal state; but which one? */
163 	if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
164 		return tsd_state_nominal_slow;
165 	} else {
166 		return tsd_state_nominal;
167 	}
168 }
169 
170 void
171 tsd_slow_update(tsd_t *tsd) {
172 	uint8_t old_state;
173 	do {
174 		uint8_t new_state = tsd_state_compute(tsd);
175 		old_state = tsd_atomic_exchange(&tsd->state, new_state,
176 		    ATOMIC_ACQUIRE);
177 	} while (old_state == tsd_state_nominal_recompute);
178 }
179 
180 void
181 tsd_state_set(tsd_t *tsd, uint8_t new_state) {
182 	/* Only the tsd module can change the state *to* recompute. */
183 	assert(new_state != tsd_state_nominal_recompute);
184 	uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
185 	if (old_state > tsd_state_nominal_max) {
186 		/*
187 		 * Not currently in the nominal list, but it might need to be
188 		 * inserted there.
189 		 */
190 		assert(!tsd_in_nominal_list(tsd));
191 		tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
192 		if (new_state <= tsd_state_nominal_max) {
193 			tsd_add_nominal(tsd);
194 		}
195 	} else {
196 		/*
197 		 * We're currently nominal.  If the new state is non-nominal,
198 		 * great; we take ourselves off the list and just enter the new
199 		 * state.
200 		 */
201 		assert(tsd_in_nominal_list(tsd));
202 		if (new_state > tsd_state_nominal_max) {
203 			tsd_remove_nominal(tsd);
204 			tsd_atomic_store(&tsd->state, new_state,
205 			    ATOMIC_RELAXED);
206 		} else {
207 			/*
208 			 * This is the tricky case.  We're transitioning from
209 			 * one nominal state to another.  The caller can't know
210 			 * about any races that are occuring at the same time,
211 			 * so we always have to recompute no matter what.
212 			 */
213 			tsd_slow_update(tsd);
214 		}
215 	}
216 }
217 
218 static bool
219 tsd_data_init(tsd_t *tsd) {
220 	/*
221 	 * We initialize the rtree context first (before the tcache), since the
222 	 * tcache initialization depends on it.
223 	 */
224 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
225 
226 	/*
227 	 * A nondeterministic seed based on the address of tsd reduces
228 	 * the likelihood of lockstep non-uniform cache index
229 	 * utilization among identical concurrent processes, but at the
230 	 * cost of test repeatability.  For debug builds, instead use a
231 	 * deterministic seed.
232 	 */
233 	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
234 	    (uint64_t)(uintptr_t)tsd;
235 
236 	return tsd_tcache_enabled_data_init(tsd);
237 }
238 
239 static void
240 assert_tsd_data_cleanup_done(tsd_t *tsd) {
241 	assert(!tsd_nominal(tsd));
242 	assert(!tsd_in_nominal_list(tsd));
243 	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
244 	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
245 	assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
246 	assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
247 	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
248 	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
249 }
250 
251 static bool
252 tsd_data_init_nocleanup(tsd_t *tsd) {
253 	assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
254 	    tsd_state_get(tsd) == tsd_state_minimal_initialized);
255 	/*
256 	 * During reincarnation, there is no guarantee that the cleanup function
257 	 * will be called (deallocation may happen after all tsd destructors).
258 	 * We set up tsd in a way that no cleanup is needed.
259 	 */
260 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
261 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
262 	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
263 	*tsd_reentrancy_levelp_get(tsd) = 1;
264 	assert_tsd_data_cleanup_done(tsd);
265 
266 	return false;
267 }
268 
269 tsd_t *
270 tsd_fetch_slow(tsd_t *tsd, bool minimal) {
271 	assert(!tsd_fast(tsd));
272 
273 	if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
274 		/*
275 		 * On slow path but no work needed.  Note that we can't
276 		 * necessarily *assert* that we're slow, because we might be
277 		 * slow because of an asynchronous modification to global state,
278 		 * which might be asynchronously modified *back*.
279 		 */
280 	} else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
281 		tsd_slow_update(tsd);
282 	} else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
283 		if (!minimal) {
284 			if (tsd_booted) {
285 				tsd_state_set(tsd, tsd_state_nominal);
286 				tsd_slow_update(tsd);
287 				/* Trigger cleanup handler registration. */
288 				tsd_set(tsd);
289 				tsd_data_init(tsd);
290 			}
291 		} else {
292 			tsd_state_set(tsd, tsd_state_minimal_initialized);
293 			tsd_set(tsd);
294 			tsd_data_init_nocleanup(tsd);
295 		}
296 	} else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
297 		if (!minimal) {
298 			/* Switch to fully initialized. */
299 			tsd_state_set(tsd, tsd_state_nominal);
300 			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
301 			(*tsd_reentrancy_levelp_get(tsd))--;
302 			tsd_slow_update(tsd);
303 			tsd_data_init(tsd);
304 		} else {
305 			assert_tsd_data_cleanup_done(tsd);
306 		}
307 	} else if (tsd_state_get(tsd) == tsd_state_purgatory) {
308 		tsd_state_set(tsd, tsd_state_reincarnated);
309 		tsd_set(tsd);
310 		tsd_data_init_nocleanup(tsd);
311 	} else {
312 		assert(tsd_state_get(tsd) == tsd_state_reincarnated);
313 	}
314 
315 	return tsd;
316 }
317 
318 void *
319 malloc_tsd_malloc(size_t size) {
320 	return a0malloc(CACHELINE_CEILING(size));
321 }
322 
323 void
324 malloc_tsd_dalloc(void *wrapper) {
325 	a0dalloc(wrapper);
326 }
327 
328 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
329 #ifndef _WIN32
330 JEMALLOC_EXPORT
331 #endif
332 void
333 _malloc_thread_cleanup(void) {
334 	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
335 	unsigned i;
336 
337 	for (i = 0; i < ncleanups; i++) {
338 		pending[i] = true;
339 	}
340 
341 	do {
342 		again = false;
343 		for (i = 0; i < ncleanups; i++) {
344 			if (pending[i]) {
345 				pending[i] = cleanups[i]();
346 				if (pending[i]) {
347 					again = true;
348 				}
349 			}
350 		}
351 	} while (again);
352 }
353 #endif
354 
355 void
356 malloc_tsd_cleanup_register(bool (*f)(void)) {
357 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
358 	cleanups[ncleanups] = f;
359 	ncleanups++;
360 }
361 
362 static void
363 tsd_do_data_cleanup(tsd_t *tsd) {
364 	prof_tdata_cleanup(tsd);
365 	iarena_cleanup(tsd);
366 	arena_cleanup(tsd);
367 	arenas_tdata_cleanup(tsd);
368 	tcache_cleanup(tsd);
369 	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
370 }
371 
372 void
373 tsd_cleanup(void *arg) {
374 	tsd_t *tsd = (tsd_t *)arg;
375 
376 	switch (tsd_state_get(tsd)) {
377 	case tsd_state_uninitialized:
378 		/* Do nothing. */
379 		break;
380 	case tsd_state_minimal_initialized:
381 		/* This implies the thread only did free() in its life time. */
382 		/* Fall through. */
383 	case tsd_state_reincarnated:
384 		/*
385 		 * Reincarnated means another destructor deallocated memory
386 		 * after the destructor was called.  Cleanup isn't required but
387 		 * is still called for testing and completeness.
388 		 */
389 		assert_tsd_data_cleanup_done(tsd);
390 		/* Fall through. */
391 	case tsd_state_nominal:
392 	case tsd_state_nominal_slow:
393 		tsd_do_data_cleanup(tsd);
394 		tsd_state_set(tsd, tsd_state_purgatory);
395 		tsd_set(tsd);
396 		break;
397 	case tsd_state_purgatory:
398 		/*
399 		 * The previous time this destructor was called, we set the
400 		 * state to tsd_state_purgatory so that other destructors
401 		 * wouldn't cause re-creation of the tsd.  This time, do
402 		 * nothing, and do not request another callback.
403 		 */
404 		break;
405 	default:
406 		not_reached();
407 	}
408 #ifdef JEMALLOC_JET
409 	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
410 	int *data = tsd_test_datap_get_unsafe(tsd);
411 	if (test_callback != NULL) {
412 		test_callback(data);
413 	}
414 #endif
415 }
416 
417 tsd_t *
418 malloc_tsd_boot0(void) {
419 	tsd_t *tsd;
420 
421 	ncleanups = 0;
422 	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
423 	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
424 		return NULL;
425 	}
426 	if (tsd_boot0()) {
427 		return NULL;
428 	}
429 	tsd = tsd_fetch();
430 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
431 	return tsd;
432 }
433 
434 void
435 malloc_tsd_boot1(void) {
436 	tsd_boot1();
437 	tsd_t *tsd = tsd_fetch();
438 	/* malloc_slow has been set properly.  Update tsd_slow. */
439 	tsd_slow_update(tsd);
440 	*tsd_arenas_tdata_bypassp_get(tsd) = false;
441 }
442 
443 #ifdef _WIN32
444 static BOOL WINAPI
445 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
446 	switch (fdwReason) {
447 #ifdef JEMALLOC_LAZY_LOCK
448 	case DLL_THREAD_ATTACH:
449 		isthreaded = true;
450 		break;
451 #endif
452 	case DLL_THREAD_DETACH:
453 		_malloc_thread_cleanup();
454 		break;
455 	default:
456 		break;
457 	}
458 	return true;
459 }
460 
461 /*
462  * We need to be able to say "read" here (in the "pragma section"), but have
463  * hooked "read". We won't read for the rest of the file, so we can get away
464  * with unhooking.
465  */
466 #ifdef read
467 #  undef read
468 #endif
469 
470 #ifdef _MSC_VER
471 #  ifdef _M_IX86
472 #    pragma comment(linker, "/INCLUDE:__tls_used")
473 #    pragma comment(linker, "/INCLUDE:_tls_callback")
474 #  else
475 #    pragma comment(linker, "/INCLUDE:_tls_used")
476 #    pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
477 #  endif
478 #  pragma section(".CRT$XLY",long,read)
479 #endif
480 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
481 BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
482     DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
483 #endif
484 
485 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
486     !defined(_WIN32))
487 void *
488 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
489 	pthread_t self = pthread_self();
490 	tsd_init_block_t *iter;
491 
492 	/* Check whether this thread has already inserted into the list. */
493 	malloc_mutex_lock(TSDN_NULL, &head->lock);
494 	ql_foreach(iter, &head->blocks, link) {
495 		if (iter->thread == self) {
496 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
497 			return iter->data;
498 		}
499 	}
500 	/* Insert block into list. */
501 	ql_elm_new(block, link);
502 	block->thread = self;
503 	ql_tail_insert(&head->blocks, block, link);
504 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
505 	return NULL;
506 }
507 
508 void
509 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
510 	malloc_mutex_lock(TSDN_NULL, &head->lock);
511 	ql_remove(&head->blocks, block, link);
512 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
513 }
514 #endif
515 
516 void
517 tsd_prefork(tsd_t *tsd) {
518 	malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
519 }
520 
521 void
522 tsd_postfork_parent(tsd_t *tsd) {
523 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
524 }
525 
526 void
527 tsd_postfork_child(tsd_t *tsd) {
528 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
529 	ql_new(&tsd_nominal_tsds);
530 
531 	if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
532 		tsd_add_nominal(tsd);
533 	}
534 }
535