xref: /freebsd/contrib/jemalloc/src/tcache.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/assert.h"
5 #include "jemalloc/internal/mutex.h"
6 #include "jemalloc/internal/safety_check.h"
7 #include "jemalloc/internal/san.h"
8 #include "jemalloc/internal/sc.h"
9 
10 /******************************************************************************/
11 /* Data. */
12 
13 bool opt_tcache = true;
14 
15 /* tcache_maxclass is set to 32KB by default.  */
16 size_t opt_tcache_max = ((size_t)1) << 15;
17 
18 /* Reasonable defaults for min and max values. */
19 unsigned opt_tcache_nslots_small_min = 20;
20 unsigned opt_tcache_nslots_small_max = 200;
21 unsigned opt_tcache_nslots_large = 20;
22 
23 /*
24  * We attempt to make the number of slots in a tcache bin for a given size class
25  * equal to the number of objects in a slab times some multiplier.  By default,
26  * the multiplier is 2 (i.e. we set the maximum number of objects in the tcache
27  * to twice the number of objects in a slab).
28  * This is bounded by some other constraints as well, like the fact that it
29  * must be even, must be less than opt_tcache_nslots_small_max, etc..
30  */
31 ssize_t	opt_lg_tcache_nslots_mul = 1;
32 
33 /*
34  * Number of allocation bytes between tcache incremental GCs.  Again, this
35  * default just seems to work well; more tuning is possible.
36  */
37 size_t opt_tcache_gc_incr_bytes = 65536;
38 
39 /*
40  * With default settings, we may end up flushing small bins frequently with
41  * small flush amounts.  To limit this tendency, we can set a number of bytes to
42  * "delay" by.  If we try to flush N M-byte items, we decrease that size-class's
43  * delay by N * M.  So, if delay is 1024 and we're looking at the 64-byte size
44  * class, we won't do any flushing until we've been asked to flush 1024/64 == 16
45  * items.  This can happen in any configuration (i.e. being asked to flush 16
46  * items once, or 4 items 4 times).
47  *
48  * Practically, this is stored as a count of items in a uint8_t, so the
49  * effective maximum value for a size class is 255 * sz.
50  */
51 size_t opt_tcache_gc_delay_bytes = 0;
52 
53 /*
54  * When a cache bin is flushed because it's full, how much of it do we flush?
55  * By default, we flush half the maximum number of items.
56  */
57 unsigned opt_lg_tcache_flush_small_div = 1;
58 unsigned opt_lg_tcache_flush_large_div = 1;
59 
60 cache_bin_info_t	*tcache_bin_info;
61 
62 /* Total stack size required (per tcache).  Include the padding above. */
63 static size_t tcache_bin_alloc_size;
64 static size_t tcache_bin_alloc_alignment;
65 
66 /* Number of cache bins enabled, including both large and small. */
67 unsigned		nhbins;
68 /* Max size class to be cached (can be small or large). */
69 size_t			tcache_maxclass;
70 
71 tcaches_t		*tcaches;
72 
73 /* Index of first element within tcaches that has never been used. */
74 static unsigned		tcaches_past;
75 
76 /* Head of singly linked list tracking available tcaches elements. */
77 static tcaches_t	*tcaches_avail;
78 
79 /* Protects tcaches{,_past,_avail}. */
80 static malloc_mutex_t	tcaches_mtx;
81 
82 /******************************************************************************/
83 
84 size_t
85 tcache_salloc(tsdn_t *tsdn, const void *ptr) {
86 	return arena_salloc(tsdn, ptr);
87 }
88 
89 uint64_t
90 tcache_gc_new_event_wait(tsd_t *tsd) {
91 	return opt_tcache_gc_incr_bytes;
92 }
93 
94 uint64_t
95 tcache_gc_postponed_event_wait(tsd_t *tsd) {
96 	return TE_MIN_START_WAIT;
97 }
98 
99 uint64_t
100 tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
101 	return opt_tcache_gc_incr_bytes;
102 }
103 
104 uint64_t
105 tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
106 	return TE_MIN_START_WAIT;
107 }
108 
109 static uint8_t
110 tcache_gc_item_delay_compute(szind_t szind) {
111 	assert(szind < SC_NBINS);
112 	size_t sz = sz_index2size(szind);
113 	size_t item_delay = opt_tcache_gc_delay_bytes / sz;
114 	size_t delay_max = ZU(1)
115 	    << (sizeof(((tcache_slow_t *)NULL)->bin_flush_delay_items[0]) * 8);
116 	if (item_delay >= delay_max) {
117 		item_delay = delay_max - 1;
118 	}
119 	return (uint8_t)item_delay;
120 }
121 
122 static void
123 tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
124     szind_t szind) {
125 	/* Aim to flush 3/4 of items below low-water. */
126 	assert(szind < SC_NBINS);
127 
128 	cache_bin_t *cache_bin = &tcache->bins[szind];
129 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
130 	    &tcache_bin_info[szind]);
131 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
132 	    &tcache_bin_info[szind]);
133 	assert(!tcache_slow->bin_refilled[szind]);
134 
135 	size_t nflush = low_water - (low_water >> 2);
136 	if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
137 		/* Workaround for a conversion warning. */
138 		uint8_t nflush_uint8 = (uint8_t)nflush;
139 		assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
140 		    sizeof(nflush_uint8));
141 		tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
142 		return;
143 	} else {
144 		tcache_slow->bin_flush_delay_items[szind]
145 		    = tcache_gc_item_delay_compute(szind);
146 	}
147 
148 	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
149 	    (unsigned)(ncached - nflush));
150 
151 	/*
152 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
153 	 * the fill count is always at least 1.
154 	 */
155 	if ((cache_bin_info_ncached_max(&tcache_bin_info[szind])
156 	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
157 		tcache_slow->lg_fill_div[szind]++;
158 	}
159 }
160 
161 static void
162 tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
163     szind_t szind) {
164 	/* Like the small GC; flush 3/4 of untouched items. */
165 	assert(szind >= SC_NBINS);
166 	cache_bin_t *cache_bin = &tcache->bins[szind];
167 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
168 	    &tcache_bin_info[szind]);
169 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
170 	    &tcache_bin_info[szind]);
171 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
172 	    (unsigned)(ncached - low_water + (low_water >> 2)));
173 }
174 
175 static void
176 tcache_event(tsd_t *tsd) {
177 	tcache_t *tcache = tcache_get(tsd);
178 	if (tcache == NULL) {
179 		return;
180 	}
181 
182 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
183 	szind_t szind = tcache_slow->next_gc_bin;
184 	bool is_small = (szind < SC_NBINS);
185 	cache_bin_t *cache_bin = &tcache->bins[szind];
186 
187 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
188 
189 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
190 	    &tcache_bin_info[szind]);
191 	if (low_water > 0) {
192 		if (is_small) {
193 			tcache_gc_small(tsd, tcache_slow, tcache, szind);
194 		} else {
195 			tcache_gc_large(tsd, tcache_slow, tcache, szind);
196 		}
197 	} else if (is_small && tcache_slow->bin_refilled[szind]) {
198 		assert(low_water == 0);
199 		/*
200 		 * Increase fill count by 2X for small bins.  Make sure
201 		 * lg_fill_div stays greater than 0.
202 		 */
203 		if (tcache_slow->lg_fill_div[szind] > 1) {
204 			tcache_slow->lg_fill_div[szind]--;
205 		}
206 		tcache_slow->bin_refilled[szind] = false;
207 	}
208 	cache_bin_low_water_set(cache_bin);
209 
210 	tcache_slow->next_gc_bin++;
211 	if (tcache_slow->next_gc_bin == nhbins) {
212 		tcache_slow->next_gc_bin = 0;
213 	}
214 }
215 
216 void
217 tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed) {
218 	assert(elapsed == TE_INVALID_ELAPSED);
219 	tcache_event(tsd);
220 }
221 
222 void
223 tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
224 	assert(elapsed == TE_INVALID_ELAPSED);
225 	tcache_event(tsd);
226 }
227 
228 void *
229 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
230     tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
231     bool *tcache_success) {
232 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
233 	void *ret;
234 
235 	assert(tcache_slow->arena != NULL);
236 	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
237 	    >> tcache_slow->lg_fill_div[binind];
238 	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
239 	    &tcache_bin_info[binind], binind, nfill);
240 	tcache_slow->bin_refilled[binind] = true;
241 	ret = cache_bin_alloc(cache_bin, tcache_success);
242 
243 	return ret;
244 }
245 
246 static const void *
247 tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
248 	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
249 	return arr->ptr[ind];
250 }
251 
252 static void
253 tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
254     emap_full_alloc_ctx_t *alloc_ctx) {
255 	size_t *szind_sum = (size_t *)szind_sum_ctx;
256 	*szind_sum -= alloc_ctx->szind;
257 	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
258 }
259 
260 JEMALLOC_NOINLINE static void
261 tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
262     size_t nptrs, emap_batch_lookup_result_t *edatas) {
263 	bool found_mismatch = false;
264 	for (size_t i = 0; i < nptrs; i++) {
265 		szind_t true_szind = edata_szind_get(edatas[i].edata);
266 		if (true_szind != szind) {
267 			found_mismatch = true;
268 			safety_check_fail_sized_dealloc(
269 			    /* current_dealloc */ false,
270 			    /* ptr */ tcache_bin_flush_ptr_getter(arr, i),
271 			    /* true_size */ sz_index2size(true_szind),
272 			    /* input_size */ sz_index2size(szind));
273 		}
274 	}
275 	assert(found_mismatch);
276 }
277 
278 static void
279 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
280     szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
281 
282 	/*
283 	 * This gets compiled away when config_opt_safety_checks is false.
284 	 * Checks for sized deallocation bugs, failing early rather than
285 	 * corrupting metadata.
286 	 */
287 	size_t szind_sum = binind * nflush;
288 	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
289 	    &tcache_bin_flush_ptr_getter, (void *)arr,
290 	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
291 	    edatas);
292 	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
293 		tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas);
294 	}
295 }
296 
297 JEMALLOC_ALWAYS_INLINE bool
298 tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
299     unsigned cur_binshard, bool small) {
300 	if (small) {
301 		return edata_arena_ind_get(edata) == cur_arena_ind
302 		    && edata_binshard_get(edata) == cur_binshard;
303 	} else {
304 		return edata_arena_ind_get(edata) == cur_arena_ind;
305 	}
306 }
307 
308 JEMALLOC_ALWAYS_INLINE void
309 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
310     szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
311 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
312 	/*
313 	 * A couple lookup calls take tsdn; declare it once for convenience
314 	 * instead of calling tsd_tsdn(tsd) all the time.
315 	 */
316 	tsdn_t *tsdn = tsd_tsdn(tsd);
317 
318 	if (small) {
319 		assert(binind < SC_NBINS);
320 	} else {
321 		assert(binind < nhbins);
322 	}
323 	arena_t *tcache_arena = tcache_slow->arena;
324 	assert(tcache_arena != NULL);
325 
326 	/*
327 	 * Variable length array must have > 0 length; the last element is never
328 	 * touched (it's just included to satisfy the no-zero-length rule).
329 	 */
330 	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
331 	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
332 
333 	/*
334 	 * The slabs where we freed the last remaining object in the slab (and
335 	 * so need to free the slab itself).
336 	 * Used only if small == true.
337 	 */
338 	unsigned dalloc_count = 0;
339 	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
340 
341 	/*
342 	 * We're about to grab a bunch of locks.  If one of them happens to be
343 	 * the one guarding the arena-level stats counters we flush our
344 	 * thread-local ones to, we do so under one critical section.
345 	 */
346 	bool merged_stats = false;
347 	while (nflush > 0) {
348 		/* Lock the arena, or bin, associated with the first object. */
349 		edata_t *edata = item_edata[0].edata;
350 		unsigned cur_arena_ind = edata_arena_ind_get(edata);
351 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
352 
353 		/*
354 		 * These assignments are always overwritten when small is true,
355 		 * and their values are always ignored when small is false, but
356 		 * to avoid the technical UB when we pass them as parameters, we
357 		 * need to intialize them.
358 		 */
359 		unsigned cur_binshard = 0;
360 		bin_t *cur_bin = NULL;
361 		if (small) {
362 			cur_binshard = edata_binshard_get(edata);
363 			cur_bin = arena_get_bin(cur_arena, binind,
364 			    cur_binshard);
365 			assert(cur_binshard < bin_infos[binind].n_shards);
366 			/*
367 			 * If you're looking at profiles, you might think this
368 			 * is a good place to prefetch the bin stats, which are
369 			 * often a cache miss.  This turns out not to be
370 			 * helpful on the workloads we've looked at, with moving
371 			 * the bin stats next to the lock seeming to do better.
372 			 */
373 		}
374 
375 		if (small) {
376 			malloc_mutex_lock(tsdn, &cur_bin->lock);
377 		}
378 		if (!small && !arena_is_auto(cur_arena)) {
379 			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
380 		}
381 
382 		/*
383 		 * If we acquired the right lock and have some stats to flush,
384 		 * flush them.
385 		 */
386 		if (config_stats && tcache_arena == cur_arena
387 		    && !merged_stats) {
388 			merged_stats = true;
389 			if (small) {
390 				cur_bin->stats.nflushes++;
391 				cur_bin->stats.nrequests +=
392 				    cache_bin->tstats.nrequests;
393 				cache_bin->tstats.nrequests = 0;
394 			} else {
395 				arena_stats_large_flush_nrequests_add(tsdn,
396 				    &tcache_arena->stats, binind,
397 				    cache_bin->tstats.nrequests);
398 				cache_bin->tstats.nrequests = 0;
399 			}
400 		}
401 
402 		/*
403 		 * Large allocations need special prep done.  Afterwards, we can
404 		 * drop the large lock.
405 		 */
406 		if (!small) {
407 			for (unsigned i = 0; i < nflush; i++) {
408 				void *ptr = ptrs->ptr[i];
409 				edata = item_edata[i].edata;
410 				assert(ptr != NULL && edata != NULL);
411 
412 				if (tcache_bin_flush_match(edata, cur_arena_ind,
413 				    cur_binshard, small)) {
414 					large_dalloc_prep_locked(tsdn,
415 					    edata);
416 				}
417 			}
418 		}
419 		if (!small && !arena_is_auto(cur_arena)) {
420 			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
421 		}
422 
423 		/* Deallocate whatever we can. */
424 		unsigned ndeferred = 0;
425 		/* Init only to avoid used-uninitialized warning. */
426 		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
427 		if (small) {
428 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
429 		}
430 		for (unsigned i = 0; i < nflush; i++) {
431 			void *ptr = ptrs->ptr[i];
432 			edata = item_edata[i].edata;
433 			assert(ptr != NULL && edata != NULL);
434 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
435 			    cur_binshard, small)) {
436 				/*
437 				 * The object was allocated either via a
438 				 * different arena, or a different bin in this
439 				 * arena.  Either way, stash the object so that
440 				 * it can be handled in a future pass.
441 				 */
442 				ptrs->ptr[ndeferred] = ptr;
443 				item_edata[ndeferred].edata = edata;
444 				ndeferred++;
445 				continue;
446 			}
447 			if (small) {
448 				if (arena_dalloc_bin_locked_step(tsdn,
449 				    cur_arena, cur_bin, &dalloc_bin_info,
450 				    binind, edata, ptr)) {
451 					dalloc_slabs[dalloc_count] = edata;
452 					dalloc_count++;
453 				}
454 			} else {
455 				if (large_dalloc_safety_checks(edata, ptr,
456 				    binind)) {
457 					/* See the comment in isfree. */
458 					continue;
459 				}
460 				large_dalloc_finish(tsdn, edata);
461 			}
462 		}
463 
464 		if (small) {
465 			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
466 			    &dalloc_bin_info);
467 			malloc_mutex_unlock(tsdn, &cur_bin->lock);
468 		}
469 		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
470 		nflush = ndeferred;
471 	}
472 
473 	/* Handle all deferred slab dalloc. */
474 	assert(small || dalloc_count == 0);
475 	for (unsigned i = 0; i < dalloc_count; i++) {
476 		edata_t *slab = dalloc_slabs[i];
477 		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
478 
479 	}
480 
481 	if (config_stats && !merged_stats) {
482 		if (small) {
483 			/*
484 			 * The flush loop didn't happen to flush to this
485 			 * thread's arena, so the stats didn't get merged.
486 			 * Manually do so now.
487 			 */
488 			bin_t *bin = arena_bin_choose(tsdn, tcache_arena,
489 			    binind, NULL);
490 			malloc_mutex_lock(tsdn, &bin->lock);
491 			bin->stats.nflushes++;
492 			bin->stats.nrequests += cache_bin->tstats.nrequests;
493 			cache_bin->tstats.nrequests = 0;
494 			malloc_mutex_unlock(tsdn, &bin->lock);
495 		} else {
496 			arena_stats_large_flush_nrequests_add(tsdn,
497 			    &tcache_arena->stats, binind,
498 			    cache_bin->tstats.nrequests);
499 			cache_bin->tstats.nrequests = 0;
500 		}
501 	}
502 
503 }
504 
505 JEMALLOC_ALWAYS_INLINE void
506 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
507     szind_t binind, unsigned rem, bool small) {
508 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
509 
510 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
511 	    &tcache_bin_info[binind]);
512 	assert((cache_bin_sz_t)rem <= ncached);
513 	unsigned nflush = ncached - rem;
514 
515 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
516 	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
517 	    &ptrs, nflush);
518 
519 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
520 	    small);
521 
522 	cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
523 	    ncached - rem);
524 }
525 
526 void
527 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
528     szind_t binind, unsigned rem) {
529 	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, true);
530 }
531 
532 void
533 tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
534     szind_t binind, unsigned rem) {
535 	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, false);
536 }
537 
538 /*
539  * Flushing stashed happens when 1) tcache fill, 2) tcache flush, or 3) tcache
540  * GC event.  This makes sure that the stashed items do not hold memory for too
541  * long, and new buffers can only be allocated when nothing is stashed.
542  *
543  * The downside is, the time between stash and flush may be relatively short,
544  * especially when the request rate is high.  It lowers the chance of detecting
545  * write-after-free -- however that is a delayed detection anyway, and is less
546  * of a focus than the memory overhead.
547  */
548 void
549 tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
550     szind_t binind, bool is_small) {
551 	cache_bin_info_t *info = &tcache_bin_info[binind];
552 	/*
553 	 * The two below are for assertion only.  The content of original cached
554 	 * items remain unchanged -- the stashed items reside on the other end
555 	 * of the stack.  Checking the stack head and ncached to verify.
556 	 */
557 	void *head_content = *cache_bin->stack_head;
558 	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin,
559 	    info);
560 
561 	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin, info);
562 	assert(orig_cached + nstashed <= cache_bin_info_ncached_max(info));
563 	if (nstashed == 0) {
564 		return;
565 	}
566 
567 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nstashed);
568 	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, info, &ptrs,
569 	    nstashed);
570 	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
571 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nstashed,
572 	    is_small);
573 	cache_bin_finish_flush_stashed(cache_bin, info);
574 
575 	assert(cache_bin_nstashed_get_local(cache_bin, info) == 0);
576 	assert(cache_bin_ncached_get_local(cache_bin, info) == orig_cached);
577 	assert(head_content == *cache_bin->stack_head);
578 }
579 
580 void
581 tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
582     tcache_t *tcache, arena_t *arena) {
583 	assert(tcache_slow->arena == NULL);
584 	tcache_slow->arena = arena;
585 
586 	if (config_stats) {
587 		/* Link into list of extant tcaches. */
588 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
589 
590 		ql_elm_new(tcache_slow, link);
591 		ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
592 		cache_bin_array_descriptor_init(
593 		    &tcache_slow->cache_bin_array_descriptor, tcache->bins);
594 		ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
595 		    &tcache_slow->cache_bin_array_descriptor, link);
596 
597 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
598 	}
599 }
600 
601 static void
602 tcache_arena_dissociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
603     tcache_t *tcache) {
604 	arena_t *arena = tcache_slow->arena;
605 	assert(arena != NULL);
606 	if (config_stats) {
607 		/* Unlink from list of extant tcaches. */
608 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
609 		if (config_debug) {
610 			bool in_ql = false;
611 			tcache_slow_t *iter;
612 			ql_foreach(iter, &arena->tcache_ql, link) {
613 				if (iter == tcache_slow) {
614 					in_ql = true;
615 					break;
616 				}
617 			}
618 			assert(in_ql);
619 		}
620 		ql_remove(&arena->tcache_ql, tcache_slow, link);
621 		ql_remove(&arena->cache_bin_array_descriptor_ql,
622 		    &tcache_slow->cache_bin_array_descriptor, link);
623 		tcache_stats_merge(tsdn, tcache_slow->tcache, arena);
624 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
625 	}
626 	tcache_slow->arena = NULL;
627 }
628 
629 void
630 tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
631     tcache_t *tcache, arena_t *arena) {
632 	tcache_arena_dissociate(tsdn, tcache_slow, tcache);
633 	tcache_arena_associate(tsdn, tcache_slow, tcache, arena);
634 }
635 
636 bool
637 tsd_tcache_enabled_data_init(tsd_t *tsd) {
638 	/* Called upon tsd initialization. */
639 	tsd_tcache_enabled_set(tsd, opt_tcache);
640 	tsd_slow_update(tsd);
641 
642 	if (opt_tcache) {
643 		/* Trigger tcache init. */
644 		tsd_tcache_data_init(tsd);
645 	}
646 
647 	return false;
648 }
649 
650 static void
651 tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
652     void *mem) {
653 	tcache->tcache_slow = tcache_slow;
654 	tcache_slow->tcache = tcache;
655 
656 	memset(&tcache_slow->link, 0, sizeof(ql_elm(tcache_t)));
657 	tcache_slow->next_gc_bin = 0;
658 	tcache_slow->arena = NULL;
659 	tcache_slow->dyn_alloc = mem;
660 
661 	/*
662 	 * We reserve cache bins for all small size classes, even if some may
663 	 * not get used (i.e. bins higher than nhbins).  This allows the fast
664 	 * and common paths to access cache bin metadata safely w/o worrying
665 	 * about which ones are disabled.
666 	 */
667 	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
668 	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
669 
670 	size_t cur_offset = 0;
671 	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
672 	    &cur_offset);
673 	for (unsigned i = 0; i < nhbins; i++) {
674 		if (i < SC_NBINS) {
675 			tcache_slow->lg_fill_div[i] = 1;
676 			tcache_slow->bin_refilled[i] = false;
677 			tcache_slow->bin_flush_delay_items[i]
678 			    = tcache_gc_item_delay_compute(i);
679 		}
680 		cache_bin_t *cache_bin = &tcache->bins[i];
681 		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
682 		    &cur_offset);
683 	}
684 	/*
685 	 * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
686 	 * their cache bins are initialized to a state to safely and efficiently
687 	 * fail all fastpath alloc / free, so that no additional check around
688 	 * nhbins is needed on fastpath.
689 	 */
690 	for (unsigned i = nhbins; i < SC_NBINS; i++) {
691 		/* Disabled small bins. */
692 		cache_bin_t *cache_bin = &tcache->bins[i];
693 		void *fake_stack = mem;
694 		size_t fake_offset = 0;
695 
696 		cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
697 		    &fake_offset);
698 		assert(tcache_small_bin_disabled(i, cache_bin));
699 	}
700 
701 	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
702 	    &cur_offset);
703 	/* Sanity check that the whole stack is used. */
704 	assert(cur_offset == tcache_bin_alloc_size);
705 }
706 
707 /* Initialize auto tcache (embedded in TSD). */
708 bool
709 tsd_tcache_data_init(tsd_t *tsd) {
710 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
711 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
712 
713 	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
714 	size_t alignment = tcache_bin_alloc_alignment;
715 	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
716 
717 	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
718 	    true, arena_get(TSDN_NULL, 0, true));
719 	if (mem == NULL) {
720 		return true;
721 	}
722 
723 	tcache_init(tsd, tcache_slow, tcache, mem);
724 	/*
725 	 * Initialization is a bit tricky here.  After malloc init is done, all
726 	 * threads can rely on arena_choose and associate tcache accordingly.
727 	 * However, the thread that does actual malloc bootstrapping relies on
728 	 * functional tsd, and it can only rely on a0.  In that case, we
729 	 * associate its tcache to a0 temporarily, and later on
730 	 * arena_choose_hard() will re-associate properly.
731 	 */
732 	tcache_slow->arena = NULL;
733 	arena_t *arena;
734 	if (!malloc_initialized()) {
735 		/* If in initialization, assign to a0. */
736 		arena = arena_get(tsd_tsdn(tsd), 0, false);
737 		tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
738 		    arena);
739 	} else {
740 		arena = arena_choose(tsd, NULL);
741 		/* This may happen if thread.tcache.enabled is used. */
742 		if (tcache_slow->arena == NULL) {
743 			tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
744 			    tcache, arena);
745 		}
746 	}
747 	assert(arena == tcache_slow->arena);
748 
749 	return false;
750 }
751 
752 /* Created manual tcache for tcache.create mallctl. */
753 tcache_t *
754 tcache_create_explicit(tsd_t *tsd) {
755 	/*
756 	 * We place the cache bin stacks, then the tcache_t, then a pointer to
757 	 * the beginning of the whole allocation (for freeing).  The makes sure
758 	 * the cache bins have the requested alignment.
759 	 */
760 	size_t size = tcache_bin_alloc_size + sizeof(tcache_t)
761 	    + sizeof(tcache_slow_t);
762 	/* Naturally align the pointer stacks. */
763 	size = PTR_CEILING(size);
764 	size = sz_sa2u(size, tcache_bin_alloc_alignment);
765 
766 	void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
767 	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
768 	if (mem == NULL) {
769 		return NULL;
770 	}
771 	tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
772 	tcache_slow_t *tcache_slow =
773 	    (void *)((uintptr_t)mem + tcache_bin_alloc_size + sizeof(tcache_t));
774 	tcache_init(tsd, tcache_slow, tcache, mem);
775 
776 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
777 	    arena_ichoose(tsd, NULL));
778 
779 	return tcache;
780 }
781 
782 static void
783 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
784 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
785 	assert(tcache_slow->arena != NULL);
786 
787 	for (unsigned i = 0; i < nhbins; i++) {
788 		cache_bin_t *cache_bin = &tcache->bins[i];
789 		if (i < SC_NBINS) {
790 			tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
791 		} else {
792 			tcache_bin_flush_large(tsd, tcache, cache_bin, i, 0);
793 		}
794 		if (config_stats) {
795 			assert(cache_bin->tstats.nrequests == 0);
796 		}
797 	}
798 }
799 
800 void
801 tcache_flush(tsd_t *tsd) {
802 	assert(tcache_available(tsd));
803 	tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
804 }
805 
806 static void
807 tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
808 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
809 	tcache_flush_cache(tsd, tcache);
810 	arena_t *arena = tcache_slow->arena;
811 	tcache_arena_dissociate(tsd_tsdn(tsd), tcache_slow, tcache);
812 
813 	if (tsd_tcache) {
814 		cache_bin_t *cache_bin = &tcache->bins[0];
815 		cache_bin_assert_empty(cache_bin, &tcache_bin_info[0]);
816 	}
817 	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
818 	    true);
819 
820 	/*
821 	 * The deallocation and tcache flush above may not trigger decay since
822 	 * we are on the tcache shutdown path (potentially with non-nominal
823 	 * tsd).  Manually trigger decay to avoid pathological cases.  Also
824 	 * include arena 0 because the tcache array is allocated from it.
825 	 */
826 	arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
827 	    false, false);
828 
829 	if (arena_nthreads_get(arena, false) == 0 &&
830 	    !background_thread_enabled()) {
831 		/* Force purging when no threads assigned to the arena anymore. */
832 		arena_decay(tsd_tsdn(tsd), arena,
833 		    /* is_background_thread */ false, /* all */ true);
834 	} else {
835 		arena_decay(tsd_tsdn(tsd), arena,
836 		    /* is_background_thread */ false, /* all */ false);
837 	}
838 }
839 
840 /* For auto tcache (embedded in TSD) only. */
841 void
842 tcache_cleanup(tsd_t *tsd) {
843 	tcache_t *tcache = tsd_tcachep_get(tsd);
844 	if (!tcache_available(tsd)) {
845 		assert(tsd_tcache_enabled_get(tsd) == false);
846 		assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
847 		return;
848 	}
849 	assert(tsd_tcache_enabled_get(tsd));
850 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
851 
852 	tcache_destroy(tsd, tcache, true);
853 	if (config_debug) {
854 		/*
855 		 * For debug testing only, we want to pretend we're still in the
856 		 * zero-initialized state.
857 		 */
858 		memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
859 	}
860 }
861 
862 void
863 tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
864 	cassert(config_stats);
865 
866 	/* Merge and reset tcache stats. */
867 	for (unsigned i = 0; i < nhbins; i++) {
868 		cache_bin_t *cache_bin = &tcache->bins[i];
869 		if (i < SC_NBINS) {
870 			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
871 			malloc_mutex_lock(tsdn, &bin->lock);
872 			bin->stats.nrequests += cache_bin->tstats.nrequests;
873 			malloc_mutex_unlock(tsdn, &bin->lock);
874 		} else {
875 			arena_stats_large_flush_nrequests_add(tsdn,
876 			    &arena->stats, i, cache_bin->tstats.nrequests);
877 		}
878 		cache_bin->tstats.nrequests = 0;
879 	}
880 }
881 
882 static bool
883 tcaches_create_prep(tsd_t *tsd, base_t *base) {
884 	bool err;
885 
886 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
887 
888 	if (tcaches == NULL) {
889 		tcaches = base_alloc(tsd_tsdn(tsd), base,
890 		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
891 		if (tcaches == NULL) {
892 			err = true;
893 			goto label_return;
894 		}
895 	}
896 
897 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
898 		err = true;
899 		goto label_return;
900 	}
901 
902 	err = false;
903 label_return:
904 	return err;
905 }
906 
907 bool
908 tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
909 	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
910 
911 	bool err;
912 
913 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
914 
915 	if (tcaches_create_prep(tsd, base)) {
916 		err = true;
917 		goto label_return;
918 	}
919 
920 	tcache_t *tcache = tcache_create_explicit(tsd);
921 	if (tcache == NULL) {
922 		err = true;
923 		goto label_return;
924 	}
925 
926 	tcaches_t *elm;
927 	if (tcaches_avail != NULL) {
928 		elm = tcaches_avail;
929 		tcaches_avail = tcaches_avail->next;
930 		elm->tcache = tcache;
931 		*r_ind = (unsigned)(elm - tcaches);
932 	} else {
933 		elm = &tcaches[tcaches_past];
934 		elm->tcache = tcache;
935 		*r_ind = tcaches_past;
936 		tcaches_past++;
937 	}
938 
939 	err = false;
940 label_return:
941 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
942 	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
943 	return err;
944 }
945 
946 static tcache_t *
947 tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm, bool allow_reinit) {
948 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
949 
950 	if (elm->tcache == NULL) {
951 		return NULL;
952 	}
953 	tcache_t *tcache = elm->tcache;
954 	if (allow_reinit) {
955 		elm->tcache = TCACHES_ELM_NEED_REINIT;
956 	} else {
957 		elm->tcache = NULL;
958 	}
959 
960 	if (tcache == TCACHES_ELM_NEED_REINIT) {
961 		return NULL;
962 	}
963 	return tcache;
964 }
965 
966 void
967 tcaches_flush(tsd_t *tsd, unsigned ind) {
968 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
969 	tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind], true);
970 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
971 	if (tcache != NULL) {
972 		/* Destroy the tcache; recreate in tcaches_get() if needed. */
973 		tcache_destroy(tsd, tcache, false);
974 	}
975 }
976 
977 void
978 tcaches_destroy(tsd_t *tsd, unsigned ind) {
979 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
980 	tcaches_t *elm = &tcaches[ind];
981 	tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
982 	elm->next = tcaches_avail;
983 	tcaches_avail = elm;
984 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
985 	if (tcache != NULL) {
986 		tcache_destroy(tsd, tcache, false);
987 	}
988 }
989 
990 static unsigned
991 tcache_ncached_max_compute(szind_t szind) {
992 	if (szind >= SC_NBINS) {
993 		assert(szind < nhbins);
994 		return opt_tcache_nslots_large;
995 	}
996 	unsigned slab_nregs = bin_infos[szind].nregs;
997 
998 	/* We may modify these values; start with the opt versions. */
999 	unsigned nslots_small_min = opt_tcache_nslots_small_min;
1000 	unsigned nslots_small_max = opt_tcache_nslots_small_max;
1001 
1002 	/*
1003 	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
1004 	 * suitable for a cache bin size.
1005 	 */
1006 	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
1007 		nslots_small_max = CACHE_BIN_NCACHED_MAX;
1008 	}
1009 	if (nslots_small_min % 2 != 0) {
1010 		nslots_small_min++;
1011 	}
1012 	if (nslots_small_max % 2 != 0) {
1013 		nslots_small_max--;
1014 	}
1015 	if (nslots_small_min < 2) {
1016 		nslots_small_min = 2;
1017 	}
1018 	if (nslots_small_max < 2) {
1019 		nslots_small_max = 2;
1020 	}
1021 	if (nslots_small_min > nslots_small_max) {
1022 		nslots_small_min = nslots_small_max;
1023 	}
1024 
1025 	unsigned candidate;
1026 	if (opt_lg_tcache_nslots_mul < 0) {
1027 		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
1028 	} else {
1029 		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
1030 	}
1031 	if (candidate % 2 != 0) {
1032 		/*
1033 		 * We need the candidate size to be even -- we assume that we
1034 		 * can divide by two and get a positive number (e.g. when
1035 		 * flushing).
1036 		 */
1037 		++candidate;
1038 	}
1039 	if (candidate <= nslots_small_min) {
1040 		return nslots_small_min;
1041 	} else if (candidate <= nslots_small_max) {
1042 		return candidate;
1043 	} else {
1044 		return nslots_small_max;
1045 	}
1046 }
1047 
1048 bool
1049 tcache_boot(tsdn_t *tsdn, base_t *base) {
1050 	tcache_maxclass = sz_s2u(opt_tcache_max);
1051 	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
1052 	nhbins = sz_size2index(tcache_maxclass) + 1;
1053 
1054 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
1055 	    malloc_mutex_rank_exclusive)) {
1056 		return true;
1057 	}
1058 
1059 	/* Initialize tcache_bin_info.  See comments in tcache_init(). */
1060 	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
1061 	size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
1062 	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
1063 	    CACHELINE);
1064 	if (tcache_bin_info == NULL) {
1065 		return true;
1066 	}
1067 
1068 	for (szind_t i = 0; i < nhbins; i++) {
1069 		unsigned ncached_max = tcache_ncached_max_compute(i);
1070 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
1071 	}
1072 	for (szind_t i = nhbins; i < SC_NBINS; i++) {
1073 		/* Disabled small bins. */
1074 		cache_bin_info_init(&tcache_bin_info[i], 0);
1075 		assert(tcache_small_bin_disabled(i, NULL));
1076 	}
1077 
1078 	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
1079 	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
1080 
1081 	return false;
1082 }
1083 
1084 void
1085 tcache_prefork(tsdn_t *tsdn) {
1086 	malloc_mutex_prefork(tsdn, &tcaches_mtx);
1087 }
1088 
1089 void
1090 tcache_postfork_parent(tsdn_t *tsdn) {
1091 	malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
1092 }
1093 
1094 void
1095 tcache_postfork_child(tsdn_t *tsdn) {
1096 	malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
1097 }
1098 
1099 void tcache_assert_initialized(tcache_t *tcache) {
1100 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
1101 }
1102