xref: /freebsd/contrib/jemalloc/src/tcache.c (revision 63d1fd5970ec814904aa0f4580b10a0d302d08b2)
1 #define	JEMALLOC_TCACHE_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
3 
4 /******************************************************************************/
5 /* Data. */
6 
7 bool	opt_tcache = true;
8 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
9 
10 tcache_bin_info_t	*tcache_bin_info;
11 static unsigned		stack_nelms; /* Total stack elms per tcache. */
12 
13 unsigned		nhbins;
14 size_t			tcache_maxclass;
15 
16 tcaches_t		*tcaches;
17 
18 /* Index of first element within tcaches that has never been used. */
19 static unsigned		tcaches_past;
20 
21 /* Head of singly linked list tracking available tcaches elements. */
22 static tcaches_t	*tcaches_avail;
23 
24 /******************************************************************************/
25 
26 size_t
27 tcache_salloc(tsdn_t *tsdn, const void *ptr)
28 {
29 
30 	return (arena_salloc(tsdn, ptr, false));
31 }
32 
33 void
34 tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
35 {
36 	szind_t binind = tcache->next_gc_bin;
37 	tcache_bin_t *tbin = &tcache->tbins[binind];
38 	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
39 
40 	if (tbin->low_water > 0) {
41 		/*
42 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
43 		 */
44 		if (binind < NBINS) {
45 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
46 			    tbin->ncached - tbin->low_water + (tbin->low_water
47 			    >> 2));
48 		} else {
49 			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
50 			    - tbin->low_water + (tbin->low_water >> 2), tcache);
51 		}
52 		/*
53 		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
54 		 * fill count is always at least 1.
55 		 */
56 		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
57 			tbin->lg_fill_div++;
58 	} else if (tbin->low_water < 0) {
59 		/*
60 		 * Increase fill count by 2X.  Make sure lg_fill_div stays
61 		 * greater than 0.
62 		 */
63 		if (tbin->lg_fill_div > 1)
64 			tbin->lg_fill_div--;
65 	}
66 	tbin->low_water = tbin->ncached;
67 
68 	tcache->next_gc_bin++;
69 	if (tcache->next_gc_bin == nhbins)
70 		tcache->next_gc_bin = 0;
71 }
72 
73 void *
74 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
75     tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
76 {
77 	void *ret;
78 
79 	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
80 	    tcache->prof_accumbytes : 0);
81 	if (config_prof)
82 		tcache->prof_accumbytes = 0;
83 	ret = tcache_alloc_easy(tbin, tcache_success);
84 
85 	return (ret);
86 }
87 
88 void
89 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
90     szind_t binind, unsigned rem)
91 {
92 	arena_t *arena;
93 	void *ptr;
94 	unsigned i, nflush, ndeferred;
95 	bool merged_stats = false;
96 
97 	assert(binind < NBINS);
98 	assert(rem <= tbin->ncached);
99 
100 	arena = arena_choose(tsd, NULL);
101 	assert(arena != NULL);
102 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
103 		/* Lock the arena bin associated with the first object. */
104 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
105 		    *(tbin->avail - 1));
106 		arena_t *bin_arena = extent_node_arena_get(&chunk->node);
107 		arena_bin_t *bin = &bin_arena->bins[binind];
108 
109 		if (config_prof && bin_arena == arena) {
110 			if (arena_prof_accum(tsd_tsdn(tsd), arena,
111 			    tcache->prof_accumbytes))
112 				prof_idump(tsd_tsdn(tsd));
113 			tcache->prof_accumbytes = 0;
114 		}
115 
116 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
117 		if (config_stats && bin_arena == arena) {
118 			assert(!merged_stats);
119 			merged_stats = true;
120 			bin->stats.nflushes++;
121 			bin->stats.nrequests += tbin->tstats.nrequests;
122 			tbin->tstats.nrequests = 0;
123 		}
124 		ndeferred = 0;
125 		for (i = 0; i < nflush; i++) {
126 			ptr = *(tbin->avail - 1 - i);
127 			assert(ptr != NULL);
128 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
129 			if (extent_node_arena_get(&chunk->node) == bin_arena) {
130 				size_t pageind = ((uintptr_t)ptr -
131 				    (uintptr_t)chunk) >> LG_PAGE;
132 				arena_chunk_map_bits_t *bitselm =
133 				    arena_bitselm_get_mutable(chunk, pageind);
134 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
135 				    bin_arena, chunk, ptr, bitselm);
136 			} else {
137 				/*
138 				 * This object was allocated via a different
139 				 * arena bin than the one that is currently
140 				 * locked.  Stash the object, so that it can be
141 				 * handled in a future pass.
142 				 */
143 				*(tbin->avail - 1 - ndeferred) = ptr;
144 				ndeferred++;
145 			}
146 		}
147 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
148 		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
149 	}
150 	if (config_stats && !merged_stats) {
151 		/*
152 		 * The flush loop didn't happen to flush to this thread's
153 		 * arena, so the stats didn't get merged.  Manually do so now.
154 		 */
155 		arena_bin_t *bin = &arena->bins[binind];
156 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
157 		bin->stats.nflushes++;
158 		bin->stats.nrequests += tbin->tstats.nrequests;
159 		tbin->tstats.nrequests = 0;
160 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
161 	}
162 
163 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
164 	    sizeof(void *));
165 	tbin->ncached = rem;
166 	if ((int)tbin->ncached < tbin->low_water)
167 		tbin->low_water = tbin->ncached;
168 }
169 
170 void
171 tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
172     unsigned rem, tcache_t *tcache)
173 {
174 	arena_t *arena;
175 	void *ptr;
176 	unsigned i, nflush, ndeferred;
177 	bool merged_stats = false;
178 
179 	assert(binind < nhbins);
180 	assert(rem <= tbin->ncached);
181 
182 	arena = arena_choose(tsd, NULL);
183 	assert(arena != NULL);
184 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
185 		/* Lock the arena associated with the first object. */
186 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
187 		    *(tbin->avail - 1));
188 		arena_t *locked_arena = extent_node_arena_get(&chunk->node);
189 		UNUSED bool idump;
190 
191 		if (config_prof)
192 			idump = false;
193 		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
194 		if ((config_prof || config_stats) && locked_arena == arena) {
195 			if (config_prof) {
196 				idump = arena_prof_accum_locked(arena,
197 				    tcache->prof_accumbytes);
198 				tcache->prof_accumbytes = 0;
199 			}
200 			if (config_stats) {
201 				merged_stats = true;
202 				arena->stats.nrequests_large +=
203 				    tbin->tstats.nrequests;
204 				arena->stats.lstats[binind - NBINS].nrequests +=
205 				    tbin->tstats.nrequests;
206 				tbin->tstats.nrequests = 0;
207 			}
208 		}
209 		ndeferred = 0;
210 		for (i = 0; i < nflush; i++) {
211 			ptr = *(tbin->avail - 1 - i);
212 			assert(ptr != NULL);
213 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
214 			if (extent_node_arena_get(&chunk->node) ==
215 			    locked_arena) {
216 				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
217 				    locked_arena, chunk, ptr);
218 			} else {
219 				/*
220 				 * This object was allocated via a different
221 				 * arena than the one that is currently locked.
222 				 * Stash the object, so that it can be handled
223 				 * in a future pass.
224 				 */
225 				*(tbin->avail - 1 - ndeferred) = ptr;
226 				ndeferred++;
227 			}
228 		}
229 		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
230 		if (config_prof && idump)
231 			prof_idump(tsd_tsdn(tsd));
232 		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
233 		    ndeferred);
234 	}
235 	if (config_stats && !merged_stats) {
236 		/*
237 		 * The flush loop didn't happen to flush to this thread's
238 		 * arena, so the stats didn't get merged.  Manually do so now.
239 		 */
240 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
241 		arena->stats.nrequests_large += tbin->tstats.nrequests;
242 		arena->stats.lstats[binind - NBINS].nrequests +=
243 		    tbin->tstats.nrequests;
244 		tbin->tstats.nrequests = 0;
245 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
246 	}
247 
248 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
249 	    sizeof(void *));
250 	tbin->ncached = rem;
251 	if ((int)tbin->ncached < tbin->low_water)
252 		tbin->low_water = tbin->ncached;
253 }
254 
255 static void
256 tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
257 {
258 
259 	if (config_stats) {
260 		/* Link into list of extant tcaches. */
261 		malloc_mutex_lock(tsdn, &arena->lock);
262 		ql_elm_new(tcache, link);
263 		ql_tail_insert(&arena->tcache_ql, tcache, link);
264 		malloc_mutex_unlock(tsdn, &arena->lock);
265 	}
266 }
267 
268 static void
269 tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
270 {
271 
272 	if (config_stats) {
273 		/* Unlink from list of extant tcaches. */
274 		malloc_mutex_lock(tsdn, &arena->lock);
275 		if (config_debug) {
276 			bool in_ql = false;
277 			tcache_t *iter;
278 			ql_foreach(iter, &arena->tcache_ql, link) {
279 				if (iter == tcache) {
280 					in_ql = true;
281 					break;
282 				}
283 			}
284 			assert(in_ql);
285 		}
286 		ql_remove(&arena->tcache_ql, tcache, link);
287 		tcache_stats_merge(tsdn, tcache, arena);
288 		malloc_mutex_unlock(tsdn, &arena->lock);
289 	}
290 }
291 
292 void
293 tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
294     arena_t *newarena)
295 {
296 
297 	tcache_arena_dissociate(tsdn, tcache, oldarena);
298 	tcache_arena_associate(tsdn, tcache, newarena);
299 }
300 
301 tcache_t *
302 tcache_get_hard(tsd_t *tsd)
303 {
304 	arena_t *arena;
305 
306 	if (!tcache_enabled_get()) {
307 		if (tsd_nominal(tsd))
308 			tcache_enabled_set(false); /* Memoize. */
309 		return (NULL);
310 	}
311 	arena = arena_choose(tsd, NULL);
312 	if (unlikely(arena == NULL))
313 		return (NULL);
314 	return (tcache_create(tsd_tsdn(tsd), arena));
315 }
316 
317 tcache_t *
318 tcache_create(tsdn_t *tsdn, arena_t *arena)
319 {
320 	tcache_t *tcache;
321 	size_t size, stack_offset;
322 	unsigned i;
323 
324 	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
325 	/* Naturally align the pointer stacks. */
326 	size = PTR_CEILING(size);
327 	stack_offset = size;
328 	size += stack_nelms * sizeof(void *);
329 	/* Avoid false cacheline sharing. */
330 	size = sa2u(size, CACHELINE);
331 
332 	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
333 	    arena_get(TSDN_NULL, 0, true));
334 	if (tcache == NULL)
335 		return (NULL);
336 
337 	tcache_arena_associate(tsdn, tcache, arena);
338 
339 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
340 
341 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
342 	for (i = 0; i < nhbins; i++) {
343 		tcache->tbins[i].lg_fill_div = 1;
344 		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
345 		/*
346 		 * avail points past the available space.  Allocations will
347 		 * access the slots toward higher addresses (for the benefit of
348 		 * prefetch).
349 		 */
350 		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
351 		    (uintptr_t)stack_offset);
352 	}
353 
354 	return (tcache);
355 }
356 
357 static void
358 tcache_destroy(tsd_t *tsd, tcache_t *tcache)
359 {
360 	arena_t *arena;
361 	unsigned i;
362 
363 	arena = arena_choose(tsd, NULL);
364 	tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena);
365 
366 	for (i = 0; i < NBINS; i++) {
367 		tcache_bin_t *tbin = &tcache->tbins[i];
368 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
369 
370 		if (config_stats && tbin->tstats.nrequests != 0) {
371 			arena_bin_t *bin = &arena->bins[i];
372 			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
373 			bin->stats.nrequests += tbin->tstats.nrequests;
374 			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
375 		}
376 	}
377 
378 	for (; i < nhbins; i++) {
379 		tcache_bin_t *tbin = &tcache->tbins[i];
380 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
381 
382 		if (config_stats && tbin->tstats.nrequests != 0) {
383 			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
384 			arena->stats.nrequests_large += tbin->tstats.nrequests;
385 			arena->stats.lstats[i - NBINS].nrequests +=
386 			    tbin->tstats.nrequests;
387 			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
388 		}
389 	}
390 
391 	if (config_prof && tcache->prof_accumbytes > 0 &&
392 	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
393 		prof_idump(tsd_tsdn(tsd));
394 
395 	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
396 }
397 
398 void
399 tcache_cleanup(tsd_t *tsd)
400 {
401 	tcache_t *tcache;
402 
403 	if (!config_tcache)
404 		return;
405 
406 	if ((tcache = tsd_tcache_get(tsd)) != NULL) {
407 		tcache_destroy(tsd, tcache);
408 		tsd_tcache_set(tsd, NULL);
409 	}
410 }
411 
412 void
413 tcache_enabled_cleanup(tsd_t *tsd)
414 {
415 
416 	/* Do nothing. */
417 }
418 
419 void
420 tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
421 {
422 	unsigned i;
423 
424 	cassert(config_stats);
425 
426 	malloc_mutex_assert_owner(tsdn, &arena->lock);
427 
428 	/* Merge and reset tcache stats. */
429 	for (i = 0; i < NBINS; i++) {
430 		arena_bin_t *bin = &arena->bins[i];
431 		tcache_bin_t *tbin = &tcache->tbins[i];
432 		malloc_mutex_lock(tsdn, &bin->lock);
433 		bin->stats.nrequests += tbin->tstats.nrequests;
434 		malloc_mutex_unlock(tsdn, &bin->lock);
435 		tbin->tstats.nrequests = 0;
436 	}
437 
438 	for (; i < nhbins; i++) {
439 		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
440 		tcache_bin_t *tbin = &tcache->tbins[i];
441 		arena->stats.nrequests_large += tbin->tstats.nrequests;
442 		lstats->nrequests += tbin->tstats.nrequests;
443 		tbin->tstats.nrequests = 0;
444 	}
445 }
446 
447 bool
448 tcaches_create(tsd_t *tsd, unsigned *r_ind)
449 {
450 	arena_t *arena;
451 	tcache_t *tcache;
452 	tcaches_t *elm;
453 
454 	if (tcaches == NULL) {
455 		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
456 		    (MALLOCX_TCACHE_MAX+1));
457 		if (tcaches == NULL)
458 			return (true);
459 	}
460 
461 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
462 		return (true);
463 	arena = arena_ichoose(tsd, NULL);
464 	if (unlikely(arena == NULL))
465 		return (true);
466 	tcache = tcache_create(tsd_tsdn(tsd), arena);
467 	if (tcache == NULL)
468 		return (true);
469 
470 	if (tcaches_avail != NULL) {
471 		elm = tcaches_avail;
472 		tcaches_avail = tcaches_avail->next;
473 		elm->tcache = tcache;
474 		*r_ind = (unsigned)(elm - tcaches);
475 	} else {
476 		elm = &tcaches[tcaches_past];
477 		elm->tcache = tcache;
478 		*r_ind = tcaches_past;
479 		tcaches_past++;
480 	}
481 
482 	return (false);
483 }
484 
485 static void
486 tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm)
487 {
488 
489 	if (elm->tcache == NULL)
490 		return;
491 	tcache_destroy(tsd, elm->tcache);
492 	elm->tcache = NULL;
493 }
494 
495 void
496 tcaches_flush(tsd_t *tsd, unsigned ind)
497 {
498 
499 	tcaches_elm_flush(tsd, &tcaches[ind]);
500 }
501 
502 void
503 tcaches_destroy(tsd_t *tsd, unsigned ind)
504 {
505 	tcaches_t *elm = &tcaches[ind];
506 	tcaches_elm_flush(tsd, elm);
507 	elm->next = tcaches_avail;
508 	tcaches_avail = elm;
509 }
510 
511 bool
512 tcache_boot(tsdn_t *tsdn)
513 {
514 	unsigned i;
515 
516 	/*
517 	 * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
518 	 * known.
519 	 */
520 	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS)
521 		tcache_maxclass = SMALL_MAXCLASS;
522 	else if ((ZU(1) << opt_lg_tcache_max) > large_maxclass)
523 		tcache_maxclass = large_maxclass;
524 	else
525 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
526 
527 	nhbins = size2index(tcache_maxclass) + 1;
528 
529 	/* Initialize tcache_bin_info. */
530 	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins *
531 	    sizeof(tcache_bin_info_t));
532 	if (tcache_bin_info == NULL)
533 		return (true);
534 	stack_nelms = 0;
535 	for (i = 0; i < NBINS; i++) {
536 		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
537 			tcache_bin_info[i].ncached_max =
538 			    TCACHE_NSLOTS_SMALL_MIN;
539 		} else if ((arena_bin_info[i].nregs << 1) <=
540 		    TCACHE_NSLOTS_SMALL_MAX) {
541 			tcache_bin_info[i].ncached_max =
542 			    (arena_bin_info[i].nregs << 1);
543 		} else {
544 			tcache_bin_info[i].ncached_max =
545 			    TCACHE_NSLOTS_SMALL_MAX;
546 		}
547 		stack_nelms += tcache_bin_info[i].ncached_max;
548 	}
549 	for (; i < nhbins; i++) {
550 		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
551 		stack_nelms += tcache_bin_info[i].ncached_max;
552 	}
553 
554 	return (false);
555 }
556