xref: /freebsd/contrib/jemalloc/src/tcache.c (revision 6486b015fc84e96725fef22b0e3363351399ae83)
1 #define	JEMALLOC_TCACHE_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
3 
4 /******************************************************************************/
5 /* Data. */
6 
7 malloc_tsd_data(, tcache, tcache_t *, NULL)
8 malloc_tsd_data(, tcache_enabled, tcache_enabled_t, tcache_enabled_default)
9 
10 bool	opt_tcache = true;
11 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
12 
13 tcache_bin_info_t	*tcache_bin_info;
14 static unsigned		stack_nelms; /* Total stack elms per tcache. */
15 
16 size_t			nhbins;
17 size_t			tcache_maxclass;
18 
19 /******************************************************************************/
20 
21 size_t	tcache_salloc(const void *ptr)
22 {
23 
24 	return (arena_salloc(ptr, false));
25 }
26 
27 void *
28 tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
29 {
30 	void *ret;
31 
32 	arena_tcache_fill_small(tcache->arena, tbin, binind,
33 	    config_prof ? tcache->prof_accumbytes : 0);
34 	if (config_prof)
35 		tcache->prof_accumbytes = 0;
36 	ret = tcache_alloc_easy(tbin);
37 
38 	return (ret);
39 }
40 
41 void
42 tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem,
43     tcache_t *tcache)
44 {
45 	void *ptr;
46 	unsigned i, nflush, ndeferred;
47 	bool merged_stats = false;
48 
49 	assert(binind < NBINS);
50 	assert(rem <= tbin->ncached);
51 
52 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
53 		/* Lock the arena bin associated with the first object. */
54 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
55 		    tbin->avail[0]);
56 		arena_t *arena = chunk->arena;
57 		arena_bin_t *bin = &arena->bins[binind];
58 
59 		if (config_prof && arena == tcache->arena) {
60 			malloc_mutex_lock(&arena->lock);
61 			arena_prof_accum(arena, tcache->prof_accumbytes);
62 			malloc_mutex_unlock(&arena->lock);
63 			tcache->prof_accumbytes = 0;
64 		}
65 
66 		malloc_mutex_lock(&bin->lock);
67 		if (config_stats && arena == tcache->arena) {
68 			assert(merged_stats == false);
69 			merged_stats = true;
70 			bin->stats.nflushes++;
71 			bin->stats.nrequests += tbin->tstats.nrequests;
72 			tbin->tstats.nrequests = 0;
73 		}
74 		ndeferred = 0;
75 		for (i = 0; i < nflush; i++) {
76 			ptr = tbin->avail[i];
77 			assert(ptr != NULL);
78 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
79 			if (chunk->arena == arena) {
80 				size_t pageind = ((uintptr_t)ptr -
81 				    (uintptr_t)chunk) >> LG_PAGE;
82 				arena_chunk_map_t *mapelm =
83 				    &chunk->map[pageind-map_bias];
84 				if (config_fill && opt_junk) {
85 					arena_alloc_junk_small(ptr,
86 					    &arena_bin_info[binind], true);
87 				}
88 				arena_dalloc_bin(arena, chunk, ptr, mapelm);
89 			} else {
90 				/*
91 				 * This object was allocated via a different
92 				 * arena bin than the one that is currently
93 				 * locked.  Stash the object, so that it can be
94 				 * handled in a future pass.
95 				 */
96 				tbin->avail[ndeferred] = ptr;
97 				ndeferred++;
98 			}
99 		}
100 		malloc_mutex_unlock(&bin->lock);
101 	}
102 	if (config_stats && merged_stats == false) {
103 		/*
104 		 * The flush loop didn't happen to flush to this thread's
105 		 * arena, so the stats didn't get merged.  Manually do so now.
106 		 */
107 		arena_bin_t *bin = &tcache->arena->bins[binind];
108 		malloc_mutex_lock(&bin->lock);
109 		bin->stats.nflushes++;
110 		bin->stats.nrequests += tbin->tstats.nrequests;
111 		tbin->tstats.nrequests = 0;
112 		malloc_mutex_unlock(&bin->lock);
113 	}
114 
115 	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
116 	    rem * sizeof(void *));
117 	tbin->ncached = rem;
118 	if ((int)tbin->ncached < tbin->low_water)
119 		tbin->low_water = tbin->ncached;
120 }
121 
122 void
123 tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem,
124     tcache_t *tcache)
125 {
126 	void *ptr;
127 	unsigned i, nflush, ndeferred;
128 	bool merged_stats = false;
129 
130 	assert(binind < nhbins);
131 	assert(rem <= tbin->ncached);
132 
133 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
134 		/* Lock the arena associated with the first object. */
135 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
136 		    tbin->avail[0]);
137 		arena_t *arena = chunk->arena;
138 
139 		malloc_mutex_lock(&arena->lock);
140 		if ((config_prof || config_stats) && arena == tcache->arena) {
141 			if (config_prof) {
142 				arena_prof_accum(arena,
143 				    tcache->prof_accumbytes);
144 				tcache->prof_accumbytes = 0;
145 			}
146 			if (config_stats) {
147 				merged_stats = true;
148 				arena->stats.nrequests_large +=
149 				    tbin->tstats.nrequests;
150 				arena->stats.lstats[binind - NBINS].nrequests +=
151 				    tbin->tstats.nrequests;
152 				tbin->tstats.nrequests = 0;
153 			}
154 		}
155 		ndeferred = 0;
156 		for (i = 0; i < nflush; i++) {
157 			ptr = tbin->avail[i];
158 			assert(ptr != NULL);
159 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
160 			if (chunk->arena == arena)
161 				arena_dalloc_large(arena, chunk, ptr);
162 			else {
163 				/*
164 				 * This object was allocated via a different
165 				 * arena than the one that is currently locked.
166 				 * Stash the object, so that it can be handled
167 				 * in a future pass.
168 				 */
169 				tbin->avail[ndeferred] = ptr;
170 				ndeferred++;
171 			}
172 		}
173 		malloc_mutex_unlock(&arena->lock);
174 	}
175 	if (config_stats && merged_stats == false) {
176 		/*
177 		 * The flush loop didn't happen to flush to this thread's
178 		 * arena, so the stats didn't get merged.  Manually do so now.
179 		 */
180 		arena_t *arena = tcache->arena;
181 		malloc_mutex_lock(&arena->lock);
182 		arena->stats.nrequests_large += tbin->tstats.nrequests;
183 		arena->stats.lstats[binind - NBINS].nrequests +=
184 		    tbin->tstats.nrequests;
185 		tbin->tstats.nrequests = 0;
186 		malloc_mutex_unlock(&arena->lock);
187 	}
188 
189 	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
190 	    rem * sizeof(void *));
191 	tbin->ncached = rem;
192 	if ((int)tbin->ncached < tbin->low_water)
193 		tbin->low_water = tbin->ncached;
194 }
195 
196 void
197 tcache_arena_associate(tcache_t *tcache, arena_t *arena)
198 {
199 
200 	if (config_stats) {
201 		/* Link into list of extant tcaches. */
202 		malloc_mutex_lock(&arena->lock);
203 		ql_elm_new(tcache, link);
204 		ql_tail_insert(&arena->tcache_ql, tcache, link);
205 		malloc_mutex_unlock(&arena->lock);
206 	}
207 	tcache->arena = arena;
208 }
209 
210 void
211 tcache_arena_dissociate(tcache_t *tcache)
212 {
213 
214 	if (config_stats) {
215 		/* Unlink from list of extant tcaches. */
216 		malloc_mutex_lock(&tcache->arena->lock);
217 		ql_remove(&tcache->arena->tcache_ql, tcache, link);
218 		malloc_mutex_unlock(&tcache->arena->lock);
219 		tcache_stats_merge(tcache, tcache->arena);
220 	}
221 }
222 
223 tcache_t *
224 tcache_create(arena_t *arena)
225 {
226 	tcache_t *tcache;
227 	size_t size, stack_offset;
228 	unsigned i;
229 
230 	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
231 	/* Naturally align the pointer stacks. */
232 	size = PTR_CEILING(size);
233 	stack_offset = size;
234 	size += stack_nelms * sizeof(void *);
235 	/*
236 	 * Round up to the nearest multiple of the cacheline size, in order to
237 	 * avoid the possibility of false cacheline sharing.
238 	 *
239 	 * That this works relies on the same logic as in ipalloc(), but we
240 	 * cannot directly call ipalloc() here due to tcache bootstrapping
241 	 * issues.
242 	 */
243 	size = (size + CACHELINE_MASK) & (-CACHELINE);
244 
245 	if (size <= SMALL_MAXCLASS)
246 		tcache = (tcache_t *)arena_malloc_small(arena, size, true);
247 	else if (size <= tcache_maxclass)
248 		tcache = (tcache_t *)arena_malloc_large(arena, size, true);
249 	else
250 		tcache = (tcache_t *)icalloc(size);
251 
252 	if (tcache == NULL)
253 		return (NULL);
254 
255 	tcache_arena_associate(tcache, arena);
256 
257 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
258 	for (i = 0; i < nhbins; i++) {
259 		tcache->tbins[i].lg_fill_div = 1;
260 		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
261 		    (uintptr_t)stack_offset);
262 		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
263 	}
264 
265 	tcache_tsd_set(&tcache);
266 
267 	return (tcache);
268 }
269 
270 void
271 tcache_destroy(tcache_t *tcache)
272 {
273 	unsigned i;
274 	size_t tcache_size;
275 
276 	tcache_arena_dissociate(tcache);
277 
278 	for (i = 0; i < NBINS; i++) {
279 		tcache_bin_t *tbin = &tcache->tbins[i];
280 		tcache_bin_flush_small(tbin, i, 0, tcache);
281 
282 		if (config_stats && tbin->tstats.nrequests != 0) {
283 			arena_t *arena = tcache->arena;
284 			arena_bin_t *bin = &arena->bins[i];
285 			malloc_mutex_lock(&bin->lock);
286 			bin->stats.nrequests += tbin->tstats.nrequests;
287 			malloc_mutex_unlock(&bin->lock);
288 		}
289 	}
290 
291 	for (; i < nhbins; i++) {
292 		tcache_bin_t *tbin = &tcache->tbins[i];
293 		tcache_bin_flush_large(tbin, i, 0, tcache);
294 
295 		if (config_stats && tbin->tstats.nrequests != 0) {
296 			arena_t *arena = tcache->arena;
297 			malloc_mutex_lock(&arena->lock);
298 			arena->stats.nrequests_large += tbin->tstats.nrequests;
299 			arena->stats.lstats[i - NBINS].nrequests +=
300 			    tbin->tstats.nrequests;
301 			malloc_mutex_unlock(&arena->lock);
302 		}
303 	}
304 
305 	if (config_prof && tcache->prof_accumbytes > 0) {
306 		malloc_mutex_lock(&tcache->arena->lock);
307 		arena_prof_accum(tcache->arena, tcache->prof_accumbytes);
308 		malloc_mutex_unlock(&tcache->arena->lock);
309 	}
310 
311 	tcache_size = arena_salloc(tcache, false);
312 	if (tcache_size <= SMALL_MAXCLASS) {
313 		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
314 		arena_t *arena = chunk->arena;
315 		size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >>
316 		    LG_PAGE;
317 		arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias];
318 		arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
319 		    (uintptr_t)((pageind - (mapelm->bits >> LG_PAGE)) <<
320 		    LG_PAGE));
321 		arena_bin_t *bin = run->bin;
322 
323 		malloc_mutex_lock(&bin->lock);
324 		arena_dalloc_bin(arena, chunk, tcache, mapelm);
325 		malloc_mutex_unlock(&bin->lock);
326 	} else if (tcache_size <= tcache_maxclass) {
327 		arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
328 		arena_t *arena = chunk->arena;
329 
330 		malloc_mutex_lock(&arena->lock);
331 		arena_dalloc_large(arena, chunk, tcache);
332 		malloc_mutex_unlock(&arena->lock);
333 	} else
334 		idalloc(tcache);
335 }
336 
337 void
338 tcache_thread_cleanup(void *arg)
339 {
340 	tcache_t *tcache = *(tcache_t **)arg;
341 
342 	if (tcache == TCACHE_STATE_DISABLED) {
343 		/* Do nothing. */
344 	} else if (tcache == TCACHE_STATE_REINCARNATED) {
345 		/*
346 		 * Another destructor called an allocator function after this
347 		 * destructor was called.  Reset tcache to
348 		 * TCACHE_STATE_PURGATORY in order to receive another callback.
349 		 */
350 		tcache = TCACHE_STATE_PURGATORY;
351 		tcache_tsd_set(&tcache);
352 	} else if (tcache == TCACHE_STATE_PURGATORY) {
353 		/*
354 		 * The previous time this destructor was called, we set the key
355 		 * to TCACHE_STATE_PURGATORY so that other destructors wouldn't
356 		 * cause re-creation of the tcache.  This time, do nothing, so
357 		 * that the destructor will not be called again.
358 		 */
359 	} else if (tcache != NULL) {
360 		assert(tcache != TCACHE_STATE_PURGATORY);
361 		tcache_destroy(tcache);
362 		tcache = TCACHE_STATE_PURGATORY;
363 		tcache_tsd_set(&tcache);
364 	}
365 }
366 
367 void
368 tcache_stats_merge(tcache_t *tcache, arena_t *arena)
369 {
370 	unsigned i;
371 
372 	/* Merge and reset tcache stats. */
373 	for (i = 0; i < NBINS; i++) {
374 		arena_bin_t *bin = &arena->bins[i];
375 		tcache_bin_t *tbin = &tcache->tbins[i];
376 		malloc_mutex_lock(&bin->lock);
377 		bin->stats.nrequests += tbin->tstats.nrequests;
378 		malloc_mutex_unlock(&bin->lock);
379 		tbin->tstats.nrequests = 0;
380 	}
381 
382 	for (; i < nhbins; i++) {
383 		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
384 		tcache_bin_t *tbin = &tcache->tbins[i];
385 		arena->stats.nrequests_large += tbin->tstats.nrequests;
386 		lstats->nrequests += tbin->tstats.nrequests;
387 		tbin->tstats.nrequests = 0;
388 	}
389 }
390 
391 bool
392 tcache_boot0(void)
393 {
394 	unsigned i;
395 
396 	/*
397 	 * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is
398 	 * known.
399 	 */
400 	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
401 		tcache_maxclass = SMALL_MAXCLASS;
402 	else if ((1U << opt_lg_tcache_max) > arena_maxclass)
403 		tcache_maxclass = arena_maxclass;
404 	else
405 		tcache_maxclass = (1U << opt_lg_tcache_max);
406 
407 	nhbins = NBINS + (tcache_maxclass >> LG_PAGE);
408 
409 	/* Initialize tcache_bin_info. */
410 	tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
411 	    sizeof(tcache_bin_info_t));
412 	if (tcache_bin_info == NULL)
413 		return (true);
414 	stack_nelms = 0;
415 	for (i = 0; i < NBINS; i++) {
416 		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
417 			tcache_bin_info[i].ncached_max =
418 			    (arena_bin_info[i].nregs << 1);
419 		} else {
420 			tcache_bin_info[i].ncached_max =
421 			    TCACHE_NSLOTS_SMALL_MAX;
422 		}
423 		stack_nelms += tcache_bin_info[i].ncached_max;
424 	}
425 	for (; i < nhbins; i++) {
426 		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
427 		stack_nelms += tcache_bin_info[i].ncached_max;
428 	}
429 
430 	return (false);
431 }
432 
433 bool
434 tcache_boot1(void)
435 {
436 
437 	if (tcache_tsd_boot() || tcache_enabled_tsd_boot())
438 		return (true);
439 
440 	return (false);
441 }
442