xref: /freebsd/contrib/jemalloc/src/sec.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_preamble.h"
2*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_internal_includes.h"
3*c43cad87SWarner Losh 
4*c43cad87SWarner Losh #include "jemalloc/internal/sec.h"
5*c43cad87SWarner Losh 
6*c43cad87SWarner Losh static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
7*c43cad87SWarner Losh     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
8*c43cad87SWarner Losh     bool *deferred_work_generated);
9*c43cad87SWarner Losh static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
10*c43cad87SWarner Losh     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
11*c43cad87SWarner Losh static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
12*c43cad87SWarner Losh     size_t old_size, size_t new_size, bool *deferred_work_generated);
13*c43cad87SWarner Losh static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
14*c43cad87SWarner Losh     bool *deferred_work_generated);
15*c43cad87SWarner Losh 
16*c43cad87SWarner Losh static void
17*c43cad87SWarner Losh sec_bin_init(sec_bin_t *bin) {
18*c43cad87SWarner Losh 	bin->being_batch_filled = false;
19*c43cad87SWarner Losh 	bin->bytes_cur = 0;
20*c43cad87SWarner Losh 	edata_list_active_init(&bin->freelist);
21*c43cad87SWarner Losh }
22*c43cad87SWarner Losh 
23*c43cad87SWarner Losh bool
24*c43cad87SWarner Losh sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
25*c43cad87SWarner Losh     const sec_opts_t *opts) {
26*c43cad87SWarner Losh 	assert(opts->max_alloc >= PAGE);
27*c43cad87SWarner Losh 
28*c43cad87SWarner Losh 	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
29*c43cad87SWarner Losh 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
30*c43cad87SWarner Losh 
31*c43cad87SWarner Losh 	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
32*c43cad87SWarner Losh 	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
33*c43cad87SWarner Losh 	size_t sz_alloc = sz_shards + sz_bins;
34*c43cad87SWarner Losh 	void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
35*c43cad87SWarner Losh 	if (dynalloc == NULL) {
36*c43cad87SWarner Losh 		return true;
37*c43cad87SWarner Losh 	}
38*c43cad87SWarner Losh 	sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
39*c43cad87SWarner Losh 	sec->shards = shard_cur;
40*c43cad87SWarner Losh 	sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
41*c43cad87SWarner Losh 	/* Just for asserts, below. */
42*c43cad87SWarner Losh 	sec_bin_t *bin_start = bin_cur;
43*c43cad87SWarner Losh 
44*c43cad87SWarner Losh 	for (size_t i = 0; i < opts->nshards; i++) {
45*c43cad87SWarner Losh 		sec_shard_t *shard = shard_cur;
46*c43cad87SWarner Losh 		shard_cur++;
47*c43cad87SWarner Losh 		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
48*c43cad87SWarner Losh 		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
49*c43cad87SWarner Losh 		if (err) {
50*c43cad87SWarner Losh 			return true;
51*c43cad87SWarner Losh 		}
52*c43cad87SWarner Losh 		shard->enabled = true;
53*c43cad87SWarner Losh 		shard->bins = bin_cur;
54*c43cad87SWarner Losh 		for (pszind_t j = 0; j < npsizes; j++) {
55*c43cad87SWarner Losh 			sec_bin_init(&shard->bins[j]);
56*c43cad87SWarner Losh 			bin_cur++;
57*c43cad87SWarner Losh 		}
58*c43cad87SWarner Losh 		shard->bytes_cur = 0;
59*c43cad87SWarner Losh 		shard->to_flush_next = 0;
60*c43cad87SWarner Losh 	}
61*c43cad87SWarner Losh 	/*
62*c43cad87SWarner Losh 	 * Should have exactly matched the bin_start to the first unused byte
63*c43cad87SWarner Losh 	 * after the shards.
64*c43cad87SWarner Losh 	 */
65*c43cad87SWarner Losh 	assert((void *)shard_cur == (void *)bin_start);
66*c43cad87SWarner Losh 	/* And the last bin to use up the last bytes of the allocation. */
67*c43cad87SWarner Losh 	assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
68*c43cad87SWarner Losh 	sec->fallback = fallback;
69*c43cad87SWarner Losh 
70*c43cad87SWarner Losh 
71*c43cad87SWarner Losh 	sec->opts = *opts;
72*c43cad87SWarner Losh 	sec->npsizes = npsizes;
73*c43cad87SWarner Losh 
74*c43cad87SWarner Losh 	/*
75*c43cad87SWarner Losh 	 * Initialize these last so that an improper use of an SEC whose
76*c43cad87SWarner Losh 	 * initialization failed will segfault in an easy-to-spot way.
77*c43cad87SWarner Losh 	 */
78*c43cad87SWarner Losh 	sec->pai.alloc = &sec_alloc;
79*c43cad87SWarner Losh 	sec->pai.alloc_batch = &pai_alloc_batch_default;
80*c43cad87SWarner Losh 	sec->pai.expand = &sec_expand;
81*c43cad87SWarner Losh 	sec->pai.shrink = &sec_shrink;
82*c43cad87SWarner Losh 	sec->pai.dalloc = &sec_dalloc;
83*c43cad87SWarner Losh 	sec->pai.dalloc_batch = &pai_dalloc_batch_default;
84*c43cad87SWarner Losh 
85*c43cad87SWarner Losh 	return false;
86*c43cad87SWarner Losh }
87*c43cad87SWarner Losh 
88*c43cad87SWarner Losh static sec_shard_t *
89*c43cad87SWarner Losh sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
90*c43cad87SWarner Losh 	/*
91*c43cad87SWarner Losh 	 * Eventually, we should implement affinity, tracking source shard using
92*c43cad87SWarner Losh 	 * the edata_t's newly freed up fields.  For now, just randomly
93*c43cad87SWarner Losh 	 * distribute across all shards.
94*c43cad87SWarner Losh 	 */
95*c43cad87SWarner Losh 	if (tsdn_null(tsdn)) {
96*c43cad87SWarner Losh 		return &sec->shards[0];
97*c43cad87SWarner Losh 	}
98*c43cad87SWarner Losh 	tsd_t *tsd = tsdn_tsd(tsdn);
99*c43cad87SWarner Losh 	uint8_t *idxp = tsd_sec_shardp_get(tsd);
100*c43cad87SWarner Losh 	if (*idxp == (uint8_t)-1) {
101*c43cad87SWarner Losh 		/*
102*c43cad87SWarner Losh 		 * First use; initialize using the trick from Daniel Lemire's
103*c43cad87SWarner Losh 		 * "A fast alternative to the modulo reduction.  Use a 64 bit
104*c43cad87SWarner Losh 		 * number to store 32 bits, since we'll deliberately overflow
105*c43cad87SWarner Losh 		 * when we multiply by the number of shards.
106*c43cad87SWarner Losh 		 */
107*c43cad87SWarner Losh 		uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
108*c43cad87SWarner Losh 		uint32_t idx =
109*c43cad87SWarner Losh 		    (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
110*c43cad87SWarner Losh 		assert(idx < (uint32_t)sec->opts.nshards);
111*c43cad87SWarner Losh 		*idxp = (uint8_t)idx;
112*c43cad87SWarner Losh 	}
113*c43cad87SWarner Losh 	return &sec->shards[*idxp];
114*c43cad87SWarner Losh }
115*c43cad87SWarner Losh 
116*c43cad87SWarner Losh /*
117*c43cad87SWarner Losh  * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
118*c43cad87SWarner Losh  * empty cache, we'll try to fill it, which can push the shard over it's limit.
119*c43cad87SWarner Losh  */
120*c43cad87SWarner Losh static void
121*c43cad87SWarner Losh sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
122*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
123*c43cad87SWarner Losh 	edata_list_active_t to_flush;
124*c43cad87SWarner Losh 	edata_list_active_init(&to_flush);
125*c43cad87SWarner Losh 	while (shard->bytes_cur > sec->opts.bytes_after_flush) {
126*c43cad87SWarner Losh 		/* Pick a victim. */
127*c43cad87SWarner Losh 		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
128*c43cad87SWarner Losh 
129*c43cad87SWarner Losh 		/* Update our victim-picking state. */
130*c43cad87SWarner Losh 		shard->to_flush_next++;
131*c43cad87SWarner Losh 		if (shard->to_flush_next == sec->npsizes) {
132*c43cad87SWarner Losh 			shard->to_flush_next = 0;
133*c43cad87SWarner Losh 		}
134*c43cad87SWarner Losh 
135*c43cad87SWarner Losh 		assert(shard->bytes_cur >= bin->bytes_cur);
136*c43cad87SWarner Losh 		if (bin->bytes_cur != 0) {
137*c43cad87SWarner Losh 			shard->bytes_cur -= bin->bytes_cur;
138*c43cad87SWarner Losh 			bin->bytes_cur = 0;
139*c43cad87SWarner Losh 			edata_list_active_concat(&to_flush, &bin->freelist);
140*c43cad87SWarner Losh 		}
141*c43cad87SWarner Losh 		/*
142*c43cad87SWarner Losh 		 * Either bin->bytes_cur was 0, in which case we didn't touch
143*c43cad87SWarner Losh 		 * the bin list but it should be empty anyways (or else we
144*c43cad87SWarner Losh 		 * missed a bytes_cur update on a list modification), or it
145*c43cad87SWarner Losh 		 * *was* 0 and we emptied it ourselves.  Either way, it should
146*c43cad87SWarner Losh 		 * be empty now.
147*c43cad87SWarner Losh 		 */
148*c43cad87SWarner Losh 		assert(edata_list_active_empty(&bin->freelist));
149*c43cad87SWarner Losh 	}
150*c43cad87SWarner Losh 
151*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
152*c43cad87SWarner Losh 	bool deferred_work_generated = false;
153*c43cad87SWarner Losh 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
154*c43cad87SWarner Losh 	    &deferred_work_generated);
155*c43cad87SWarner Losh }
156*c43cad87SWarner Losh 
157*c43cad87SWarner Losh static edata_t *
158*c43cad87SWarner Losh sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
159*c43cad87SWarner Losh     sec_bin_t *bin) {
160*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
161*c43cad87SWarner Losh 	if (!shard->enabled) {
162*c43cad87SWarner Losh 		return NULL;
163*c43cad87SWarner Losh 	}
164*c43cad87SWarner Losh 	edata_t *edata = edata_list_active_first(&bin->freelist);
165*c43cad87SWarner Losh 	if (edata != NULL) {
166*c43cad87SWarner Losh 		edata_list_active_remove(&bin->freelist, edata);
167*c43cad87SWarner Losh 		assert(edata_size_get(edata) <= bin->bytes_cur);
168*c43cad87SWarner Losh 		bin->bytes_cur -= edata_size_get(edata);
169*c43cad87SWarner Losh 		assert(edata_size_get(edata) <= shard->bytes_cur);
170*c43cad87SWarner Losh 		shard->bytes_cur -= edata_size_get(edata);
171*c43cad87SWarner Losh 	}
172*c43cad87SWarner Losh 	return edata;
173*c43cad87SWarner Losh }
174*c43cad87SWarner Losh 
175*c43cad87SWarner Losh static edata_t *
176*c43cad87SWarner Losh sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
177*c43cad87SWarner Losh     sec_bin_t *bin, size_t size) {
178*c43cad87SWarner Losh 	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
179*c43cad87SWarner Losh 
180*c43cad87SWarner Losh 	edata_list_active_t result;
181*c43cad87SWarner Losh 	edata_list_active_init(&result);
182*c43cad87SWarner Losh 	bool deferred_work_generated = false;
183*c43cad87SWarner Losh 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
184*c43cad87SWarner Losh 	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
185*c43cad87SWarner Losh 
186*c43cad87SWarner Losh 	edata_t *ret = edata_list_active_first(&result);
187*c43cad87SWarner Losh 	if (ret != NULL) {
188*c43cad87SWarner Losh 		edata_list_active_remove(&result, ret);
189*c43cad87SWarner Losh 	}
190*c43cad87SWarner Losh 
191*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
192*c43cad87SWarner Losh 	bin->being_batch_filled = false;
193*c43cad87SWarner Losh 	/*
194*c43cad87SWarner Losh 	 * Handle the easy case first: nothing to cache.  Note that this can
195*c43cad87SWarner Losh 	 * only happen in case of OOM, since sec_alloc checks the expected
196*c43cad87SWarner Losh 	 * number of allocs, and doesn't bother going down the batch_fill
197*c43cad87SWarner Losh 	 * pathway if there won't be anything left to cache.  So to be in this
198*c43cad87SWarner Losh 	 * code path, we must have asked for > 1 alloc, but only gotten 1 back.
199*c43cad87SWarner Losh 	 */
200*c43cad87SWarner Losh 	if (nalloc <= 1) {
201*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &shard->mtx);
202*c43cad87SWarner Losh 		return ret;
203*c43cad87SWarner Losh 	}
204*c43cad87SWarner Losh 
205*c43cad87SWarner Losh 	size_t new_cached_bytes = (nalloc - 1) * size;
206*c43cad87SWarner Losh 
207*c43cad87SWarner Losh 	edata_list_active_concat(&bin->freelist, &result);
208*c43cad87SWarner Losh 	bin->bytes_cur += new_cached_bytes;
209*c43cad87SWarner Losh 	shard->bytes_cur += new_cached_bytes;
210*c43cad87SWarner Losh 
211*c43cad87SWarner Losh 	if (shard->bytes_cur > sec->opts.max_bytes) {
212*c43cad87SWarner Losh 		sec_flush_some_and_unlock(tsdn, sec, shard);
213*c43cad87SWarner Losh 	} else {
214*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &shard->mtx);
215*c43cad87SWarner Losh 	}
216*c43cad87SWarner Losh 
217*c43cad87SWarner Losh 	return ret;
218*c43cad87SWarner Losh }
219*c43cad87SWarner Losh 
220*c43cad87SWarner Losh static edata_t *
221*c43cad87SWarner Losh sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
222*c43cad87SWarner Losh     bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
223*c43cad87SWarner Losh 	assert((size & PAGE_MASK) == 0);
224*c43cad87SWarner Losh 	assert(!guarded);
225*c43cad87SWarner Losh 
226*c43cad87SWarner Losh 	sec_t *sec = (sec_t *)self;
227*c43cad87SWarner Losh 
228*c43cad87SWarner Losh 	if (zero || alignment > PAGE || sec->opts.nshards == 0
229*c43cad87SWarner Losh 	    || size > sec->opts.max_alloc) {
230*c43cad87SWarner Losh 		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
231*c43cad87SWarner Losh 		    /* guarded */ false, frequent_reuse,
232*c43cad87SWarner Losh 		    deferred_work_generated);
233*c43cad87SWarner Losh 	}
234*c43cad87SWarner Losh 	pszind_t pszind = sz_psz2ind(size);
235*c43cad87SWarner Losh 	assert(pszind < sec->npsizes);
236*c43cad87SWarner Losh 
237*c43cad87SWarner Losh 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
238*c43cad87SWarner Losh 	sec_bin_t *bin = &shard->bins[pszind];
239*c43cad87SWarner Losh 	bool do_batch_fill = false;
240*c43cad87SWarner Losh 
241*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
242*c43cad87SWarner Losh 	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
243*c43cad87SWarner Losh 	if (edata == NULL) {
244*c43cad87SWarner Losh 		if (!bin->being_batch_filled
245*c43cad87SWarner Losh 		    && sec->opts.batch_fill_extra > 0) {
246*c43cad87SWarner Losh 			bin->being_batch_filled = true;
247*c43cad87SWarner Losh 			do_batch_fill = true;
248*c43cad87SWarner Losh 		}
249*c43cad87SWarner Losh 	}
250*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
251*c43cad87SWarner Losh 	if (edata == NULL) {
252*c43cad87SWarner Losh 		if (do_batch_fill) {
253*c43cad87SWarner Losh 			edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
254*c43cad87SWarner Losh 			    size);
255*c43cad87SWarner Losh 		} else {
256*c43cad87SWarner Losh 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
257*c43cad87SWarner Losh 			    zero, /* guarded */ false, frequent_reuse,
258*c43cad87SWarner Losh 			    deferred_work_generated);
259*c43cad87SWarner Losh 		}
260*c43cad87SWarner Losh 	}
261*c43cad87SWarner Losh 	return edata;
262*c43cad87SWarner Losh }
263*c43cad87SWarner Losh 
264*c43cad87SWarner Losh static bool
265*c43cad87SWarner Losh sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
266*c43cad87SWarner Losh     size_t new_size, bool zero, bool *deferred_work_generated) {
267*c43cad87SWarner Losh 	sec_t *sec = (sec_t *)self;
268*c43cad87SWarner Losh 	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
269*c43cad87SWarner Losh 	    deferred_work_generated);
270*c43cad87SWarner Losh }
271*c43cad87SWarner Losh 
272*c43cad87SWarner Losh static bool
273*c43cad87SWarner Losh sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
274*c43cad87SWarner Losh     size_t new_size, bool *deferred_work_generated) {
275*c43cad87SWarner Losh 	sec_t *sec = (sec_t *)self;
276*c43cad87SWarner Losh 	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
277*c43cad87SWarner Losh 	    deferred_work_generated);
278*c43cad87SWarner Losh }
279*c43cad87SWarner Losh 
280*c43cad87SWarner Losh static void
281*c43cad87SWarner Losh sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
282*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
283*c43cad87SWarner Losh 	shard->bytes_cur = 0;
284*c43cad87SWarner Losh 	edata_list_active_t to_flush;
285*c43cad87SWarner Losh 	edata_list_active_init(&to_flush);
286*c43cad87SWarner Losh 	for (pszind_t i = 0; i < sec->npsizes; i++) {
287*c43cad87SWarner Losh 		sec_bin_t *bin = &shard->bins[i];
288*c43cad87SWarner Losh 		bin->bytes_cur = 0;
289*c43cad87SWarner Losh 		edata_list_active_concat(&to_flush, &bin->freelist);
290*c43cad87SWarner Losh 	}
291*c43cad87SWarner Losh 
292*c43cad87SWarner Losh 	/*
293*c43cad87SWarner Losh 	 * Ordinarily we would try to avoid doing the batch deallocation while
294*c43cad87SWarner Losh 	 * holding the shard mutex, but the flush_all pathways only happen when
295*c43cad87SWarner Losh 	 * we're disabling the HPA or resetting the arena, both of which are
296*c43cad87SWarner Losh 	 * rare pathways.
297*c43cad87SWarner Losh 	 */
298*c43cad87SWarner Losh 	bool deferred_work_generated = false;
299*c43cad87SWarner Losh 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
300*c43cad87SWarner Losh 	    &deferred_work_generated);
301*c43cad87SWarner Losh }
302*c43cad87SWarner Losh 
303*c43cad87SWarner Losh static void
304*c43cad87SWarner Losh sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
305*c43cad87SWarner Losh     edata_t *edata) {
306*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
307*c43cad87SWarner Losh 	assert(shard->bytes_cur <= sec->opts.max_bytes);
308*c43cad87SWarner Losh 	size_t size = edata_size_get(edata);
309*c43cad87SWarner Losh 	pszind_t pszind = sz_psz2ind(size);
310*c43cad87SWarner Losh 	assert(pszind < sec->npsizes);
311*c43cad87SWarner Losh 	/*
312*c43cad87SWarner Losh 	 * Prepending here results in LIFO allocation per bin, which seems
313*c43cad87SWarner Losh 	 * reasonable.
314*c43cad87SWarner Losh 	 */
315*c43cad87SWarner Losh 	sec_bin_t *bin = &shard->bins[pszind];
316*c43cad87SWarner Losh 	edata_list_active_prepend(&bin->freelist, edata);
317*c43cad87SWarner Losh 	bin->bytes_cur += size;
318*c43cad87SWarner Losh 	shard->bytes_cur += size;
319*c43cad87SWarner Losh 	if (shard->bytes_cur > sec->opts.max_bytes) {
320*c43cad87SWarner Losh 		/*
321*c43cad87SWarner Losh 		 * We've exceeded the shard limit.  We make two nods in the
322*c43cad87SWarner Losh 		 * direction of fragmentation avoidance: we flush everything in
323*c43cad87SWarner Losh 		 * the shard, rather than one particular bin, and we hold the
324*c43cad87SWarner Losh 		 * lock while flushing (in case one of the extents we flush is
325*c43cad87SWarner Losh 		 * highly preferred from a fragmentation-avoidance perspective
326*c43cad87SWarner Losh 		 * in the backing allocator).  This has the extra advantage of
327*c43cad87SWarner Losh 		 * not requiring advanced cache balancing strategies.
328*c43cad87SWarner Losh 		 */
329*c43cad87SWarner Losh 		sec_flush_some_and_unlock(tsdn, sec, shard);
330*c43cad87SWarner Losh 		malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
331*c43cad87SWarner Losh 	} else {
332*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &shard->mtx);
333*c43cad87SWarner Losh 	}
334*c43cad87SWarner Losh }
335*c43cad87SWarner Losh 
336*c43cad87SWarner Losh static void
337*c43cad87SWarner Losh sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
338*c43cad87SWarner Losh     bool *deferred_work_generated) {
339*c43cad87SWarner Losh 	sec_t *sec = (sec_t *)self;
340*c43cad87SWarner Losh 	if (sec->opts.nshards == 0
341*c43cad87SWarner Losh 	    || edata_size_get(edata) > sec->opts.max_alloc) {
342*c43cad87SWarner Losh 		pai_dalloc(tsdn, sec->fallback, edata,
343*c43cad87SWarner Losh 		    deferred_work_generated);
344*c43cad87SWarner Losh 		return;
345*c43cad87SWarner Losh 	}
346*c43cad87SWarner Losh 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
347*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
348*c43cad87SWarner Losh 	if (shard->enabled) {
349*c43cad87SWarner Losh 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
350*c43cad87SWarner Losh 	} else {
351*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &shard->mtx);
352*c43cad87SWarner Losh 		pai_dalloc(tsdn, sec->fallback, edata,
353*c43cad87SWarner Losh 		    deferred_work_generated);
354*c43cad87SWarner Losh 	}
355*c43cad87SWarner Losh }
356*c43cad87SWarner Losh 
357*c43cad87SWarner Losh void
358*c43cad87SWarner Losh sec_flush(tsdn_t *tsdn, sec_t *sec) {
359*c43cad87SWarner Losh 	for (size_t i = 0; i < sec->opts.nshards; i++) {
360*c43cad87SWarner Losh 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
361*c43cad87SWarner Losh 		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
362*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
363*c43cad87SWarner Losh 	}
364*c43cad87SWarner Losh }
365*c43cad87SWarner Losh 
366*c43cad87SWarner Losh void
367*c43cad87SWarner Losh sec_disable(tsdn_t *tsdn, sec_t *sec) {
368*c43cad87SWarner Losh 	for (size_t i = 0; i < sec->opts.nshards; i++) {
369*c43cad87SWarner Losh 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
370*c43cad87SWarner Losh 		sec->shards[i].enabled = false;
371*c43cad87SWarner Losh 		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
372*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
373*c43cad87SWarner Losh 	}
374*c43cad87SWarner Losh }
375*c43cad87SWarner Losh 
376*c43cad87SWarner Losh void
377*c43cad87SWarner Losh sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
378*c43cad87SWarner Losh 	size_t sum = 0;
379*c43cad87SWarner Losh 	for (size_t i = 0; i < sec->opts.nshards; i++) {
380*c43cad87SWarner Losh 		/*
381*c43cad87SWarner Losh 		 * We could save these lock acquisitions by making bytes_cur
382*c43cad87SWarner Losh 		 * atomic, but stats collection is rare anyways and we expect
383*c43cad87SWarner Losh 		 * the number and type of stats to get more interesting.
384*c43cad87SWarner Losh 		 */
385*c43cad87SWarner Losh 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
386*c43cad87SWarner Losh 		sum += sec->shards[i].bytes_cur;
387*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
388*c43cad87SWarner Losh 	}
389*c43cad87SWarner Losh 	stats->bytes += sum;
390*c43cad87SWarner Losh }
391*c43cad87SWarner Losh 
392*c43cad87SWarner Losh void
393*c43cad87SWarner Losh sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
394*c43cad87SWarner Losh     mutex_prof_data_t *mutex_prof_data) {
395*c43cad87SWarner Losh 	for (size_t i = 0; i < sec->opts.nshards; i++) {
396*c43cad87SWarner Losh 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
397*c43cad87SWarner Losh 		malloc_mutex_prof_accum(tsdn, mutex_prof_data,
398*c43cad87SWarner Losh 		    &sec->shards[i].mtx);
399*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
400*c43cad87SWarner Losh 	}
401*c43cad87SWarner Losh }
402*c43cad87SWarner Losh 
403*c43cad87SWarner Losh void
404*c43cad87SWarner Losh sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
405*c43cad87SWarner Losh 	for (size_t i = 0; i < sec->opts.nshards; i++) {
406*c43cad87SWarner Losh 		malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
407*c43cad87SWarner Losh 	}
408*c43cad87SWarner Losh }
409*c43cad87SWarner Losh 
410*c43cad87SWarner Losh void
411*c43cad87SWarner Losh sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
412*c43cad87SWarner Losh 	for (size_t i = 0; i < sec->opts.nshards; i++) {
413*c43cad87SWarner Losh 		malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
414*c43cad87SWarner Losh 	}
415*c43cad87SWarner Losh }
416*c43cad87SWarner Losh 
417*c43cad87SWarner Losh void
418*c43cad87SWarner Losh sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
419*c43cad87SWarner Losh 	for (size_t i = 0; i < sec->opts.nshards; i++) {
420*c43cad87SWarner Losh 		malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
421*c43cad87SWarner Losh 	}
422*c43cad87SWarner Losh }
423