xref: /freebsd/contrib/jemalloc/src/hpa.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_preamble.h"
2*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_internal_includes.h"
3*c43cad87SWarner Losh 
4*c43cad87SWarner Losh #include "jemalloc/internal/hpa.h"
5*c43cad87SWarner Losh 
6*c43cad87SWarner Losh #include "jemalloc/internal/fb.h"
7*c43cad87SWarner Losh #include "jemalloc/internal/witness.h"
8*c43cad87SWarner Losh 
9*c43cad87SWarner Losh #define HPA_EDEN_SIZE (128 * HUGEPAGE)
10*c43cad87SWarner Losh 
11*c43cad87SWarner Losh static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
12*c43cad87SWarner Losh     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
13*c43cad87SWarner Losh     bool *deferred_work_generated);
14*c43cad87SWarner Losh static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
15*c43cad87SWarner Losh     size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
16*c43cad87SWarner Losh static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
17*c43cad87SWarner Losh     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
18*c43cad87SWarner Losh static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
19*c43cad87SWarner Losh     size_t old_size, size_t new_size, bool *deferred_work_generated);
20*c43cad87SWarner Losh static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
21*c43cad87SWarner Losh     bool *deferred_work_generated);
22*c43cad87SWarner Losh static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
23*c43cad87SWarner Losh     edata_list_active_t *list, bool *deferred_work_generated);
24*c43cad87SWarner Losh static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
25*c43cad87SWarner Losh 
26*c43cad87SWarner Losh bool
27*c43cad87SWarner Losh hpa_supported() {
28*c43cad87SWarner Losh #ifdef _WIN32
29*c43cad87SWarner Losh 	/*
30*c43cad87SWarner Losh 	 * At least until the API and implementation is somewhat settled, we
31*c43cad87SWarner Losh 	 * don't want to try to debug the VM subsystem on the hardest-to-test
32*c43cad87SWarner Losh 	 * platform.
33*c43cad87SWarner Losh 	 */
34*c43cad87SWarner Losh 	return false;
35*c43cad87SWarner Losh #endif
36*c43cad87SWarner Losh 	if (!pages_can_hugify) {
37*c43cad87SWarner Losh 		return false;
38*c43cad87SWarner Losh 	}
39*c43cad87SWarner Losh 	/*
40*c43cad87SWarner Losh 	 * We fundamentally rely on a address-space-hungry growth strategy for
41*c43cad87SWarner Losh 	 * hugepages.
42*c43cad87SWarner Losh 	 */
43*c43cad87SWarner Losh 	if (LG_SIZEOF_PTR != 3) {
44*c43cad87SWarner Losh 		return false;
45*c43cad87SWarner Losh 	}
46*c43cad87SWarner Losh 	/*
47*c43cad87SWarner Losh 	 * If we couldn't detect the value of HUGEPAGE, HUGEPAGE_PAGES becomes
48*c43cad87SWarner Losh 	 * this sentinel value -- see the comment in pages.h.
49*c43cad87SWarner Losh 	 */
50*c43cad87SWarner Losh 	if (HUGEPAGE_PAGES == 1) {
51*c43cad87SWarner Losh 		return false;
52*c43cad87SWarner Losh 	}
53*c43cad87SWarner Losh 	return true;
54*c43cad87SWarner Losh }
55*c43cad87SWarner Losh 
56*c43cad87SWarner Losh static void
57*c43cad87SWarner Losh hpa_do_consistency_checks(hpa_shard_t *shard) {
58*c43cad87SWarner Losh 	assert(shard->base != NULL);
59*c43cad87SWarner Losh }
60*c43cad87SWarner Losh 
61*c43cad87SWarner Losh bool
62*c43cad87SWarner Losh hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
63*c43cad87SWarner Losh 	/* malloc_conf processing should have filtered out these cases. */
64*c43cad87SWarner Losh 	assert(hpa_supported());
65*c43cad87SWarner Losh 	bool err;
66*c43cad87SWarner Losh 	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
67*c43cad87SWarner Losh 	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
68*c43cad87SWarner Losh 	if (err) {
69*c43cad87SWarner Losh 		return true;
70*c43cad87SWarner Losh 	}
71*c43cad87SWarner Losh 	err = malloc_mutex_init(&central->mtx, "hpa_central",
72*c43cad87SWarner Losh 	    WITNESS_RANK_HPA_CENTRAL, malloc_mutex_rank_exclusive);
73*c43cad87SWarner Losh 	if (err) {
74*c43cad87SWarner Losh 		return true;
75*c43cad87SWarner Losh 	}
76*c43cad87SWarner Losh 	central->base = base;
77*c43cad87SWarner Losh 	central->eden = NULL;
78*c43cad87SWarner Losh 	central->eden_len = 0;
79*c43cad87SWarner Losh 	central->age_counter = 0;
80*c43cad87SWarner Losh 	central->hooks = *hooks;
81*c43cad87SWarner Losh 	return false;
82*c43cad87SWarner Losh }
83*c43cad87SWarner Losh 
84*c43cad87SWarner Losh static hpdata_t *
85*c43cad87SWarner Losh hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
86*c43cad87SWarner Losh 	return (hpdata_t *)base_alloc(tsdn, central->base, sizeof(hpdata_t),
87*c43cad87SWarner Losh 	    CACHELINE);
88*c43cad87SWarner Losh }
89*c43cad87SWarner Losh 
90*c43cad87SWarner Losh hpdata_t *
91*c43cad87SWarner Losh hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
92*c43cad87SWarner Losh     bool *oom) {
93*c43cad87SWarner Losh 	/* Don't yet support big allocations; these should get filtered out. */
94*c43cad87SWarner Losh 	assert(size <= HUGEPAGE);
95*c43cad87SWarner Losh 	/*
96*c43cad87SWarner Losh 	 * Should only try to extract from the central allocator if the local
97*c43cad87SWarner Losh 	 * shard is exhausted.  We should hold the grow_mtx on that shard.
98*c43cad87SWarner Losh 	 */
99*c43cad87SWarner Losh 	witness_assert_positive_depth_to_rank(
100*c43cad87SWarner Losh 	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
101*c43cad87SWarner Losh 
102*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &central->grow_mtx);
103*c43cad87SWarner Losh 	*oom = false;
104*c43cad87SWarner Losh 
105*c43cad87SWarner Losh 	hpdata_t *ps = NULL;
106*c43cad87SWarner Losh 
107*c43cad87SWarner Losh 	/* Is eden a perfect fit? */
108*c43cad87SWarner Losh 	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
109*c43cad87SWarner Losh 		ps = hpa_alloc_ps(tsdn, central);
110*c43cad87SWarner Losh 		if (ps == NULL) {
111*c43cad87SWarner Losh 			*oom = true;
112*c43cad87SWarner Losh 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
113*c43cad87SWarner Losh 			return NULL;
114*c43cad87SWarner Losh 		}
115*c43cad87SWarner Losh 		hpdata_init(ps, central->eden, central->age_counter++);
116*c43cad87SWarner Losh 		central->eden = NULL;
117*c43cad87SWarner Losh 		central->eden_len = 0;
118*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &central->grow_mtx);
119*c43cad87SWarner Losh 		return ps;
120*c43cad87SWarner Losh 	}
121*c43cad87SWarner Losh 
122*c43cad87SWarner Losh 	/*
123*c43cad87SWarner Losh 	 * We're about to try to allocate from eden by splitting.  If eden is
124*c43cad87SWarner Losh 	 * NULL, we have to allocate it too.  Otherwise, we just have to
125*c43cad87SWarner Losh 	 * allocate an edata_t for the new psset.
126*c43cad87SWarner Losh 	 */
127*c43cad87SWarner Losh 	if (central->eden == NULL) {
128*c43cad87SWarner Losh 		/*
129*c43cad87SWarner Losh 		 * During development, we're primarily concerned with systems
130*c43cad87SWarner Losh 		 * with overcommit.  Eventually, we should be more careful here.
131*c43cad87SWarner Losh 		 */
132*c43cad87SWarner Losh 		bool commit = true;
133*c43cad87SWarner Losh 		/* Allocate address space, bailing if we fail. */
134*c43cad87SWarner Losh 		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
135*c43cad87SWarner Losh 		    &commit);
136*c43cad87SWarner Losh 		if (new_eden == NULL) {
137*c43cad87SWarner Losh 			*oom = true;
138*c43cad87SWarner Losh 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
139*c43cad87SWarner Losh 			return NULL;
140*c43cad87SWarner Losh 		}
141*c43cad87SWarner Losh 		ps = hpa_alloc_ps(tsdn, central);
142*c43cad87SWarner Losh 		if (ps == NULL) {
143*c43cad87SWarner Losh 			pages_unmap(new_eden, HPA_EDEN_SIZE);
144*c43cad87SWarner Losh 			*oom = true;
145*c43cad87SWarner Losh 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
146*c43cad87SWarner Losh 			return NULL;
147*c43cad87SWarner Losh 		}
148*c43cad87SWarner Losh 		central->eden = new_eden;
149*c43cad87SWarner Losh 		central->eden_len = HPA_EDEN_SIZE;
150*c43cad87SWarner Losh 	} else {
151*c43cad87SWarner Losh 		/* Eden is already nonempty; only need an edata for ps. */
152*c43cad87SWarner Losh 		ps = hpa_alloc_ps(tsdn, central);
153*c43cad87SWarner Losh 		if (ps == NULL) {
154*c43cad87SWarner Losh 			*oom = true;
155*c43cad87SWarner Losh 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
156*c43cad87SWarner Losh 			return NULL;
157*c43cad87SWarner Losh 		}
158*c43cad87SWarner Losh 	}
159*c43cad87SWarner Losh 	assert(ps != NULL);
160*c43cad87SWarner Losh 	assert(central->eden != NULL);
161*c43cad87SWarner Losh 	assert(central->eden_len > HUGEPAGE);
162*c43cad87SWarner Losh 	assert(central->eden_len % HUGEPAGE == 0);
163*c43cad87SWarner Losh 	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
164*c43cad87SWarner Losh 
165*c43cad87SWarner Losh 	hpdata_init(ps, central->eden, central->age_counter++);
166*c43cad87SWarner Losh 
167*c43cad87SWarner Losh 	char *eden_char = (char *)central->eden;
168*c43cad87SWarner Losh 	eden_char += HUGEPAGE;
169*c43cad87SWarner Losh 	central->eden = (void *)eden_char;
170*c43cad87SWarner Losh 	central->eden_len -= HUGEPAGE;
171*c43cad87SWarner Losh 
172*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &central->grow_mtx);
173*c43cad87SWarner Losh 
174*c43cad87SWarner Losh 	return ps;
175*c43cad87SWarner Losh }
176*c43cad87SWarner Losh 
177*c43cad87SWarner Losh bool
178*c43cad87SWarner Losh hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
179*c43cad87SWarner Losh     base_t *base, edata_cache_t *edata_cache, unsigned ind,
180*c43cad87SWarner Losh     const hpa_shard_opts_t *opts) {
181*c43cad87SWarner Losh 	/* malloc_conf processing should have filtered out these cases. */
182*c43cad87SWarner Losh 	assert(hpa_supported());
183*c43cad87SWarner Losh 	bool err;
184*c43cad87SWarner Losh 	err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow",
185*c43cad87SWarner Losh 	    WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive);
186*c43cad87SWarner Losh 	if (err) {
187*c43cad87SWarner Losh 		return true;
188*c43cad87SWarner Losh 	}
189*c43cad87SWarner Losh 	err = malloc_mutex_init(&shard->mtx, "hpa_shard",
190*c43cad87SWarner Losh 	    WITNESS_RANK_HPA_SHARD, malloc_mutex_rank_exclusive);
191*c43cad87SWarner Losh 	if (err) {
192*c43cad87SWarner Losh 		return true;
193*c43cad87SWarner Losh 	}
194*c43cad87SWarner Losh 
195*c43cad87SWarner Losh 	assert(edata_cache != NULL);
196*c43cad87SWarner Losh 	shard->central = central;
197*c43cad87SWarner Losh 	shard->base = base;
198*c43cad87SWarner Losh 	edata_cache_fast_init(&shard->ecf, edata_cache);
199*c43cad87SWarner Losh 	psset_init(&shard->psset);
200*c43cad87SWarner Losh 	shard->age_counter = 0;
201*c43cad87SWarner Losh 	shard->ind = ind;
202*c43cad87SWarner Losh 	shard->emap = emap;
203*c43cad87SWarner Losh 
204*c43cad87SWarner Losh 	shard->opts = *opts;
205*c43cad87SWarner Losh 
206*c43cad87SWarner Losh 	shard->npending_purge = 0;
207*c43cad87SWarner Losh 	nstime_init_zero(&shard->last_purge);
208*c43cad87SWarner Losh 
209*c43cad87SWarner Losh 	shard->stats.npurge_passes = 0;
210*c43cad87SWarner Losh 	shard->stats.npurges = 0;
211*c43cad87SWarner Losh 	shard->stats.nhugifies = 0;
212*c43cad87SWarner Losh 	shard->stats.ndehugifies = 0;
213*c43cad87SWarner Losh 
214*c43cad87SWarner Losh 	/*
215*c43cad87SWarner Losh 	 * Fill these in last, so that if an hpa_shard gets used despite
216*c43cad87SWarner Losh 	 * initialization failing, we'll at least crash instead of just
217*c43cad87SWarner Losh 	 * operating on corrupted data.
218*c43cad87SWarner Losh 	 */
219*c43cad87SWarner Losh 	shard->pai.alloc = &hpa_alloc;
220*c43cad87SWarner Losh 	shard->pai.alloc_batch = &hpa_alloc_batch;
221*c43cad87SWarner Losh 	shard->pai.expand = &hpa_expand;
222*c43cad87SWarner Losh 	shard->pai.shrink = &hpa_shrink;
223*c43cad87SWarner Losh 	shard->pai.dalloc = &hpa_dalloc;
224*c43cad87SWarner Losh 	shard->pai.dalloc_batch = &hpa_dalloc_batch;
225*c43cad87SWarner Losh 	shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work;
226*c43cad87SWarner Losh 
227*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
228*c43cad87SWarner Losh 
229*c43cad87SWarner Losh 	return false;
230*c43cad87SWarner Losh }
231*c43cad87SWarner Losh 
232*c43cad87SWarner Losh /*
233*c43cad87SWarner Losh  * Note that the stats functions here follow the usual stats naming conventions;
234*c43cad87SWarner Losh  * "merge" obtains the stats from some live object of instance, while "accum"
235*c43cad87SWarner Losh  * only combines the stats from one stats objet to another.  Hence the lack of
236*c43cad87SWarner Losh  * locking here.
237*c43cad87SWarner Losh  */
238*c43cad87SWarner Losh static void
239*c43cad87SWarner Losh hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
240*c43cad87SWarner Losh     hpa_shard_nonderived_stats_t *src) {
241*c43cad87SWarner Losh 	dst->npurge_passes += src->npurge_passes;
242*c43cad87SWarner Losh 	dst->npurges += src->npurges;
243*c43cad87SWarner Losh 	dst->nhugifies += src->nhugifies;
244*c43cad87SWarner Losh 	dst->ndehugifies += src->ndehugifies;
245*c43cad87SWarner Losh }
246*c43cad87SWarner Losh 
247*c43cad87SWarner Losh void
248*c43cad87SWarner Losh hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
249*c43cad87SWarner Losh 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
250*c43cad87SWarner Losh 	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats,
251*c43cad87SWarner Losh 	    &src->nonderived_stats);
252*c43cad87SWarner Losh }
253*c43cad87SWarner Losh 
254*c43cad87SWarner Losh void
255*c43cad87SWarner Losh hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
256*c43cad87SWarner Losh     hpa_shard_stats_t *dst) {
257*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
258*c43cad87SWarner Losh 
259*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
260*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
261*c43cad87SWarner Losh 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
262*c43cad87SWarner Losh 	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, &shard->stats);
263*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
264*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
265*c43cad87SWarner Losh }
266*c43cad87SWarner Losh 
267*c43cad87SWarner Losh static bool
268*c43cad87SWarner Losh hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
269*c43cad87SWarner Losh 	/*
270*c43cad87SWarner Losh 	 * Note that this needs to be >= rather than just >, because of the
271*c43cad87SWarner Losh 	 * important special case in which the hugification threshold is exactly
272*c43cad87SWarner Losh 	 * HUGEPAGE.
273*c43cad87SWarner Losh 	 */
274*c43cad87SWarner Losh 	return hpdata_nactive_get(ps) * PAGE
275*c43cad87SWarner Losh 	    >= shard->opts.hugification_threshold;
276*c43cad87SWarner Losh }
277*c43cad87SWarner Losh 
278*c43cad87SWarner Losh static size_t
279*c43cad87SWarner Losh hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
280*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
281*c43cad87SWarner Losh 	return psset_ndirty(&shard->psset) - shard->npending_purge;
282*c43cad87SWarner Losh }
283*c43cad87SWarner Losh 
284*c43cad87SWarner Losh static size_t
285*c43cad87SWarner Losh hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
286*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
287*c43cad87SWarner Losh 	if (shard->opts.dirty_mult == (fxp_t)-1) {
288*c43cad87SWarner Losh 		return (size_t)-1;
289*c43cad87SWarner Losh 	}
290*c43cad87SWarner Losh 	return fxp_mul_frac(psset_nactive(&shard->psset),
291*c43cad87SWarner Losh 	    shard->opts.dirty_mult);
292*c43cad87SWarner Losh }
293*c43cad87SWarner Losh 
294*c43cad87SWarner Losh static bool
295*c43cad87SWarner Losh hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
296*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
297*c43cad87SWarner Losh 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
298*c43cad87SWarner Losh 	if (to_hugify == NULL) {
299*c43cad87SWarner Losh 		return false;
300*c43cad87SWarner Losh 	}
301*c43cad87SWarner Losh 	return hpa_adjusted_ndirty(tsdn, shard)
302*c43cad87SWarner Losh 	    + hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard);
303*c43cad87SWarner Losh }
304*c43cad87SWarner Losh 
305*c43cad87SWarner Losh static bool
306*c43cad87SWarner Losh hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
307*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
308*c43cad87SWarner Losh 	if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) {
309*c43cad87SWarner Losh 		return true;
310*c43cad87SWarner Losh 	}
311*c43cad87SWarner Losh 	if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
312*c43cad87SWarner Losh 		return true;
313*c43cad87SWarner Losh 	}
314*c43cad87SWarner Losh 	return false;
315*c43cad87SWarner Losh }
316*c43cad87SWarner Losh 
317*c43cad87SWarner Losh static void
318*c43cad87SWarner Losh hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
319*c43cad87SWarner Losh     hpdata_t *ps) {
320*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
321*c43cad87SWarner Losh 	if (hpdata_changing_state_get(ps)) {
322*c43cad87SWarner Losh 		hpdata_purge_allowed_set(ps, false);
323*c43cad87SWarner Losh 		hpdata_disallow_hugify(ps);
324*c43cad87SWarner Losh 		return;
325*c43cad87SWarner Losh 	}
326*c43cad87SWarner Losh 	/*
327*c43cad87SWarner Losh 	 * Hugepages are distinctly costly to purge, so try to avoid it unless
328*c43cad87SWarner Losh 	 * they're *particularly* full of dirty pages.  Eventually, we should
329*c43cad87SWarner Losh 	 * use a smarter / more dynamic heuristic for situations where we have
330*c43cad87SWarner Losh 	 * to manually hugify.
331*c43cad87SWarner Losh 	 *
332*c43cad87SWarner Losh 	 * In situations where we don't manually hugify, this problem is
333*c43cad87SWarner Losh 	 * reduced.  The "bad" situation we're trying to avoid is one's that's
334*c43cad87SWarner Losh 	 * common in some Linux configurations (where both enabled and defrag
335*c43cad87SWarner Losh 	 * are set to madvise) that can lead to long latency spikes on the first
336*c43cad87SWarner Losh 	 * access after a hugification.  The ideal policy in such configurations
337*c43cad87SWarner Losh 	 * is probably time-based for both purging and hugifying; only hugify a
338*c43cad87SWarner Losh 	 * hugepage if it's met the criteria for some extended period of time,
339*c43cad87SWarner Losh 	 * and only dehugify it if it's failed to meet the criteria for an
340*c43cad87SWarner Losh 	 * extended period of time.  When background threads are on, we should
341*c43cad87SWarner Losh 	 * try to take this hit on one of them, as well.
342*c43cad87SWarner Losh 	 *
343*c43cad87SWarner Losh 	 * I think the ideal setting is THP always enabled, and defrag set to
344*c43cad87SWarner Losh 	 * deferred; in that case we don't need any explicit calls on the
345*c43cad87SWarner Losh 	 * allocator's end at all; we just try to pack allocations in a
346*c43cad87SWarner Losh 	 * hugepage-friendly manner and let the OS hugify in the background.
347*c43cad87SWarner Losh 	 */
348*c43cad87SWarner Losh 	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
349*c43cad87SWarner Losh 	if (hpa_good_hugification_candidate(shard, ps)
350*c43cad87SWarner Losh 	    && !hpdata_huge_get(ps)) {
351*c43cad87SWarner Losh 		nstime_t now;
352*c43cad87SWarner Losh 		shard->central->hooks.curtime(&now, /* first_reading */ true);
353*c43cad87SWarner Losh 		hpdata_allow_hugify(ps, now);
354*c43cad87SWarner Losh 	}
355*c43cad87SWarner Losh 	/*
356*c43cad87SWarner Losh 	 * Once a hugepage has become eligible for hugification, we don't mark
357*c43cad87SWarner Losh 	 * it as ineligible just because it stops meeting the criteria (this
358*c43cad87SWarner Losh 	 * could lead to situations where a hugepage that spends most of its
359*c43cad87SWarner Losh 	 * time meeting the criteria never quite getting hugified if there are
360*c43cad87SWarner Losh 	 * intervening deallocations).  The idea is that the hugification delay
361*c43cad87SWarner Losh 	 * will allow them to get purged, reseting their "hugify-allowed" bit.
362*c43cad87SWarner Losh 	 * If they don't get purged, then the hugification isn't hurting and
363*c43cad87SWarner Losh 	 * might help.  As an exception, we don't hugify hugepages that are now
364*c43cad87SWarner Losh 	 * empty; it definitely doesn't help there until the hugepage gets
365*c43cad87SWarner Losh 	 * reused, which is likely not for a while.
366*c43cad87SWarner Losh 	 */
367*c43cad87SWarner Losh 	if (hpdata_nactive_get(ps) == 0) {
368*c43cad87SWarner Losh 		hpdata_disallow_hugify(ps);
369*c43cad87SWarner Losh 	}
370*c43cad87SWarner Losh }
371*c43cad87SWarner Losh 
372*c43cad87SWarner Losh static bool
373*c43cad87SWarner Losh hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
374*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
375*c43cad87SWarner Losh 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
376*c43cad87SWarner Losh 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
377*c43cad87SWarner Losh }
378*c43cad87SWarner Losh 
379*c43cad87SWarner Losh /* Returns whether or not we purged anything. */
380*c43cad87SWarner Losh static bool
381*c43cad87SWarner Losh hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
382*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
383*c43cad87SWarner Losh 
384*c43cad87SWarner Losh 	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
385*c43cad87SWarner Losh 	if (to_purge == NULL) {
386*c43cad87SWarner Losh 		return false;
387*c43cad87SWarner Losh 	}
388*c43cad87SWarner Losh 	assert(hpdata_purge_allowed_get(to_purge));
389*c43cad87SWarner Losh 	assert(!hpdata_changing_state_get(to_purge));
390*c43cad87SWarner Losh 
391*c43cad87SWarner Losh 	/*
392*c43cad87SWarner Losh 	 * Don't let anyone else purge or hugify this page while
393*c43cad87SWarner Losh 	 * we're purging it (allocations and deallocations are
394*c43cad87SWarner Losh 	 * OK).
395*c43cad87SWarner Losh 	 */
396*c43cad87SWarner Losh 	psset_update_begin(&shard->psset, to_purge);
397*c43cad87SWarner Losh 	assert(hpdata_alloc_allowed_get(to_purge));
398*c43cad87SWarner Losh 	hpdata_mid_purge_set(to_purge, true);
399*c43cad87SWarner Losh 	hpdata_purge_allowed_set(to_purge, false);
400*c43cad87SWarner Losh 	hpdata_disallow_hugify(to_purge);
401*c43cad87SWarner Losh 	/*
402*c43cad87SWarner Losh 	 * Unlike with hugification (where concurrent
403*c43cad87SWarner Losh 	 * allocations are allowed), concurrent allocation out
404*c43cad87SWarner Losh 	 * of a hugepage being purged is unsafe; we might hand
405*c43cad87SWarner Losh 	 * out an extent for an allocation and then purge it
406*c43cad87SWarner Losh 	 * (clearing out user data).
407*c43cad87SWarner Losh 	 */
408*c43cad87SWarner Losh 	hpdata_alloc_allowed_set(to_purge, false);
409*c43cad87SWarner Losh 	psset_update_end(&shard->psset, to_purge);
410*c43cad87SWarner Losh 
411*c43cad87SWarner Losh 	/* Gather all the metadata we'll need during the purge. */
412*c43cad87SWarner Losh 	bool dehugify = hpdata_huge_get(to_purge);
413*c43cad87SWarner Losh 	hpdata_purge_state_t purge_state;
414*c43cad87SWarner Losh 	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state);
415*c43cad87SWarner Losh 
416*c43cad87SWarner Losh 	shard->npending_purge += num_to_purge;
417*c43cad87SWarner Losh 
418*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
419*c43cad87SWarner Losh 
420*c43cad87SWarner Losh 	/* Actually do the purging, now that the lock is dropped. */
421*c43cad87SWarner Losh 	if (dehugify) {
422*c43cad87SWarner Losh 		shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
423*c43cad87SWarner Losh 		    HUGEPAGE);
424*c43cad87SWarner Losh 	}
425*c43cad87SWarner Losh 	size_t total_purged = 0;
426*c43cad87SWarner Losh 	uint64_t purges_this_pass = 0;
427*c43cad87SWarner Losh 	void *purge_addr;
428*c43cad87SWarner Losh 	size_t purge_size;
429*c43cad87SWarner Losh 	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
430*c43cad87SWarner Losh 	    &purge_size)) {
431*c43cad87SWarner Losh 		total_purged += purge_size;
432*c43cad87SWarner Losh 		assert(total_purged <= HUGEPAGE);
433*c43cad87SWarner Losh 		purges_this_pass++;
434*c43cad87SWarner Losh 		shard->central->hooks.purge(purge_addr, purge_size);
435*c43cad87SWarner Losh 	}
436*c43cad87SWarner Losh 
437*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
438*c43cad87SWarner Losh 	/* The shard updates */
439*c43cad87SWarner Losh 	shard->npending_purge -= num_to_purge;
440*c43cad87SWarner Losh 	shard->stats.npurge_passes++;
441*c43cad87SWarner Losh 	shard->stats.npurges += purges_this_pass;
442*c43cad87SWarner Losh 	shard->central->hooks.curtime(&shard->last_purge,
443*c43cad87SWarner Losh 	    /* first_reading */ false);
444*c43cad87SWarner Losh 	if (dehugify) {
445*c43cad87SWarner Losh 		shard->stats.ndehugifies++;
446*c43cad87SWarner Losh 	}
447*c43cad87SWarner Losh 
448*c43cad87SWarner Losh 	/* The hpdata updates. */
449*c43cad87SWarner Losh 	psset_update_begin(&shard->psset, to_purge);
450*c43cad87SWarner Losh 	if (dehugify) {
451*c43cad87SWarner Losh 		hpdata_dehugify(to_purge);
452*c43cad87SWarner Losh 	}
453*c43cad87SWarner Losh 	hpdata_purge_end(to_purge, &purge_state);
454*c43cad87SWarner Losh 	hpdata_mid_purge_set(to_purge, false);
455*c43cad87SWarner Losh 
456*c43cad87SWarner Losh 	hpdata_alloc_allowed_set(to_purge, true);
457*c43cad87SWarner Losh 	hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge);
458*c43cad87SWarner Losh 
459*c43cad87SWarner Losh 	psset_update_end(&shard->psset, to_purge);
460*c43cad87SWarner Losh 
461*c43cad87SWarner Losh 	return true;
462*c43cad87SWarner Losh }
463*c43cad87SWarner Losh 
464*c43cad87SWarner Losh /* Returns whether or not we hugified anything. */
465*c43cad87SWarner Losh static bool
466*c43cad87SWarner Losh hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
467*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
468*c43cad87SWarner Losh 
469*c43cad87SWarner Losh 	if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
470*c43cad87SWarner Losh 		return false;
471*c43cad87SWarner Losh 	}
472*c43cad87SWarner Losh 
473*c43cad87SWarner Losh 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
474*c43cad87SWarner Losh 	if (to_hugify == NULL) {
475*c43cad87SWarner Losh 		return false;
476*c43cad87SWarner Losh 	}
477*c43cad87SWarner Losh 	assert(hpdata_hugify_allowed_get(to_hugify));
478*c43cad87SWarner Losh 	assert(!hpdata_changing_state_get(to_hugify));
479*c43cad87SWarner Losh 
480*c43cad87SWarner Losh 	/* Make sure that it's been hugifiable for long enough. */
481*c43cad87SWarner Losh 	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
482*c43cad87SWarner Losh 	uint64_t millis = shard->central->hooks.ms_since(&time_hugify_allowed);
483*c43cad87SWarner Losh 	if (millis < shard->opts.hugify_delay_ms) {
484*c43cad87SWarner Losh 		return false;
485*c43cad87SWarner Losh 	}
486*c43cad87SWarner Losh 
487*c43cad87SWarner Losh 	/*
488*c43cad87SWarner Losh 	 * Don't let anyone else purge or hugify this page while
489*c43cad87SWarner Losh 	 * we're hugifying it (allocations and deallocations are
490*c43cad87SWarner Losh 	 * OK).
491*c43cad87SWarner Losh 	 */
492*c43cad87SWarner Losh 	psset_update_begin(&shard->psset, to_hugify);
493*c43cad87SWarner Losh 	hpdata_mid_hugify_set(to_hugify, true);
494*c43cad87SWarner Losh 	hpdata_purge_allowed_set(to_hugify, false);
495*c43cad87SWarner Losh 	hpdata_disallow_hugify(to_hugify);
496*c43cad87SWarner Losh 	assert(hpdata_alloc_allowed_get(to_hugify));
497*c43cad87SWarner Losh 	psset_update_end(&shard->psset, to_hugify);
498*c43cad87SWarner Losh 
499*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
500*c43cad87SWarner Losh 
501*c43cad87SWarner Losh 	shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
502*c43cad87SWarner Losh 
503*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
504*c43cad87SWarner Losh 	shard->stats.nhugifies++;
505*c43cad87SWarner Losh 
506*c43cad87SWarner Losh 	psset_update_begin(&shard->psset, to_hugify);
507*c43cad87SWarner Losh 	hpdata_hugify(to_hugify);
508*c43cad87SWarner Losh 	hpdata_mid_hugify_set(to_hugify, false);
509*c43cad87SWarner Losh 	hpa_update_purge_hugify_eligibility(tsdn, shard, to_hugify);
510*c43cad87SWarner Losh 	psset_update_end(&shard->psset, to_hugify);
511*c43cad87SWarner Losh 
512*c43cad87SWarner Losh 	return true;
513*c43cad87SWarner Losh }
514*c43cad87SWarner Losh 
515*c43cad87SWarner Losh /*
516*c43cad87SWarner Losh  * Execution of deferred work is forced if it's triggered by an explicit
517*c43cad87SWarner Losh  * hpa_shard_do_deferred_work() call.
518*c43cad87SWarner Losh  */
519*c43cad87SWarner Losh static void
520*c43cad87SWarner Losh hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
521*c43cad87SWarner Losh     bool forced) {
522*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
523*c43cad87SWarner Losh 	if (!forced && shard->opts.deferral_allowed) {
524*c43cad87SWarner Losh 		return;
525*c43cad87SWarner Losh 	}
526*c43cad87SWarner Losh 	/*
527*c43cad87SWarner Losh 	 * If we're on a background thread, do work so long as there's work to
528*c43cad87SWarner Losh 	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
529*c43cad87SWarner Losh 	 * most a small fixed number of operations.
530*c43cad87SWarner Losh 	 */
531*c43cad87SWarner Losh 	bool hugified = false;
532*c43cad87SWarner Losh 	bool purged = false;
533*c43cad87SWarner Losh 	size_t max_ops = (forced ? (size_t)-1 : 16);
534*c43cad87SWarner Losh 	size_t nops = 0;
535*c43cad87SWarner Losh 	do {
536*c43cad87SWarner Losh 		/*
537*c43cad87SWarner Losh 		 * Always purge before hugifying, to make sure we get some
538*c43cad87SWarner Losh 		 * ability to hit our quiescence targets.
539*c43cad87SWarner Losh 		 */
540*c43cad87SWarner Losh 		purged = false;
541*c43cad87SWarner Losh 		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
542*c43cad87SWarner Losh 			purged = hpa_try_purge(tsdn, shard);
543*c43cad87SWarner Losh 			if (purged) {
544*c43cad87SWarner Losh 				nops++;
545*c43cad87SWarner Losh 			}
546*c43cad87SWarner Losh 		}
547*c43cad87SWarner Losh 		hugified = hpa_try_hugify(tsdn, shard);
548*c43cad87SWarner Losh 		if (hugified) {
549*c43cad87SWarner Losh 			nops++;
550*c43cad87SWarner Losh 		}
551*c43cad87SWarner Losh 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
552*c43cad87SWarner Losh 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
553*c43cad87SWarner Losh 	} while ((hugified || purged) && nops < max_ops);
554*c43cad87SWarner Losh }
555*c43cad87SWarner Losh 
556*c43cad87SWarner Losh static edata_t *
557*c43cad87SWarner Losh hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
558*c43cad87SWarner Losh     bool *oom) {
559*c43cad87SWarner Losh 	bool err;
560*c43cad87SWarner Losh 	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
561*c43cad87SWarner Losh 	if (edata == NULL) {
562*c43cad87SWarner Losh 		*oom = true;
563*c43cad87SWarner Losh 		return NULL;
564*c43cad87SWarner Losh 	}
565*c43cad87SWarner Losh 
566*c43cad87SWarner Losh 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
567*c43cad87SWarner Losh 	if (ps == NULL) {
568*c43cad87SWarner Losh 		edata_cache_fast_put(tsdn, &shard->ecf, edata);
569*c43cad87SWarner Losh 		return NULL;
570*c43cad87SWarner Losh 	}
571*c43cad87SWarner Losh 
572*c43cad87SWarner Losh 	psset_update_begin(&shard->psset, ps);
573*c43cad87SWarner Losh 
574*c43cad87SWarner Losh 	if (hpdata_empty(ps)) {
575*c43cad87SWarner Losh 		/*
576*c43cad87SWarner Losh 		 * If the pageslab used to be empty, treat it as though it's
577*c43cad87SWarner Losh 		 * brand new for fragmentation-avoidance purposes; what we're
578*c43cad87SWarner Losh 		 * trying to approximate is the age of the allocations *in* that
579*c43cad87SWarner Losh 		 * pageslab, and the allocations in the new pageslab are
580*c43cad87SWarner Losh 		 * definitionally the youngest in this hpa shard.
581*c43cad87SWarner Losh 		 */
582*c43cad87SWarner Losh 		hpdata_age_set(ps, shard->age_counter++);
583*c43cad87SWarner Losh 	}
584*c43cad87SWarner Losh 
585*c43cad87SWarner Losh 	void *addr = hpdata_reserve_alloc(ps, size);
586*c43cad87SWarner Losh 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
587*c43cad87SWarner Losh 	    SC_NSIZES, /* sn */ hpdata_age_get(ps), extent_state_active,
588*c43cad87SWarner Losh 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
589*c43cad87SWarner Losh 	    EXTENT_NOT_HEAD);
590*c43cad87SWarner Losh 	edata_ps_set(edata, ps);
591*c43cad87SWarner Losh 
592*c43cad87SWarner Losh 	/*
593*c43cad87SWarner Losh 	 * This could theoretically be moved outside of the critical section,
594*c43cad87SWarner Losh 	 * but that introduces the potential for a race.  Without the lock, the
595*c43cad87SWarner Losh 	 * (initially nonempty, since this is the reuse pathway) pageslab we
596*c43cad87SWarner Losh 	 * allocated out of could become otherwise empty while the lock is
597*c43cad87SWarner Losh 	 * dropped.  This would force us to deal with a pageslab eviction down
598*c43cad87SWarner Losh 	 * the error pathway, which is a pain.
599*c43cad87SWarner Losh 	 */
600*c43cad87SWarner Losh 	err = emap_register_boundary(tsdn, shard->emap, edata,
601*c43cad87SWarner Losh 	    SC_NSIZES, /* slab */ false);
602*c43cad87SWarner Losh 	if (err) {
603*c43cad87SWarner Losh 		hpdata_unreserve(ps, edata_addr_get(edata),
604*c43cad87SWarner Losh 		    edata_size_get(edata));
605*c43cad87SWarner Losh 		/*
606*c43cad87SWarner Losh 		 * We should arguably reset dirty state here, but this would
607*c43cad87SWarner Losh 		 * require some sort of prepare + commit functionality that's a
608*c43cad87SWarner Losh 		 * little much to deal with for now.
609*c43cad87SWarner Losh 		 *
610*c43cad87SWarner Losh 		 * We don't have a do_deferred_work down this pathway, on the
611*c43cad87SWarner Losh 		 * principle that we didn't *really* affect shard state (we
612*c43cad87SWarner Losh 		 * tweaked the stats, but our tweaks weren't really accurate).
613*c43cad87SWarner Losh 		 */
614*c43cad87SWarner Losh 		psset_update_end(&shard->psset, ps);
615*c43cad87SWarner Losh 		edata_cache_fast_put(tsdn, &shard->ecf, edata);
616*c43cad87SWarner Losh 		*oom = true;
617*c43cad87SWarner Losh 		return NULL;
618*c43cad87SWarner Losh 	}
619*c43cad87SWarner Losh 
620*c43cad87SWarner Losh 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
621*c43cad87SWarner Losh 	psset_update_end(&shard->psset, ps);
622*c43cad87SWarner Losh 	return edata;
623*c43cad87SWarner Losh }
624*c43cad87SWarner Losh 
625*c43cad87SWarner Losh static size_t
626*c43cad87SWarner Losh hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
627*c43cad87SWarner Losh     bool *oom, size_t nallocs, edata_list_active_t *results,
628*c43cad87SWarner Losh     bool *deferred_work_generated) {
629*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
630*c43cad87SWarner Losh 	size_t nsuccess = 0;
631*c43cad87SWarner Losh 	for (; nsuccess < nallocs; nsuccess++) {
632*c43cad87SWarner Losh 		edata_t *edata = hpa_try_alloc_one_no_grow(tsdn, shard, size,
633*c43cad87SWarner Losh 		    oom);
634*c43cad87SWarner Losh 		if (edata == NULL) {
635*c43cad87SWarner Losh 			break;
636*c43cad87SWarner Losh 		}
637*c43cad87SWarner Losh 		edata_list_active_append(results, edata);
638*c43cad87SWarner Losh 	}
639*c43cad87SWarner Losh 
640*c43cad87SWarner Losh 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
641*c43cad87SWarner Losh 	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
642*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
643*c43cad87SWarner Losh 	return nsuccess;
644*c43cad87SWarner Losh }
645*c43cad87SWarner Losh 
646*c43cad87SWarner Losh static size_t
647*c43cad87SWarner Losh hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
648*c43cad87SWarner Losh     size_t nallocs, edata_list_active_t *results,
649*c43cad87SWarner Losh     bool *deferred_work_generated) {
650*c43cad87SWarner Losh 	assert(size <= shard->opts.slab_max_alloc);
651*c43cad87SWarner Losh 	bool oom = false;
652*c43cad87SWarner Losh 
653*c43cad87SWarner Losh 	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
654*c43cad87SWarner Losh 	    nallocs, results, deferred_work_generated);
655*c43cad87SWarner Losh 
656*c43cad87SWarner Losh 	if (nsuccess == nallocs || oom) {
657*c43cad87SWarner Losh 		return nsuccess;
658*c43cad87SWarner Losh 	}
659*c43cad87SWarner Losh 
660*c43cad87SWarner Losh 	/*
661*c43cad87SWarner Losh 	 * We didn't OOM, but weren't able to fill everything requested of us;
662*c43cad87SWarner Losh 	 * try to grow.
663*c43cad87SWarner Losh 	 */
664*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
665*c43cad87SWarner Losh 	/*
666*c43cad87SWarner Losh 	 * Check for grow races; maybe some earlier thread expanded the psset
667*c43cad87SWarner Losh 	 * in between when we dropped the main mutex and grabbed the grow mutex.
668*c43cad87SWarner Losh 	 */
669*c43cad87SWarner Losh 	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
670*c43cad87SWarner Losh 	    nallocs - nsuccess, results, deferred_work_generated);
671*c43cad87SWarner Losh 	if (nsuccess == nallocs || oom) {
672*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
673*c43cad87SWarner Losh 		return nsuccess;
674*c43cad87SWarner Losh 	}
675*c43cad87SWarner Losh 
676*c43cad87SWarner Losh 	/*
677*c43cad87SWarner Losh 	 * Note that we don't hold shard->mtx here (while growing);
678*c43cad87SWarner Losh 	 * deallocations (and allocations of smaller sizes) may still succeed
679*c43cad87SWarner Losh 	 * while we're doing this potentially expensive system call.
680*c43cad87SWarner Losh 	 */
681*c43cad87SWarner Losh 	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size, &oom);
682*c43cad87SWarner Losh 	if (ps == NULL) {
683*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
684*c43cad87SWarner Losh 		return nsuccess;
685*c43cad87SWarner Losh 	}
686*c43cad87SWarner Losh 
687*c43cad87SWarner Losh 	/*
688*c43cad87SWarner Losh 	 * We got the pageslab; allocate from it.  This does an unlock followed
689*c43cad87SWarner Losh 	 * by a lock on the same mutex, and holds the grow mutex while doing
690*c43cad87SWarner Losh 	 * deferred work, but this is an uncommon path; the simplicity is worth
691*c43cad87SWarner Losh 	 * it.
692*c43cad87SWarner Losh 	 */
693*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
694*c43cad87SWarner Losh 	psset_insert(&shard->psset, ps);
695*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
696*c43cad87SWarner Losh 
697*c43cad87SWarner Losh 	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
698*c43cad87SWarner Losh 	    nallocs - nsuccess, results, deferred_work_generated);
699*c43cad87SWarner Losh 	/*
700*c43cad87SWarner Losh 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
701*c43cad87SWarner Losh 	 * should be allowed to proceed while we're working.
702*c43cad87SWarner Losh 	 */
703*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
704*c43cad87SWarner Losh 
705*c43cad87SWarner Losh 	return nsuccess;
706*c43cad87SWarner Losh }
707*c43cad87SWarner Losh 
708*c43cad87SWarner Losh static hpa_shard_t *
709*c43cad87SWarner Losh hpa_from_pai(pai_t *self) {
710*c43cad87SWarner Losh 	assert(self->alloc = &hpa_alloc);
711*c43cad87SWarner Losh 	assert(self->expand = &hpa_expand);
712*c43cad87SWarner Losh 	assert(self->shrink = &hpa_shrink);
713*c43cad87SWarner Losh 	assert(self->dalloc = &hpa_dalloc);
714*c43cad87SWarner Losh 	return (hpa_shard_t *)self;
715*c43cad87SWarner Losh }
716*c43cad87SWarner Losh 
717*c43cad87SWarner Losh static size_t
718*c43cad87SWarner Losh hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
719*c43cad87SWarner Losh     edata_list_active_t *results, bool *deferred_work_generated) {
720*c43cad87SWarner Losh 	assert(nallocs > 0);
721*c43cad87SWarner Losh 	assert((size & PAGE_MASK) == 0);
722*c43cad87SWarner Losh 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
723*c43cad87SWarner Losh 	    WITNESS_RANK_CORE, 0);
724*c43cad87SWarner Losh 	hpa_shard_t *shard = hpa_from_pai(self);
725*c43cad87SWarner Losh 
726*c43cad87SWarner Losh 	if (size > shard->opts.slab_max_alloc) {
727*c43cad87SWarner Losh 		return 0;
728*c43cad87SWarner Losh 	}
729*c43cad87SWarner Losh 
730*c43cad87SWarner Losh 	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
731*c43cad87SWarner Losh 	    results, deferred_work_generated);
732*c43cad87SWarner Losh 
733*c43cad87SWarner Losh 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
734*c43cad87SWarner Losh 	    WITNESS_RANK_CORE, 0);
735*c43cad87SWarner Losh 
736*c43cad87SWarner Losh 	/*
737*c43cad87SWarner Losh 	 * Guard the sanity checks with config_debug because the loop cannot be
738*c43cad87SWarner Losh 	 * proven non-circular by the compiler, even if everything within the
739*c43cad87SWarner Losh 	 * loop is optimized away.
740*c43cad87SWarner Losh 	 */
741*c43cad87SWarner Losh 	if (config_debug) {
742*c43cad87SWarner Losh 		edata_t *edata;
743*c43cad87SWarner Losh 		ql_foreach(edata, &results->head, ql_link_active) {
744*c43cad87SWarner Losh 			emap_assert_mapped(tsdn, shard->emap, edata);
745*c43cad87SWarner Losh 			assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
746*c43cad87SWarner Losh 			assert(edata_state_get(edata) == extent_state_active);
747*c43cad87SWarner Losh 			assert(edata_arena_ind_get(edata) == shard->ind);
748*c43cad87SWarner Losh 			assert(edata_szind_get_maybe_invalid(edata) ==
749*c43cad87SWarner Losh 			    SC_NSIZES);
750*c43cad87SWarner Losh 			assert(!edata_slab_get(edata));
751*c43cad87SWarner Losh 			assert(edata_committed_get(edata));
752*c43cad87SWarner Losh 			assert(edata_base_get(edata) == edata_addr_get(edata));
753*c43cad87SWarner Losh 			assert(edata_base_get(edata) != NULL);
754*c43cad87SWarner Losh 		}
755*c43cad87SWarner Losh 	}
756*c43cad87SWarner Losh 	return nsuccess;
757*c43cad87SWarner Losh }
758*c43cad87SWarner Losh 
759*c43cad87SWarner Losh static edata_t *
760*c43cad87SWarner Losh hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
761*c43cad87SWarner Losh     bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
762*c43cad87SWarner Losh 	assert((size & PAGE_MASK) == 0);
763*c43cad87SWarner Losh 	assert(!guarded);
764*c43cad87SWarner Losh 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
765*c43cad87SWarner Losh 	    WITNESS_RANK_CORE, 0);
766*c43cad87SWarner Losh 
767*c43cad87SWarner Losh 	/* We don't handle alignment or zeroing for now. */
768*c43cad87SWarner Losh 	if (alignment > PAGE || zero) {
769*c43cad87SWarner Losh 		return NULL;
770*c43cad87SWarner Losh 	}
771*c43cad87SWarner Losh 	/*
772*c43cad87SWarner Losh 	 * An alloc with alignment == PAGE and zero == false is equivalent to a
773*c43cad87SWarner Losh 	 * batch alloc of 1.  Just do that, so we can share code.
774*c43cad87SWarner Losh 	 */
775*c43cad87SWarner Losh 	edata_list_active_t results;
776*c43cad87SWarner Losh 	edata_list_active_init(&results);
777*c43cad87SWarner Losh 	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
778*c43cad87SWarner Losh 	    &results, deferred_work_generated);
779*c43cad87SWarner Losh 	assert(nallocs == 0 || nallocs == 1);
780*c43cad87SWarner Losh 	edata_t *edata = edata_list_active_first(&results);
781*c43cad87SWarner Losh 	return edata;
782*c43cad87SWarner Losh }
783*c43cad87SWarner Losh 
784*c43cad87SWarner Losh static bool
785*c43cad87SWarner Losh hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
786*c43cad87SWarner Losh     size_t new_size, bool zero, bool *deferred_work_generated) {
787*c43cad87SWarner Losh 	/* Expand not yet supported. */
788*c43cad87SWarner Losh 	return true;
789*c43cad87SWarner Losh }
790*c43cad87SWarner Losh 
791*c43cad87SWarner Losh static bool
792*c43cad87SWarner Losh hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
793*c43cad87SWarner Losh     size_t old_size, size_t new_size, bool *deferred_work_generated) {
794*c43cad87SWarner Losh 	/* Shrink not yet supported. */
795*c43cad87SWarner Losh 	return true;
796*c43cad87SWarner Losh }
797*c43cad87SWarner Losh 
798*c43cad87SWarner Losh static void
799*c43cad87SWarner Losh hpa_dalloc_prepare_unlocked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
800*c43cad87SWarner Losh 	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
801*c43cad87SWarner Losh 
802*c43cad87SWarner Losh 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
803*c43cad87SWarner Losh 	assert(edata_state_get(edata) == extent_state_active);
804*c43cad87SWarner Losh 	assert(edata_arena_ind_get(edata) == shard->ind);
805*c43cad87SWarner Losh 	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
806*c43cad87SWarner Losh 	assert(edata_committed_get(edata));
807*c43cad87SWarner Losh 	assert(edata_base_get(edata) != NULL);
808*c43cad87SWarner Losh 
809*c43cad87SWarner Losh 	/*
810*c43cad87SWarner Losh 	 * Another thread shouldn't be trying to touch the metadata of an
811*c43cad87SWarner Losh 	 * allocation being freed.  The one exception is a merge attempt from a
812*c43cad87SWarner Losh 	 * lower-addressed PAC extent; in this case we have a nominal race on
813*c43cad87SWarner Losh 	 * the edata metadata bits, but in practice the fact that the PAI bits
814*c43cad87SWarner Losh 	 * are different will prevent any further access.  The race is bad, but
815*c43cad87SWarner Losh 	 * benign in practice, and the long term plan is to track enough state
816*c43cad87SWarner Losh 	 * in the rtree to prevent these merge attempts in the first place.
817*c43cad87SWarner Losh 	 */
818*c43cad87SWarner Losh 	edata_addr_set(edata, edata_base_get(edata));
819*c43cad87SWarner Losh 	edata_zeroed_set(edata, false);
820*c43cad87SWarner Losh 	emap_deregister_boundary(tsdn, shard->emap, edata);
821*c43cad87SWarner Losh }
822*c43cad87SWarner Losh 
823*c43cad87SWarner Losh static void
824*c43cad87SWarner Losh hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
825*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
826*c43cad87SWarner Losh 
827*c43cad87SWarner Losh 	/*
828*c43cad87SWarner Losh 	 * Release the metadata early, to avoid having to remember to do it
829*c43cad87SWarner Losh 	 * while we're also doing tricky purging logic.  First, we need to grab
830*c43cad87SWarner Losh 	 * a few bits of metadata from it.
831*c43cad87SWarner Losh 	 *
832*c43cad87SWarner Losh 	 * Note that the shard mutex protects ps's metadata too; it wouldn't be
833*c43cad87SWarner Losh 	 * correct to try to read most information out of it without the lock.
834*c43cad87SWarner Losh 	 */
835*c43cad87SWarner Losh 	hpdata_t *ps = edata_ps_get(edata);
836*c43cad87SWarner Losh 	/* Currently, all edatas come from pageslabs. */
837*c43cad87SWarner Losh 	assert(ps != NULL);
838*c43cad87SWarner Losh 	void *unreserve_addr = edata_addr_get(edata);
839*c43cad87SWarner Losh 	size_t unreserve_size = edata_size_get(edata);
840*c43cad87SWarner Losh 	edata_cache_fast_put(tsdn, &shard->ecf, edata);
841*c43cad87SWarner Losh 
842*c43cad87SWarner Losh 	psset_update_begin(&shard->psset, ps);
843*c43cad87SWarner Losh 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
844*c43cad87SWarner Losh 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
845*c43cad87SWarner Losh 	psset_update_end(&shard->psset, ps);
846*c43cad87SWarner Losh }
847*c43cad87SWarner Losh 
848*c43cad87SWarner Losh static void
849*c43cad87SWarner Losh hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
850*c43cad87SWarner Losh     bool *deferred_work_generated) {
851*c43cad87SWarner Losh 	hpa_shard_t *shard = hpa_from_pai(self);
852*c43cad87SWarner Losh 
853*c43cad87SWarner Losh 	edata_t *edata;
854*c43cad87SWarner Losh 	ql_foreach(edata, &list->head, ql_link_active) {
855*c43cad87SWarner Losh 		hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
856*c43cad87SWarner Losh 	}
857*c43cad87SWarner Losh 
858*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
859*c43cad87SWarner Losh 	/* Now, remove from the list. */
860*c43cad87SWarner Losh 	while ((edata = edata_list_active_first(list)) != NULL) {
861*c43cad87SWarner Losh 		edata_list_active_remove(list, edata);
862*c43cad87SWarner Losh 		hpa_dalloc_locked(tsdn, shard, edata);
863*c43cad87SWarner Losh 	}
864*c43cad87SWarner Losh 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
865*c43cad87SWarner Losh 	*deferred_work_generated =
866*c43cad87SWarner Losh 	    hpa_shard_has_deferred_work(tsdn, shard);
867*c43cad87SWarner Losh 
868*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
869*c43cad87SWarner Losh }
870*c43cad87SWarner Losh 
871*c43cad87SWarner Losh static void
872*c43cad87SWarner Losh hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
873*c43cad87SWarner Losh     bool *deferred_work_generated) {
874*c43cad87SWarner Losh 	assert(!edata_guarded_get(edata));
875*c43cad87SWarner Losh 	/* Just a dalloc_batch of size 1; this lets us share logic. */
876*c43cad87SWarner Losh 	edata_list_active_t dalloc_list;
877*c43cad87SWarner Losh 	edata_list_active_init(&dalloc_list);
878*c43cad87SWarner Losh 	edata_list_active_append(&dalloc_list, edata);
879*c43cad87SWarner Losh 	hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated);
880*c43cad87SWarner Losh }
881*c43cad87SWarner Losh 
882*c43cad87SWarner Losh /*
883*c43cad87SWarner Losh  * Calculate time until either purging or hugification ought to happen.
884*c43cad87SWarner Losh  * Called by background threads.
885*c43cad87SWarner Losh  */
886*c43cad87SWarner Losh static uint64_t
887*c43cad87SWarner Losh hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
888*c43cad87SWarner Losh 	hpa_shard_t *shard = hpa_from_pai(self);
889*c43cad87SWarner Losh 	uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
890*c43cad87SWarner Losh 
891*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
892*c43cad87SWarner Losh 
893*c43cad87SWarner Losh 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
894*c43cad87SWarner Losh 	if (to_hugify != NULL) {
895*c43cad87SWarner Losh 		nstime_t time_hugify_allowed =
896*c43cad87SWarner Losh 		    hpdata_time_hugify_allowed(to_hugify);
897*c43cad87SWarner Losh 		uint64_t since_hugify_allowed_ms =
898*c43cad87SWarner Losh 		    shard->central->hooks.ms_since(&time_hugify_allowed);
899*c43cad87SWarner Losh 		/*
900*c43cad87SWarner Losh 		 * If not enough time has passed since hugification was allowed,
901*c43cad87SWarner Losh 		 * sleep for the rest.
902*c43cad87SWarner Losh 		 */
903*c43cad87SWarner Losh 		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
904*c43cad87SWarner Losh 			time_ns = shard->opts.hugify_delay_ms -
905*c43cad87SWarner Losh 			    since_hugify_allowed_ms;
906*c43cad87SWarner Losh 			time_ns *= 1000 * 1000;
907*c43cad87SWarner Losh 		} else {
908*c43cad87SWarner Losh 			malloc_mutex_unlock(tsdn, &shard->mtx);
909*c43cad87SWarner Losh 			return BACKGROUND_THREAD_DEFERRED_MIN;
910*c43cad87SWarner Losh 		}
911*c43cad87SWarner Losh 	}
912*c43cad87SWarner Losh 
913*c43cad87SWarner Losh 	if (hpa_should_purge(tsdn, shard)) {
914*c43cad87SWarner Losh 		/*
915*c43cad87SWarner Losh 		 * If we haven't purged before, no need to check interval
916*c43cad87SWarner Losh 		 * between purges. Simply purge as soon as possible.
917*c43cad87SWarner Losh 		 */
918*c43cad87SWarner Losh 		if (shard->stats.npurge_passes == 0) {
919*c43cad87SWarner Losh 			malloc_mutex_unlock(tsdn, &shard->mtx);
920*c43cad87SWarner Losh 			return BACKGROUND_THREAD_DEFERRED_MIN;
921*c43cad87SWarner Losh 		}
922*c43cad87SWarner Losh 		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
923*c43cad87SWarner Losh 		    &shard->last_purge);
924*c43cad87SWarner Losh 
925*c43cad87SWarner Losh 		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
926*c43cad87SWarner Losh 			uint64_t until_purge_ns;
927*c43cad87SWarner Losh 			until_purge_ns = shard->opts.min_purge_interval_ms -
928*c43cad87SWarner Losh 			    since_last_purge_ms;
929*c43cad87SWarner Losh 			until_purge_ns *= 1000 * 1000;
930*c43cad87SWarner Losh 
931*c43cad87SWarner Losh 			if (until_purge_ns < time_ns) {
932*c43cad87SWarner Losh 				time_ns = until_purge_ns;
933*c43cad87SWarner Losh 			}
934*c43cad87SWarner Losh 		} else {
935*c43cad87SWarner Losh 			time_ns = BACKGROUND_THREAD_DEFERRED_MIN;
936*c43cad87SWarner Losh 		}
937*c43cad87SWarner Losh 	}
938*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
939*c43cad87SWarner Losh 	return time_ns;
940*c43cad87SWarner Losh }
941*c43cad87SWarner Losh 
942*c43cad87SWarner Losh void
943*c43cad87SWarner Losh hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
944*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
945*c43cad87SWarner Losh 
946*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
947*c43cad87SWarner Losh 	edata_cache_fast_disable(tsdn, &shard->ecf);
948*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
949*c43cad87SWarner Losh }
950*c43cad87SWarner Losh 
951*c43cad87SWarner Losh static void
952*c43cad87SWarner Losh hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
953*c43cad87SWarner Losh 	assert(bin_stats->npageslabs == 0);
954*c43cad87SWarner Losh 	assert(bin_stats->nactive == 0);
955*c43cad87SWarner Losh }
956*c43cad87SWarner Losh 
957*c43cad87SWarner Losh static void
958*c43cad87SWarner Losh hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
959*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
960*c43cad87SWarner Losh 	for (int huge = 0; huge <= 1; huge++) {
961*c43cad87SWarner Losh 		hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]);
962*c43cad87SWarner Losh 		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
963*c43cad87SWarner Losh 			hpa_shard_assert_stats_empty(
964*c43cad87SWarner Losh 			    &psset->stats.nonfull_slabs[i][huge]);
965*c43cad87SWarner Losh 		}
966*c43cad87SWarner Losh 	}
967*c43cad87SWarner Losh }
968*c43cad87SWarner Losh 
969*c43cad87SWarner Losh void
970*c43cad87SWarner Losh hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
971*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
972*c43cad87SWarner Losh 	/*
973*c43cad87SWarner Losh 	 * By the time we're here, the arena code should have dalloc'd all the
974*c43cad87SWarner Losh 	 * active extents, which means we should have eventually evicted
975*c43cad87SWarner Losh 	 * everything from the psset, so it shouldn't be able to serve even a
976*c43cad87SWarner Losh 	 * 1-page allocation.
977*c43cad87SWarner Losh 	 */
978*c43cad87SWarner Losh 	if (config_debug) {
979*c43cad87SWarner Losh 		malloc_mutex_lock(tsdn, &shard->mtx);
980*c43cad87SWarner Losh 		hpa_assert_empty(tsdn, shard, &shard->psset);
981*c43cad87SWarner Losh 		malloc_mutex_unlock(tsdn, &shard->mtx);
982*c43cad87SWarner Losh 	}
983*c43cad87SWarner Losh 	hpdata_t *ps;
984*c43cad87SWarner Losh 	while ((ps = psset_pick_alloc(&shard->psset, PAGE)) != NULL) {
985*c43cad87SWarner Losh 		/* There should be no allocations anywhere. */
986*c43cad87SWarner Losh 		assert(hpdata_empty(ps));
987*c43cad87SWarner Losh 		psset_remove(&shard->psset, ps);
988*c43cad87SWarner Losh 		shard->central->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
989*c43cad87SWarner Losh 	}
990*c43cad87SWarner Losh }
991*c43cad87SWarner Losh 
992*c43cad87SWarner Losh void
993*c43cad87SWarner Losh hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
994*c43cad87SWarner Losh     bool deferral_allowed) {
995*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
996*c43cad87SWarner Losh 
997*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
998*c43cad87SWarner Losh 	bool deferral_previously_allowed = shard->opts.deferral_allowed;
999*c43cad87SWarner Losh 	shard->opts.deferral_allowed = deferral_allowed;
1000*c43cad87SWarner Losh 	if (deferral_previously_allowed && !deferral_allowed) {
1001*c43cad87SWarner Losh 		hpa_shard_maybe_do_deferred_work(tsdn, shard,
1002*c43cad87SWarner Losh 		    /* forced */ true);
1003*c43cad87SWarner Losh 	}
1004*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
1005*c43cad87SWarner Losh }
1006*c43cad87SWarner Losh 
1007*c43cad87SWarner Losh void
1008*c43cad87SWarner Losh hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
1009*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
1010*c43cad87SWarner Losh 
1011*c43cad87SWarner Losh 	malloc_mutex_lock(tsdn, &shard->mtx);
1012*c43cad87SWarner Losh 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ true);
1013*c43cad87SWarner Losh 	malloc_mutex_unlock(tsdn, &shard->mtx);
1014*c43cad87SWarner Losh }
1015*c43cad87SWarner Losh 
1016*c43cad87SWarner Losh void
1017*c43cad87SWarner Losh hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
1018*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
1019*c43cad87SWarner Losh 
1020*c43cad87SWarner Losh 	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
1021*c43cad87SWarner Losh }
1022*c43cad87SWarner Losh 
1023*c43cad87SWarner Losh void
1024*c43cad87SWarner Losh hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) {
1025*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
1026*c43cad87SWarner Losh 
1027*c43cad87SWarner Losh 	malloc_mutex_prefork(tsdn, &shard->mtx);
1028*c43cad87SWarner Losh }
1029*c43cad87SWarner Losh 
1030*c43cad87SWarner Losh void
1031*c43cad87SWarner Losh hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) {
1032*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
1033*c43cad87SWarner Losh 
1034*c43cad87SWarner Losh 	malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx);
1035*c43cad87SWarner Losh 	malloc_mutex_postfork_parent(tsdn, &shard->mtx);
1036*c43cad87SWarner Losh }
1037*c43cad87SWarner Losh 
1038*c43cad87SWarner Losh void
1039*c43cad87SWarner Losh hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
1040*c43cad87SWarner Losh 	hpa_do_consistency_checks(shard);
1041*c43cad87SWarner Losh 
1042*c43cad87SWarner Losh 	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
1043*c43cad87SWarner Losh 	malloc_mutex_postfork_child(tsdn, &shard->mtx);
1044*c43cad87SWarner Losh }
1045