xref: /freebsd/contrib/jemalloc/src/hpdata.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_preamble.h"
2*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_internal_includes.h"
3*c43cad87SWarner Losh 
4*c43cad87SWarner Losh #include "jemalloc/internal/hpdata.h"
5*c43cad87SWarner Losh 
6*c43cad87SWarner Losh static int
7*c43cad87SWarner Losh hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
8*c43cad87SWarner Losh 	uint64_t a_age = hpdata_age_get(a);
9*c43cad87SWarner Losh 	uint64_t b_age = hpdata_age_get(b);
10*c43cad87SWarner Losh 	/*
11*c43cad87SWarner Losh 	 * hpdata ages are operation counts in the psset; no two should be the
12*c43cad87SWarner Losh 	 * same.
13*c43cad87SWarner Losh 	 */
14*c43cad87SWarner Losh 	assert(a_age != b_age);
15*c43cad87SWarner Losh 	return (a_age > b_age) - (a_age < b_age);
16*c43cad87SWarner Losh }
17*c43cad87SWarner Losh 
18*c43cad87SWarner Losh ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
19*c43cad87SWarner Losh 
20*c43cad87SWarner Losh void
21*c43cad87SWarner Losh hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
22*c43cad87SWarner Losh 	hpdata_addr_set(hpdata, addr);
23*c43cad87SWarner Losh 	hpdata_age_set(hpdata, age);
24*c43cad87SWarner Losh 	hpdata->h_huge = false;
25*c43cad87SWarner Losh 	hpdata->h_alloc_allowed = true;
26*c43cad87SWarner Losh 	hpdata->h_in_psset_alloc_container = false;
27*c43cad87SWarner Losh 	hpdata->h_purge_allowed = false;
28*c43cad87SWarner Losh 	hpdata->h_hugify_allowed = false;
29*c43cad87SWarner Losh 	hpdata->h_in_psset_hugify_container = false;
30*c43cad87SWarner Losh 	hpdata->h_mid_purge = false;
31*c43cad87SWarner Losh 	hpdata->h_mid_hugify = false;
32*c43cad87SWarner Losh 	hpdata->h_updating = false;
33*c43cad87SWarner Losh 	hpdata->h_in_psset = false;
34*c43cad87SWarner Losh 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
35*c43cad87SWarner Losh 	hpdata->h_nactive = 0;
36*c43cad87SWarner Losh 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
37*c43cad87SWarner Losh 	hpdata->h_ntouched = 0;
38*c43cad87SWarner Losh 	fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
39*c43cad87SWarner Losh 
40*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
41*c43cad87SWarner Losh }
42*c43cad87SWarner Losh 
43*c43cad87SWarner Losh void *
44*c43cad87SWarner Losh hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
45*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
46*c43cad87SWarner Losh 	/*
47*c43cad87SWarner Losh 	 * This is a metadata change; the hpdata should therefore either not be
48*c43cad87SWarner Losh 	 * in the psset, or should have explicitly marked itself as being
49*c43cad87SWarner Losh 	 * mid-update.
50*c43cad87SWarner Losh 	 */
51*c43cad87SWarner Losh 	assert(!hpdata->h_in_psset || hpdata->h_updating);
52*c43cad87SWarner Losh 	assert(hpdata->h_alloc_allowed);
53*c43cad87SWarner Losh 	assert((sz & PAGE_MASK) == 0);
54*c43cad87SWarner Losh 	size_t npages = sz >> LG_PAGE;
55*c43cad87SWarner Losh 	assert(npages <= hpdata_longest_free_range_get(hpdata));
56*c43cad87SWarner Losh 
57*c43cad87SWarner Losh 	size_t result;
58*c43cad87SWarner Losh 
59*c43cad87SWarner Losh 	size_t start = 0;
60*c43cad87SWarner Losh 	/*
61*c43cad87SWarner Losh 	 * These are dead stores, but the compiler will issue warnings on them
62*c43cad87SWarner Losh 	 * since it can't tell statically that found is always true below.
63*c43cad87SWarner Losh 	 */
64*c43cad87SWarner Losh 	size_t begin = 0;
65*c43cad87SWarner Losh 	size_t len = 0;
66*c43cad87SWarner Losh 
67*c43cad87SWarner Losh 	size_t largest_unchosen_range = 0;
68*c43cad87SWarner Losh 	while (true) {
69*c43cad87SWarner Losh 		bool found = fb_urange_iter(hpdata->active_pages,
70*c43cad87SWarner Losh 		    HUGEPAGE_PAGES, start, &begin, &len);
71*c43cad87SWarner Losh 		/*
72*c43cad87SWarner Losh 		 * A precondition to this function is that hpdata must be able
73*c43cad87SWarner Losh 		 * to serve the allocation.
74*c43cad87SWarner Losh 		 */
75*c43cad87SWarner Losh 		assert(found);
76*c43cad87SWarner Losh 		assert(len <= hpdata_longest_free_range_get(hpdata));
77*c43cad87SWarner Losh 		if (len >= npages) {
78*c43cad87SWarner Losh 			/*
79*c43cad87SWarner Losh 			 * We use first-fit within the page slabs; this gives
80*c43cad87SWarner Losh 			 * bounded worst-case fragmentation within a slab.  It's
81*c43cad87SWarner Losh 			 * not necessarily right; we could experiment with
82*c43cad87SWarner Losh 			 * various other options.
83*c43cad87SWarner Losh 			 */
84*c43cad87SWarner Losh 			break;
85*c43cad87SWarner Losh 		}
86*c43cad87SWarner Losh 		if (len > largest_unchosen_range) {
87*c43cad87SWarner Losh 			largest_unchosen_range = len;
88*c43cad87SWarner Losh 		}
89*c43cad87SWarner Losh 		start = begin + len;
90*c43cad87SWarner Losh 	}
91*c43cad87SWarner Losh 	/* We found a range; remember it. */
92*c43cad87SWarner Losh 	result = begin;
93*c43cad87SWarner Losh 	fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
94*c43cad87SWarner Losh 	hpdata->h_nactive += npages;
95*c43cad87SWarner Losh 
96*c43cad87SWarner Losh 	/*
97*c43cad87SWarner Losh 	 * We might be about to dirty some memory for the first time; update our
98*c43cad87SWarner Losh 	 * count if so.
99*c43cad87SWarner Losh 	 */
100*c43cad87SWarner Losh 	size_t new_dirty = fb_ucount(hpdata->touched_pages,  HUGEPAGE_PAGES,
101*c43cad87SWarner Losh 	    result, npages);
102*c43cad87SWarner Losh 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
103*c43cad87SWarner Losh 	hpdata->h_ntouched += new_dirty;
104*c43cad87SWarner Losh 
105*c43cad87SWarner Losh 	/*
106*c43cad87SWarner Losh 	 * If we allocated out of a range that was the longest in the hpdata, it
107*c43cad87SWarner Losh 	 * might be the only one of that size and we'll have to adjust the
108*c43cad87SWarner Losh 	 * metadata.
109*c43cad87SWarner Losh 	 */
110*c43cad87SWarner Losh 	if (len == hpdata_longest_free_range_get(hpdata)) {
111*c43cad87SWarner Losh 		start = begin + npages;
112*c43cad87SWarner Losh 		while (start < HUGEPAGE_PAGES) {
113*c43cad87SWarner Losh 			bool found = fb_urange_iter(hpdata->active_pages,
114*c43cad87SWarner Losh 			    HUGEPAGE_PAGES, start, &begin, &len);
115*c43cad87SWarner Losh 			if (!found) {
116*c43cad87SWarner Losh 				break;
117*c43cad87SWarner Losh 			}
118*c43cad87SWarner Losh 			assert(len <= hpdata_longest_free_range_get(hpdata));
119*c43cad87SWarner Losh 			if (len == hpdata_longest_free_range_get(hpdata)) {
120*c43cad87SWarner Losh 				largest_unchosen_range = len;
121*c43cad87SWarner Losh 				break;
122*c43cad87SWarner Losh 			}
123*c43cad87SWarner Losh 			if (len > largest_unchosen_range) {
124*c43cad87SWarner Losh 				largest_unchosen_range = len;
125*c43cad87SWarner Losh 			}
126*c43cad87SWarner Losh 			start = begin + len;
127*c43cad87SWarner Losh 		}
128*c43cad87SWarner Losh 		hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
129*c43cad87SWarner Losh 	}
130*c43cad87SWarner Losh 
131*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
132*c43cad87SWarner Losh 	return (void *)(
133*c43cad87SWarner Losh 	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
134*c43cad87SWarner Losh }
135*c43cad87SWarner Losh 
136*c43cad87SWarner Losh void
137*c43cad87SWarner Losh hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
138*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
139*c43cad87SWarner Losh 	/* See the comment in reserve. */
140*c43cad87SWarner Losh 	assert(!hpdata->h_in_psset || hpdata->h_updating);
141*c43cad87SWarner Losh 	assert(((uintptr_t)addr & PAGE_MASK) == 0);
142*c43cad87SWarner Losh 	assert((sz & PAGE_MASK) == 0);
143*c43cad87SWarner Losh 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
144*c43cad87SWarner Losh 	    >> LG_PAGE;
145*c43cad87SWarner Losh 	assert(begin < HUGEPAGE_PAGES);
146*c43cad87SWarner Losh 	size_t npages = sz >> LG_PAGE;
147*c43cad87SWarner Losh 	size_t old_longest_range = hpdata_longest_free_range_get(hpdata);
148*c43cad87SWarner Losh 
149*c43cad87SWarner Losh 	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
150*c43cad87SWarner Losh 	/* We might have just created a new, larger range. */
151*c43cad87SWarner Losh 	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES,
152*c43cad87SWarner Losh 	    begin) + 1);
153*c43cad87SWarner Losh 	size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES,
154*c43cad87SWarner Losh 	    begin + npages - 1);
155*c43cad87SWarner Losh 	size_t new_range_len = new_end - new_begin;
156*c43cad87SWarner Losh 
157*c43cad87SWarner Losh 	if (new_range_len > old_longest_range) {
158*c43cad87SWarner Losh 		hpdata_longest_free_range_set(hpdata, new_range_len);
159*c43cad87SWarner Losh 	}
160*c43cad87SWarner Losh 
161*c43cad87SWarner Losh 	hpdata->h_nactive -= npages;
162*c43cad87SWarner Losh 
163*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
164*c43cad87SWarner Losh }
165*c43cad87SWarner Losh 
166*c43cad87SWarner Losh size_t
167*c43cad87SWarner Losh hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
168*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
169*c43cad87SWarner Losh 	/*
170*c43cad87SWarner Losh 	 * See the comment below; we might purge any inactive extent, so it's
171*c43cad87SWarner Losh 	 * unsafe for any other thread to turn any inactive extent active while
172*c43cad87SWarner Losh 	 * we're operating on it.
173*c43cad87SWarner Losh 	 */
174*c43cad87SWarner Losh 	assert(!hpdata_alloc_allowed_get(hpdata));
175*c43cad87SWarner Losh 
176*c43cad87SWarner Losh 	purge_state->npurged = 0;
177*c43cad87SWarner Losh 	purge_state->next_purge_search_begin = 0;
178*c43cad87SWarner Losh 
179*c43cad87SWarner Losh 	/*
180*c43cad87SWarner Losh 	 * Initialize to_purge.
181*c43cad87SWarner Losh 	 *
182*c43cad87SWarner Losh 	 * It's possible to end up in situations where two dirty extents are
183*c43cad87SWarner Losh 	 * separated by a retained extent:
184*c43cad87SWarner Losh 	 * - 1 page allocated.
185*c43cad87SWarner Losh 	 * - 1 page allocated.
186*c43cad87SWarner Losh 	 * - 1 pages allocated.
187*c43cad87SWarner Losh 	 *
188*c43cad87SWarner Losh 	 * If the middle page is freed and purged, and then the first and third
189*c43cad87SWarner Losh 	 * pages are freed, and then another purge pass happens, the hpdata
190*c43cad87SWarner Losh 	 * looks like this:
191*c43cad87SWarner Losh 	 * - 1 page dirty.
192*c43cad87SWarner Losh 	 * - 1 page retained.
193*c43cad87SWarner Losh 	 * - 1 page dirty.
194*c43cad87SWarner Losh 	 *
195*c43cad87SWarner Losh 	 * But it's safe to do a single 3-page purge.
196*c43cad87SWarner Losh 	 *
197*c43cad87SWarner Losh 	 * We do this by first computing the dirty pages, and then filling in
198*c43cad87SWarner Losh 	 * any gaps by extending each range in the dirty bitmap to extend until
199*c43cad87SWarner Losh 	 * the next active page.  This purges more pages, but the expensive part
200*c43cad87SWarner Losh 	 * of purging is the TLB shootdowns, rather than the kernel state
201*c43cad87SWarner Losh 	 * tracking; doing a little bit more of the latter is fine if it saves
202*c43cad87SWarner Losh 	 * us from doing some of the former.
203*c43cad87SWarner Losh 	 */
204*c43cad87SWarner Losh 
205*c43cad87SWarner Losh 	/*
206*c43cad87SWarner Losh 	 * The dirty pages are those that are touched but not active.  Note that
207*c43cad87SWarner Losh 	 * in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the
208*c43cad87SWarner Losh 	 * fb_group_t is 64 bits, so this is 64 bytes, spread across 8
209*c43cad87SWarner Losh 	 * fb_group_ts.
210*c43cad87SWarner Losh 	 */
211*c43cad87SWarner Losh 	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
212*c43cad87SWarner Losh 	fb_init(dirty_pages, HUGEPAGE_PAGES);
213*c43cad87SWarner Losh 	fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
214*c43cad87SWarner Losh 	fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
215*c43cad87SWarner Losh 	    HUGEPAGE_PAGES);
216*c43cad87SWarner Losh 
217*c43cad87SWarner Losh 	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
218*c43cad87SWarner Losh 	size_t next_bit = 0;
219*c43cad87SWarner Losh 	while (next_bit < HUGEPAGE_PAGES) {
220*c43cad87SWarner Losh 		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
221*c43cad87SWarner Losh 		    next_bit);
222*c43cad87SWarner Losh 		/* Recall that fb_ffs returns nbits if no set bit is found. */
223*c43cad87SWarner Losh 		if (next_dirty == HUGEPAGE_PAGES) {
224*c43cad87SWarner Losh 			break;
225*c43cad87SWarner Losh 		}
226*c43cad87SWarner Losh 		size_t next_active = fb_ffs(hpdata->active_pages,
227*c43cad87SWarner Losh 		    HUGEPAGE_PAGES, next_dirty);
228*c43cad87SWarner Losh 		/*
229*c43cad87SWarner Losh 		 * Don't purge past the end of the dirty extent, into retained
230*c43cad87SWarner Losh 		 * pages.  This helps the kernel a tiny bit, but honestly it's
231*c43cad87SWarner Losh 		 * mostly helpful for testing (where we tend to write test cases
232*c43cad87SWarner Losh 		 * that think in terms of the dirty ranges).
233*c43cad87SWarner Losh 		 */
234*c43cad87SWarner Losh 		ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
235*c43cad87SWarner Losh 		    next_active - 1);
236*c43cad87SWarner Losh 		assert(last_dirty >= 0);
237*c43cad87SWarner Losh 		assert((size_t)last_dirty >= next_dirty);
238*c43cad87SWarner Losh 		assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
239*c43cad87SWarner Losh 
240*c43cad87SWarner Losh 		fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
241*c43cad87SWarner Losh 		    last_dirty - next_dirty + 1);
242*c43cad87SWarner Losh 		next_bit = next_active + 1;
243*c43cad87SWarner Losh 	}
244*c43cad87SWarner Losh 
245*c43cad87SWarner Losh 	/* We should purge, at least, everything dirty. */
246*c43cad87SWarner Losh 	size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
247*c43cad87SWarner Losh 	purge_state->ndirty_to_purge = ndirty;
248*c43cad87SWarner Losh 	assert(ndirty <= fb_scount(
249*c43cad87SWarner Losh 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
250*c43cad87SWarner Losh 	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
251*c43cad87SWarner Losh 	    HUGEPAGE_PAGES));
252*c43cad87SWarner Losh 
253*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
254*c43cad87SWarner Losh 
255*c43cad87SWarner Losh 	return ndirty;
256*c43cad87SWarner Losh }
257*c43cad87SWarner Losh 
258*c43cad87SWarner Losh bool
259*c43cad87SWarner Losh hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
260*c43cad87SWarner Losh     void **r_purge_addr, size_t *r_purge_size) {
261*c43cad87SWarner Losh 	/*
262*c43cad87SWarner Losh 	 * Note that we don't have a consistency check here; we're accessing
263*c43cad87SWarner Losh 	 * hpdata without synchronization, and therefore have no right to expect
264*c43cad87SWarner Losh 	 * a consistent state.
265*c43cad87SWarner Losh 	 */
266*c43cad87SWarner Losh 	assert(!hpdata_alloc_allowed_get(hpdata));
267*c43cad87SWarner Losh 
268*c43cad87SWarner Losh 	if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
269*c43cad87SWarner Losh 		return false;
270*c43cad87SWarner Losh 	}
271*c43cad87SWarner Losh 	size_t purge_begin;
272*c43cad87SWarner Losh 	size_t purge_len;
273*c43cad87SWarner Losh 	bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES,
274*c43cad87SWarner Losh 	    purge_state->next_purge_search_begin, &purge_begin, &purge_len);
275*c43cad87SWarner Losh 	if (!found_range) {
276*c43cad87SWarner Losh 		return false;
277*c43cad87SWarner Losh 	}
278*c43cad87SWarner Losh 
279*c43cad87SWarner Losh 	*r_purge_addr = (void *)(
280*c43cad87SWarner Losh 	    (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
281*c43cad87SWarner Losh 	*r_purge_size = purge_len * PAGE;
282*c43cad87SWarner Losh 
283*c43cad87SWarner Losh 	purge_state->next_purge_search_begin = purge_begin + purge_len;
284*c43cad87SWarner Losh 	purge_state->npurged += purge_len;
285*c43cad87SWarner Losh 	assert(purge_state->npurged <= HUGEPAGE_PAGES);
286*c43cad87SWarner Losh 
287*c43cad87SWarner Losh 	return true;
288*c43cad87SWarner Losh }
289*c43cad87SWarner Losh 
290*c43cad87SWarner Losh void
291*c43cad87SWarner Losh hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
292*c43cad87SWarner Losh 	assert(!hpdata_alloc_allowed_get(hpdata));
293*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
294*c43cad87SWarner Losh 	/* See the comment in reserve. */
295*c43cad87SWarner Losh 	assert(!hpdata->h_in_psset || hpdata->h_updating);
296*c43cad87SWarner Losh 
297*c43cad87SWarner Losh 	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
298*c43cad87SWarner Losh 	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
299*c43cad87SWarner Losh 	assert(purge_state->npurged >= purge_state->ndirty_to_purge);
300*c43cad87SWarner Losh 
301*c43cad87SWarner Losh 	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
302*c43cad87SWarner Losh 	    HUGEPAGE_PAGES);
303*c43cad87SWarner Losh 	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
304*c43cad87SWarner Losh 	    purge_state->to_purge, HUGEPAGE_PAGES);
305*c43cad87SWarner Losh 	assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
306*c43cad87SWarner Losh 	hpdata->h_ntouched -= purge_state->ndirty_to_purge;
307*c43cad87SWarner Losh 
308*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
309*c43cad87SWarner Losh }
310*c43cad87SWarner Losh 
311*c43cad87SWarner Losh void
312*c43cad87SWarner Losh hpdata_hugify(hpdata_t *hpdata) {
313*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
314*c43cad87SWarner Losh 	hpdata->h_huge = true;
315*c43cad87SWarner Losh 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
316*c43cad87SWarner Losh 	hpdata->h_ntouched = HUGEPAGE_PAGES;
317*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
318*c43cad87SWarner Losh }
319*c43cad87SWarner Losh 
320*c43cad87SWarner Losh void
321*c43cad87SWarner Losh hpdata_dehugify(hpdata_t *hpdata) {
322*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
323*c43cad87SWarner Losh 	hpdata->h_huge = false;
324*c43cad87SWarner Losh 	hpdata_assert_consistent(hpdata);
325*c43cad87SWarner Losh }
326