xref: /freebsd/contrib/jemalloc/src/hpdata.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/hpdata.h"
5 
6 static int
7 hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
8 	uint64_t a_age = hpdata_age_get(a);
9 	uint64_t b_age = hpdata_age_get(b);
10 	/*
11 	 * hpdata ages are operation counts in the psset; no two should be the
12 	 * same.
13 	 */
14 	assert(a_age != b_age);
15 	return (a_age > b_age) - (a_age < b_age);
16 }
17 
18 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
19 
20 void
21 hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
22 	hpdata_addr_set(hpdata, addr);
23 	hpdata_age_set(hpdata, age);
24 	hpdata->h_huge = false;
25 	hpdata->h_alloc_allowed = true;
26 	hpdata->h_in_psset_alloc_container = false;
27 	hpdata->h_purge_allowed = false;
28 	hpdata->h_hugify_allowed = false;
29 	hpdata->h_in_psset_hugify_container = false;
30 	hpdata->h_mid_purge = false;
31 	hpdata->h_mid_hugify = false;
32 	hpdata->h_updating = false;
33 	hpdata->h_in_psset = false;
34 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
35 	hpdata->h_nactive = 0;
36 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
37 	hpdata->h_ntouched = 0;
38 	fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
39 
40 	hpdata_assert_consistent(hpdata);
41 }
42 
43 void *
44 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
45 	hpdata_assert_consistent(hpdata);
46 	/*
47 	 * This is a metadata change; the hpdata should therefore either not be
48 	 * in the psset, or should have explicitly marked itself as being
49 	 * mid-update.
50 	 */
51 	assert(!hpdata->h_in_psset || hpdata->h_updating);
52 	assert(hpdata->h_alloc_allowed);
53 	assert((sz & PAGE_MASK) == 0);
54 	size_t npages = sz >> LG_PAGE;
55 	assert(npages <= hpdata_longest_free_range_get(hpdata));
56 
57 	size_t result;
58 
59 	size_t start = 0;
60 	/*
61 	 * These are dead stores, but the compiler will issue warnings on them
62 	 * since it can't tell statically that found is always true below.
63 	 */
64 	size_t begin = 0;
65 	size_t len = 0;
66 
67 	size_t largest_unchosen_range = 0;
68 	while (true) {
69 		bool found = fb_urange_iter(hpdata->active_pages,
70 		    HUGEPAGE_PAGES, start, &begin, &len);
71 		/*
72 		 * A precondition to this function is that hpdata must be able
73 		 * to serve the allocation.
74 		 */
75 		assert(found);
76 		assert(len <= hpdata_longest_free_range_get(hpdata));
77 		if (len >= npages) {
78 			/*
79 			 * We use first-fit within the page slabs; this gives
80 			 * bounded worst-case fragmentation within a slab.  It's
81 			 * not necessarily right; we could experiment with
82 			 * various other options.
83 			 */
84 			break;
85 		}
86 		if (len > largest_unchosen_range) {
87 			largest_unchosen_range = len;
88 		}
89 		start = begin + len;
90 	}
91 	/* We found a range; remember it. */
92 	result = begin;
93 	fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
94 	hpdata->h_nactive += npages;
95 
96 	/*
97 	 * We might be about to dirty some memory for the first time; update our
98 	 * count if so.
99 	 */
100 	size_t new_dirty = fb_ucount(hpdata->touched_pages,  HUGEPAGE_PAGES,
101 	    result, npages);
102 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
103 	hpdata->h_ntouched += new_dirty;
104 
105 	/*
106 	 * If we allocated out of a range that was the longest in the hpdata, it
107 	 * might be the only one of that size and we'll have to adjust the
108 	 * metadata.
109 	 */
110 	if (len == hpdata_longest_free_range_get(hpdata)) {
111 		start = begin + npages;
112 		while (start < HUGEPAGE_PAGES) {
113 			bool found = fb_urange_iter(hpdata->active_pages,
114 			    HUGEPAGE_PAGES, start, &begin, &len);
115 			if (!found) {
116 				break;
117 			}
118 			assert(len <= hpdata_longest_free_range_get(hpdata));
119 			if (len == hpdata_longest_free_range_get(hpdata)) {
120 				largest_unchosen_range = len;
121 				break;
122 			}
123 			if (len > largest_unchosen_range) {
124 				largest_unchosen_range = len;
125 			}
126 			start = begin + len;
127 		}
128 		hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
129 	}
130 
131 	hpdata_assert_consistent(hpdata);
132 	return (void *)(
133 	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
134 }
135 
136 void
137 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
138 	hpdata_assert_consistent(hpdata);
139 	/* See the comment in reserve. */
140 	assert(!hpdata->h_in_psset || hpdata->h_updating);
141 	assert(((uintptr_t)addr & PAGE_MASK) == 0);
142 	assert((sz & PAGE_MASK) == 0);
143 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
144 	    >> LG_PAGE;
145 	assert(begin < HUGEPAGE_PAGES);
146 	size_t npages = sz >> LG_PAGE;
147 	size_t old_longest_range = hpdata_longest_free_range_get(hpdata);
148 
149 	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
150 	/* We might have just created a new, larger range. */
151 	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES,
152 	    begin) + 1);
153 	size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES,
154 	    begin + npages - 1);
155 	size_t new_range_len = new_end - new_begin;
156 
157 	if (new_range_len > old_longest_range) {
158 		hpdata_longest_free_range_set(hpdata, new_range_len);
159 	}
160 
161 	hpdata->h_nactive -= npages;
162 
163 	hpdata_assert_consistent(hpdata);
164 }
165 
166 size_t
167 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
168 	hpdata_assert_consistent(hpdata);
169 	/*
170 	 * See the comment below; we might purge any inactive extent, so it's
171 	 * unsafe for any other thread to turn any inactive extent active while
172 	 * we're operating on it.
173 	 */
174 	assert(!hpdata_alloc_allowed_get(hpdata));
175 
176 	purge_state->npurged = 0;
177 	purge_state->next_purge_search_begin = 0;
178 
179 	/*
180 	 * Initialize to_purge.
181 	 *
182 	 * It's possible to end up in situations where two dirty extents are
183 	 * separated by a retained extent:
184 	 * - 1 page allocated.
185 	 * - 1 page allocated.
186 	 * - 1 pages allocated.
187 	 *
188 	 * If the middle page is freed and purged, and then the first and third
189 	 * pages are freed, and then another purge pass happens, the hpdata
190 	 * looks like this:
191 	 * - 1 page dirty.
192 	 * - 1 page retained.
193 	 * - 1 page dirty.
194 	 *
195 	 * But it's safe to do a single 3-page purge.
196 	 *
197 	 * We do this by first computing the dirty pages, and then filling in
198 	 * any gaps by extending each range in the dirty bitmap to extend until
199 	 * the next active page.  This purges more pages, but the expensive part
200 	 * of purging is the TLB shootdowns, rather than the kernel state
201 	 * tracking; doing a little bit more of the latter is fine if it saves
202 	 * us from doing some of the former.
203 	 */
204 
205 	/*
206 	 * The dirty pages are those that are touched but not active.  Note that
207 	 * in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the
208 	 * fb_group_t is 64 bits, so this is 64 bytes, spread across 8
209 	 * fb_group_ts.
210 	 */
211 	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
212 	fb_init(dirty_pages, HUGEPAGE_PAGES);
213 	fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
214 	fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
215 	    HUGEPAGE_PAGES);
216 
217 	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
218 	size_t next_bit = 0;
219 	while (next_bit < HUGEPAGE_PAGES) {
220 		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
221 		    next_bit);
222 		/* Recall that fb_ffs returns nbits if no set bit is found. */
223 		if (next_dirty == HUGEPAGE_PAGES) {
224 			break;
225 		}
226 		size_t next_active = fb_ffs(hpdata->active_pages,
227 		    HUGEPAGE_PAGES, next_dirty);
228 		/*
229 		 * Don't purge past the end of the dirty extent, into retained
230 		 * pages.  This helps the kernel a tiny bit, but honestly it's
231 		 * mostly helpful for testing (where we tend to write test cases
232 		 * that think in terms of the dirty ranges).
233 		 */
234 		ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
235 		    next_active - 1);
236 		assert(last_dirty >= 0);
237 		assert((size_t)last_dirty >= next_dirty);
238 		assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
239 
240 		fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
241 		    last_dirty - next_dirty + 1);
242 		next_bit = next_active + 1;
243 	}
244 
245 	/* We should purge, at least, everything dirty. */
246 	size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
247 	purge_state->ndirty_to_purge = ndirty;
248 	assert(ndirty <= fb_scount(
249 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
250 	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
251 	    HUGEPAGE_PAGES));
252 
253 	hpdata_assert_consistent(hpdata);
254 
255 	return ndirty;
256 }
257 
258 bool
259 hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
260     void **r_purge_addr, size_t *r_purge_size) {
261 	/*
262 	 * Note that we don't have a consistency check here; we're accessing
263 	 * hpdata without synchronization, and therefore have no right to expect
264 	 * a consistent state.
265 	 */
266 	assert(!hpdata_alloc_allowed_get(hpdata));
267 
268 	if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
269 		return false;
270 	}
271 	size_t purge_begin;
272 	size_t purge_len;
273 	bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES,
274 	    purge_state->next_purge_search_begin, &purge_begin, &purge_len);
275 	if (!found_range) {
276 		return false;
277 	}
278 
279 	*r_purge_addr = (void *)(
280 	    (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
281 	*r_purge_size = purge_len * PAGE;
282 
283 	purge_state->next_purge_search_begin = purge_begin + purge_len;
284 	purge_state->npurged += purge_len;
285 	assert(purge_state->npurged <= HUGEPAGE_PAGES);
286 
287 	return true;
288 }
289 
290 void
291 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
292 	assert(!hpdata_alloc_allowed_get(hpdata));
293 	hpdata_assert_consistent(hpdata);
294 	/* See the comment in reserve. */
295 	assert(!hpdata->h_in_psset || hpdata->h_updating);
296 
297 	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
298 	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
299 	assert(purge_state->npurged >= purge_state->ndirty_to_purge);
300 
301 	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
302 	    HUGEPAGE_PAGES);
303 	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
304 	    purge_state->to_purge, HUGEPAGE_PAGES);
305 	assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
306 	hpdata->h_ntouched -= purge_state->ndirty_to_purge;
307 
308 	hpdata_assert_consistent(hpdata);
309 }
310 
311 void
312 hpdata_hugify(hpdata_t *hpdata) {
313 	hpdata_assert_consistent(hpdata);
314 	hpdata->h_huge = true;
315 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
316 	hpdata->h_ntouched = HUGEPAGE_PAGES;
317 	hpdata_assert_consistent(hpdata);
318 }
319 
320 void
321 hpdata_dehugify(hpdata_t *hpdata) {
322 	hpdata_assert_consistent(hpdata);
323 	hpdata->h_huge = false;
324 	hpdata_assert_consistent(hpdata);
325 }
326