xref: /freebsd/contrib/jemalloc/src/psset.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/psset.h"
5 
6 #include "jemalloc/internal/fb.h"
7 
8 void
9 psset_init(psset_t *psset) {
10 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
11 		hpdata_age_heap_new(&psset->pageslabs[i]);
12 	}
13 	fb_init(psset->pageslab_bitmap, PSSET_NPSIZES);
14 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
15 	memset(&psset->stats, 0, sizeof(psset->stats));
16 	hpdata_empty_list_init(&psset->empty);
17 	for (int i = 0; i < PSSET_NPURGE_LISTS; i++) {
18 		hpdata_purge_list_init(&psset->to_purge[i]);
19 	}
20 	fb_init(psset->purge_bitmap, PSSET_NPURGE_LISTS);
21 	hpdata_hugify_list_init(&psset->to_hugify);
22 }
23 
24 static void
25 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
26 	dst->npageslabs += src->npageslabs;
27 	dst->nactive += src->nactive;
28 	dst->ndirty += src->ndirty;
29 }
30 
31 void
32 psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
33 	psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
34 	psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
35 	psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]);
36 	psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]);
37 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
38 		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
39 		    &src->nonfull_slabs[i][0]);
40 		psset_bin_stats_accum(&dst->nonfull_slabs[i][1],
41 		    &src->nonfull_slabs[i][1]);
42 	}
43 }
44 
45 /*
46  * The stats maintenance strategy is to remove a pageslab's contribution to the
47  * stats when we call psset_update_begin, and re-add it (to a potentially new
48  * bin) when we call psset_update_end.
49  */
50 JEMALLOC_ALWAYS_INLINE void
51 psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
52     hpdata_t *ps, bool insert) {
53 	size_t mul = insert ? (size_t)1 : (size_t)-1;
54 	size_t huge_idx = (size_t)hpdata_huge_get(ps);
55 
56 	binstats[huge_idx].npageslabs += mul * 1;
57 	binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
58 	binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
59 
60 	psset->merged_stats.npageslabs += mul * 1;
61 	psset->merged_stats.nactive += mul * hpdata_nactive_get(ps);
62 	psset->merged_stats.ndirty += mul * hpdata_ndirty_get(ps);
63 
64 	if (config_debug) {
65 		psset_bin_stats_t check_stats = {0};
66 		for (size_t huge = 0; huge <= 1; huge++) {
67 			psset_bin_stats_accum(&check_stats,
68 			    &psset->stats.full_slabs[huge]);
69 			psset_bin_stats_accum(&check_stats,
70 			    &psset->stats.empty_slabs[huge]);
71 			for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
72 				psset_bin_stats_accum(&check_stats,
73 				    &psset->stats.nonfull_slabs[pind][huge]);
74 			}
75 		}
76 		assert(psset->merged_stats.npageslabs
77 		    == check_stats.npageslabs);
78 		assert(psset->merged_stats.nactive == check_stats.nactive);
79 		assert(psset->merged_stats.ndirty == check_stats.ndirty);
80 	}
81 }
82 
83 static void
84 psset_bin_stats_insert(psset_t *psset, psset_bin_stats_t *binstats,
85     hpdata_t *ps) {
86 	psset_bin_stats_insert_remove(psset, binstats, ps, true);
87 }
88 
89 static void
90 psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
91     hpdata_t *ps) {
92 	psset_bin_stats_insert_remove(psset, binstats, ps, false);
93 }
94 
95 static void
96 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
97 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
98 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
99 		fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
100 	}
101 }
102 
103 static void
104 psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
105 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
106 		fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
107 	}
108 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
109 }
110 
111 static void
112 psset_stats_insert(psset_t* psset, hpdata_t *ps) {
113 	if (hpdata_empty(ps)) {
114 		psset_bin_stats_insert(psset, psset->stats.empty_slabs, ps);
115 	} else if (hpdata_full(ps)) {
116 		psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
117 	} else {
118 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
119 
120 		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
121 		    longest_free_range << LG_PAGE));
122 		assert(pind < PSSET_NPSIZES);
123 
124 		psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
125 		    ps);
126 	}
127 }
128 
129 static void
130 psset_stats_remove(psset_t *psset, hpdata_t *ps) {
131 	if (hpdata_empty(ps)) {
132 		psset_bin_stats_remove(psset, psset->stats.empty_slabs, ps);
133 	} else if (hpdata_full(ps)) {
134 		psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
135 	} else {
136 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
137 
138 		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
139 		    longest_free_range << LG_PAGE));
140 		assert(pind < PSSET_NPSIZES);
141 
142 		psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
143 		    ps);
144 	}
145 }
146 
147 /*
148  * Put ps into some container so that it can be found during future allocation
149  * requests.
150  */
151 static void
152 psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) {
153 	assert(!hpdata_in_psset_alloc_container_get(ps));
154 	hpdata_in_psset_alloc_container_set(ps, true);
155 	if (hpdata_empty(ps)) {
156 		/*
157 		 * This prepend, paired with popping the head in psset_fit,
158 		 * means we implement LIFO ordering for the empty slabs set,
159 		 * which seems reasonable.
160 		 */
161 		hpdata_empty_list_prepend(&psset->empty, ps);
162 	} else if (hpdata_full(ps)) {
163 		/*
164 		 * We don't need to keep track of the full slabs; we're never
165 		 * going to return them from a psset_pick_alloc call.
166 		 */
167 	} else {
168 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
169 
170 		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
171 		    longest_free_range << LG_PAGE));
172 		assert(pind < PSSET_NPSIZES);
173 
174 		psset_hpdata_heap_insert(psset, pind, ps);
175 	}
176 }
177 
178 /* Remove ps from those collections. */
179 static void
180 psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
181 	assert(hpdata_in_psset_alloc_container_get(ps));
182 	hpdata_in_psset_alloc_container_set(ps, false);
183 
184 	if (hpdata_empty(ps)) {
185 		hpdata_empty_list_remove(&psset->empty, ps);
186 	} else if (hpdata_full(ps)) {
187 		/* Same as above -- do nothing in this case. */
188 	} else {
189 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
190 
191 		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
192 		    longest_free_range << LG_PAGE));
193 		assert(pind < PSSET_NPSIZES);
194 
195 		psset_hpdata_heap_remove(psset, pind, ps);
196 	}
197 }
198 
199 static size_t
200 psset_purge_list_ind(hpdata_t *ps) {
201 	size_t ndirty = hpdata_ndirty_get(ps);
202 	/* Shouldn't have something with no dirty pages purgeable. */
203 	assert(ndirty > 0);
204 	/*
205 	 * Higher indices correspond to lists we'd like to purge earlier; make
206 	 * the two highest indices correspond to empty lists, which we attempt
207 	 * to purge before purging any non-empty list.  This has two advantages:
208 	 * - Empty page slabs are the least likely to get reused (we'll only
209 	 *   pick them for an allocation if we have no other choice).
210 	 * - Empty page slabs can purge every dirty page they contain in a
211 	 *   single call, which is not usually the case.
212 	 *
213 	 * We purge hugeified empty slabs before nonhugeified ones, on the basis
214 	 * that they are fully dirty, while nonhugified slabs might not be, so
215 	 * we free up more pages more easily.
216 	 */
217 	if (hpdata_nactive_get(ps) == 0) {
218 		if (hpdata_huge_get(ps)) {
219 			return PSSET_NPURGE_LISTS - 1;
220 		} else {
221 			return PSSET_NPURGE_LISTS - 2;
222 		}
223 	}
224 
225 	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE));
226 	/*
227 	 * For non-empty slabs, we may reuse them again.  Prefer purging
228 	 * non-hugeified slabs before hugeified ones then, among pages of
229 	 * similar dirtiness.  We still get some benefit from the hugification.
230 	 */
231 	return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1);
232 }
233 
234 static void
235 psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
236 	/*
237 	 * Remove the hpdata from its purge list (if it's in one).  Even if it's
238 	 * going to stay in the same one, by appending it during
239 	 * psset_update_end, we move it to the end of its queue, so that we
240 	 * purge LRU within a given dirtiness bucket.
241 	 */
242 	if (hpdata_purge_allowed_get(ps)) {
243 		size_t ind = psset_purge_list_ind(ps);
244 		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
245 		hpdata_purge_list_remove(purge_list, ps);
246 		if (hpdata_purge_list_empty(purge_list)) {
247 			fb_unset(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
248 		}
249 	}
250 }
251 
252 static void
253 psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) {
254 	if (hpdata_purge_allowed_get(ps)) {
255 		size_t ind = psset_purge_list_ind(ps);
256 		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
257 		if (hpdata_purge_list_empty(purge_list)) {
258 			fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
259 		}
260 		hpdata_purge_list_append(purge_list, ps);
261 	}
262 
263 }
264 
265 void
266 psset_update_begin(psset_t *psset, hpdata_t *ps) {
267 	hpdata_assert_consistent(ps);
268 	assert(hpdata_in_psset_get(ps));
269 	hpdata_updating_set(ps, true);
270 	psset_stats_remove(psset, ps);
271 	if (hpdata_in_psset_alloc_container_get(ps)) {
272 		/*
273 		 * Some metadata updates can break alloc container invariants
274 		 * (e.g. the longest free range determines the hpdata_heap_t the
275 		 * pageslab lives in).
276 		 */
277 		assert(hpdata_alloc_allowed_get(ps));
278 		psset_alloc_container_remove(psset, ps);
279 	}
280 	psset_maybe_remove_purge_list(psset, ps);
281 	/*
282 	 * We don't update presence in the hugify list; we try to keep it FIFO,
283 	 * even in the presence of other metadata updates.  We'll update
284 	 * presence at the end of the metadata update if necessary.
285 	 */
286 }
287 
288 void
289 psset_update_end(psset_t *psset, hpdata_t *ps) {
290 	assert(hpdata_in_psset_get(ps));
291 	hpdata_updating_set(ps, false);
292 	psset_stats_insert(psset, ps);
293 
294 	/*
295 	 * The update begin should have removed ps from whatever alloc container
296 	 * it was in.
297 	 */
298 	assert(!hpdata_in_psset_alloc_container_get(ps));
299 	if (hpdata_alloc_allowed_get(ps)) {
300 		psset_alloc_container_insert(psset, ps);
301 	}
302 	psset_maybe_insert_purge_list(psset, ps);
303 
304 	if (hpdata_hugify_allowed_get(ps)
305 	    && !hpdata_in_psset_hugify_container_get(ps)) {
306 		hpdata_in_psset_hugify_container_set(ps, true);
307 		hpdata_hugify_list_append(&psset->to_hugify, ps);
308 	} else if (!hpdata_hugify_allowed_get(ps)
309 	    && hpdata_in_psset_hugify_container_get(ps)) {
310 		hpdata_in_psset_hugify_container_set(ps, false);
311 		hpdata_hugify_list_remove(&psset->to_hugify, ps);
312 	}
313 	hpdata_assert_consistent(ps);
314 }
315 
316 hpdata_t *
317 psset_pick_alloc(psset_t *psset, size_t size) {
318 	assert((size & PAGE_MASK) == 0);
319 	assert(size <= HUGEPAGE);
320 
321 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
322 	pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
323 	    (size_t)min_pind);
324 	if (pind == PSSET_NPSIZES) {
325 		return hpdata_empty_list_first(&psset->empty);
326 	}
327 	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
328 	if (ps == NULL) {
329 		return NULL;
330 	}
331 
332 	hpdata_assert_consistent(ps);
333 
334 	return ps;
335 }
336 
337 hpdata_t *
338 psset_pick_purge(psset_t *psset) {
339 	ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS,
340 	    PSSET_NPURGE_LISTS - 1);
341 	if (ind_ssz < 0) {
342 		return NULL;
343 	}
344 	pszind_t ind = (pszind_t)ind_ssz;
345 	assert(ind < PSSET_NPURGE_LISTS);
346 	hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
347 	assert(ps != NULL);
348 	return ps;
349 }
350 
351 hpdata_t *
352 psset_pick_hugify(psset_t *psset) {
353 	return hpdata_hugify_list_first(&psset->to_hugify);
354 }
355 
356 void
357 psset_insert(psset_t *psset, hpdata_t *ps) {
358 	hpdata_in_psset_set(ps, true);
359 
360 	psset_stats_insert(psset, ps);
361 	if (hpdata_alloc_allowed_get(ps)) {
362 		psset_alloc_container_insert(psset, ps);
363 	}
364 	psset_maybe_insert_purge_list(psset, ps);
365 
366 	if (hpdata_hugify_allowed_get(ps)) {
367 		hpdata_in_psset_hugify_container_set(ps, true);
368 		hpdata_hugify_list_append(&psset->to_hugify, ps);
369 	}
370 }
371 
372 void
373 psset_remove(psset_t *psset, hpdata_t *ps) {
374 	hpdata_in_psset_set(ps, false);
375 
376 	psset_stats_remove(psset, ps);
377 	if (hpdata_in_psset_alloc_container_get(ps)) {
378 		psset_alloc_container_remove(psset, ps);
379 	}
380 	psset_maybe_remove_purge_list(psset, ps);
381 	if (hpdata_in_psset_hugify_container_get(ps)) {
382 		hpdata_in_psset_hugify_container_set(ps, false);
383 		hpdata_hugify_list_remove(&psset->to_hugify, ps);
384 	}
385 }
386