xref: /freebsd/contrib/jemalloc/src/sc.c (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 
3 #include "jemalloc/internal/assert.h"
4 #include "jemalloc/internal/bit_util.h"
5 #include "jemalloc/internal/bitmap.h"
6 #include "jemalloc/internal/pages.h"
7 #include "jemalloc/internal/sc.h"
8 
9 /*
10  * This module computes the size classes used to satisfy allocations.  The logic
11  * here was ported more or less line-by-line from a shell script, and because of
12  * that is not the most idiomatic C.  Eventually we should fix this, but for now
13  * at least the damage is compartmentalized to this file.
14  */
15 
16 sc_data_t sc_data_global;
17 
18 static size_t
19 reg_size_compute(int lg_base, int lg_delta, int ndelta) {
20 	return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
21 }
22 
23 /* Returns the number of pages in the slab. */
24 static int
25 slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) {
26 	size_t page = (ZU(1) << lg_page);
27 	size_t reg_size = reg_size_compute(lg_base, lg_delta, ndelta);
28 
29 	size_t try_slab_size = page;
30 	size_t try_nregs = try_slab_size / reg_size;
31 	size_t perfect_slab_size = 0;
32 	bool perfect = false;
33 	/*
34 	 * This loop continues until we find the least common multiple of the
35 	 * page size and size class size.  Size classes are all of the form
36 	 * base + ndelta * delta == (ndelta + base/ndelta) * delta, which is
37 	 * (ndelta + ngroup) * delta.  The way we choose slabbing strategies
38 	 * means that delta is at most the page size and ndelta < ngroup.  So
39 	 * the loop executes for at most 2 * ngroup - 1 iterations, which is
40 	 * also the bound on the number of pages in a slab chosen by default.
41 	 * With the current default settings, this is at most 7.
42 	 */
43 	while (!perfect) {
44 		perfect_slab_size = try_slab_size;
45 		size_t perfect_nregs = try_nregs;
46 		try_slab_size += page;
47 		try_nregs = try_slab_size / reg_size;
48 		if (perfect_slab_size == perfect_nregs * reg_size) {
49 			perfect = true;
50 		}
51 	}
52 	return (int)(perfect_slab_size / page);
53 }
54 
55 static void
56 size_class(
57     /* Output. */
58     sc_t *sc,
59     /* Configuration decisions. */
60     int lg_max_lookup, int lg_page, int lg_ngroup,
61     /* Inputs specific to the size class. */
62     int index, int lg_base, int lg_delta, int ndelta) {
63 	sc->index = index;
64 	sc->lg_base = lg_base;
65 	sc->lg_delta = lg_delta;
66 	sc->ndelta = ndelta;
67 	sc->psz = (reg_size_compute(lg_base, lg_delta, ndelta)
68 	    % (ZU(1) << lg_page) == 0);
69 	size_t size = (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
70 	if (index == 0) {
71 		assert(!sc->psz);
72 	}
73 	if (size < (ZU(1) << (lg_page + lg_ngroup))) {
74 		sc->bin = true;
75 		sc->pgs = slab_size(lg_page, lg_base, lg_delta, ndelta);
76 	} else {
77 		sc->bin = false;
78 		sc->pgs = 0;
79 	}
80 	if (size <= (ZU(1) << lg_max_lookup)) {
81 		sc->lg_delta_lookup = lg_delta;
82 	} else {
83 		sc->lg_delta_lookup = 0;
84 	}
85 }
86 
87 static void
88 size_classes(
89     /* Output. */
90     sc_data_t *sc_data,
91     /* Determined by the system. */
92     size_t lg_ptr_size, int lg_quantum,
93     /* Configuration decisions. */
94     int lg_tiny_min, int lg_max_lookup, int lg_page, int lg_ngroup) {
95 	int ptr_bits = (1 << lg_ptr_size) * 8;
96 	int ngroup = (1 << lg_ngroup);
97 	int ntiny = 0;
98 	int nlbins = 0;
99 	int lg_tiny_maxclass = (unsigned)-1;
100 	int nbins = 0;
101 	int npsizes = 0;
102 
103 	int index = 0;
104 
105 	int ndelta = 0;
106 	int lg_base = lg_tiny_min;
107 	int lg_delta = lg_base;
108 
109 	/* Outputs that we update as we go. */
110 	size_t lookup_maxclass = 0;
111 	size_t small_maxclass = 0;
112 	int lg_large_minclass = 0;
113 	size_t large_maxclass = 0;
114 
115 	/* Tiny size classes. */
116 	while (lg_base < lg_quantum) {
117 		sc_t *sc = &sc_data->sc[index];
118 		size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
119 		    lg_base, lg_delta, ndelta);
120 		if (sc->lg_delta_lookup != 0) {
121 			nlbins = index + 1;
122 		}
123 		if (sc->psz) {
124 			npsizes++;
125 		}
126 		if (sc->bin) {
127 			nbins++;
128 		}
129 		ntiny++;
130 		/* Final written value is correct. */
131 		lg_tiny_maxclass = lg_base;
132 		index++;
133 		lg_delta = lg_base;
134 		lg_base++;
135 	}
136 
137 	/* First non-tiny (pseudo) group. */
138 	if (ntiny != 0) {
139 		sc_t *sc = &sc_data->sc[index];
140 		/*
141 		 * See the note in sc.h; the first non-tiny size class has an
142 		 * unusual encoding.
143 		 */
144 		lg_base--;
145 		ndelta = 1;
146 		size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
147 		    lg_base, lg_delta, ndelta);
148 		index++;
149 		lg_base++;
150 		lg_delta++;
151 		if (sc->psz) {
152 			npsizes++;
153 		}
154 		if (sc->bin) {
155 			nbins++;
156 		}
157 	}
158 	while (ndelta < ngroup) {
159 		sc_t *sc = &sc_data->sc[index];
160 		size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
161 		    lg_base, lg_delta, ndelta);
162 		index++;
163 		ndelta++;
164 		if (sc->psz) {
165 			npsizes++;
166 		}
167 		if (sc->bin) {
168 			nbins++;
169 		}
170 	}
171 
172 	/* All remaining groups. */
173 	lg_base = lg_base + lg_ngroup;
174 	while (lg_base < ptr_bits - 1) {
175 		ndelta = 1;
176 		int ndelta_limit;
177 		if (lg_base == ptr_bits - 2) {
178 			ndelta_limit = ngroup - 1;
179 		} else {
180 			ndelta_limit = ngroup;
181 		}
182 		while (ndelta <= ndelta_limit) {
183 			sc_t *sc = &sc_data->sc[index];
184 			size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
185 			    lg_base, lg_delta, ndelta);
186 			if (sc->lg_delta_lookup != 0) {
187 				nlbins = index + 1;
188 				/* Final written value is correct. */
189 				lookup_maxclass = (ZU(1) << lg_base)
190 				    + (ZU(ndelta) << lg_delta);
191 			}
192 			if (sc->psz) {
193 				npsizes++;
194 			}
195 			if (sc->bin) {
196 				nbins++;
197 				/* Final written value is correct. */
198 				small_maxclass = (ZU(1) << lg_base)
199 				    + (ZU(ndelta) << lg_delta);
200 				if (lg_ngroup > 0) {
201 					lg_large_minclass = lg_base + 1;
202 				} else {
203 					lg_large_minclass = lg_base + 2;
204 				}
205 			}
206 			large_maxclass = (ZU(1) << lg_base)
207 			    + (ZU(ndelta) << lg_delta);
208 			index++;
209 			ndelta++;
210 		}
211 		lg_base++;
212 		lg_delta++;
213 	}
214 	/* Additional outputs. */
215 	int nsizes = index;
216 	unsigned lg_ceil_nsizes = lg_ceil(nsizes);
217 
218 	/* Fill in the output data. */
219 	sc_data->ntiny = ntiny;
220 	sc_data->nlbins = nlbins;
221 	sc_data->nbins = nbins;
222 	sc_data->nsizes = nsizes;
223 	sc_data->lg_ceil_nsizes = lg_ceil_nsizes;
224 	sc_data->npsizes = npsizes;
225 	sc_data->lg_tiny_maxclass = lg_tiny_maxclass;
226 	sc_data->lookup_maxclass = lookup_maxclass;
227 	sc_data->small_maxclass = small_maxclass;
228 	sc_data->lg_large_minclass = lg_large_minclass;
229 	sc_data->large_minclass = (ZU(1) << lg_large_minclass);
230 	sc_data->large_maxclass = large_maxclass;
231 
232 	/*
233 	 * We compute these values in two ways:
234 	 *   - Incrementally, as above.
235 	 *   - In macros, in sc.h.
236 	 * The computation is easier when done incrementally, but putting it in
237 	 * a constant makes it available to the fast paths without having to
238 	 * touch the extra global cacheline.  We assert, however, that the two
239 	 * computations are equivalent.
240 	 */
241 	assert(sc_data->npsizes == SC_NPSIZES);
242 	assert(sc_data->lg_tiny_maxclass == SC_LG_TINY_MAXCLASS);
243 	assert(sc_data->small_maxclass == SC_SMALL_MAXCLASS);
244 	assert(sc_data->large_minclass == SC_LARGE_MINCLASS);
245 	assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS);
246 	assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS);
247 
248 	/*
249 	 * In the allocation fastpath, we want to assume that we can
250 	 * unconditionally subtract the requested allocation size from
251 	 * a ssize_t, and detect passing through 0 correctly.  This
252 	 * results in optimal generated code.  For this to work, the
253 	 * maximum allocation size must be less than SSIZE_MAX.
254 	 */
255 	assert(SC_LARGE_MAXCLASS < SSIZE_MAX);
256 }
257 
258 void
259 sc_data_init(sc_data_t *sc_data) {
260 	assert(!sc_data->initialized);
261 
262 	int lg_max_lookup = 12;
263 
264 	size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
265 	    lg_max_lookup, LG_PAGE, 2);
266 
267 	sc_data->initialized = true;
268 }
269 
270 static void
271 sc_data_update_sc_slab_size(sc_t *sc, size_t reg_size, size_t pgs_guess) {
272 	size_t min_pgs = reg_size / PAGE;
273 	if (reg_size % PAGE != 0) {
274 		min_pgs++;
275 	}
276 	/*
277 	 * BITMAP_MAXBITS is actually determined by putting the smallest
278 	 * possible size-class on one page, so this can never be 0.
279 	 */
280 	size_t max_pgs = BITMAP_MAXBITS * reg_size / PAGE;
281 
282 	assert(min_pgs <= max_pgs);
283 	assert(min_pgs > 0);
284 	assert(max_pgs >= 1);
285 	if (pgs_guess < min_pgs) {
286 		sc->pgs = (int)min_pgs;
287 	} else if (pgs_guess > max_pgs) {
288 		sc->pgs = (int)max_pgs;
289 	} else {
290 		sc->pgs = (int)pgs_guess;
291 	}
292 }
293 
294 void
295 sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) {
296 	assert(data->initialized);
297 	for (int i = 0; i < data->nsizes; i++) {
298 		sc_t *sc = &data->sc[i];
299 		if (!sc->bin) {
300 			break;
301 		}
302 		size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta,
303 		    sc->ndelta);
304 		if (begin <= reg_size && reg_size <= end) {
305 			sc_data_update_sc_slab_size(sc, reg_size, pgs);
306 		}
307 	}
308 }
309 
310 void
311 sc_boot(sc_data_t *data) {
312 	sc_data_init(data);
313 }
314