1*c5ad8142SEric van Gyzen #include "jemalloc/internal/jemalloc_preamble.h"
2*c5ad8142SEric van Gyzen
3*c5ad8142SEric van Gyzen #include "jemalloc/internal/assert.h"
4*c5ad8142SEric van Gyzen #include "jemalloc/internal/bit_util.h"
5*c5ad8142SEric van Gyzen #include "jemalloc/internal/bitmap.h"
6*c5ad8142SEric van Gyzen #include "jemalloc/internal/pages.h"
7*c5ad8142SEric van Gyzen #include "jemalloc/internal/sc.h"
8*c5ad8142SEric van Gyzen
9*c5ad8142SEric van Gyzen /*
10*c5ad8142SEric van Gyzen * This module computes the size classes used to satisfy allocations. The logic
11*c5ad8142SEric van Gyzen * here was ported more or less line-by-line from a shell script, and because of
12*c5ad8142SEric van Gyzen * that is not the most idiomatic C. Eventually we should fix this, but for now
13*c5ad8142SEric van Gyzen * at least the damage is compartmentalized to this file.
14*c5ad8142SEric van Gyzen */
15*c5ad8142SEric van Gyzen
16*c5ad8142SEric van Gyzen sc_data_t sc_data_global;
17*c5ad8142SEric van Gyzen
18*c5ad8142SEric van Gyzen static size_t
reg_size_compute(int lg_base,int lg_delta,int ndelta)19*c5ad8142SEric van Gyzen reg_size_compute(int lg_base, int lg_delta, int ndelta) {
20*c5ad8142SEric van Gyzen return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
21*c5ad8142SEric van Gyzen }
22*c5ad8142SEric van Gyzen
23*c5ad8142SEric van Gyzen /* Returns the number of pages in the slab. */
24*c5ad8142SEric van Gyzen static int
slab_size(int lg_page,int lg_base,int lg_delta,int ndelta)25*c5ad8142SEric van Gyzen slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) {
26*c5ad8142SEric van Gyzen size_t page = (ZU(1) << lg_page);
27*c5ad8142SEric van Gyzen size_t reg_size = reg_size_compute(lg_base, lg_delta, ndelta);
28*c5ad8142SEric van Gyzen
29*c5ad8142SEric van Gyzen size_t try_slab_size = page;
30*c5ad8142SEric van Gyzen size_t try_nregs = try_slab_size / reg_size;
31*c5ad8142SEric van Gyzen size_t perfect_slab_size = 0;
32*c5ad8142SEric van Gyzen bool perfect = false;
33*c5ad8142SEric van Gyzen /*
34*c5ad8142SEric van Gyzen * This loop continues until we find the least common multiple of the
35*c5ad8142SEric van Gyzen * page size and size class size. Size classes are all of the form
36*c5ad8142SEric van Gyzen * base + ndelta * delta == (ndelta + base/ndelta) * delta, which is
37*c5ad8142SEric van Gyzen * (ndelta + ngroup) * delta. The way we choose slabbing strategies
38*c5ad8142SEric van Gyzen * means that delta is at most the page size and ndelta < ngroup. So
39*c5ad8142SEric van Gyzen * the loop executes for at most 2 * ngroup - 1 iterations, which is
40*c5ad8142SEric van Gyzen * also the bound on the number of pages in a slab chosen by default.
41*c5ad8142SEric van Gyzen * With the current default settings, this is at most 7.
42*c5ad8142SEric van Gyzen */
43*c5ad8142SEric van Gyzen while (!perfect) {
44*c5ad8142SEric van Gyzen perfect_slab_size = try_slab_size;
45*c5ad8142SEric van Gyzen size_t perfect_nregs = try_nregs;
46*c5ad8142SEric van Gyzen try_slab_size += page;
47*c5ad8142SEric van Gyzen try_nregs = try_slab_size / reg_size;
48*c5ad8142SEric van Gyzen if (perfect_slab_size == perfect_nregs * reg_size) {
49*c5ad8142SEric van Gyzen perfect = true;
50*c5ad8142SEric van Gyzen }
51*c5ad8142SEric van Gyzen }
52*c5ad8142SEric van Gyzen return (int)(perfect_slab_size / page);
53*c5ad8142SEric van Gyzen }
54*c5ad8142SEric van Gyzen
55*c5ad8142SEric van Gyzen static void
size_class(sc_t * sc,int lg_max_lookup,int lg_page,int lg_ngroup,int index,int lg_base,int lg_delta,int ndelta)56*c5ad8142SEric van Gyzen size_class(
57*c5ad8142SEric van Gyzen /* Output. */
58*c5ad8142SEric van Gyzen sc_t *sc,
59*c5ad8142SEric van Gyzen /* Configuration decisions. */
60*c5ad8142SEric van Gyzen int lg_max_lookup, int lg_page, int lg_ngroup,
61*c5ad8142SEric van Gyzen /* Inputs specific to the size class. */
62*c5ad8142SEric van Gyzen int index, int lg_base, int lg_delta, int ndelta) {
63*c5ad8142SEric van Gyzen sc->index = index;
64*c5ad8142SEric van Gyzen sc->lg_base = lg_base;
65*c5ad8142SEric van Gyzen sc->lg_delta = lg_delta;
66*c5ad8142SEric van Gyzen sc->ndelta = ndelta;
67*c5ad8142SEric van Gyzen sc->psz = (reg_size_compute(lg_base, lg_delta, ndelta)
68*c5ad8142SEric van Gyzen % (ZU(1) << lg_page) == 0);
69*c5ad8142SEric van Gyzen size_t size = (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
70*c5ad8142SEric van Gyzen if (index == 0) {
71*c5ad8142SEric van Gyzen assert(!sc->psz);
72*c5ad8142SEric van Gyzen }
73*c5ad8142SEric van Gyzen if (size < (ZU(1) << (lg_page + lg_ngroup))) {
74*c5ad8142SEric van Gyzen sc->bin = true;
75*c5ad8142SEric van Gyzen sc->pgs = slab_size(lg_page, lg_base, lg_delta, ndelta);
76*c5ad8142SEric van Gyzen } else {
77*c5ad8142SEric van Gyzen sc->bin = false;
78*c5ad8142SEric van Gyzen sc->pgs = 0;
79*c5ad8142SEric van Gyzen }
80*c5ad8142SEric van Gyzen if (size <= (ZU(1) << lg_max_lookup)) {
81*c5ad8142SEric van Gyzen sc->lg_delta_lookup = lg_delta;
82*c5ad8142SEric van Gyzen } else {
83*c5ad8142SEric van Gyzen sc->lg_delta_lookup = 0;
84*c5ad8142SEric van Gyzen }
85*c5ad8142SEric van Gyzen }
86*c5ad8142SEric van Gyzen
87*c5ad8142SEric van Gyzen static void
size_classes(sc_data_t * sc_data,size_t lg_ptr_size,int lg_quantum,int lg_tiny_min,int lg_max_lookup,int lg_page,int lg_ngroup)88*c5ad8142SEric van Gyzen size_classes(
89*c5ad8142SEric van Gyzen /* Output. */
90*c5ad8142SEric van Gyzen sc_data_t *sc_data,
91*c5ad8142SEric van Gyzen /* Determined by the system. */
92*c5ad8142SEric van Gyzen size_t lg_ptr_size, int lg_quantum,
93*c5ad8142SEric van Gyzen /* Configuration decisions. */
94*c5ad8142SEric van Gyzen int lg_tiny_min, int lg_max_lookup, int lg_page, int lg_ngroup) {
95*c5ad8142SEric van Gyzen int ptr_bits = (1 << lg_ptr_size) * 8;
96*c5ad8142SEric van Gyzen int ngroup = (1 << lg_ngroup);
97*c5ad8142SEric van Gyzen int ntiny = 0;
98*c5ad8142SEric van Gyzen int nlbins = 0;
99*c5ad8142SEric van Gyzen int lg_tiny_maxclass = (unsigned)-1;
100*c5ad8142SEric van Gyzen int nbins = 0;
101*c5ad8142SEric van Gyzen int npsizes = 0;
102*c5ad8142SEric van Gyzen
103*c5ad8142SEric van Gyzen int index = 0;
104*c5ad8142SEric van Gyzen
105*c5ad8142SEric van Gyzen int ndelta = 0;
106*c5ad8142SEric van Gyzen int lg_base = lg_tiny_min;
107*c5ad8142SEric van Gyzen int lg_delta = lg_base;
108*c5ad8142SEric van Gyzen
109*c5ad8142SEric van Gyzen /* Outputs that we update as we go. */
110*c5ad8142SEric van Gyzen size_t lookup_maxclass = 0;
111*c5ad8142SEric van Gyzen size_t small_maxclass = 0;
112*c5ad8142SEric van Gyzen int lg_large_minclass = 0;
113*c5ad8142SEric van Gyzen size_t large_maxclass = 0;
114*c5ad8142SEric van Gyzen
115*c5ad8142SEric van Gyzen /* Tiny size classes. */
116*c5ad8142SEric van Gyzen while (lg_base < lg_quantum) {
117*c5ad8142SEric van Gyzen sc_t *sc = &sc_data->sc[index];
118*c5ad8142SEric van Gyzen size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
119*c5ad8142SEric van Gyzen lg_base, lg_delta, ndelta);
120*c5ad8142SEric van Gyzen if (sc->lg_delta_lookup != 0) {
121*c5ad8142SEric van Gyzen nlbins = index + 1;
122*c5ad8142SEric van Gyzen }
123*c5ad8142SEric van Gyzen if (sc->psz) {
124*c5ad8142SEric van Gyzen npsizes++;
125*c5ad8142SEric van Gyzen }
126*c5ad8142SEric van Gyzen if (sc->bin) {
127*c5ad8142SEric van Gyzen nbins++;
128*c5ad8142SEric van Gyzen }
129*c5ad8142SEric van Gyzen ntiny++;
130*c5ad8142SEric van Gyzen /* Final written value is correct. */
131*c5ad8142SEric van Gyzen lg_tiny_maxclass = lg_base;
132*c5ad8142SEric van Gyzen index++;
133*c5ad8142SEric van Gyzen lg_delta = lg_base;
134*c5ad8142SEric van Gyzen lg_base++;
135*c5ad8142SEric van Gyzen }
136*c5ad8142SEric van Gyzen
137*c5ad8142SEric van Gyzen /* First non-tiny (pseudo) group. */
138*c5ad8142SEric van Gyzen if (ntiny != 0) {
139*c5ad8142SEric van Gyzen sc_t *sc = &sc_data->sc[index];
140*c5ad8142SEric van Gyzen /*
141*c5ad8142SEric van Gyzen * See the note in sc.h; the first non-tiny size class has an
142*c5ad8142SEric van Gyzen * unusual encoding.
143*c5ad8142SEric van Gyzen */
144*c5ad8142SEric van Gyzen lg_base--;
145*c5ad8142SEric van Gyzen ndelta = 1;
146*c5ad8142SEric van Gyzen size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
147*c5ad8142SEric van Gyzen lg_base, lg_delta, ndelta);
148*c5ad8142SEric van Gyzen index++;
149*c5ad8142SEric van Gyzen lg_base++;
150*c5ad8142SEric van Gyzen lg_delta++;
151*c5ad8142SEric van Gyzen if (sc->psz) {
152*c5ad8142SEric van Gyzen npsizes++;
153*c5ad8142SEric van Gyzen }
154*c5ad8142SEric van Gyzen if (sc->bin) {
155*c5ad8142SEric van Gyzen nbins++;
156*c5ad8142SEric van Gyzen }
157*c5ad8142SEric van Gyzen }
158*c5ad8142SEric van Gyzen while (ndelta < ngroup) {
159*c5ad8142SEric van Gyzen sc_t *sc = &sc_data->sc[index];
160*c5ad8142SEric van Gyzen size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
161*c5ad8142SEric van Gyzen lg_base, lg_delta, ndelta);
162*c5ad8142SEric van Gyzen index++;
163*c5ad8142SEric van Gyzen ndelta++;
164*c5ad8142SEric van Gyzen if (sc->psz) {
165*c5ad8142SEric van Gyzen npsizes++;
166*c5ad8142SEric van Gyzen }
167*c5ad8142SEric van Gyzen if (sc->bin) {
168*c5ad8142SEric van Gyzen nbins++;
169*c5ad8142SEric van Gyzen }
170*c5ad8142SEric van Gyzen }
171*c5ad8142SEric van Gyzen
172*c5ad8142SEric van Gyzen /* All remaining groups. */
173*c5ad8142SEric van Gyzen lg_base = lg_base + lg_ngroup;
174*c5ad8142SEric van Gyzen while (lg_base < ptr_bits - 1) {
175*c5ad8142SEric van Gyzen ndelta = 1;
176*c5ad8142SEric van Gyzen int ndelta_limit;
177*c5ad8142SEric van Gyzen if (lg_base == ptr_bits - 2) {
178*c5ad8142SEric van Gyzen ndelta_limit = ngroup - 1;
179*c5ad8142SEric van Gyzen } else {
180*c5ad8142SEric van Gyzen ndelta_limit = ngroup;
181*c5ad8142SEric van Gyzen }
182*c5ad8142SEric van Gyzen while (ndelta <= ndelta_limit) {
183*c5ad8142SEric van Gyzen sc_t *sc = &sc_data->sc[index];
184*c5ad8142SEric van Gyzen size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
185*c5ad8142SEric van Gyzen lg_base, lg_delta, ndelta);
186*c5ad8142SEric van Gyzen if (sc->lg_delta_lookup != 0) {
187*c5ad8142SEric van Gyzen nlbins = index + 1;
188*c5ad8142SEric van Gyzen /* Final written value is correct. */
189*c5ad8142SEric van Gyzen lookup_maxclass = (ZU(1) << lg_base)
190*c5ad8142SEric van Gyzen + (ZU(ndelta) << lg_delta);
191*c5ad8142SEric van Gyzen }
192*c5ad8142SEric van Gyzen if (sc->psz) {
193*c5ad8142SEric van Gyzen npsizes++;
194*c5ad8142SEric van Gyzen }
195*c5ad8142SEric van Gyzen if (sc->bin) {
196*c5ad8142SEric van Gyzen nbins++;
197*c5ad8142SEric van Gyzen /* Final written value is correct. */
198*c5ad8142SEric van Gyzen small_maxclass = (ZU(1) << lg_base)
199*c5ad8142SEric van Gyzen + (ZU(ndelta) << lg_delta);
200*c5ad8142SEric van Gyzen if (lg_ngroup > 0) {
201*c5ad8142SEric van Gyzen lg_large_minclass = lg_base + 1;
202*c5ad8142SEric van Gyzen } else {
203*c5ad8142SEric van Gyzen lg_large_minclass = lg_base + 2;
204*c5ad8142SEric van Gyzen }
205*c5ad8142SEric van Gyzen }
206*c5ad8142SEric van Gyzen large_maxclass = (ZU(1) << lg_base)
207*c5ad8142SEric van Gyzen + (ZU(ndelta) << lg_delta);
208*c5ad8142SEric van Gyzen index++;
209*c5ad8142SEric van Gyzen ndelta++;
210*c5ad8142SEric van Gyzen }
211*c5ad8142SEric van Gyzen lg_base++;
212*c5ad8142SEric van Gyzen lg_delta++;
213*c5ad8142SEric van Gyzen }
214*c5ad8142SEric van Gyzen /* Additional outputs. */
215*c5ad8142SEric van Gyzen int nsizes = index;
216*c5ad8142SEric van Gyzen unsigned lg_ceil_nsizes = lg_ceil(nsizes);
217*c5ad8142SEric van Gyzen
218*c5ad8142SEric van Gyzen /* Fill in the output data. */
219*c5ad8142SEric van Gyzen sc_data->ntiny = ntiny;
220*c5ad8142SEric van Gyzen sc_data->nlbins = nlbins;
221*c5ad8142SEric van Gyzen sc_data->nbins = nbins;
222*c5ad8142SEric van Gyzen sc_data->nsizes = nsizes;
223*c5ad8142SEric van Gyzen sc_data->lg_ceil_nsizes = lg_ceil_nsizes;
224*c5ad8142SEric van Gyzen sc_data->npsizes = npsizes;
225*c5ad8142SEric van Gyzen sc_data->lg_tiny_maxclass = lg_tiny_maxclass;
226*c5ad8142SEric van Gyzen sc_data->lookup_maxclass = lookup_maxclass;
227*c5ad8142SEric van Gyzen sc_data->small_maxclass = small_maxclass;
228*c5ad8142SEric van Gyzen sc_data->lg_large_minclass = lg_large_minclass;
229*c5ad8142SEric van Gyzen sc_data->large_minclass = (ZU(1) << lg_large_minclass);
230*c5ad8142SEric van Gyzen sc_data->large_maxclass = large_maxclass;
231*c5ad8142SEric van Gyzen
232*c5ad8142SEric van Gyzen /*
233*c5ad8142SEric van Gyzen * We compute these values in two ways:
234*c5ad8142SEric van Gyzen * - Incrementally, as above.
235*c5ad8142SEric van Gyzen * - In macros, in sc.h.
236*c5ad8142SEric van Gyzen * The computation is easier when done incrementally, but putting it in
237*c5ad8142SEric van Gyzen * a constant makes it available to the fast paths without having to
238*c5ad8142SEric van Gyzen * touch the extra global cacheline. We assert, however, that the two
239*c5ad8142SEric van Gyzen * computations are equivalent.
240*c5ad8142SEric van Gyzen */
241*c5ad8142SEric van Gyzen assert(sc_data->npsizes == SC_NPSIZES);
242*c5ad8142SEric van Gyzen assert(sc_data->lg_tiny_maxclass == SC_LG_TINY_MAXCLASS);
243*c5ad8142SEric van Gyzen assert(sc_data->small_maxclass == SC_SMALL_MAXCLASS);
244*c5ad8142SEric van Gyzen assert(sc_data->large_minclass == SC_LARGE_MINCLASS);
245*c5ad8142SEric van Gyzen assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS);
246*c5ad8142SEric van Gyzen assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS);
247*c5ad8142SEric van Gyzen
248*c5ad8142SEric van Gyzen /*
249*c5ad8142SEric van Gyzen * In the allocation fastpath, we want to assume that we can
250*c5ad8142SEric van Gyzen * unconditionally subtract the requested allocation size from
251*c5ad8142SEric van Gyzen * a ssize_t, and detect passing through 0 correctly. This
252*c5ad8142SEric van Gyzen * results in optimal generated code. For this to work, the
253*c5ad8142SEric van Gyzen * maximum allocation size must be less than SSIZE_MAX.
254*c5ad8142SEric van Gyzen */
255*c5ad8142SEric van Gyzen assert(SC_LARGE_MAXCLASS < SSIZE_MAX);
256*c5ad8142SEric van Gyzen }
257*c5ad8142SEric van Gyzen
258*c5ad8142SEric van Gyzen void
sc_data_init(sc_data_t * sc_data)259*c5ad8142SEric van Gyzen sc_data_init(sc_data_t *sc_data) {
260*c5ad8142SEric van Gyzen assert(!sc_data->initialized);
261*c5ad8142SEric van Gyzen
262*c5ad8142SEric van Gyzen int lg_max_lookup = 12;
263*c5ad8142SEric van Gyzen
264*c5ad8142SEric van Gyzen size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
265*c5ad8142SEric van Gyzen lg_max_lookup, LG_PAGE, 2);
266*c5ad8142SEric van Gyzen
267*c5ad8142SEric van Gyzen sc_data->initialized = true;
268*c5ad8142SEric van Gyzen }
269*c5ad8142SEric van Gyzen
270*c5ad8142SEric van Gyzen static void
sc_data_update_sc_slab_size(sc_t * sc,size_t reg_size,size_t pgs_guess)271*c5ad8142SEric van Gyzen sc_data_update_sc_slab_size(sc_t *sc, size_t reg_size, size_t pgs_guess) {
272*c5ad8142SEric van Gyzen size_t min_pgs = reg_size / PAGE;
273*c5ad8142SEric van Gyzen if (reg_size % PAGE != 0) {
274*c5ad8142SEric van Gyzen min_pgs++;
275*c5ad8142SEric van Gyzen }
276*c5ad8142SEric van Gyzen /*
277*c5ad8142SEric van Gyzen * BITMAP_MAXBITS is actually determined by putting the smallest
278*c5ad8142SEric van Gyzen * possible size-class on one page, so this can never be 0.
279*c5ad8142SEric van Gyzen */
280*c5ad8142SEric van Gyzen size_t max_pgs = BITMAP_MAXBITS * reg_size / PAGE;
281*c5ad8142SEric van Gyzen
282*c5ad8142SEric van Gyzen assert(min_pgs <= max_pgs);
283*c5ad8142SEric van Gyzen assert(min_pgs > 0);
284*c5ad8142SEric van Gyzen assert(max_pgs >= 1);
285*c5ad8142SEric van Gyzen if (pgs_guess < min_pgs) {
286*c5ad8142SEric van Gyzen sc->pgs = (int)min_pgs;
287*c5ad8142SEric van Gyzen } else if (pgs_guess > max_pgs) {
288*c5ad8142SEric van Gyzen sc->pgs = (int)max_pgs;
289*c5ad8142SEric van Gyzen } else {
290*c5ad8142SEric van Gyzen sc->pgs = (int)pgs_guess;
291*c5ad8142SEric van Gyzen }
292*c5ad8142SEric van Gyzen }
293*c5ad8142SEric van Gyzen
294*c5ad8142SEric van Gyzen void
sc_data_update_slab_size(sc_data_t * data,size_t begin,size_t end,int pgs)295*c5ad8142SEric van Gyzen sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) {
296*c5ad8142SEric van Gyzen assert(data->initialized);
297*c5ad8142SEric van Gyzen for (int i = 0; i < data->nsizes; i++) {
298*c5ad8142SEric van Gyzen sc_t *sc = &data->sc[i];
299*c5ad8142SEric van Gyzen if (!sc->bin) {
300*c5ad8142SEric van Gyzen break;
301*c5ad8142SEric van Gyzen }
302*c5ad8142SEric van Gyzen size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta,
303*c5ad8142SEric van Gyzen sc->ndelta);
304*c5ad8142SEric van Gyzen if (begin <= reg_size && reg_size <= end) {
305*c5ad8142SEric van Gyzen sc_data_update_sc_slab_size(sc, reg_size, pgs);
306*c5ad8142SEric van Gyzen }
307*c5ad8142SEric van Gyzen }
308*c5ad8142SEric van Gyzen }
309*c5ad8142SEric van Gyzen
310*c5ad8142SEric van Gyzen void
sc_boot(sc_data_t * data)311*c5ad8142SEric van Gyzen sc_boot(sc_data_t *data) {
312*c5ad8142SEric van Gyzen sc_data_init(data);
313*c5ad8142SEric van Gyzen }
314