1*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_preamble.h" 2*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_internal_includes.h" 3*c43cad87SWarner Losh 4*c43cad87SWarner Losh #include "jemalloc/internal/eset.h" 5*c43cad87SWarner Losh 6*c43cad87SWarner Losh #define ESET_NPSIZES (SC_NPSIZES + 1) 7*c43cad87SWarner Losh 8*c43cad87SWarner Losh static void 9*c43cad87SWarner Losh eset_bin_init(eset_bin_t *bin) { 10*c43cad87SWarner Losh edata_heap_new(&bin->heap); 11*c43cad87SWarner Losh /* 12*c43cad87SWarner Losh * heap_min doesn't need initialization; it gets filled in when the bin 13*c43cad87SWarner Losh * goes from non-empty to empty. 14*c43cad87SWarner Losh */ 15*c43cad87SWarner Losh } 16*c43cad87SWarner Losh 17*c43cad87SWarner Losh static void 18*c43cad87SWarner Losh eset_bin_stats_init(eset_bin_stats_t *bin_stats) { 19*c43cad87SWarner Losh atomic_store_zu(&bin_stats->nextents, 0, ATOMIC_RELAXED); 20*c43cad87SWarner Losh atomic_store_zu(&bin_stats->nbytes, 0, ATOMIC_RELAXED); 21*c43cad87SWarner Losh } 22*c43cad87SWarner Losh 23*c43cad87SWarner Losh void 24*c43cad87SWarner Losh eset_init(eset_t *eset, extent_state_t state) { 25*c43cad87SWarner Losh for (unsigned i = 0; i < ESET_NPSIZES; i++) { 26*c43cad87SWarner Losh eset_bin_init(&eset->bins[i]); 27*c43cad87SWarner Losh eset_bin_stats_init(&eset->bin_stats[i]); 28*c43cad87SWarner Losh } 29*c43cad87SWarner Losh fb_init(eset->bitmap, ESET_NPSIZES); 30*c43cad87SWarner Losh edata_list_inactive_init(&eset->lru); 31*c43cad87SWarner Losh eset->state = state; 32*c43cad87SWarner Losh } 33*c43cad87SWarner Losh 34*c43cad87SWarner Losh size_t 35*c43cad87SWarner Losh eset_npages_get(eset_t *eset) { 36*c43cad87SWarner Losh return atomic_load_zu(&eset->npages, ATOMIC_RELAXED); 37*c43cad87SWarner Losh } 38*c43cad87SWarner Losh 39*c43cad87SWarner Losh size_t 40*c43cad87SWarner Losh eset_nextents_get(eset_t *eset, pszind_t pind) { 41*c43cad87SWarner Losh return atomic_load_zu(&eset->bin_stats[pind].nextents, ATOMIC_RELAXED); 42*c43cad87SWarner Losh } 43*c43cad87SWarner Losh 44*c43cad87SWarner Losh size_t 45*c43cad87SWarner Losh eset_nbytes_get(eset_t *eset, pszind_t pind) { 46*c43cad87SWarner Losh return atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED); 47*c43cad87SWarner Losh } 48*c43cad87SWarner Losh 49*c43cad87SWarner Losh static void 50*c43cad87SWarner Losh eset_stats_add(eset_t *eset, pszind_t pind, size_t sz) { 51*c43cad87SWarner Losh size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents, 52*c43cad87SWarner Losh ATOMIC_RELAXED); 53*c43cad87SWarner Losh atomic_store_zu(&eset->bin_stats[pind].nextents, cur + 1, 54*c43cad87SWarner Losh ATOMIC_RELAXED); 55*c43cad87SWarner Losh cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED); 56*c43cad87SWarner Losh atomic_store_zu(&eset->bin_stats[pind].nbytes, cur + sz, 57*c43cad87SWarner Losh ATOMIC_RELAXED); 58*c43cad87SWarner Losh } 59*c43cad87SWarner Losh 60*c43cad87SWarner Losh static void 61*c43cad87SWarner Losh eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) { 62*c43cad87SWarner Losh size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents, 63*c43cad87SWarner Losh ATOMIC_RELAXED); 64*c43cad87SWarner Losh atomic_store_zu(&eset->bin_stats[pind].nextents, cur - 1, 65*c43cad87SWarner Losh ATOMIC_RELAXED); 66*c43cad87SWarner Losh cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED); 67*c43cad87SWarner Losh atomic_store_zu(&eset->bin_stats[pind].nbytes, cur - sz, 68*c43cad87SWarner Losh ATOMIC_RELAXED); 69*c43cad87SWarner Losh } 70*c43cad87SWarner Losh 71*c43cad87SWarner Losh void 72*c43cad87SWarner Losh eset_insert(eset_t *eset, edata_t *edata) { 73*c43cad87SWarner Losh assert(edata_state_get(edata) == eset->state); 74*c43cad87SWarner Losh 75*c43cad87SWarner Losh size_t size = edata_size_get(edata); 76*c43cad87SWarner Losh size_t psz = sz_psz_quantize_floor(size); 77*c43cad87SWarner Losh pszind_t pind = sz_psz2ind(psz); 78*c43cad87SWarner Losh 79*c43cad87SWarner Losh edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata); 80*c43cad87SWarner Losh if (edata_heap_empty(&eset->bins[pind].heap)) { 81*c43cad87SWarner Losh fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind); 82*c43cad87SWarner Losh /* Only element is automatically the min element. */ 83*c43cad87SWarner Losh eset->bins[pind].heap_min = edata_cmp_summary; 84*c43cad87SWarner Losh } else { 85*c43cad87SWarner Losh /* 86*c43cad87SWarner Losh * There's already a min element; update the summary if we're 87*c43cad87SWarner Losh * about to insert a lower one. 88*c43cad87SWarner Losh */ 89*c43cad87SWarner Losh if (edata_cmp_summary_comp(edata_cmp_summary, 90*c43cad87SWarner Losh eset->bins[pind].heap_min) < 0) { 91*c43cad87SWarner Losh eset->bins[pind].heap_min = edata_cmp_summary; 92*c43cad87SWarner Losh } 93*c43cad87SWarner Losh } 94*c43cad87SWarner Losh edata_heap_insert(&eset->bins[pind].heap, edata); 95*c43cad87SWarner Losh 96*c43cad87SWarner Losh if (config_stats) { 97*c43cad87SWarner Losh eset_stats_add(eset, pind, size); 98*c43cad87SWarner Losh } 99*c43cad87SWarner Losh 100*c43cad87SWarner Losh edata_list_inactive_append(&eset->lru, edata); 101*c43cad87SWarner Losh size_t npages = size >> LG_PAGE; 102*c43cad87SWarner Losh /* 103*c43cad87SWarner Losh * All modifications to npages hold the mutex (as asserted above), so we 104*c43cad87SWarner Losh * don't need an atomic fetch-add; we can get by with a load followed by 105*c43cad87SWarner Losh * a store. 106*c43cad87SWarner Losh */ 107*c43cad87SWarner Losh size_t cur_eset_npages = 108*c43cad87SWarner Losh atomic_load_zu(&eset->npages, ATOMIC_RELAXED); 109*c43cad87SWarner Losh atomic_store_zu(&eset->npages, cur_eset_npages + npages, 110*c43cad87SWarner Losh ATOMIC_RELAXED); 111*c43cad87SWarner Losh } 112*c43cad87SWarner Losh 113*c43cad87SWarner Losh void 114*c43cad87SWarner Losh eset_remove(eset_t *eset, edata_t *edata) { 115*c43cad87SWarner Losh assert(edata_state_get(edata) == eset->state || 116*c43cad87SWarner Losh edata_state_in_transition(edata_state_get(edata))); 117*c43cad87SWarner Losh 118*c43cad87SWarner Losh size_t size = edata_size_get(edata); 119*c43cad87SWarner Losh size_t psz = sz_psz_quantize_floor(size); 120*c43cad87SWarner Losh pszind_t pind = sz_psz2ind(psz); 121*c43cad87SWarner Losh if (config_stats) { 122*c43cad87SWarner Losh eset_stats_sub(eset, pind, size); 123*c43cad87SWarner Losh } 124*c43cad87SWarner Losh 125*c43cad87SWarner Losh edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata); 126*c43cad87SWarner Losh edata_heap_remove(&eset->bins[pind].heap, edata); 127*c43cad87SWarner Losh if (edata_heap_empty(&eset->bins[pind].heap)) { 128*c43cad87SWarner Losh fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind); 129*c43cad87SWarner Losh } else { 130*c43cad87SWarner Losh /* 131*c43cad87SWarner Losh * This is a little weird; we compare if the summaries are 132*c43cad87SWarner Losh * equal, rather than if the edata we removed was the heap 133*c43cad87SWarner Losh * minimum. The reason why is that getting the heap minimum 134*c43cad87SWarner Losh * can cause a pairing heap merge operation. We can avoid this 135*c43cad87SWarner Losh * if we only update the min if it's changed, in which case the 136*c43cad87SWarner Losh * summaries of the removed element and the min element should 137*c43cad87SWarner Losh * compare equal. 138*c43cad87SWarner Losh */ 139*c43cad87SWarner Losh if (edata_cmp_summary_comp(edata_cmp_summary, 140*c43cad87SWarner Losh eset->bins[pind].heap_min) == 0) { 141*c43cad87SWarner Losh eset->bins[pind].heap_min = edata_cmp_summary_get( 142*c43cad87SWarner Losh edata_heap_first(&eset->bins[pind].heap)); 143*c43cad87SWarner Losh } 144*c43cad87SWarner Losh } 145*c43cad87SWarner Losh edata_list_inactive_remove(&eset->lru, edata); 146*c43cad87SWarner Losh size_t npages = size >> LG_PAGE; 147*c43cad87SWarner Losh /* 148*c43cad87SWarner Losh * As in eset_insert, we hold eset->mtx and so don't need atomic 149*c43cad87SWarner Losh * operations for updating eset->npages. 150*c43cad87SWarner Losh */ 151*c43cad87SWarner Losh size_t cur_extents_npages = 152*c43cad87SWarner Losh atomic_load_zu(&eset->npages, ATOMIC_RELAXED); 153*c43cad87SWarner Losh assert(cur_extents_npages >= npages); 154*c43cad87SWarner Losh atomic_store_zu(&eset->npages, 155*c43cad87SWarner Losh cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED); 156*c43cad87SWarner Losh } 157*c43cad87SWarner Losh 158*c43cad87SWarner Losh /* 159*c43cad87SWarner Losh * Find an extent with size [min_size, max_size) to satisfy the alignment 160*c43cad87SWarner Losh * requirement. For each size, try only the first extent in the heap. 161*c43cad87SWarner Losh */ 162*c43cad87SWarner Losh static edata_t * 163*c43cad87SWarner Losh eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size, 164*c43cad87SWarner Losh size_t alignment) { 165*c43cad87SWarner Losh pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size)); 166*c43cad87SWarner Losh pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size)); 167*c43cad87SWarner Losh 168*c43cad87SWarner Losh for (pszind_t i = 169*c43cad87SWarner Losh (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind); 170*c43cad87SWarner Losh i < pind_max; 171*c43cad87SWarner Losh i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) { 172*c43cad87SWarner Losh assert(i < SC_NPSIZES); 173*c43cad87SWarner Losh assert(!edata_heap_empty(&eset->bins[i].heap)); 174*c43cad87SWarner Losh edata_t *edata = edata_heap_first(&eset->bins[i].heap); 175*c43cad87SWarner Losh uintptr_t base = (uintptr_t)edata_base_get(edata); 176*c43cad87SWarner Losh size_t candidate_size = edata_size_get(edata); 177*c43cad87SWarner Losh assert(candidate_size >= min_size); 178*c43cad87SWarner Losh 179*c43cad87SWarner Losh uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base, 180*c43cad87SWarner Losh PAGE_CEILING(alignment)); 181*c43cad87SWarner Losh if (base > next_align || base + candidate_size <= next_align) { 182*c43cad87SWarner Losh /* Overflow or not crossing the next alignment. */ 183*c43cad87SWarner Losh continue; 184*c43cad87SWarner Losh } 185*c43cad87SWarner Losh 186*c43cad87SWarner Losh size_t leadsize = next_align - base; 187*c43cad87SWarner Losh if (candidate_size - leadsize >= min_size) { 188*c43cad87SWarner Losh return edata; 189*c43cad87SWarner Losh } 190*c43cad87SWarner Losh } 191*c43cad87SWarner Losh 192*c43cad87SWarner Losh return NULL; 193*c43cad87SWarner Losh } 194*c43cad87SWarner Losh 195*c43cad87SWarner Losh /* 196*c43cad87SWarner Losh * Do first-fit extent selection, i.e. select the oldest/lowest extent that is 197*c43cad87SWarner Losh * large enough. 198*c43cad87SWarner Losh * 199*c43cad87SWarner Losh * lg_max_fit is the (log of the) maximum ratio between the requested size and 200*c43cad87SWarner Losh * the returned size that we'll allow. This can reduce fragmentation by 201*c43cad87SWarner Losh * avoiding reusing and splitting large extents for smaller sizes. In practice, 202*c43cad87SWarner Losh * it's set to opt_lg_extent_max_active_fit for the dirty eset and SC_PTR_BITS 203*c43cad87SWarner Losh * for others. 204*c43cad87SWarner Losh */ 205*c43cad87SWarner Losh static edata_t * 206*c43cad87SWarner Losh eset_first_fit(eset_t *eset, size_t size, bool exact_only, 207*c43cad87SWarner Losh unsigned lg_max_fit) { 208*c43cad87SWarner Losh edata_t *ret = NULL; 209*c43cad87SWarner Losh edata_cmp_summary_t ret_summ JEMALLOC_CC_SILENCE_INIT({0}); 210*c43cad87SWarner Losh 211*c43cad87SWarner Losh pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size)); 212*c43cad87SWarner Losh 213*c43cad87SWarner Losh if (exact_only) { 214*c43cad87SWarner Losh return edata_heap_empty(&eset->bins[pind].heap) ? NULL : 215*c43cad87SWarner Losh edata_heap_first(&eset->bins[pind].heap); 216*c43cad87SWarner Losh } 217*c43cad87SWarner Losh 218*c43cad87SWarner Losh for (pszind_t i = 219*c43cad87SWarner Losh (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind); 220*c43cad87SWarner Losh i < ESET_NPSIZES; 221*c43cad87SWarner Losh i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) { 222*c43cad87SWarner Losh assert(!edata_heap_empty(&eset->bins[i].heap)); 223*c43cad87SWarner Losh if (lg_max_fit == SC_PTR_BITS) { 224*c43cad87SWarner Losh /* 225*c43cad87SWarner Losh * We'll shift by this below, and shifting out all the 226*c43cad87SWarner Losh * bits is undefined. Decreasing is safe, since the 227*c43cad87SWarner Losh * page size is larger than 1 byte. 228*c43cad87SWarner Losh */ 229*c43cad87SWarner Losh lg_max_fit = SC_PTR_BITS - 1; 230*c43cad87SWarner Losh } 231*c43cad87SWarner Losh if ((sz_pind2sz(i) >> lg_max_fit) > size) { 232*c43cad87SWarner Losh break; 233*c43cad87SWarner Losh } 234*c43cad87SWarner Losh if (ret == NULL || edata_cmp_summary_comp( 235*c43cad87SWarner Losh eset->bins[i].heap_min, ret_summ) < 0) { 236*c43cad87SWarner Losh /* 237*c43cad87SWarner Losh * We grab the edata as early as possible, even though 238*c43cad87SWarner Losh * we might change it later. Practically, a large 239*c43cad87SWarner Losh * portion of eset_fit calls succeed at the first valid 240*c43cad87SWarner Losh * index, so this doesn't cost much, and we get the 241*c43cad87SWarner Losh * effect of prefetching the edata as early as possible. 242*c43cad87SWarner Losh */ 243*c43cad87SWarner Losh edata_t *edata = edata_heap_first(&eset->bins[i].heap); 244*c43cad87SWarner Losh assert(edata_size_get(edata) >= size); 245*c43cad87SWarner Losh assert(ret == NULL || edata_snad_comp(edata, ret) < 0); 246*c43cad87SWarner Losh assert(ret == NULL || edata_cmp_summary_comp( 247*c43cad87SWarner Losh eset->bins[i].heap_min, 248*c43cad87SWarner Losh edata_cmp_summary_get(edata)) == 0); 249*c43cad87SWarner Losh ret = edata; 250*c43cad87SWarner Losh ret_summ = eset->bins[i].heap_min; 251*c43cad87SWarner Losh } 252*c43cad87SWarner Losh if (i == SC_NPSIZES) { 253*c43cad87SWarner Losh break; 254*c43cad87SWarner Losh } 255*c43cad87SWarner Losh assert(i < SC_NPSIZES); 256*c43cad87SWarner Losh } 257*c43cad87SWarner Losh 258*c43cad87SWarner Losh return ret; 259*c43cad87SWarner Losh } 260*c43cad87SWarner Losh 261*c43cad87SWarner Losh edata_t * 262*c43cad87SWarner Losh eset_fit(eset_t *eset, size_t esize, size_t alignment, bool exact_only, 263*c43cad87SWarner Losh unsigned lg_max_fit) { 264*c43cad87SWarner Losh size_t max_size = esize + PAGE_CEILING(alignment) - PAGE; 265*c43cad87SWarner Losh /* Beware size_t wrap-around. */ 266*c43cad87SWarner Losh if (max_size < esize) { 267*c43cad87SWarner Losh return NULL; 268*c43cad87SWarner Losh } 269*c43cad87SWarner Losh 270*c43cad87SWarner Losh edata_t *edata = eset_first_fit(eset, max_size, exact_only, lg_max_fit); 271*c43cad87SWarner Losh 272*c43cad87SWarner Losh if (alignment > PAGE && edata == NULL) { 273*c43cad87SWarner Losh /* 274*c43cad87SWarner Losh * max_size guarantees the alignment requirement but is rather 275*c43cad87SWarner Losh * pessimistic. Next we try to satisfy the aligned allocation 276*c43cad87SWarner Losh * with sizes in [esize, max_size). 277*c43cad87SWarner Losh */ 278*c43cad87SWarner Losh edata = eset_fit_alignment(eset, esize, max_size, alignment); 279*c43cad87SWarner Losh } 280*c43cad87SWarner Losh 281*c43cad87SWarner Losh return edata; 282*c43cad87SWarner Losh } 283