1*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_preamble.h" 2*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_internal_includes.h" 3*c43cad87SWarner Losh 4*c43cad87SWarner Losh #include "jemalloc/internal/decay.h" 5*c43cad87SWarner Losh 6*c43cad87SWarner Losh static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { 7*c43cad87SWarner Losh #define STEP(step, h, x, y) \ 8*c43cad87SWarner Losh h, 9*c43cad87SWarner Losh SMOOTHSTEP 10*c43cad87SWarner Losh #undef STEP 11*c43cad87SWarner Losh }; 12*c43cad87SWarner Losh 13*c43cad87SWarner Losh /* 14*c43cad87SWarner Losh * Generate a new deadline that is uniformly random within the next epoch after 15*c43cad87SWarner Losh * the current one. 16*c43cad87SWarner Losh */ 17*c43cad87SWarner Losh void 18*c43cad87SWarner Losh decay_deadline_init(decay_t *decay) { 19*c43cad87SWarner Losh nstime_copy(&decay->deadline, &decay->epoch); 20*c43cad87SWarner Losh nstime_add(&decay->deadline, &decay->interval); 21*c43cad87SWarner Losh if (decay_ms_read(decay) > 0) { 22*c43cad87SWarner Losh nstime_t jitter; 23*c43cad87SWarner Losh 24*c43cad87SWarner Losh nstime_init(&jitter, prng_range_u64(&decay->jitter_state, 25*c43cad87SWarner Losh nstime_ns(&decay->interval))); 26*c43cad87SWarner Losh nstime_add(&decay->deadline, &jitter); 27*c43cad87SWarner Losh } 28*c43cad87SWarner Losh } 29*c43cad87SWarner Losh 30*c43cad87SWarner Losh void 31*c43cad87SWarner Losh decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) { 32*c43cad87SWarner Losh atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED); 33*c43cad87SWarner Losh if (decay_ms > 0) { 34*c43cad87SWarner Losh nstime_init(&decay->interval, (uint64_t)decay_ms * 35*c43cad87SWarner Losh KQU(1000000)); 36*c43cad87SWarner Losh nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS); 37*c43cad87SWarner Losh } 38*c43cad87SWarner Losh 39*c43cad87SWarner Losh nstime_copy(&decay->epoch, cur_time); 40*c43cad87SWarner Losh decay->jitter_state = (uint64_t)(uintptr_t)decay; 41*c43cad87SWarner Losh decay_deadline_init(decay); 42*c43cad87SWarner Losh decay->nunpurged = 0; 43*c43cad87SWarner Losh memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); 44*c43cad87SWarner Losh } 45*c43cad87SWarner Losh 46*c43cad87SWarner Losh bool 47*c43cad87SWarner Losh decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) { 48*c43cad87SWarner Losh if (config_debug) { 49*c43cad87SWarner Losh for (size_t i = 0; i < sizeof(decay_t); i++) { 50*c43cad87SWarner Losh assert(((char *)decay)[i] == 0); 51*c43cad87SWarner Losh } 52*c43cad87SWarner Losh decay->ceil_npages = 0; 53*c43cad87SWarner Losh } 54*c43cad87SWarner Losh if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY, 55*c43cad87SWarner Losh malloc_mutex_rank_exclusive)) { 56*c43cad87SWarner Losh return true; 57*c43cad87SWarner Losh } 58*c43cad87SWarner Losh decay->purging = false; 59*c43cad87SWarner Losh decay_reinit(decay, cur_time, decay_ms); 60*c43cad87SWarner Losh return false; 61*c43cad87SWarner Losh } 62*c43cad87SWarner Losh 63*c43cad87SWarner Losh bool 64*c43cad87SWarner Losh decay_ms_valid(ssize_t decay_ms) { 65*c43cad87SWarner Losh if (decay_ms < -1) { 66*c43cad87SWarner Losh return false; 67*c43cad87SWarner Losh } 68*c43cad87SWarner Losh if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX * 69*c43cad87SWarner Losh KQU(1000)) { 70*c43cad87SWarner Losh return true; 71*c43cad87SWarner Losh } 72*c43cad87SWarner Losh return false; 73*c43cad87SWarner Losh } 74*c43cad87SWarner Losh 75*c43cad87SWarner Losh static void 76*c43cad87SWarner Losh decay_maybe_update_time(decay_t *decay, nstime_t *new_time) { 77*c43cad87SWarner Losh if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, 78*c43cad87SWarner Losh new_time) > 0)) { 79*c43cad87SWarner Losh /* 80*c43cad87SWarner Losh * Time went backwards. Move the epoch back in time and 81*c43cad87SWarner Losh * generate a new deadline, with the expectation that time 82*c43cad87SWarner Losh * typically flows forward for long enough periods of time that 83*c43cad87SWarner Losh * epochs complete. Unfortunately, this strategy is susceptible 84*c43cad87SWarner Losh * to clock jitter triggering premature epoch advances, but 85*c43cad87SWarner Losh * clock jitter estimation and compensation isn't feasible here 86*c43cad87SWarner Losh * because calls into this code are event-driven. 87*c43cad87SWarner Losh */ 88*c43cad87SWarner Losh nstime_copy(&decay->epoch, new_time); 89*c43cad87SWarner Losh decay_deadline_init(decay); 90*c43cad87SWarner Losh } else { 91*c43cad87SWarner Losh /* Verify that time does not go backwards. */ 92*c43cad87SWarner Losh assert(nstime_compare(&decay->epoch, new_time) <= 0); 93*c43cad87SWarner Losh } 94*c43cad87SWarner Losh } 95*c43cad87SWarner Losh 96*c43cad87SWarner Losh static size_t 97*c43cad87SWarner Losh decay_backlog_npages_limit(const decay_t *decay) { 98*c43cad87SWarner Losh /* 99*c43cad87SWarner Losh * For each element of decay_backlog, multiply by the corresponding 100*c43cad87SWarner Losh * fixed-point smoothstep decay factor. Sum the products, then divide 101*c43cad87SWarner Losh * to round down to the nearest whole number of pages. 102*c43cad87SWarner Losh */ 103*c43cad87SWarner Losh uint64_t sum = 0; 104*c43cad87SWarner Losh for (unsigned i = 0; i < SMOOTHSTEP_NSTEPS; i++) { 105*c43cad87SWarner Losh sum += decay->backlog[i] * h_steps[i]; 106*c43cad87SWarner Losh } 107*c43cad87SWarner Losh size_t npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); 108*c43cad87SWarner Losh 109*c43cad87SWarner Losh return npages_limit_backlog; 110*c43cad87SWarner Losh } 111*c43cad87SWarner Losh 112*c43cad87SWarner Losh /* 113*c43cad87SWarner Losh * Update backlog, assuming that 'nadvance_u64' time intervals have passed. 114*c43cad87SWarner Losh * Trailing 'nadvance_u64' records should be erased and 'current_npages' is 115*c43cad87SWarner Losh * placed as the newest record. 116*c43cad87SWarner Losh */ 117*c43cad87SWarner Losh static void 118*c43cad87SWarner Losh decay_backlog_update(decay_t *decay, uint64_t nadvance_u64, 119*c43cad87SWarner Losh size_t current_npages) { 120*c43cad87SWarner Losh if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { 121*c43cad87SWarner Losh memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) * 122*c43cad87SWarner Losh sizeof(size_t)); 123*c43cad87SWarner Losh } else { 124*c43cad87SWarner Losh size_t nadvance_z = (size_t)nadvance_u64; 125*c43cad87SWarner Losh 126*c43cad87SWarner Losh assert((uint64_t)nadvance_z == nadvance_u64); 127*c43cad87SWarner Losh 128*c43cad87SWarner Losh memmove(decay->backlog, &decay->backlog[nadvance_z], 129*c43cad87SWarner Losh (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); 130*c43cad87SWarner Losh if (nadvance_z > 1) { 131*c43cad87SWarner Losh memset(&decay->backlog[SMOOTHSTEP_NSTEPS - 132*c43cad87SWarner Losh nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); 133*c43cad87SWarner Losh } 134*c43cad87SWarner Losh } 135*c43cad87SWarner Losh 136*c43cad87SWarner Losh size_t npages_delta = (current_npages > decay->nunpurged) ? 137*c43cad87SWarner Losh current_npages - decay->nunpurged : 0; 138*c43cad87SWarner Losh decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta; 139*c43cad87SWarner Losh 140*c43cad87SWarner Losh if (config_debug) { 141*c43cad87SWarner Losh if (current_npages > decay->ceil_npages) { 142*c43cad87SWarner Losh decay->ceil_npages = current_npages; 143*c43cad87SWarner Losh } 144*c43cad87SWarner Losh size_t npages_limit = decay_backlog_npages_limit(decay); 145*c43cad87SWarner Losh assert(decay->ceil_npages >= npages_limit); 146*c43cad87SWarner Losh if (decay->ceil_npages > npages_limit) { 147*c43cad87SWarner Losh decay->ceil_npages = npages_limit; 148*c43cad87SWarner Losh } 149*c43cad87SWarner Losh } 150*c43cad87SWarner Losh } 151*c43cad87SWarner Losh 152*c43cad87SWarner Losh static inline bool 153*c43cad87SWarner Losh decay_deadline_reached(const decay_t *decay, const nstime_t *time) { 154*c43cad87SWarner Losh return (nstime_compare(&decay->deadline, time) <= 0); 155*c43cad87SWarner Losh } 156*c43cad87SWarner Losh 157*c43cad87SWarner Losh uint64_t 158*c43cad87SWarner Losh decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) { 159*c43cad87SWarner Losh uint64_t decay_interval_ns = decay_epoch_duration_ns(decay); 160*c43cad87SWarner Losh size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns); 161*c43cad87SWarner Losh 162*c43cad87SWarner Losh uint64_t npages_purge; 163*c43cad87SWarner Losh if (n_epoch >= SMOOTHSTEP_NSTEPS) { 164*c43cad87SWarner Losh npages_purge = npages_new; 165*c43cad87SWarner Losh } else { 166*c43cad87SWarner Losh uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1]; 167*c43cad87SWarner Losh assert(h_steps_max >= 168*c43cad87SWarner Losh h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]); 169*c43cad87SWarner Losh npages_purge = npages_new * (h_steps_max - 170*c43cad87SWarner Losh h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]); 171*c43cad87SWarner Losh npages_purge >>= SMOOTHSTEP_BFP; 172*c43cad87SWarner Losh } 173*c43cad87SWarner Losh return npages_purge; 174*c43cad87SWarner Losh } 175*c43cad87SWarner Losh 176*c43cad87SWarner Losh bool 177*c43cad87SWarner Losh decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time, 178*c43cad87SWarner Losh size_t npages_current) { 179*c43cad87SWarner Losh /* Handle possible non-monotonicity of time. */ 180*c43cad87SWarner Losh decay_maybe_update_time(decay, new_time); 181*c43cad87SWarner Losh 182*c43cad87SWarner Losh if (!decay_deadline_reached(decay, new_time)) { 183*c43cad87SWarner Losh return false; 184*c43cad87SWarner Losh } 185*c43cad87SWarner Losh nstime_t delta; 186*c43cad87SWarner Losh nstime_copy(&delta, new_time); 187*c43cad87SWarner Losh nstime_subtract(&delta, &decay->epoch); 188*c43cad87SWarner Losh 189*c43cad87SWarner Losh uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval); 190*c43cad87SWarner Losh assert(nadvance_u64 > 0); 191*c43cad87SWarner Losh 192*c43cad87SWarner Losh /* Add nadvance_u64 decay intervals to epoch. */ 193*c43cad87SWarner Losh nstime_copy(&delta, &decay->interval); 194*c43cad87SWarner Losh nstime_imultiply(&delta, nadvance_u64); 195*c43cad87SWarner Losh nstime_add(&decay->epoch, &delta); 196*c43cad87SWarner Losh 197*c43cad87SWarner Losh /* Set a new deadline. */ 198*c43cad87SWarner Losh decay_deadline_init(decay); 199*c43cad87SWarner Losh 200*c43cad87SWarner Losh /* Update the backlog. */ 201*c43cad87SWarner Losh decay_backlog_update(decay, nadvance_u64, npages_current); 202*c43cad87SWarner Losh 203*c43cad87SWarner Losh decay->npages_limit = decay_backlog_npages_limit(decay); 204*c43cad87SWarner Losh decay->nunpurged = (decay->npages_limit > npages_current) ? 205*c43cad87SWarner Losh decay->npages_limit : npages_current; 206*c43cad87SWarner Losh 207*c43cad87SWarner Losh return true; 208*c43cad87SWarner Losh } 209*c43cad87SWarner Losh 210*c43cad87SWarner Losh /* 211*c43cad87SWarner Losh * Calculate how many pages should be purged after 'interval'. 212*c43cad87SWarner Losh * 213*c43cad87SWarner Losh * First, calculate how many pages should remain at the moment, then subtract 214*c43cad87SWarner Losh * the number of pages that should remain after 'interval'. The difference is 215*c43cad87SWarner Losh * how many pages should be purged until then. 216*c43cad87SWarner Losh * 217*c43cad87SWarner Losh * The number of pages that should remain at a specific moment is calculated 218*c43cad87SWarner Losh * like this: pages(now) = sum(backlog[i] * h_steps[i]). After 'interval' 219*c43cad87SWarner Losh * passes, backlog would shift 'interval' positions to the left and sigmoid 220*c43cad87SWarner Losh * curve would be applied starting with backlog[interval]. 221*c43cad87SWarner Losh * 222*c43cad87SWarner Losh * The implementation doesn't directly map to the description, but it's 223*c43cad87SWarner Losh * essentially the same calculation, optimized to avoid iterating over 224*c43cad87SWarner Losh * [interval..SMOOTHSTEP_NSTEPS) twice. 225*c43cad87SWarner Losh */ 226*c43cad87SWarner Losh static inline size_t 227*c43cad87SWarner Losh decay_npurge_after_interval(decay_t *decay, size_t interval) { 228*c43cad87SWarner Losh size_t i; 229*c43cad87SWarner Losh uint64_t sum = 0; 230*c43cad87SWarner Losh for (i = 0; i < interval; i++) { 231*c43cad87SWarner Losh sum += decay->backlog[i] * h_steps[i]; 232*c43cad87SWarner Losh } 233*c43cad87SWarner Losh for (; i < SMOOTHSTEP_NSTEPS; i++) { 234*c43cad87SWarner Losh sum += decay->backlog[i] * 235*c43cad87SWarner Losh (h_steps[i] - h_steps[i - interval]); 236*c43cad87SWarner Losh } 237*c43cad87SWarner Losh 238*c43cad87SWarner Losh return (size_t)(sum >> SMOOTHSTEP_BFP); 239*c43cad87SWarner Losh } 240*c43cad87SWarner Losh 241*c43cad87SWarner Losh uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current, 242*c43cad87SWarner Losh uint64_t npages_threshold) { 243*c43cad87SWarner Losh if (!decay_gradually(decay)) { 244*c43cad87SWarner Losh return DECAY_UNBOUNDED_TIME_TO_PURGE; 245*c43cad87SWarner Losh } 246*c43cad87SWarner Losh uint64_t decay_interval_ns = decay_epoch_duration_ns(decay); 247*c43cad87SWarner Losh assert(decay_interval_ns > 0); 248*c43cad87SWarner Losh if (npages_current == 0) { 249*c43cad87SWarner Losh unsigned i; 250*c43cad87SWarner Losh for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) { 251*c43cad87SWarner Losh if (decay->backlog[i] > 0) { 252*c43cad87SWarner Losh break; 253*c43cad87SWarner Losh } 254*c43cad87SWarner Losh } 255*c43cad87SWarner Losh if (i == SMOOTHSTEP_NSTEPS) { 256*c43cad87SWarner Losh /* No dirty pages recorded. Sleep indefinitely. */ 257*c43cad87SWarner Losh return DECAY_UNBOUNDED_TIME_TO_PURGE; 258*c43cad87SWarner Losh } 259*c43cad87SWarner Losh } 260*c43cad87SWarner Losh if (npages_current <= npages_threshold) { 261*c43cad87SWarner Losh /* Use max interval. */ 262*c43cad87SWarner Losh return decay_interval_ns * SMOOTHSTEP_NSTEPS; 263*c43cad87SWarner Losh } 264*c43cad87SWarner Losh 265*c43cad87SWarner Losh /* Minimal 2 intervals to ensure reaching next epoch deadline. */ 266*c43cad87SWarner Losh size_t lb = 2; 267*c43cad87SWarner Losh size_t ub = SMOOTHSTEP_NSTEPS; 268*c43cad87SWarner Losh 269*c43cad87SWarner Losh size_t npurge_lb, npurge_ub; 270*c43cad87SWarner Losh npurge_lb = decay_npurge_after_interval(decay, lb); 271*c43cad87SWarner Losh if (npurge_lb > npages_threshold) { 272*c43cad87SWarner Losh return decay_interval_ns * lb; 273*c43cad87SWarner Losh } 274*c43cad87SWarner Losh npurge_ub = decay_npurge_after_interval(decay, ub); 275*c43cad87SWarner Losh if (npurge_ub < npages_threshold) { 276*c43cad87SWarner Losh return decay_interval_ns * ub; 277*c43cad87SWarner Losh } 278*c43cad87SWarner Losh 279*c43cad87SWarner Losh unsigned n_search = 0; 280*c43cad87SWarner Losh size_t target, npurge; 281*c43cad87SWarner Losh while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) { 282*c43cad87SWarner Losh target = (lb + ub) / 2; 283*c43cad87SWarner Losh npurge = decay_npurge_after_interval(decay, target); 284*c43cad87SWarner Losh if (npurge > npages_threshold) { 285*c43cad87SWarner Losh ub = target; 286*c43cad87SWarner Losh npurge_ub = npurge; 287*c43cad87SWarner Losh } else { 288*c43cad87SWarner Losh lb = target; 289*c43cad87SWarner Losh npurge_lb = npurge; 290*c43cad87SWarner Losh } 291*c43cad87SWarner Losh assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1); 292*c43cad87SWarner Losh ++n_search; 293*c43cad87SWarner Losh } 294*c43cad87SWarner Losh return decay_interval_ns * (ub + lb) / 2; 295*c43cad87SWarner Losh } 296