1 #include "jemalloc/internal/jemalloc_preamble.h" 2 #include "jemalloc/internal/jemalloc_internal_includes.h" 3 4 #include "jemalloc/internal/decay.h" 5 6 static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = { 7 #define STEP(step, h, x, y) \ 8 h, 9 SMOOTHSTEP 10 #undef STEP 11 }; 12 13 /* 14 * Generate a new deadline that is uniformly random within the next epoch after 15 * the current one. 16 */ 17 void 18 decay_deadline_init(decay_t *decay) { 19 nstime_copy(&decay->deadline, &decay->epoch); 20 nstime_add(&decay->deadline, &decay->interval); 21 if (decay_ms_read(decay) > 0) { 22 nstime_t jitter; 23 24 nstime_init(&jitter, prng_range_u64(&decay->jitter_state, 25 nstime_ns(&decay->interval))); 26 nstime_add(&decay->deadline, &jitter); 27 } 28 } 29 30 void 31 decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) { 32 atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED); 33 if (decay_ms > 0) { 34 nstime_init(&decay->interval, (uint64_t)decay_ms * 35 KQU(1000000)); 36 nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS); 37 } 38 39 nstime_copy(&decay->epoch, cur_time); 40 decay->jitter_state = (uint64_t)(uintptr_t)decay; 41 decay_deadline_init(decay); 42 decay->nunpurged = 0; 43 memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t)); 44 } 45 46 bool 47 decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) { 48 if (config_debug) { 49 for (size_t i = 0; i < sizeof(decay_t); i++) { 50 assert(((char *)decay)[i] == 0); 51 } 52 decay->ceil_npages = 0; 53 } 54 if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY, 55 malloc_mutex_rank_exclusive)) { 56 return true; 57 } 58 decay->purging = false; 59 decay_reinit(decay, cur_time, decay_ms); 60 return false; 61 } 62 63 bool 64 decay_ms_valid(ssize_t decay_ms) { 65 if (decay_ms < -1) { 66 return false; 67 } 68 if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX * 69 KQU(1000)) { 70 return true; 71 } 72 return false; 73 } 74 75 static void 76 decay_maybe_update_time(decay_t *decay, nstime_t *new_time) { 77 if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, 78 new_time) > 0)) { 79 /* 80 * Time went backwards. Move the epoch back in time and 81 * generate a new deadline, with the expectation that time 82 * typically flows forward for long enough periods of time that 83 * epochs complete. Unfortunately, this strategy is susceptible 84 * to clock jitter triggering premature epoch advances, but 85 * clock jitter estimation and compensation isn't feasible here 86 * because calls into this code are event-driven. 87 */ 88 nstime_copy(&decay->epoch, new_time); 89 decay_deadline_init(decay); 90 } else { 91 /* Verify that time does not go backwards. */ 92 assert(nstime_compare(&decay->epoch, new_time) <= 0); 93 } 94 } 95 96 static size_t 97 decay_backlog_npages_limit(const decay_t *decay) { 98 /* 99 * For each element of decay_backlog, multiply by the corresponding 100 * fixed-point smoothstep decay factor. Sum the products, then divide 101 * to round down to the nearest whole number of pages. 102 */ 103 uint64_t sum = 0; 104 for (unsigned i = 0; i < SMOOTHSTEP_NSTEPS; i++) { 105 sum += decay->backlog[i] * h_steps[i]; 106 } 107 size_t npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP); 108 109 return npages_limit_backlog; 110 } 111 112 /* 113 * Update backlog, assuming that 'nadvance_u64' time intervals have passed. 114 * Trailing 'nadvance_u64' records should be erased and 'current_npages' is 115 * placed as the newest record. 116 */ 117 static void 118 decay_backlog_update(decay_t *decay, uint64_t nadvance_u64, 119 size_t current_npages) { 120 if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) { 121 memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) * 122 sizeof(size_t)); 123 } else { 124 size_t nadvance_z = (size_t)nadvance_u64; 125 126 assert((uint64_t)nadvance_z == nadvance_u64); 127 128 memmove(decay->backlog, &decay->backlog[nadvance_z], 129 (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t)); 130 if (nadvance_z > 1) { 131 memset(&decay->backlog[SMOOTHSTEP_NSTEPS - 132 nadvance_z], 0, (nadvance_z-1) * sizeof(size_t)); 133 } 134 } 135 136 size_t npages_delta = (current_npages > decay->nunpurged) ? 137 current_npages - decay->nunpurged : 0; 138 decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta; 139 140 if (config_debug) { 141 if (current_npages > decay->ceil_npages) { 142 decay->ceil_npages = current_npages; 143 } 144 size_t npages_limit = decay_backlog_npages_limit(decay); 145 assert(decay->ceil_npages >= npages_limit); 146 if (decay->ceil_npages > npages_limit) { 147 decay->ceil_npages = npages_limit; 148 } 149 } 150 } 151 152 static inline bool 153 decay_deadline_reached(const decay_t *decay, const nstime_t *time) { 154 return (nstime_compare(&decay->deadline, time) <= 0); 155 } 156 157 uint64_t 158 decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) { 159 uint64_t decay_interval_ns = decay_epoch_duration_ns(decay); 160 size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns); 161 162 uint64_t npages_purge; 163 if (n_epoch >= SMOOTHSTEP_NSTEPS) { 164 npages_purge = npages_new; 165 } else { 166 uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1]; 167 assert(h_steps_max >= 168 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]); 169 npages_purge = npages_new * (h_steps_max - 170 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]); 171 npages_purge >>= SMOOTHSTEP_BFP; 172 } 173 return npages_purge; 174 } 175 176 bool 177 decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time, 178 size_t npages_current) { 179 /* Handle possible non-monotonicity of time. */ 180 decay_maybe_update_time(decay, new_time); 181 182 if (!decay_deadline_reached(decay, new_time)) { 183 return false; 184 } 185 nstime_t delta; 186 nstime_copy(&delta, new_time); 187 nstime_subtract(&delta, &decay->epoch); 188 189 uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval); 190 assert(nadvance_u64 > 0); 191 192 /* Add nadvance_u64 decay intervals to epoch. */ 193 nstime_copy(&delta, &decay->interval); 194 nstime_imultiply(&delta, nadvance_u64); 195 nstime_add(&decay->epoch, &delta); 196 197 /* Set a new deadline. */ 198 decay_deadline_init(decay); 199 200 /* Update the backlog. */ 201 decay_backlog_update(decay, nadvance_u64, npages_current); 202 203 decay->npages_limit = decay_backlog_npages_limit(decay); 204 decay->nunpurged = (decay->npages_limit > npages_current) ? 205 decay->npages_limit : npages_current; 206 207 return true; 208 } 209 210 /* 211 * Calculate how many pages should be purged after 'interval'. 212 * 213 * First, calculate how many pages should remain at the moment, then subtract 214 * the number of pages that should remain after 'interval'. The difference is 215 * how many pages should be purged until then. 216 * 217 * The number of pages that should remain at a specific moment is calculated 218 * like this: pages(now) = sum(backlog[i] * h_steps[i]). After 'interval' 219 * passes, backlog would shift 'interval' positions to the left and sigmoid 220 * curve would be applied starting with backlog[interval]. 221 * 222 * The implementation doesn't directly map to the description, but it's 223 * essentially the same calculation, optimized to avoid iterating over 224 * [interval..SMOOTHSTEP_NSTEPS) twice. 225 */ 226 static inline size_t 227 decay_npurge_after_interval(decay_t *decay, size_t interval) { 228 size_t i; 229 uint64_t sum = 0; 230 for (i = 0; i < interval; i++) { 231 sum += decay->backlog[i] * h_steps[i]; 232 } 233 for (; i < SMOOTHSTEP_NSTEPS; i++) { 234 sum += decay->backlog[i] * 235 (h_steps[i] - h_steps[i - interval]); 236 } 237 238 return (size_t)(sum >> SMOOTHSTEP_BFP); 239 } 240 241 uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current, 242 uint64_t npages_threshold) { 243 if (!decay_gradually(decay)) { 244 return DECAY_UNBOUNDED_TIME_TO_PURGE; 245 } 246 uint64_t decay_interval_ns = decay_epoch_duration_ns(decay); 247 assert(decay_interval_ns > 0); 248 if (npages_current == 0) { 249 unsigned i; 250 for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) { 251 if (decay->backlog[i] > 0) { 252 break; 253 } 254 } 255 if (i == SMOOTHSTEP_NSTEPS) { 256 /* No dirty pages recorded. Sleep indefinitely. */ 257 return DECAY_UNBOUNDED_TIME_TO_PURGE; 258 } 259 } 260 if (npages_current <= npages_threshold) { 261 /* Use max interval. */ 262 return decay_interval_ns * SMOOTHSTEP_NSTEPS; 263 } 264 265 /* Minimal 2 intervals to ensure reaching next epoch deadline. */ 266 size_t lb = 2; 267 size_t ub = SMOOTHSTEP_NSTEPS; 268 269 size_t npurge_lb, npurge_ub; 270 npurge_lb = decay_npurge_after_interval(decay, lb); 271 if (npurge_lb > npages_threshold) { 272 return decay_interval_ns * lb; 273 } 274 npurge_ub = decay_npurge_after_interval(decay, ub); 275 if (npurge_ub < npages_threshold) { 276 return decay_interval_ns * ub; 277 } 278 279 unsigned n_search = 0; 280 size_t target, npurge; 281 while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) { 282 target = (lb + ub) / 2; 283 npurge = decay_npurge_after_interval(decay, target); 284 if (npurge > npages_threshold) { 285 ub = target; 286 npurge_ub = npurge; 287 } else { 288 lb = target; 289 npurge_lb = npurge; 290 } 291 assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1); 292 ++n_search; 293 } 294 return decay_interval_ns * (ub + lb) / 2; 295 } 296