xref: /freebsd/contrib/jemalloc/src/decay.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/decay.h"
5 
6 static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
7 #define STEP(step, h, x, y)			\
8 		h,
9 		SMOOTHSTEP
10 #undef STEP
11 };
12 
13 /*
14  * Generate a new deadline that is uniformly random within the next epoch after
15  * the current one.
16  */
17 void
18 decay_deadline_init(decay_t *decay) {
19 	nstime_copy(&decay->deadline, &decay->epoch);
20 	nstime_add(&decay->deadline, &decay->interval);
21 	if (decay_ms_read(decay) > 0) {
22 		nstime_t jitter;
23 
24 		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
25 		    nstime_ns(&decay->interval)));
26 		nstime_add(&decay->deadline, &jitter);
27 	}
28 }
29 
30 void
31 decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
32 	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
33 	if (decay_ms > 0) {
34 		nstime_init(&decay->interval, (uint64_t)decay_ms *
35 		    KQU(1000000));
36 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
37 	}
38 
39 	nstime_copy(&decay->epoch, cur_time);
40 	decay->jitter_state = (uint64_t)(uintptr_t)decay;
41 	decay_deadline_init(decay);
42 	decay->nunpurged = 0;
43 	memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
44 }
45 
46 bool
47 decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
48 	if (config_debug) {
49 		for (size_t i = 0; i < sizeof(decay_t); i++) {
50 			assert(((char *)decay)[i] == 0);
51 		}
52 		decay->ceil_npages = 0;
53 	}
54 	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
55 	    malloc_mutex_rank_exclusive)) {
56 		return true;
57 	}
58 	decay->purging = false;
59 	decay_reinit(decay, cur_time, decay_ms);
60 	return false;
61 }
62 
63 bool
64 decay_ms_valid(ssize_t decay_ms) {
65 	if (decay_ms < -1) {
66 		return false;
67 	}
68 	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
69 	    KQU(1000)) {
70 		return true;
71 	}
72 	return false;
73 }
74 
75 static void
76 decay_maybe_update_time(decay_t *decay, nstime_t *new_time) {
77 	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch,
78 	    new_time) > 0)) {
79 		/*
80 		 * Time went backwards.  Move the epoch back in time and
81 		 * generate a new deadline, with the expectation that time
82 		 * typically flows forward for long enough periods of time that
83 		 * epochs complete.  Unfortunately, this strategy is susceptible
84 		 * to clock jitter triggering premature epoch advances, but
85 		 * clock jitter estimation and compensation isn't feasible here
86 		 * because calls into this code are event-driven.
87 		 */
88 		nstime_copy(&decay->epoch, new_time);
89 		decay_deadline_init(decay);
90 	} else {
91 		/* Verify that time does not go backwards. */
92 		assert(nstime_compare(&decay->epoch, new_time) <= 0);
93 	}
94 }
95 
96 static size_t
97 decay_backlog_npages_limit(const decay_t *decay) {
98 	/*
99 	 * For each element of decay_backlog, multiply by the corresponding
100 	 * fixed-point smoothstep decay factor.  Sum the products, then divide
101 	 * to round down to the nearest whole number of pages.
102 	 */
103 	uint64_t sum = 0;
104 	for (unsigned i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
105 		sum += decay->backlog[i] * h_steps[i];
106 	}
107 	size_t npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
108 
109 	return npages_limit_backlog;
110 }
111 
112 /*
113  * Update backlog, assuming that 'nadvance_u64' time intervals have passed.
114  * Trailing 'nadvance_u64' records should be erased and 'current_npages' is
115  * placed as the newest record.
116  */
117 static void
118 decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
119     size_t current_npages) {
120 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
121 		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
122 		    sizeof(size_t));
123 	} else {
124 		size_t nadvance_z = (size_t)nadvance_u64;
125 
126 		assert((uint64_t)nadvance_z == nadvance_u64);
127 
128 		memmove(decay->backlog, &decay->backlog[nadvance_z],
129 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
130 		if (nadvance_z > 1) {
131 			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
132 			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
133 		}
134 	}
135 
136 	size_t npages_delta = (current_npages > decay->nunpurged) ?
137 	    current_npages - decay->nunpurged : 0;
138 	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
139 
140 	if (config_debug) {
141 		if (current_npages > decay->ceil_npages) {
142 			decay->ceil_npages = current_npages;
143 		}
144 		size_t npages_limit = decay_backlog_npages_limit(decay);
145 		assert(decay->ceil_npages >= npages_limit);
146 		if (decay->ceil_npages > npages_limit) {
147 			decay->ceil_npages = npages_limit;
148 		}
149 	}
150 }
151 
152 static inline bool
153 decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
154 	return (nstime_compare(&decay->deadline, time) <= 0);
155 }
156 
157 uint64_t
158 decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
159 	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
160 	size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns);
161 
162 	uint64_t npages_purge;
163 	if (n_epoch >= SMOOTHSTEP_NSTEPS) {
164 		npages_purge = npages_new;
165 	} else {
166 		uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
167 		assert(h_steps_max >=
168 		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
169 		npages_purge = npages_new * (h_steps_max -
170 		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
171 		npages_purge >>= SMOOTHSTEP_BFP;
172 	}
173 	return npages_purge;
174 }
175 
176 bool
177 decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
178     size_t npages_current) {
179 	/* Handle possible non-monotonicity of time. */
180 	decay_maybe_update_time(decay, new_time);
181 
182 	if (!decay_deadline_reached(decay, new_time)) {
183 		return false;
184 	}
185 	nstime_t delta;
186 	nstime_copy(&delta, new_time);
187 	nstime_subtract(&delta, &decay->epoch);
188 
189 	uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
190 	assert(nadvance_u64 > 0);
191 
192 	/* Add nadvance_u64 decay intervals to epoch. */
193 	nstime_copy(&delta, &decay->interval);
194 	nstime_imultiply(&delta, nadvance_u64);
195 	nstime_add(&decay->epoch, &delta);
196 
197 	/* Set a new deadline. */
198 	decay_deadline_init(decay);
199 
200 	/* Update the backlog. */
201 	decay_backlog_update(decay, nadvance_u64, npages_current);
202 
203 	decay->npages_limit = decay_backlog_npages_limit(decay);
204 	decay->nunpurged = (decay->npages_limit > npages_current) ?
205 	    decay->npages_limit : npages_current;
206 
207 	return true;
208 }
209 
210 /*
211  * Calculate how many pages should be purged after 'interval'.
212  *
213  * First, calculate how many pages should remain at the moment, then subtract
214  * the number of pages that should remain after 'interval'. The difference is
215  * how many pages should be purged until then.
216  *
217  * The number of pages that should remain at a specific moment is calculated
218  * like this: pages(now) = sum(backlog[i] * h_steps[i]). After 'interval'
219  * passes, backlog would shift 'interval' positions to the left and sigmoid
220  * curve would be applied starting with backlog[interval].
221  *
222  * The implementation doesn't directly map to the description, but it's
223  * essentially the same calculation, optimized to avoid iterating over
224  * [interval..SMOOTHSTEP_NSTEPS) twice.
225  */
226 static inline size_t
227 decay_npurge_after_interval(decay_t *decay, size_t interval) {
228 	size_t i;
229 	uint64_t sum = 0;
230 	for (i = 0; i < interval; i++) {
231 		sum += decay->backlog[i] * h_steps[i];
232 	}
233 	for (; i < SMOOTHSTEP_NSTEPS; i++) {
234 		sum += decay->backlog[i] *
235 		    (h_steps[i] - h_steps[i - interval]);
236 	}
237 
238 	return (size_t)(sum >> SMOOTHSTEP_BFP);
239 }
240 
241 uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
242     uint64_t npages_threshold) {
243 	if (!decay_gradually(decay)) {
244 		return DECAY_UNBOUNDED_TIME_TO_PURGE;
245 	}
246 	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
247 	assert(decay_interval_ns > 0);
248 	if (npages_current == 0) {
249 		unsigned i;
250 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
251 			if (decay->backlog[i] > 0) {
252 				break;
253 			}
254 		}
255 		if (i == SMOOTHSTEP_NSTEPS) {
256 			/* No dirty pages recorded.  Sleep indefinitely. */
257 			return DECAY_UNBOUNDED_TIME_TO_PURGE;
258 		}
259 	}
260 	if (npages_current <= npages_threshold) {
261 		/* Use max interval. */
262 		return decay_interval_ns * SMOOTHSTEP_NSTEPS;
263 	}
264 
265 	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
266 	size_t lb = 2;
267 	size_t ub = SMOOTHSTEP_NSTEPS;
268 
269 	size_t npurge_lb, npurge_ub;
270 	npurge_lb = decay_npurge_after_interval(decay, lb);
271 	if (npurge_lb > npages_threshold) {
272 		return decay_interval_ns * lb;
273 	}
274 	npurge_ub = decay_npurge_after_interval(decay, ub);
275 	if (npurge_ub < npages_threshold) {
276 		return decay_interval_ns * ub;
277 	}
278 
279 	unsigned n_search = 0;
280 	size_t target, npurge;
281 	while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) {
282 		target = (lb + ub) / 2;
283 		npurge = decay_npurge_after_interval(decay, target);
284 		if (npurge > npages_threshold) {
285 			ub = target;
286 			npurge_ub = npurge;
287 		} else {
288 			lb = target;
289 			npurge_lb = npurge;
290 		}
291 		assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
292 		++n_search;
293 	}
294 	return decay_interval_ns * (ub + lb) / 2;
295 }
296