xref: /freebsd/contrib/jemalloc/src/thread_event.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/thread_event.h"
5 
6 /*
7  * Signatures for event specific functions.  These functions should be defined
8  * by the modules owning each event.  The signatures here verify that the
9  * definitions follow the right format.
10  *
11  * The first two are functions computing new / postponed event wait time.  New
12  * event wait time is the time till the next event if an event is currently
13  * being triggered; postponed event wait time is the time till the next event
14  * if an event should be triggered but needs to be postponed, e.g. when the TSD
15  * is not nominal or during reentrancy.
16  *
17  * The third is the event handler function, which is called whenever an event
18  * is triggered.  The parameter is the elapsed time since the last time an
19  * event of the same type was triggered.
20  */
21 #define E(event, condition_unused, is_alloc_event_unused)		\
22 uint64_t event##_new_event_wait(tsd_t *tsd);				\
23 uint64_t event##_postponed_event_wait(tsd_t *tsd);			\
24 void event##_event_handler(tsd_t *tsd, uint64_t elapsed);
25 
26 ITERATE_OVER_ALL_EVENTS
27 #undef E
28 
29 /* Signatures for internal functions fetching elapsed time. */
30 #define E(event, condition_unused, is_alloc_event_unused)		\
31 static uint64_t event##_fetch_elapsed(tsd_t *tsd);
32 
33 ITERATE_OVER_ALL_EVENTS
34 #undef E
35 
36 static uint64_t
37 tcache_gc_fetch_elapsed(tsd_t *tsd) {
38 	return TE_INVALID_ELAPSED;
39 }
40 
41 static uint64_t
42 tcache_gc_dalloc_fetch_elapsed(tsd_t *tsd) {
43 	return TE_INVALID_ELAPSED;
44 }
45 
46 static uint64_t
47 prof_sample_fetch_elapsed(tsd_t *tsd) {
48 	uint64_t last_event = thread_allocated_last_event_get(tsd);
49 	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
50 	prof_sample_last_event_set(tsd, last_event);
51 	return last_event - last_sample_event;
52 }
53 
54 static uint64_t
55 stats_interval_fetch_elapsed(tsd_t *tsd) {
56 	uint64_t last_event = thread_allocated_last_event_get(tsd);
57 	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
58 	stats_interval_last_event_set(tsd, last_event);
59 	return last_event - last_stats_event;
60 }
61 
62 static uint64_t
63 peak_alloc_fetch_elapsed(tsd_t *tsd) {
64 	return TE_INVALID_ELAPSED;
65 }
66 
67 static uint64_t
68 peak_dalloc_fetch_elapsed(tsd_t *tsd) {
69 	return TE_INVALID_ELAPSED;
70 }
71 
72 /* Per event facilities done. */
73 
74 static bool
75 te_ctx_has_active_events(te_ctx_t *ctx) {
76 	assert(config_debug);
77 #define E(event, condition, alloc_event)			       \
78 	if (condition && alloc_event == ctx->is_alloc) {	       \
79 		return true;					       \
80 	}
81 	ITERATE_OVER_ALL_EVENTS
82 #undef E
83 	return false;
84 }
85 
86 static uint64_t
87 te_next_event_compute(tsd_t *tsd, bool is_alloc) {
88 	uint64_t wait = TE_MAX_START_WAIT;
89 #define E(event, condition, alloc_event)				\
90 	if (is_alloc == alloc_event && condition) {			\
91 		uint64_t event_wait =					\
92 		    event##_event_wait_get(tsd);			\
93 		assert(event_wait <= TE_MAX_START_WAIT);		\
94 		if (event_wait > 0U && event_wait < wait) {		\
95 			wait = event_wait;				\
96 		}							\
97 	}
98 
99 	ITERATE_OVER_ALL_EVENTS
100 #undef E
101 	assert(wait <= TE_MAX_START_WAIT);
102 	return wait;
103 }
104 
105 static void
106 te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
107 	uint64_t current_bytes = te_ctx_current_bytes_get(ctx);
108 	uint64_t last_event = te_ctx_last_event_get(ctx);
109 	uint64_t next_event = te_ctx_next_event_get(ctx);
110 	uint64_t next_event_fast = te_ctx_next_event_fast_get(ctx);
111 
112 	assert(last_event != next_event);
113 	if (next_event > TE_NEXT_EVENT_FAST_MAX || !tsd_fast(tsd)) {
114 		assert(next_event_fast == 0U);
115 	} else {
116 		assert(next_event_fast == next_event);
117 	}
118 
119 	/* The subtraction is intentionally susceptible to underflow. */
120 	uint64_t interval = next_event - last_event;
121 
122 	/* The subtraction is intentionally susceptible to underflow. */
123 	assert(current_bytes - last_event < interval);
124 	uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx));
125 	/*
126 	 * next_event should have been pushed up only except when no event is
127 	 * on and the TSD is just initialized.  The last_event == 0U guard
128 	 * below is stronger than needed, but having an exactly accurate guard
129 	 * is more complicated to implement.
130 	 */
131 	assert((!te_ctx_has_active_events(ctx) && last_event == 0U) ||
132 	    interval == min_wait ||
133 	    (interval < min_wait && interval == TE_MAX_INTERVAL));
134 }
135 
136 void
137 te_assert_invariants_debug(tsd_t *tsd) {
138 	te_ctx_t ctx;
139 	te_ctx_get(tsd, &ctx, true);
140 	te_assert_invariants_impl(tsd, &ctx);
141 
142 	te_ctx_get(tsd, &ctx, false);
143 	te_assert_invariants_impl(tsd, &ctx);
144 }
145 
146 /*
147  * Synchronization around the fast threshold in tsd --
148  * There are two threads to consider in the synchronization here:
149  * - The owner of the tsd being updated by a slow path change
150  * - The remote thread, doing that slow path change.
151  *
152  * As a design constraint, we want to ensure that a slow-path transition cannot
153  * be ignored for arbitrarily long, and that if the remote thread causes a
154  * slow-path transition and then communicates with the owner thread that it has
155  * occurred, then the owner will go down the slow path on the next allocator
156  * operation (so that we don't want to just wait until the owner hits its slow
157  * path reset condition on its own).
158  *
159  * Here's our strategy to do that:
160  *
161  * The remote thread will update the slow-path stores to TSD variables, issue a
162  * SEQ_CST fence, and then update the TSD next_event_fast counter. The owner
163  * thread will update next_event_fast, issue an SEQ_CST fence, and then check
164  * its TSD to see if it's on the slow path.
165 
166  * This is fairly straightforward when 64-bit atomics are supported. Assume that
167  * the remote fence is sandwiched between two owner fences in the reset pathway.
168  * The case where there is no preceding or trailing owner fence (i.e. because
169  * the owner thread is near the beginning or end of its life) can be analyzed
170  * similarly. The owner store to next_event_fast preceding the earlier owner
171  * fence will be earlier in coherence order than the remote store to it, so that
172  * the owner thread will go down the slow path once the store becomes visible to
173  * it, which is no later than the time of the second fence.
174 
175  * The case where we don't support 64-bit atomics is trickier, since word
176  * tearing is possible. We'll repeat the same analysis, and look at the two
177  * owner fences sandwiching the remote fence. The next_event_fast stores done
178  * alongside the earlier owner fence cannot overwrite any of the remote stores
179  * (since they precede the earlier owner fence in sb, which precedes the remote
180  * fence in sc, which precedes the remote stores in sb). After the second owner
181  * fence there will be a re-check of the slow-path variables anyways, so the
182  * "owner will notice that it's on the slow path eventually" guarantee is
183  * satisfied. To make sure that the out-of-band-messaging constraint is as well,
184  * note that either the message passing is sequenced before the second owner
185  * fence (in which case the remote stores happen before the second set of owner
186  * stores, so malloc sees a value of zero for next_event_fast and goes down the
187  * slow path), or it is not (in which case the owner sees the tsd slow-path
188  * writes on its previous update). This leaves open the possibility that the
189  * remote thread will (at some arbitrary point in the future) zero out one half
190  * of the owner thread's next_event_fast, but that's always safe (it just sends
191  * it down the slow path earlier).
192  */
193 static void
194 te_ctx_next_event_fast_update(te_ctx_t *ctx) {
195 	uint64_t next_event = te_ctx_next_event_get(ctx);
196 	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ?
197 	    next_event : 0U;
198 	te_ctx_next_event_fast_set(ctx, next_event_fast);
199 }
200 
201 void
202 te_recompute_fast_threshold(tsd_t *tsd) {
203 	if (tsd_state_get(tsd) != tsd_state_nominal) {
204 		/* Check first because this is also called on purgatory. */
205 		te_next_event_fast_set_non_nominal(tsd);
206 		return;
207 	}
208 
209 	te_ctx_t ctx;
210 	te_ctx_get(tsd, &ctx, true);
211 	te_ctx_next_event_fast_update(&ctx);
212 	te_ctx_get(tsd, &ctx, false);
213 	te_ctx_next_event_fast_update(&ctx);
214 
215 	atomic_fence(ATOMIC_SEQ_CST);
216 	if (tsd_state_get(tsd) != tsd_state_nominal) {
217 		te_next_event_fast_set_non_nominal(tsd);
218 	}
219 }
220 
221 static void
222 te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
223     uint64_t wait) {
224 	/*
225 	 * The next threshold based on future events can only be adjusted after
226 	 * progressing the last_event counter (which is set to current).
227 	 */
228 	assert(te_ctx_current_bytes_get(ctx) == te_ctx_last_event_get(ctx));
229 	assert(wait <= TE_MAX_START_WAIT);
230 
231 	uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
232 	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
233 	te_ctx_next_event_set(tsd, ctx, next_event);
234 }
235 
236 static uint64_t
237 te_clip_event_wait(uint64_t event_wait) {
238 	assert(event_wait > 0U);
239 	if (TE_MIN_START_WAIT > 1U &&
240 	    unlikely(event_wait < TE_MIN_START_WAIT)) {
241 		event_wait = TE_MIN_START_WAIT;
242 	}
243 	if (TE_MAX_START_WAIT < UINT64_MAX &&
244 	    unlikely(event_wait > TE_MAX_START_WAIT)) {
245 		event_wait = TE_MAX_START_WAIT;
246 	}
247 	return event_wait;
248 }
249 
250 void
251 te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
252 	/* usize has already been added to thread_allocated. */
253 	uint64_t bytes_after = te_ctx_current_bytes_get(ctx);
254 	/* The subtraction is intentionally susceptible to underflow. */
255 	uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx);
256 
257 	te_ctx_last_event_set(ctx, bytes_after);
258 
259 	bool allow_event_trigger = tsd_nominal(tsd) &&
260 	    tsd_reentrancy_level_get(tsd) == 0;
261 	bool is_alloc = ctx->is_alloc;
262 	uint64_t wait = TE_MAX_START_WAIT;
263 
264 #define E(event, condition, alloc_event)				\
265 	bool is_##event##_triggered = false;				\
266 	if (is_alloc == alloc_event && condition) {			\
267 		uint64_t event_wait = event##_event_wait_get(tsd);	\
268 		assert(event_wait <= TE_MAX_START_WAIT);		\
269 		if (event_wait > accumbytes) {				\
270 			event_wait -= accumbytes;			\
271 		} else if (!allow_event_trigger) {			\
272 			event_wait = event##_postponed_event_wait(tsd);	\
273 		} else {						\
274 			is_##event##_triggered = true;			\
275 			event_wait = event##_new_event_wait(tsd);	\
276 		}							\
277 		event_wait = te_clip_event_wait(event_wait);		\
278 		event##_event_wait_set(tsd, event_wait);		\
279 		if (event_wait < wait) {				\
280 			wait = event_wait;				\
281 		}							\
282 	}
283 
284 	ITERATE_OVER_ALL_EVENTS
285 #undef E
286 
287 	assert(wait <= TE_MAX_START_WAIT);
288 	te_adjust_thresholds_helper(tsd, ctx, wait);
289 	te_assert_invariants(tsd);
290 
291 #define E(event, condition, alloc_event)				\
292 	if (is_alloc == alloc_event && condition &&			\
293 	    is_##event##_triggered) {					\
294 		assert(allow_event_trigger);				\
295 		uint64_t elapsed = event##_fetch_elapsed(tsd);		\
296 		event##_event_handler(tsd, elapsed);			\
297 	}
298 
299 	ITERATE_OVER_ALL_EVENTS
300 #undef E
301 
302 	te_assert_invariants(tsd);
303 }
304 
305 static void
306 te_init(tsd_t *tsd, bool is_alloc) {
307 	te_ctx_t ctx;
308 	te_ctx_get(tsd, &ctx, is_alloc);
309 	/*
310 	 * Reset the last event to current, which starts the events from a clean
311 	 * state.  This is necessary when re-init the tsd event counters.
312 	 *
313 	 * The event counters maintain a relationship with the current bytes:
314 	 * last_event <= current < next_event.  When a reinit happens (e.g.
315 	 * reincarnated tsd), the last event needs progressing because all
316 	 * events start fresh from the current bytes.
317 	 */
318 	te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
319 
320 	uint64_t wait = TE_MAX_START_WAIT;
321 #define E(event, condition, alloc_event)				\
322 	if (is_alloc == alloc_event && condition) {			\
323 		uint64_t event_wait = event##_new_event_wait(tsd);	\
324 		event_wait = te_clip_event_wait(event_wait);		\
325 		event##_event_wait_set(tsd, event_wait);		\
326 		if (event_wait < wait) {				\
327 			wait = event_wait;				\
328 		}							\
329 	}
330 
331 	ITERATE_OVER_ALL_EVENTS
332 #undef E
333 	te_adjust_thresholds_helper(tsd, &ctx, wait);
334 }
335 
336 void
337 tsd_te_init(tsd_t *tsd) {
338 	/* Make sure no overflow for the bytes accumulated on event_trigger. */
339 	assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
340 	te_init(tsd, true);
341 	te_init(tsd, false);
342 	te_assert_invariants(tsd);
343 }
344