xref: /freebsd/contrib/jemalloc/src/prof_recent.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 #include "jemalloc/internal/jemalloc_internal_includes.h"
3 
4 #include "jemalloc/internal/assert.h"
5 #include "jemalloc/internal/buf_writer.h"
6 #include "jemalloc/internal/emitter.h"
7 #include "jemalloc/internal/prof_data.h"
8 #include "jemalloc/internal/prof_recent.h"
9 
10 ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
11 malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
12 static atomic_zd_t prof_recent_alloc_max;
13 static ssize_t prof_recent_alloc_count = 0;
14 prof_recent_list_t prof_recent_alloc_list;
15 
16 malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
17 
18 static void
19 prof_recent_alloc_max_init() {
20 	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
21 	    ATOMIC_RELAXED);
22 }
23 
24 static inline ssize_t
25 prof_recent_alloc_max_get_no_lock() {
26 	return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
27 }
28 
29 static inline ssize_t
30 prof_recent_alloc_max_get(tsd_t *tsd) {
31 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
32 	return prof_recent_alloc_max_get_no_lock();
33 }
34 
35 static inline ssize_t
36 prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) {
37 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
38 	ssize_t old_max = prof_recent_alloc_max_get(tsd);
39 	atomic_store_zd(&prof_recent_alloc_max, max, ATOMIC_RELAXED);
40 	return old_max;
41 }
42 
43 static prof_recent_t *
44 prof_recent_allocate_node(tsdn_t *tsdn) {
45 	return (prof_recent_t *)iallocztm(tsdn, sizeof(prof_recent_t),
46 	    sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
47 	    arena_get(tsdn, 0, false), true);
48 }
49 
50 static void
51 prof_recent_free_node(tsdn_t *tsdn, prof_recent_t *node) {
52 	assert(node != NULL);
53 	assert(isalloc(tsdn, node) == sz_s2u(sizeof(prof_recent_t)));
54 	idalloctm(tsdn, node, NULL, NULL, true, true);
55 }
56 
57 static inline void
58 increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
59 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
60 	++tctx->recent_count;
61 	assert(tctx->recent_count > 0);
62 }
63 
64 bool
65 prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) {
66 	cassert(config_prof);
67 	assert(opt_prof && prof_booted);
68 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
69 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
70 
71 	/*
72 	 * Check whether last-N mode is turned on without trying to acquire the
73 	 * lock, so as to optimize for the following two scenarios:
74 	 * (1) Last-N mode is switched off;
75 	 * (2) Dumping, during which last-N mode is temporarily turned off so
76 	 *     as not to block sampled allocations.
77 	 */
78 	if (prof_recent_alloc_max_get_no_lock() == 0) {
79 		return false;
80 	}
81 
82 	/*
83 	 * Increment recent_count to hold the tctx so that it won't be gone
84 	 * even after tctx->tdata->lock is released.  This acts as a
85 	 * "placeholder"; the real recording of the allocation requires a lock
86 	 * on prof_recent_alloc_mtx and is done in prof_recent_alloc (when
87 	 * tctx->tdata->lock has been released).
88 	 */
89 	increment_recent_count(tsd, tctx);
90 	return true;
91 }
92 
93 static void
94 decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
95 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
96 	assert(tctx != NULL);
97 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
98 	assert(tctx->recent_count > 0);
99 	--tctx->recent_count;
100 	prof_tctx_try_destroy(tsd, tctx);
101 }
102 
103 static inline edata_t *
104 prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) {
105 	return (edata_t *)atomic_load_p(&n->alloc_edata, ATOMIC_ACQUIRE);
106 }
107 
108 edata_t *
109 prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *n) {
110 	cassert(config_prof);
111 	return prof_recent_alloc_edata_get_no_lock(n);
112 }
113 
114 static inline edata_t *
115 prof_recent_alloc_edata_get(tsd_t *tsd, const prof_recent_t *n) {
116 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
117 	return prof_recent_alloc_edata_get_no_lock(n);
118 }
119 
120 static void
121 prof_recent_alloc_edata_set(tsd_t *tsd, prof_recent_t *n, edata_t *edata) {
122 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
123 	atomic_store_p(&n->alloc_edata, edata, ATOMIC_RELEASE);
124 }
125 
126 void
127 edata_prof_recent_alloc_init(edata_t *edata) {
128 	cassert(config_prof);
129 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
130 }
131 
132 static inline prof_recent_t *
133 edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
134 	cassert(config_prof);
135 	return edata_prof_recent_alloc_get_dont_call_directly(edata);
136 }
137 
138 prof_recent_t *
139 edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
140 	cassert(config_prof);
141 	return edata_prof_recent_alloc_get_no_lock(edata);
142 }
143 
144 static inline prof_recent_t *
145 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
146 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
147 	prof_recent_t *recent_alloc =
148 	    edata_prof_recent_alloc_get_no_lock(edata);
149 	assert(recent_alloc == NULL ||
150 	    prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
151 	return recent_alloc;
152 }
153 
154 static prof_recent_t *
155 edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
156     prof_recent_t *recent_alloc) {
157 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
158 	prof_recent_t *old_recent_alloc =
159 	    edata_prof_recent_alloc_get(tsd, edata);
160 	edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
161 	return old_recent_alloc;
162 }
163 
164 static void
165 edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
166     prof_recent_t *recent_alloc) {
167 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
168 	assert(recent_alloc != NULL);
169 	prof_recent_t *old_recent_alloc =
170 	    edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc);
171 	assert(old_recent_alloc == NULL);
172 	prof_recent_alloc_edata_set(tsd, recent_alloc, edata);
173 }
174 
175 static void
176 edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
177     prof_recent_t *recent_alloc) {
178 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
179 	assert(recent_alloc != NULL);
180 	prof_recent_t *old_recent_alloc =
181 	    edata_prof_recent_alloc_update_internal(tsd, edata, NULL);
182 	assert(old_recent_alloc == recent_alloc);
183 	assert(edata == prof_recent_alloc_edata_get(tsd, recent_alloc));
184 	prof_recent_alloc_edata_set(tsd, recent_alloc, NULL);
185 }
186 
187 /*
188  * This function should be called right before an allocation is released, so
189  * that the associated recent allocation record can contain the following
190  * information:
191  * (1) The allocation is released;
192  * (2) The time of the deallocation; and
193  * (3) The prof_tctx associated with the deallocation.
194  */
195 void
196 prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
197 	cassert(config_prof);
198 	/*
199 	 * Check whether the recent allocation record still exists without
200 	 * trying to acquire the lock.
201 	 */
202 	if (edata_prof_recent_alloc_get_no_lock(edata) == NULL) {
203 		return;
204 	}
205 
206 	prof_tctx_t *dalloc_tctx = prof_tctx_create(tsd);
207 	/*
208 	 * In case dalloc_tctx is NULL, e.g. due to OOM, we will not record the
209 	 * deallocation time / tctx, which is handled later, after we check
210 	 * again when holding the lock.
211 	 */
212 
213 	if (dalloc_tctx != NULL) {
214 		malloc_mutex_lock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
215 		increment_recent_count(tsd, dalloc_tctx);
216 		dalloc_tctx->prepared = false;
217 		malloc_mutex_unlock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
218 	}
219 
220 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
221 	/* Check again after acquiring the lock.  */
222 	prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata);
223 	if (recent != NULL) {
224 		assert(nstime_equals_zero(&recent->dalloc_time));
225 		assert(recent->dalloc_tctx == NULL);
226 		if (dalloc_tctx != NULL) {
227 			nstime_prof_update(&recent->dalloc_time);
228 			recent->dalloc_tctx = dalloc_tctx;
229 			dalloc_tctx = NULL;
230 		}
231 		edata_prof_recent_alloc_reset(tsd, edata, recent);
232 	}
233 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
234 
235 	if (dalloc_tctx != NULL) {
236 		/* We lost the rase - the allocation record was just gone. */
237 		decrement_recent_count(tsd, dalloc_tctx);
238 	}
239 }
240 
241 static void
242 prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent_alloc) {
243 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
244 	edata_t *edata = prof_recent_alloc_edata_get(tsd, recent_alloc);
245 	if (edata != NULL) {
246 		edata_prof_recent_alloc_reset(tsd, edata, recent_alloc);
247 	}
248 }
249 
250 static bool
251 prof_recent_alloc_is_empty(tsd_t *tsd) {
252 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
253 	if (ql_empty(&prof_recent_alloc_list)) {
254 		assert(prof_recent_alloc_count == 0);
255 		return true;
256 	} else {
257 		assert(prof_recent_alloc_count > 0);
258 		return false;
259 	}
260 }
261 
262 static void
263 prof_recent_alloc_assert_count(tsd_t *tsd) {
264 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
265 	if (!config_debug) {
266 		return;
267 	}
268 	ssize_t count = 0;
269 	prof_recent_t *n;
270 	ql_foreach(n, &prof_recent_alloc_list, link) {
271 		++count;
272 	}
273 	assert(count == prof_recent_alloc_count);
274 	assert(prof_recent_alloc_max_get(tsd) == -1 ||
275 	    count <= prof_recent_alloc_max_get(tsd));
276 }
277 
278 void
279 prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
280 	cassert(config_prof);
281 	assert(edata != NULL);
282 	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
283 
284 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
285 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
286 	prof_recent_alloc_assert_count(tsd);
287 
288 	/*
289 	 * Reserve a new prof_recent_t node if needed.  If needed, we release
290 	 * the prof_recent_alloc_mtx lock and allocate.  Then, rather than
291 	 * immediately checking for OOM, we regain the lock and try to make use
292 	 * of the reserve node if needed.  There are six scenarios:
293 	 *
294 	 *          \ now | no need | need but OOMed | need and allocated
295 	 *     later \    |         |                |
296 	 *    ------------------------------------------------------------
297 	 *     no need    |   (1)   |      (2)       |         (3)
298 	 *    ------------------------------------------------------------
299 	 *     need       |   (4)   |      (5)       |         (6)
300 	 *
301 	 * First, "(4)" never happens, because we don't release the lock in the
302 	 * middle if there's no need for a new node; in such cases "(1)" always
303 	 * takes place, which is trivial.
304 	 *
305 	 * Out of the remaining four scenarios, "(6)" is the common case and is
306 	 * trivial.  "(5)" is also trivial, in which case we'll rollback the
307 	 * effect of prof_recent_alloc_prepare() as expected.
308 	 *
309 	 * "(2)" / "(3)" occurs when the need for a new node is gone after we
310 	 * regain the lock.  If the new node is successfully allocated, i.e. in
311 	 * the case of "(3)", we'll release it in the end; otherwise, i.e. in
312 	 * the case of "(2)", we do nothing - we're lucky that the OOM ends up
313 	 * doing no harm at all.
314 	 *
315 	 * Therefore, the only performance cost of the "release lock" ->
316 	 * "allocate" -> "regain lock" design is the "(3)" case, but it happens
317 	 * very rarely, so the cost is relatively small compared to the gain of
318 	 * not having to have the lock order of prof_recent_alloc_mtx above all
319 	 * the allocation locks.
320 	 */
321 	prof_recent_t *reserve = NULL;
322 	if (prof_recent_alloc_max_get(tsd) == -1 ||
323 	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
324 		assert(prof_recent_alloc_max_get(tsd) != 0);
325 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
326 		reserve = prof_recent_allocate_node(tsd_tsdn(tsd));
327 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
328 		prof_recent_alloc_assert_count(tsd);
329 	}
330 
331 	if (prof_recent_alloc_max_get(tsd) == 0) {
332 		assert(prof_recent_alloc_is_empty(tsd));
333 		goto label_rollback;
334 	}
335 
336 	prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx;
337 	if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) {
338 		/* If upper limit is reached, rotate the head. */
339 		assert(prof_recent_alloc_max_get(tsd) != -1);
340 		assert(!prof_recent_alloc_is_empty(tsd));
341 		prof_recent_t *head = ql_first(&prof_recent_alloc_list);
342 		old_alloc_tctx = head->alloc_tctx;
343 		assert(old_alloc_tctx != NULL);
344 		old_dalloc_tctx = head->dalloc_tctx;
345 		prof_recent_alloc_evict_edata(tsd, head);
346 		ql_rotate(&prof_recent_alloc_list, link);
347 	} else {
348 		/* Otherwise make use of the new node. */
349 		assert(prof_recent_alloc_max_get(tsd) == -1 ||
350 		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
351 		if (reserve == NULL) {
352 			goto label_rollback;
353 		}
354 		ql_elm_new(reserve, link);
355 		ql_tail_insert(&prof_recent_alloc_list, reserve, link);
356 		reserve = NULL;
357 		old_alloc_tctx = NULL;
358 		old_dalloc_tctx = NULL;
359 		++prof_recent_alloc_count;
360 	}
361 
362 	/* Fill content into the tail node. */
363 	prof_recent_t *tail = ql_last(&prof_recent_alloc_list, link);
364 	assert(tail != NULL);
365 	tail->size = size;
366 	tail->usize = usize;
367 	nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata));
368 	tail->alloc_tctx = tctx;
369 	nstime_init_zero(&tail->dalloc_time);
370 	tail->dalloc_tctx = NULL;
371 	edata_prof_recent_alloc_set(tsd, edata, tail);
372 
373 	assert(!prof_recent_alloc_is_empty(tsd));
374 	prof_recent_alloc_assert_count(tsd);
375 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
376 
377 	if (reserve != NULL) {
378 		prof_recent_free_node(tsd_tsdn(tsd), reserve);
379 	}
380 
381 	/*
382 	 * Asynchronously handle the tctx of the old node, so that there's no
383 	 * simultaneous holdings of prof_recent_alloc_mtx and tdata->lock.
384 	 * In the worst case this may delay the tctx release but it's better
385 	 * than holding prof_recent_alloc_mtx for longer.
386 	 */
387 	if (old_alloc_tctx != NULL) {
388 		decrement_recent_count(tsd, old_alloc_tctx);
389 	}
390 	if (old_dalloc_tctx != NULL) {
391 		decrement_recent_count(tsd, old_dalloc_tctx);
392 	}
393 	return;
394 
395 label_rollback:
396 	assert(edata_prof_recent_alloc_get(tsd, edata) == NULL);
397 	prof_recent_alloc_assert_count(tsd);
398 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
399 	if (reserve != NULL) {
400 		prof_recent_free_node(tsd_tsdn(tsd), reserve);
401 	}
402 	decrement_recent_count(tsd, tctx);
403 }
404 
405 ssize_t
406 prof_recent_alloc_max_ctl_read() {
407 	cassert(config_prof);
408 	/* Don't bother to acquire the lock. */
409 	return prof_recent_alloc_max_get_no_lock();
410 }
411 
412 static void
413 prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
414 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
415 	ssize_t max = prof_recent_alloc_max_get(tsd);
416 	if (max == -1 || prof_recent_alloc_count <= max) {
417 		/* Easy case - no need to alter the list. */
418 		ql_new(to_delete);
419 		prof_recent_alloc_assert_count(tsd);
420 		return;
421 	}
422 
423 	prof_recent_t *node;
424 	ql_foreach(node, &prof_recent_alloc_list, link) {
425 		if (prof_recent_alloc_count == max) {
426 			break;
427 		}
428 		prof_recent_alloc_evict_edata(tsd, node);
429 		--prof_recent_alloc_count;
430 	}
431 	assert(prof_recent_alloc_count == max);
432 
433 	ql_move(to_delete, &prof_recent_alloc_list);
434 	if (max == 0) {
435 		assert(node == NULL);
436 	} else {
437 		assert(node != NULL);
438 		ql_split(to_delete, node, &prof_recent_alloc_list, link);
439 	}
440 	assert(!ql_empty(to_delete));
441 	prof_recent_alloc_assert_count(tsd);
442 }
443 
444 static void
445 prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) {
446 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_dump_mtx);
447 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
448 	while (!ql_empty(to_delete)) {
449 		prof_recent_t *node = ql_first(to_delete);
450 		ql_remove(to_delete, node, link);
451 		decrement_recent_count(tsd, node->alloc_tctx);
452 		if (node->dalloc_tctx != NULL) {
453 			decrement_recent_count(tsd, node->dalloc_tctx);
454 		}
455 		prof_recent_free_node(tsd_tsdn(tsd), node);
456 	}
457 }
458 
459 ssize_t
460 prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
461 	cassert(config_prof);
462 	assert(max >= -1);
463 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
464 	prof_recent_alloc_assert_count(tsd);
465 	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
466 	prof_recent_list_t to_delete;
467 	prof_recent_alloc_restore_locked(tsd, &to_delete);
468 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
469 	prof_recent_alloc_async_cleanup(tsd, &to_delete);
470 	return old_max;
471 }
472 
473 static void
474 prof_recent_alloc_dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
475 	char bt_buf[2 * sizeof(intptr_t) + 3];
476 	char *s = bt_buf;
477 	assert(tctx != NULL);
478 	prof_bt_t *bt = &tctx->gctx->bt;
479 	for (size_t i = 0; i < bt->len; ++i) {
480 		malloc_snprintf(bt_buf, sizeof(bt_buf), "%p", bt->vec[i]);
481 		emitter_json_value(emitter, emitter_type_string, &s);
482 	}
483 }
484 
485 static void
486 prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
487 	emitter_json_object_begin(emitter);
488 
489 	emitter_json_kv(emitter, "size", emitter_type_size, &node->size);
490 	emitter_json_kv(emitter, "usize", emitter_type_size, &node->usize);
491 	bool released = prof_recent_alloc_edata_get_no_lock(node) == NULL;
492 	emitter_json_kv(emitter, "released", emitter_type_bool, &released);
493 
494 	emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64,
495 	    &node->alloc_tctx->thr_uid);
496 	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
497 	assert(alloc_tdata != NULL);
498 	if (alloc_tdata->thread_name != NULL) {
499 		emitter_json_kv(emitter, "alloc_thread_name",
500 		    emitter_type_string, &alloc_tdata->thread_name);
501 	}
502 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
503 	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
504 	    &alloc_time_ns);
505 	emitter_json_array_kv_begin(emitter, "alloc_trace");
506 	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
507 	emitter_json_array_end(emitter);
508 
509 	if (released && node->dalloc_tctx != NULL) {
510 		emitter_json_kv(emitter, "dalloc_thread_uid",
511 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
512 		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
513 		assert(dalloc_tdata != NULL);
514 		if (dalloc_tdata->thread_name != NULL) {
515 			emitter_json_kv(emitter, "dalloc_thread_name",
516 			    emitter_type_string, &dalloc_tdata->thread_name);
517 		}
518 		assert(!nstime_equals_zero(&node->dalloc_time));
519 		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);
520 		emitter_json_kv(emitter, "dalloc_time", emitter_type_uint64,
521 		    &dalloc_time_ns);
522 		emitter_json_array_kv_begin(emitter, "dalloc_trace");
523 		prof_recent_alloc_dump_bt(emitter, node->dalloc_tctx);
524 		emitter_json_array_end(emitter);
525 	}
526 
527 	emitter_json_object_end(emitter);
528 }
529 
530 #define PROF_RECENT_PRINT_BUFSIZE 65536
531 JEMALLOC_COLD
532 void
533 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
534 	cassert(config_prof);
535 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
536 	buf_writer_t buf_writer;
537 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
538 	    PROF_RECENT_PRINT_BUFSIZE);
539 	emitter_t emitter;
540 	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
541 	    &buf_writer);
542 	prof_recent_list_t temp_list;
543 
544 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
545 	prof_recent_alloc_assert_count(tsd);
546 	ssize_t dump_max = prof_recent_alloc_max_get(tsd);
547 	ql_move(&temp_list, &prof_recent_alloc_list);
548 	ssize_t dump_count = prof_recent_alloc_count;
549 	prof_recent_alloc_count = 0;
550 	prof_recent_alloc_assert_count(tsd);
551 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
552 
553 	emitter_begin(&emitter);
554 	uint64_t sample_interval = (uint64_t)1U << lg_prof_sample;
555 	emitter_json_kv(&emitter, "sample_interval", emitter_type_uint64,
556 	    &sample_interval);
557 	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
558 	    &dump_max);
559 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
560 	prof_recent_t *node;
561 	ql_foreach(node, &temp_list, link) {
562 		prof_recent_alloc_dump_node(&emitter, node);
563 	}
564 	emitter_json_array_end(&emitter);
565 	emitter_end(&emitter);
566 
567 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
568 	prof_recent_alloc_assert_count(tsd);
569 	ql_concat(&temp_list, &prof_recent_alloc_list, link);
570 	ql_move(&prof_recent_alloc_list, &temp_list);
571 	prof_recent_alloc_count += dump_count;
572 	prof_recent_alloc_restore_locked(tsd, &temp_list);
573 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
574 
575 	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
576 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
577 
578 	prof_recent_alloc_async_cleanup(tsd, &temp_list);
579 }
580 #undef PROF_RECENT_PRINT_BUFSIZE
581 
582 bool
583 prof_recent_init() {
584 	cassert(config_prof);
585 	prof_recent_alloc_max_init();
586 
587 	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",
588 	    WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
589 		return true;
590 	}
591 
592 	if (malloc_mutex_init(&prof_recent_dump_mtx, "prof_recent_dump",
593 	    WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
594 		return true;
595 	}
596 
597 	ql_new(&prof_recent_alloc_list);
598 
599 	return false;
600 }
601