xref: /freebsd/contrib/jemalloc/src/prof_recent.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_preamble.h"
2*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_internal_includes.h"
3*c43cad87SWarner Losh 
4*c43cad87SWarner Losh #include "jemalloc/internal/assert.h"
5*c43cad87SWarner Losh #include "jemalloc/internal/buf_writer.h"
6*c43cad87SWarner Losh #include "jemalloc/internal/emitter.h"
7*c43cad87SWarner Losh #include "jemalloc/internal/prof_data.h"
8*c43cad87SWarner Losh #include "jemalloc/internal/prof_recent.h"
9*c43cad87SWarner Losh 
10*c43cad87SWarner Losh ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
11*c43cad87SWarner Losh malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
12*c43cad87SWarner Losh static atomic_zd_t prof_recent_alloc_max;
13*c43cad87SWarner Losh static ssize_t prof_recent_alloc_count = 0;
14*c43cad87SWarner Losh prof_recent_list_t prof_recent_alloc_list;
15*c43cad87SWarner Losh 
16*c43cad87SWarner Losh malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
17*c43cad87SWarner Losh 
18*c43cad87SWarner Losh static void
19*c43cad87SWarner Losh prof_recent_alloc_max_init() {
20*c43cad87SWarner Losh 	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
21*c43cad87SWarner Losh 	    ATOMIC_RELAXED);
22*c43cad87SWarner Losh }
23*c43cad87SWarner Losh 
24*c43cad87SWarner Losh static inline ssize_t
25*c43cad87SWarner Losh prof_recent_alloc_max_get_no_lock() {
26*c43cad87SWarner Losh 	return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
27*c43cad87SWarner Losh }
28*c43cad87SWarner Losh 
29*c43cad87SWarner Losh static inline ssize_t
30*c43cad87SWarner Losh prof_recent_alloc_max_get(tsd_t *tsd) {
31*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
32*c43cad87SWarner Losh 	return prof_recent_alloc_max_get_no_lock();
33*c43cad87SWarner Losh }
34*c43cad87SWarner Losh 
35*c43cad87SWarner Losh static inline ssize_t
36*c43cad87SWarner Losh prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) {
37*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
38*c43cad87SWarner Losh 	ssize_t old_max = prof_recent_alloc_max_get(tsd);
39*c43cad87SWarner Losh 	atomic_store_zd(&prof_recent_alloc_max, max, ATOMIC_RELAXED);
40*c43cad87SWarner Losh 	return old_max;
41*c43cad87SWarner Losh }
42*c43cad87SWarner Losh 
43*c43cad87SWarner Losh static prof_recent_t *
44*c43cad87SWarner Losh prof_recent_allocate_node(tsdn_t *tsdn) {
45*c43cad87SWarner Losh 	return (prof_recent_t *)iallocztm(tsdn, sizeof(prof_recent_t),
46*c43cad87SWarner Losh 	    sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
47*c43cad87SWarner Losh 	    arena_get(tsdn, 0, false), true);
48*c43cad87SWarner Losh }
49*c43cad87SWarner Losh 
50*c43cad87SWarner Losh static void
51*c43cad87SWarner Losh prof_recent_free_node(tsdn_t *tsdn, prof_recent_t *node) {
52*c43cad87SWarner Losh 	assert(node != NULL);
53*c43cad87SWarner Losh 	assert(isalloc(tsdn, node) == sz_s2u(sizeof(prof_recent_t)));
54*c43cad87SWarner Losh 	idalloctm(tsdn, node, NULL, NULL, true, true);
55*c43cad87SWarner Losh }
56*c43cad87SWarner Losh 
57*c43cad87SWarner Losh static inline void
58*c43cad87SWarner Losh increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
59*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
60*c43cad87SWarner Losh 	++tctx->recent_count;
61*c43cad87SWarner Losh 	assert(tctx->recent_count > 0);
62*c43cad87SWarner Losh }
63*c43cad87SWarner Losh 
64*c43cad87SWarner Losh bool
65*c43cad87SWarner Losh prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) {
66*c43cad87SWarner Losh 	cassert(config_prof);
67*c43cad87SWarner Losh 	assert(opt_prof && prof_booted);
68*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
69*c43cad87SWarner Losh 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
70*c43cad87SWarner Losh 
71*c43cad87SWarner Losh 	/*
72*c43cad87SWarner Losh 	 * Check whether last-N mode is turned on without trying to acquire the
73*c43cad87SWarner Losh 	 * lock, so as to optimize for the following two scenarios:
74*c43cad87SWarner Losh 	 * (1) Last-N mode is switched off;
75*c43cad87SWarner Losh 	 * (2) Dumping, during which last-N mode is temporarily turned off so
76*c43cad87SWarner Losh 	 *     as not to block sampled allocations.
77*c43cad87SWarner Losh 	 */
78*c43cad87SWarner Losh 	if (prof_recent_alloc_max_get_no_lock() == 0) {
79*c43cad87SWarner Losh 		return false;
80*c43cad87SWarner Losh 	}
81*c43cad87SWarner Losh 
82*c43cad87SWarner Losh 	/*
83*c43cad87SWarner Losh 	 * Increment recent_count to hold the tctx so that it won't be gone
84*c43cad87SWarner Losh 	 * even after tctx->tdata->lock is released.  This acts as a
85*c43cad87SWarner Losh 	 * "placeholder"; the real recording of the allocation requires a lock
86*c43cad87SWarner Losh 	 * on prof_recent_alloc_mtx and is done in prof_recent_alloc (when
87*c43cad87SWarner Losh 	 * tctx->tdata->lock has been released).
88*c43cad87SWarner Losh 	 */
89*c43cad87SWarner Losh 	increment_recent_count(tsd, tctx);
90*c43cad87SWarner Losh 	return true;
91*c43cad87SWarner Losh }
92*c43cad87SWarner Losh 
93*c43cad87SWarner Losh static void
94*c43cad87SWarner Losh decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
95*c43cad87SWarner Losh 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
96*c43cad87SWarner Losh 	assert(tctx != NULL);
97*c43cad87SWarner Losh 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
98*c43cad87SWarner Losh 	assert(tctx->recent_count > 0);
99*c43cad87SWarner Losh 	--tctx->recent_count;
100*c43cad87SWarner Losh 	prof_tctx_try_destroy(tsd, tctx);
101*c43cad87SWarner Losh }
102*c43cad87SWarner Losh 
103*c43cad87SWarner Losh static inline edata_t *
104*c43cad87SWarner Losh prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) {
105*c43cad87SWarner Losh 	return (edata_t *)atomic_load_p(&n->alloc_edata, ATOMIC_ACQUIRE);
106*c43cad87SWarner Losh }
107*c43cad87SWarner Losh 
108*c43cad87SWarner Losh edata_t *
109*c43cad87SWarner Losh prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *n) {
110*c43cad87SWarner Losh 	cassert(config_prof);
111*c43cad87SWarner Losh 	return prof_recent_alloc_edata_get_no_lock(n);
112*c43cad87SWarner Losh }
113*c43cad87SWarner Losh 
114*c43cad87SWarner Losh static inline edata_t *
115*c43cad87SWarner Losh prof_recent_alloc_edata_get(tsd_t *tsd, const prof_recent_t *n) {
116*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
117*c43cad87SWarner Losh 	return prof_recent_alloc_edata_get_no_lock(n);
118*c43cad87SWarner Losh }
119*c43cad87SWarner Losh 
120*c43cad87SWarner Losh static void
121*c43cad87SWarner Losh prof_recent_alloc_edata_set(tsd_t *tsd, prof_recent_t *n, edata_t *edata) {
122*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
123*c43cad87SWarner Losh 	atomic_store_p(&n->alloc_edata, edata, ATOMIC_RELEASE);
124*c43cad87SWarner Losh }
125*c43cad87SWarner Losh 
126*c43cad87SWarner Losh void
127*c43cad87SWarner Losh edata_prof_recent_alloc_init(edata_t *edata) {
128*c43cad87SWarner Losh 	cassert(config_prof);
129*c43cad87SWarner Losh 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
130*c43cad87SWarner Losh }
131*c43cad87SWarner Losh 
132*c43cad87SWarner Losh static inline prof_recent_t *
133*c43cad87SWarner Losh edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
134*c43cad87SWarner Losh 	cassert(config_prof);
135*c43cad87SWarner Losh 	return edata_prof_recent_alloc_get_dont_call_directly(edata);
136*c43cad87SWarner Losh }
137*c43cad87SWarner Losh 
138*c43cad87SWarner Losh prof_recent_t *
139*c43cad87SWarner Losh edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
140*c43cad87SWarner Losh 	cassert(config_prof);
141*c43cad87SWarner Losh 	return edata_prof_recent_alloc_get_no_lock(edata);
142*c43cad87SWarner Losh }
143*c43cad87SWarner Losh 
144*c43cad87SWarner Losh static inline prof_recent_t *
145*c43cad87SWarner Losh edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
146*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
147*c43cad87SWarner Losh 	prof_recent_t *recent_alloc =
148*c43cad87SWarner Losh 	    edata_prof_recent_alloc_get_no_lock(edata);
149*c43cad87SWarner Losh 	assert(recent_alloc == NULL ||
150*c43cad87SWarner Losh 	    prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
151*c43cad87SWarner Losh 	return recent_alloc;
152*c43cad87SWarner Losh }
153*c43cad87SWarner Losh 
154*c43cad87SWarner Losh static prof_recent_t *
155*c43cad87SWarner Losh edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
156*c43cad87SWarner Losh     prof_recent_t *recent_alloc) {
157*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
158*c43cad87SWarner Losh 	prof_recent_t *old_recent_alloc =
159*c43cad87SWarner Losh 	    edata_prof_recent_alloc_get(tsd, edata);
160*c43cad87SWarner Losh 	edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
161*c43cad87SWarner Losh 	return old_recent_alloc;
162*c43cad87SWarner Losh }
163*c43cad87SWarner Losh 
164*c43cad87SWarner Losh static void
165*c43cad87SWarner Losh edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
166*c43cad87SWarner Losh     prof_recent_t *recent_alloc) {
167*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
168*c43cad87SWarner Losh 	assert(recent_alloc != NULL);
169*c43cad87SWarner Losh 	prof_recent_t *old_recent_alloc =
170*c43cad87SWarner Losh 	    edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc);
171*c43cad87SWarner Losh 	assert(old_recent_alloc == NULL);
172*c43cad87SWarner Losh 	prof_recent_alloc_edata_set(tsd, recent_alloc, edata);
173*c43cad87SWarner Losh }
174*c43cad87SWarner Losh 
175*c43cad87SWarner Losh static void
176*c43cad87SWarner Losh edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
177*c43cad87SWarner Losh     prof_recent_t *recent_alloc) {
178*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
179*c43cad87SWarner Losh 	assert(recent_alloc != NULL);
180*c43cad87SWarner Losh 	prof_recent_t *old_recent_alloc =
181*c43cad87SWarner Losh 	    edata_prof_recent_alloc_update_internal(tsd, edata, NULL);
182*c43cad87SWarner Losh 	assert(old_recent_alloc == recent_alloc);
183*c43cad87SWarner Losh 	assert(edata == prof_recent_alloc_edata_get(tsd, recent_alloc));
184*c43cad87SWarner Losh 	prof_recent_alloc_edata_set(tsd, recent_alloc, NULL);
185*c43cad87SWarner Losh }
186*c43cad87SWarner Losh 
187*c43cad87SWarner Losh /*
188*c43cad87SWarner Losh  * This function should be called right before an allocation is released, so
189*c43cad87SWarner Losh  * that the associated recent allocation record can contain the following
190*c43cad87SWarner Losh  * information:
191*c43cad87SWarner Losh  * (1) The allocation is released;
192*c43cad87SWarner Losh  * (2) The time of the deallocation; and
193*c43cad87SWarner Losh  * (3) The prof_tctx associated with the deallocation.
194*c43cad87SWarner Losh  */
195*c43cad87SWarner Losh void
196*c43cad87SWarner Losh prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
197*c43cad87SWarner Losh 	cassert(config_prof);
198*c43cad87SWarner Losh 	/*
199*c43cad87SWarner Losh 	 * Check whether the recent allocation record still exists without
200*c43cad87SWarner Losh 	 * trying to acquire the lock.
201*c43cad87SWarner Losh 	 */
202*c43cad87SWarner Losh 	if (edata_prof_recent_alloc_get_no_lock(edata) == NULL) {
203*c43cad87SWarner Losh 		return;
204*c43cad87SWarner Losh 	}
205*c43cad87SWarner Losh 
206*c43cad87SWarner Losh 	prof_tctx_t *dalloc_tctx = prof_tctx_create(tsd);
207*c43cad87SWarner Losh 	/*
208*c43cad87SWarner Losh 	 * In case dalloc_tctx is NULL, e.g. due to OOM, we will not record the
209*c43cad87SWarner Losh 	 * deallocation time / tctx, which is handled later, after we check
210*c43cad87SWarner Losh 	 * again when holding the lock.
211*c43cad87SWarner Losh 	 */
212*c43cad87SWarner Losh 
213*c43cad87SWarner Losh 	if (dalloc_tctx != NULL) {
214*c43cad87SWarner Losh 		malloc_mutex_lock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
215*c43cad87SWarner Losh 		increment_recent_count(tsd, dalloc_tctx);
216*c43cad87SWarner Losh 		dalloc_tctx->prepared = false;
217*c43cad87SWarner Losh 		malloc_mutex_unlock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
218*c43cad87SWarner Losh 	}
219*c43cad87SWarner Losh 
220*c43cad87SWarner Losh 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
221*c43cad87SWarner Losh 	/* Check again after acquiring the lock.  */
222*c43cad87SWarner Losh 	prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata);
223*c43cad87SWarner Losh 	if (recent != NULL) {
224*c43cad87SWarner Losh 		assert(nstime_equals_zero(&recent->dalloc_time));
225*c43cad87SWarner Losh 		assert(recent->dalloc_tctx == NULL);
226*c43cad87SWarner Losh 		if (dalloc_tctx != NULL) {
227*c43cad87SWarner Losh 			nstime_prof_update(&recent->dalloc_time);
228*c43cad87SWarner Losh 			recent->dalloc_tctx = dalloc_tctx;
229*c43cad87SWarner Losh 			dalloc_tctx = NULL;
230*c43cad87SWarner Losh 		}
231*c43cad87SWarner Losh 		edata_prof_recent_alloc_reset(tsd, edata, recent);
232*c43cad87SWarner Losh 	}
233*c43cad87SWarner Losh 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
234*c43cad87SWarner Losh 
235*c43cad87SWarner Losh 	if (dalloc_tctx != NULL) {
236*c43cad87SWarner Losh 		/* We lost the rase - the allocation record was just gone. */
237*c43cad87SWarner Losh 		decrement_recent_count(tsd, dalloc_tctx);
238*c43cad87SWarner Losh 	}
239*c43cad87SWarner Losh }
240*c43cad87SWarner Losh 
241*c43cad87SWarner Losh static void
242*c43cad87SWarner Losh prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent_alloc) {
243*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
244*c43cad87SWarner Losh 	edata_t *edata = prof_recent_alloc_edata_get(tsd, recent_alloc);
245*c43cad87SWarner Losh 	if (edata != NULL) {
246*c43cad87SWarner Losh 		edata_prof_recent_alloc_reset(tsd, edata, recent_alloc);
247*c43cad87SWarner Losh 	}
248*c43cad87SWarner Losh }
249*c43cad87SWarner Losh 
250*c43cad87SWarner Losh static bool
251*c43cad87SWarner Losh prof_recent_alloc_is_empty(tsd_t *tsd) {
252*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
253*c43cad87SWarner Losh 	if (ql_empty(&prof_recent_alloc_list)) {
254*c43cad87SWarner Losh 		assert(prof_recent_alloc_count == 0);
255*c43cad87SWarner Losh 		return true;
256*c43cad87SWarner Losh 	} else {
257*c43cad87SWarner Losh 		assert(prof_recent_alloc_count > 0);
258*c43cad87SWarner Losh 		return false;
259*c43cad87SWarner Losh 	}
260*c43cad87SWarner Losh }
261*c43cad87SWarner Losh 
262*c43cad87SWarner Losh static void
263*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd_t *tsd) {
264*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
265*c43cad87SWarner Losh 	if (!config_debug) {
266*c43cad87SWarner Losh 		return;
267*c43cad87SWarner Losh 	}
268*c43cad87SWarner Losh 	ssize_t count = 0;
269*c43cad87SWarner Losh 	prof_recent_t *n;
270*c43cad87SWarner Losh 	ql_foreach(n, &prof_recent_alloc_list, link) {
271*c43cad87SWarner Losh 		++count;
272*c43cad87SWarner Losh 	}
273*c43cad87SWarner Losh 	assert(count == prof_recent_alloc_count);
274*c43cad87SWarner Losh 	assert(prof_recent_alloc_max_get(tsd) == -1 ||
275*c43cad87SWarner Losh 	    count <= prof_recent_alloc_max_get(tsd));
276*c43cad87SWarner Losh }
277*c43cad87SWarner Losh 
278*c43cad87SWarner Losh void
279*c43cad87SWarner Losh prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
280*c43cad87SWarner Losh 	cassert(config_prof);
281*c43cad87SWarner Losh 	assert(edata != NULL);
282*c43cad87SWarner Losh 	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
283*c43cad87SWarner Losh 
284*c43cad87SWarner Losh 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
285*c43cad87SWarner Losh 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
286*c43cad87SWarner Losh 	prof_recent_alloc_assert_count(tsd);
287*c43cad87SWarner Losh 
288*c43cad87SWarner Losh 	/*
289*c43cad87SWarner Losh 	 * Reserve a new prof_recent_t node if needed.  If needed, we release
290*c43cad87SWarner Losh 	 * the prof_recent_alloc_mtx lock and allocate.  Then, rather than
291*c43cad87SWarner Losh 	 * immediately checking for OOM, we regain the lock and try to make use
292*c43cad87SWarner Losh 	 * of the reserve node if needed.  There are six scenarios:
293*c43cad87SWarner Losh 	 *
294*c43cad87SWarner Losh 	 *          \ now | no need | need but OOMed | need and allocated
295*c43cad87SWarner Losh 	 *     later \    |         |                |
296*c43cad87SWarner Losh 	 *    ------------------------------------------------------------
297*c43cad87SWarner Losh 	 *     no need    |   (1)   |      (2)       |         (3)
298*c43cad87SWarner Losh 	 *    ------------------------------------------------------------
299*c43cad87SWarner Losh 	 *     need       |   (4)   |      (5)       |         (6)
300*c43cad87SWarner Losh 	 *
301*c43cad87SWarner Losh 	 * First, "(4)" never happens, because we don't release the lock in the
302*c43cad87SWarner Losh 	 * middle if there's no need for a new node; in such cases "(1)" always
303*c43cad87SWarner Losh 	 * takes place, which is trivial.
304*c43cad87SWarner Losh 	 *
305*c43cad87SWarner Losh 	 * Out of the remaining four scenarios, "(6)" is the common case and is
306*c43cad87SWarner Losh 	 * trivial.  "(5)" is also trivial, in which case we'll rollback the
307*c43cad87SWarner Losh 	 * effect of prof_recent_alloc_prepare() as expected.
308*c43cad87SWarner Losh 	 *
309*c43cad87SWarner Losh 	 * "(2)" / "(3)" occurs when the need for a new node is gone after we
310*c43cad87SWarner Losh 	 * regain the lock.  If the new node is successfully allocated, i.e. in
311*c43cad87SWarner Losh 	 * the case of "(3)", we'll release it in the end; otherwise, i.e. in
312*c43cad87SWarner Losh 	 * the case of "(2)", we do nothing - we're lucky that the OOM ends up
313*c43cad87SWarner Losh 	 * doing no harm at all.
314*c43cad87SWarner Losh 	 *
315*c43cad87SWarner Losh 	 * Therefore, the only performance cost of the "release lock" ->
316*c43cad87SWarner Losh 	 * "allocate" -> "regain lock" design is the "(3)" case, but it happens
317*c43cad87SWarner Losh 	 * very rarely, so the cost is relatively small compared to the gain of
318*c43cad87SWarner Losh 	 * not having to have the lock order of prof_recent_alloc_mtx above all
319*c43cad87SWarner Losh 	 * the allocation locks.
320*c43cad87SWarner Losh 	 */
321*c43cad87SWarner Losh 	prof_recent_t *reserve = NULL;
322*c43cad87SWarner Losh 	if (prof_recent_alloc_max_get(tsd) == -1 ||
323*c43cad87SWarner Losh 	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
324*c43cad87SWarner Losh 		assert(prof_recent_alloc_max_get(tsd) != 0);
325*c43cad87SWarner Losh 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
326*c43cad87SWarner Losh 		reserve = prof_recent_allocate_node(tsd_tsdn(tsd));
327*c43cad87SWarner Losh 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
328*c43cad87SWarner Losh 		prof_recent_alloc_assert_count(tsd);
329*c43cad87SWarner Losh 	}
330*c43cad87SWarner Losh 
331*c43cad87SWarner Losh 	if (prof_recent_alloc_max_get(tsd) == 0) {
332*c43cad87SWarner Losh 		assert(prof_recent_alloc_is_empty(tsd));
333*c43cad87SWarner Losh 		goto label_rollback;
334*c43cad87SWarner Losh 	}
335*c43cad87SWarner Losh 
336*c43cad87SWarner Losh 	prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx;
337*c43cad87SWarner Losh 	if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) {
338*c43cad87SWarner Losh 		/* If upper limit is reached, rotate the head. */
339*c43cad87SWarner Losh 		assert(prof_recent_alloc_max_get(tsd) != -1);
340*c43cad87SWarner Losh 		assert(!prof_recent_alloc_is_empty(tsd));
341*c43cad87SWarner Losh 		prof_recent_t *head = ql_first(&prof_recent_alloc_list);
342*c43cad87SWarner Losh 		old_alloc_tctx = head->alloc_tctx;
343*c43cad87SWarner Losh 		assert(old_alloc_tctx != NULL);
344*c43cad87SWarner Losh 		old_dalloc_tctx = head->dalloc_tctx;
345*c43cad87SWarner Losh 		prof_recent_alloc_evict_edata(tsd, head);
346*c43cad87SWarner Losh 		ql_rotate(&prof_recent_alloc_list, link);
347*c43cad87SWarner Losh 	} else {
348*c43cad87SWarner Losh 		/* Otherwise make use of the new node. */
349*c43cad87SWarner Losh 		assert(prof_recent_alloc_max_get(tsd) == -1 ||
350*c43cad87SWarner Losh 		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
351*c43cad87SWarner Losh 		if (reserve == NULL) {
352*c43cad87SWarner Losh 			goto label_rollback;
353*c43cad87SWarner Losh 		}
354*c43cad87SWarner Losh 		ql_elm_new(reserve, link);
355*c43cad87SWarner Losh 		ql_tail_insert(&prof_recent_alloc_list, reserve, link);
356*c43cad87SWarner Losh 		reserve = NULL;
357*c43cad87SWarner Losh 		old_alloc_tctx = NULL;
358*c43cad87SWarner Losh 		old_dalloc_tctx = NULL;
359*c43cad87SWarner Losh 		++prof_recent_alloc_count;
360*c43cad87SWarner Losh 	}
361*c43cad87SWarner Losh 
362*c43cad87SWarner Losh 	/* Fill content into the tail node. */
363*c43cad87SWarner Losh 	prof_recent_t *tail = ql_last(&prof_recent_alloc_list, link);
364*c43cad87SWarner Losh 	assert(tail != NULL);
365*c43cad87SWarner Losh 	tail->size = size;
366*c43cad87SWarner Losh 	tail->usize = usize;
367*c43cad87SWarner Losh 	nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata));
368*c43cad87SWarner Losh 	tail->alloc_tctx = tctx;
369*c43cad87SWarner Losh 	nstime_init_zero(&tail->dalloc_time);
370*c43cad87SWarner Losh 	tail->dalloc_tctx = NULL;
371*c43cad87SWarner Losh 	edata_prof_recent_alloc_set(tsd, edata, tail);
372*c43cad87SWarner Losh 
373*c43cad87SWarner Losh 	assert(!prof_recent_alloc_is_empty(tsd));
374*c43cad87SWarner Losh 	prof_recent_alloc_assert_count(tsd);
375*c43cad87SWarner Losh 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
376*c43cad87SWarner Losh 
377*c43cad87SWarner Losh 	if (reserve != NULL) {
378*c43cad87SWarner Losh 		prof_recent_free_node(tsd_tsdn(tsd), reserve);
379*c43cad87SWarner Losh 	}
380*c43cad87SWarner Losh 
381*c43cad87SWarner Losh 	/*
382*c43cad87SWarner Losh 	 * Asynchronously handle the tctx of the old node, so that there's no
383*c43cad87SWarner Losh 	 * simultaneous holdings of prof_recent_alloc_mtx and tdata->lock.
384*c43cad87SWarner Losh 	 * In the worst case this may delay the tctx release but it's better
385*c43cad87SWarner Losh 	 * than holding prof_recent_alloc_mtx for longer.
386*c43cad87SWarner Losh 	 */
387*c43cad87SWarner Losh 	if (old_alloc_tctx != NULL) {
388*c43cad87SWarner Losh 		decrement_recent_count(tsd, old_alloc_tctx);
389*c43cad87SWarner Losh 	}
390*c43cad87SWarner Losh 	if (old_dalloc_tctx != NULL) {
391*c43cad87SWarner Losh 		decrement_recent_count(tsd, old_dalloc_tctx);
392*c43cad87SWarner Losh 	}
393*c43cad87SWarner Losh 	return;
394*c43cad87SWarner Losh 
395*c43cad87SWarner Losh label_rollback:
396*c43cad87SWarner Losh 	assert(edata_prof_recent_alloc_get(tsd, edata) == NULL);
397*c43cad87SWarner Losh 	prof_recent_alloc_assert_count(tsd);
398*c43cad87SWarner Losh 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
399*c43cad87SWarner Losh 	if (reserve != NULL) {
400*c43cad87SWarner Losh 		prof_recent_free_node(tsd_tsdn(tsd), reserve);
401*c43cad87SWarner Losh 	}
402*c43cad87SWarner Losh 	decrement_recent_count(tsd, tctx);
403*c43cad87SWarner Losh }
404*c43cad87SWarner Losh 
405*c43cad87SWarner Losh ssize_t
406*c43cad87SWarner Losh prof_recent_alloc_max_ctl_read() {
407*c43cad87SWarner Losh 	cassert(config_prof);
408*c43cad87SWarner Losh 	/* Don't bother to acquire the lock. */
409*c43cad87SWarner Losh 	return prof_recent_alloc_max_get_no_lock();
410*c43cad87SWarner Losh }
411*c43cad87SWarner Losh 
412*c43cad87SWarner Losh static void
413*c43cad87SWarner Losh prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
414*c43cad87SWarner Losh 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
415*c43cad87SWarner Losh 	ssize_t max = prof_recent_alloc_max_get(tsd);
416*c43cad87SWarner Losh 	if (max == -1 || prof_recent_alloc_count <= max) {
417*c43cad87SWarner Losh 		/* Easy case - no need to alter the list. */
418*c43cad87SWarner Losh 		ql_new(to_delete);
419*c43cad87SWarner Losh 		prof_recent_alloc_assert_count(tsd);
420*c43cad87SWarner Losh 		return;
421*c43cad87SWarner Losh 	}
422*c43cad87SWarner Losh 
423*c43cad87SWarner Losh 	prof_recent_t *node;
424*c43cad87SWarner Losh 	ql_foreach(node, &prof_recent_alloc_list, link) {
425*c43cad87SWarner Losh 		if (prof_recent_alloc_count == max) {
426*c43cad87SWarner Losh 			break;
427*c43cad87SWarner Losh 		}
428*c43cad87SWarner Losh 		prof_recent_alloc_evict_edata(tsd, node);
429*c43cad87SWarner Losh 		--prof_recent_alloc_count;
430*c43cad87SWarner Losh 	}
431*c43cad87SWarner Losh 	assert(prof_recent_alloc_count == max);
432*c43cad87SWarner Losh 
433*c43cad87SWarner Losh 	ql_move(to_delete, &prof_recent_alloc_list);
434*c43cad87SWarner Losh 	if (max == 0) {
435*c43cad87SWarner Losh 		assert(node == NULL);
436*c43cad87SWarner Losh 	} else {
437*c43cad87SWarner Losh 		assert(node != NULL);
438*c43cad87SWarner Losh 		ql_split(to_delete, node, &prof_recent_alloc_list, link);
439*c43cad87SWarner Losh 	}
440*c43cad87SWarner Losh 	assert(!ql_empty(to_delete));
441*c43cad87SWarner Losh 	prof_recent_alloc_assert_count(tsd);
442*c43cad87SWarner Losh }
443*c43cad87SWarner Losh 
444*c43cad87SWarner Losh static void
445*c43cad87SWarner Losh prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) {
446*c43cad87SWarner Losh 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_dump_mtx);
447*c43cad87SWarner Losh 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
448*c43cad87SWarner Losh 	while (!ql_empty(to_delete)) {
449*c43cad87SWarner Losh 		prof_recent_t *node = ql_first(to_delete);
450*c43cad87SWarner Losh 		ql_remove(to_delete, node, link);
451*c43cad87SWarner Losh 		decrement_recent_count(tsd, node->alloc_tctx);
452*c43cad87SWarner Losh 		if (node->dalloc_tctx != NULL) {
453*c43cad87SWarner Losh 			decrement_recent_count(tsd, node->dalloc_tctx);
454*c43cad87SWarner Losh 		}
455*c43cad87SWarner Losh 		prof_recent_free_node(tsd_tsdn(tsd), node);
456*c43cad87SWarner Losh 	}
457*c43cad87SWarner Losh }
458*c43cad87SWarner Losh 
459*c43cad87SWarner Losh ssize_t
460*c43cad87SWarner Losh prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
461*c43cad87SWarner Losh 	cassert(config_prof);
462*c43cad87SWarner Losh 	assert(max >= -1);
463*c43cad87SWarner Losh 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
464*c43cad87SWarner Losh 	prof_recent_alloc_assert_count(tsd);
465*c43cad87SWarner Losh 	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
466*c43cad87SWarner Losh 	prof_recent_list_t to_delete;
467*c43cad87SWarner Losh 	prof_recent_alloc_restore_locked(tsd, &to_delete);
468*c43cad87SWarner Losh 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
469*c43cad87SWarner Losh 	prof_recent_alloc_async_cleanup(tsd, &to_delete);
470*c43cad87SWarner Losh 	return old_max;
471*c43cad87SWarner Losh }
472*c43cad87SWarner Losh 
473*c43cad87SWarner Losh static void
474*c43cad87SWarner Losh prof_recent_alloc_dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
475*c43cad87SWarner Losh 	char bt_buf[2 * sizeof(intptr_t) + 3];
476*c43cad87SWarner Losh 	char *s = bt_buf;
477*c43cad87SWarner Losh 	assert(tctx != NULL);
478*c43cad87SWarner Losh 	prof_bt_t *bt = &tctx->gctx->bt;
479*c43cad87SWarner Losh 	for (size_t i = 0; i < bt->len; ++i) {
480*c43cad87SWarner Losh 		malloc_snprintf(bt_buf, sizeof(bt_buf), "%p", bt->vec[i]);
481*c43cad87SWarner Losh 		emitter_json_value(emitter, emitter_type_string, &s);
482*c43cad87SWarner Losh 	}
483*c43cad87SWarner Losh }
484*c43cad87SWarner Losh 
485*c43cad87SWarner Losh static void
486*c43cad87SWarner Losh prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
487*c43cad87SWarner Losh 	emitter_json_object_begin(emitter);
488*c43cad87SWarner Losh 
489*c43cad87SWarner Losh 	emitter_json_kv(emitter, "size", emitter_type_size, &node->size);
490*c43cad87SWarner Losh 	emitter_json_kv(emitter, "usize", emitter_type_size, &node->usize);
491*c43cad87SWarner Losh 	bool released = prof_recent_alloc_edata_get_no_lock(node) == NULL;
492*c43cad87SWarner Losh 	emitter_json_kv(emitter, "released", emitter_type_bool, &released);
493*c43cad87SWarner Losh 
494*c43cad87SWarner Losh 	emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64,
495*c43cad87SWarner Losh 	    &node->alloc_tctx->thr_uid);
496*c43cad87SWarner Losh 	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
497*c43cad87SWarner Losh 	assert(alloc_tdata != NULL);
498*c43cad87SWarner Losh 	if (alloc_tdata->thread_name != NULL) {
499*c43cad87SWarner Losh 		emitter_json_kv(emitter, "alloc_thread_name",
500*c43cad87SWarner Losh 		    emitter_type_string, &alloc_tdata->thread_name);
501*c43cad87SWarner Losh 	}
502*c43cad87SWarner Losh 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
503*c43cad87SWarner Losh 	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
504*c43cad87SWarner Losh 	    &alloc_time_ns);
505*c43cad87SWarner Losh 	emitter_json_array_kv_begin(emitter, "alloc_trace");
506*c43cad87SWarner Losh 	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
507*c43cad87SWarner Losh 	emitter_json_array_end(emitter);
508*c43cad87SWarner Losh 
509*c43cad87SWarner Losh 	if (released && node->dalloc_tctx != NULL) {
510*c43cad87SWarner Losh 		emitter_json_kv(emitter, "dalloc_thread_uid",
511*c43cad87SWarner Losh 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
512*c43cad87SWarner Losh 		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
513*c43cad87SWarner Losh 		assert(dalloc_tdata != NULL);
514*c43cad87SWarner Losh 		if (dalloc_tdata->thread_name != NULL) {
515*c43cad87SWarner Losh 			emitter_json_kv(emitter, "dalloc_thread_name",
516*c43cad87SWarner Losh 			    emitter_type_string, &dalloc_tdata->thread_name);
517*c43cad87SWarner Losh 		}
518*c43cad87SWarner Losh 		assert(!nstime_equals_zero(&node->dalloc_time));
519*c43cad87SWarner Losh 		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);
520*c43cad87SWarner Losh 		emitter_json_kv(emitter, "dalloc_time", emitter_type_uint64,
521*c43cad87SWarner Losh 		    &dalloc_time_ns);
522*c43cad87SWarner Losh 		emitter_json_array_kv_begin(emitter, "dalloc_trace");
523*c43cad87SWarner Losh 		prof_recent_alloc_dump_bt(emitter, node->dalloc_tctx);
524*c43cad87SWarner Losh 		emitter_json_array_end(emitter);
525*c43cad87SWarner Losh 	}
526*c43cad87SWarner Losh 
527*c43cad87SWarner Losh 	emitter_json_object_end(emitter);
528*c43cad87SWarner Losh }
529*c43cad87SWarner Losh 
530*c43cad87SWarner Losh #define PROF_RECENT_PRINT_BUFSIZE 65536
531*c43cad87SWarner Losh JEMALLOC_COLD
532*c43cad87SWarner Losh void
533*c43cad87SWarner Losh prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
534*c43cad87SWarner Losh 	cassert(config_prof);
535*c43cad87SWarner Losh 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
536*c43cad87SWarner Losh 	buf_writer_t buf_writer;
537*c43cad87SWarner Losh 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
538*c43cad87SWarner Losh 	    PROF_RECENT_PRINT_BUFSIZE);
539*c43cad87SWarner Losh 	emitter_t emitter;
540*c43cad87SWarner Losh 	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
541*c43cad87SWarner Losh 	    &buf_writer);
542*c43cad87SWarner Losh 	prof_recent_list_t temp_list;
543*c43cad87SWarner Losh 
544*c43cad87SWarner Losh 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
545*c43cad87SWarner Losh 	prof_recent_alloc_assert_count(tsd);
546*c43cad87SWarner Losh 	ssize_t dump_max = prof_recent_alloc_max_get(tsd);
547*c43cad87SWarner Losh 	ql_move(&temp_list, &prof_recent_alloc_list);
548*c43cad87SWarner Losh 	ssize_t dump_count = prof_recent_alloc_count;
549*c43cad87SWarner Losh 	prof_recent_alloc_count = 0;
550*c43cad87SWarner Losh 	prof_recent_alloc_assert_count(tsd);
551*c43cad87SWarner Losh 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
552*c43cad87SWarner Losh 
553*c43cad87SWarner Losh 	emitter_begin(&emitter);
554*c43cad87SWarner Losh 	uint64_t sample_interval = (uint64_t)1U << lg_prof_sample;
555*c43cad87SWarner Losh 	emitter_json_kv(&emitter, "sample_interval", emitter_type_uint64,
556*c43cad87SWarner Losh 	    &sample_interval);
557*c43cad87SWarner Losh 	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
558*c43cad87SWarner Losh 	    &dump_max);
559*c43cad87SWarner Losh 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
560*c43cad87SWarner Losh 	prof_recent_t *node;
561*c43cad87SWarner Losh 	ql_foreach(node, &temp_list, link) {
562*c43cad87SWarner Losh 		prof_recent_alloc_dump_node(&emitter, node);
563*c43cad87SWarner Losh 	}
564*c43cad87SWarner Losh 	emitter_json_array_end(&emitter);
565*c43cad87SWarner Losh 	emitter_end(&emitter);
566*c43cad87SWarner Losh 
567*c43cad87SWarner Losh 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
568*c43cad87SWarner Losh 	prof_recent_alloc_assert_count(tsd);
569*c43cad87SWarner Losh 	ql_concat(&temp_list, &prof_recent_alloc_list, link);
570*c43cad87SWarner Losh 	ql_move(&prof_recent_alloc_list, &temp_list);
571*c43cad87SWarner Losh 	prof_recent_alloc_count += dump_count;
572*c43cad87SWarner Losh 	prof_recent_alloc_restore_locked(tsd, &temp_list);
573*c43cad87SWarner Losh 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
574*c43cad87SWarner Losh 
575*c43cad87SWarner Losh 	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
576*c43cad87SWarner Losh 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
577*c43cad87SWarner Losh 
578*c43cad87SWarner Losh 	prof_recent_alloc_async_cleanup(tsd, &temp_list);
579*c43cad87SWarner Losh }
580*c43cad87SWarner Losh #undef PROF_RECENT_PRINT_BUFSIZE
581*c43cad87SWarner Losh 
582*c43cad87SWarner Losh bool
583*c43cad87SWarner Losh prof_recent_init() {
584*c43cad87SWarner Losh 	cassert(config_prof);
585*c43cad87SWarner Losh 	prof_recent_alloc_max_init();
586*c43cad87SWarner Losh 
587*c43cad87SWarner Losh 	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",
588*c43cad87SWarner Losh 	    WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
589*c43cad87SWarner Losh 		return true;
590*c43cad87SWarner Losh 	}
591*c43cad87SWarner Losh 
592*c43cad87SWarner Losh 	if (malloc_mutex_init(&prof_recent_dump_mtx, "prof_recent_dump",
593*c43cad87SWarner Losh 	    WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
594*c43cad87SWarner Losh 		return true;
595*c43cad87SWarner Losh 	}
596*c43cad87SWarner Losh 
597*c43cad87SWarner Losh 	ql_new(&prof_recent_alloc_list);
598*c43cad87SWarner Losh 
599*c43cad87SWarner Losh 	return false;
600*c43cad87SWarner Losh }
601