xref: /freebsd/contrib/jemalloc/src/prof.c (revision c5ad81420c495d1d5de04209b0ec4fcb435c322c)
1 #define JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/ckh.h"
7 #include "jemalloc/internal/hash.h"
8 #include "jemalloc/internal/malloc_io.h"
9 #include "jemalloc/internal/mutex.h"
10 #include "jemalloc/internal/emitter.h"
11 
12 /******************************************************************************/
13 
14 #ifdef JEMALLOC_PROF_LIBUNWIND
15 #define UNW_LOCAL_ONLY
16 #include <libunwind.h>
17 #endif
18 
19 #ifdef JEMALLOC_PROF_LIBGCC
20 /*
21  * We have a circular dependency -- jemalloc_internal.h tells us if we should
22  * use libgcc's unwinding functionality, but after we've included that, we've
23  * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
24  */
25 #undef _Unwind_Backtrace
26 #include <unwind.h>
27 #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
28 #endif
29 
30 /******************************************************************************/
31 /* Data. */
32 
33 bool		opt_prof = false;
34 bool		opt_prof_active = true;
35 bool		opt_prof_thread_active_init = true;
36 size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
37 ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
38 bool		opt_prof_gdump = false;
39 bool		opt_prof_final = false;
40 bool		opt_prof_leak = false;
41 bool		opt_prof_accum = false;
42 bool		opt_prof_log = false;
43 char		opt_prof_prefix[
44     /* Minimize memory bloat for non-prof builds. */
45 #ifdef JEMALLOC_PROF
46     PATH_MAX +
47 #endif
48     1];
49 
50 /*
51  * Initialized as opt_prof_active, and accessed via
52  * prof_active_[gs]et{_unlocked,}().
53  */
54 bool			prof_active;
55 static malloc_mutex_t	prof_active_mtx;
56 
57 /*
58  * Initialized as opt_prof_thread_active_init, and accessed via
59  * prof_thread_active_init_[gs]et().
60  */
61 static bool		prof_thread_active_init;
62 static malloc_mutex_t	prof_thread_active_init_mtx;
63 
64 /*
65  * Initialized as opt_prof_gdump, and accessed via
66  * prof_gdump_[gs]et{_unlocked,}().
67  */
68 bool			prof_gdump_val;
69 static malloc_mutex_t	prof_gdump_mtx;
70 
71 uint64_t	prof_interval = 0;
72 
73 size_t		lg_prof_sample;
74 
75 typedef enum prof_logging_state_e prof_logging_state_t;
76 enum prof_logging_state_e {
77 	prof_logging_state_stopped,
78 	prof_logging_state_started,
79 	prof_logging_state_dumping
80 };
81 
82 /*
83  * - stopped: log_start never called, or previous log_stop has completed.
84  * - started: log_start called, log_stop not called yet. Allocations are logged.
85  * - dumping: log_stop called but not finished; samples are not logged anymore.
86  */
87 prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
88 
89 #ifdef JEMALLOC_JET
90 static bool prof_log_dummy = false;
91 #endif
92 
93 /* Incremented for every log file that is output. */
94 static uint64_t log_seq = 0;
95 static char log_filename[
96     /* Minimize memory bloat for non-prof builds. */
97 #ifdef JEMALLOC_PROF
98     PATH_MAX +
99 #endif
100     1];
101 
102 /* Timestamp for most recent call to log_start(). */
103 static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
104 
105 /* Increment these when adding to the log_bt and log_thr linked lists. */
106 static size_t log_bt_index = 0;
107 static size_t log_thr_index = 0;
108 
109 /* Linked list node definitions. These are only used in prof.c. */
110 typedef struct prof_bt_node_s prof_bt_node_t;
111 
112 struct prof_bt_node_s {
113 	prof_bt_node_t *next;
114 	size_t index;
115 	prof_bt_t bt;
116 	/* Variable size backtrace vector pointed to by bt. */
117 	void *vec[1];
118 };
119 
120 typedef struct prof_thr_node_s prof_thr_node_t;
121 
122 struct prof_thr_node_s {
123 	prof_thr_node_t *next;
124 	size_t index;
125 	uint64_t thr_uid;
126 	/* Variable size based on thr_name_sz. */
127 	char name[1];
128 };
129 
130 typedef struct prof_alloc_node_s prof_alloc_node_t;
131 
132 /* This is output when logging sampled allocations. */
133 struct prof_alloc_node_s {
134 	prof_alloc_node_t *next;
135 	/* Indices into an array of thread data. */
136 	size_t alloc_thr_ind;
137 	size_t free_thr_ind;
138 
139 	/* Indices into an array of backtraces. */
140 	size_t alloc_bt_ind;
141 	size_t free_bt_ind;
142 
143 	uint64_t alloc_time_ns;
144 	uint64_t free_time_ns;
145 
146 	size_t usize;
147 };
148 
149 /*
150  * Created on the first call to prof_log_start and deleted on prof_log_stop.
151  * These are the backtraces and threads that have already been logged by an
152  * allocation.
153  */
154 static bool log_tables_initialized = false;
155 static ckh_t log_bt_node_set;
156 static ckh_t log_thr_node_set;
157 
158 /* Store linked lists for logged data. */
159 static prof_bt_node_t *log_bt_first = NULL;
160 static prof_bt_node_t *log_bt_last = NULL;
161 static prof_thr_node_t *log_thr_first = NULL;
162 static prof_thr_node_t *log_thr_last = NULL;
163 static prof_alloc_node_t *log_alloc_first = NULL;
164 static prof_alloc_node_t *log_alloc_last = NULL;
165 
166 /* Protects the prof_logging_state and any log_{...} variable. */
167 static malloc_mutex_t log_mtx;
168 
169 /*
170  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
171  * there is no problem with using them for more than one gctx at the same time.
172  * The primary motivation for this sharing though is that gctx's are ephemeral,
173  * and destroying mutexes causes complications for systems that allocate when
174  * creating/destroying mutexes.
175  */
176 static malloc_mutex_t	*gctx_locks;
177 static atomic_u_t	cum_gctxs; /* Atomic counter. */
178 
179 /*
180  * Table of mutexes that are shared among tdata's.  No operations require
181  * holding multiple tdata locks, so there is no problem with using them for more
182  * than one tdata at the same time, even though a gctx lock may be acquired
183  * while holding a tdata lock.
184  */
185 static malloc_mutex_t	*tdata_locks;
186 
187 /*
188  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
189  * structure that knows about all backtraces currently captured.
190  */
191 static ckh_t		bt2gctx;
192 /* Non static to enable profiling. */
193 malloc_mutex_t		bt2gctx_mtx;
194 
195 /*
196  * Tree of all extant prof_tdata_t structures, regardless of state,
197  * {attached,detached,expired}.
198  */
199 static prof_tdata_tree_t	tdatas;
200 static malloc_mutex_t	tdatas_mtx;
201 
202 static uint64_t		next_thr_uid;
203 static malloc_mutex_t	next_thr_uid_mtx;
204 
205 static malloc_mutex_t	prof_dump_seq_mtx;
206 static uint64_t		prof_dump_seq;
207 static uint64_t		prof_dump_iseq;
208 static uint64_t		prof_dump_mseq;
209 static uint64_t		prof_dump_useq;
210 
211 /*
212  * This buffer is rather large for stack allocation, so use a single buffer for
213  * all profile dumps.
214  */
215 static malloc_mutex_t	prof_dump_mtx;
216 static char		prof_dump_buf[
217     /* Minimize memory bloat for non-prof builds. */
218 #ifdef JEMALLOC_PROF
219     PROF_DUMP_BUFSIZE
220 #else
221     1
222 #endif
223 ];
224 static size_t		prof_dump_buf_end;
225 static int		prof_dump_fd;
226 
227 /* Do not dump any profiles until bootstrapping is complete. */
228 static bool		prof_booted = false;
229 
230 /******************************************************************************/
231 /*
232  * Function prototypes for static functions that are referenced prior to
233  * definition.
234  */
235 
236 static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
237 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
238 static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
239     bool even_if_attached);
240 static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
241     bool even_if_attached);
242 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
243 
244 /* Hashtable functions for log_bt_node_set and log_thr_node_set. */
245 static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
246 static bool prof_thr_node_keycomp(const void *k1, const void *k2);
247 static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
248 static bool prof_bt_node_keycomp(const void *k1, const void *k2);
249 
250 /******************************************************************************/
251 /* Red-black trees. */
252 
253 static int
prof_tctx_comp(const prof_tctx_t * a,const prof_tctx_t * b)254 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
255 	uint64_t a_thr_uid = a->thr_uid;
256 	uint64_t b_thr_uid = b->thr_uid;
257 	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
258 	if (ret == 0) {
259 		uint64_t a_thr_discrim = a->thr_discrim;
260 		uint64_t b_thr_discrim = b->thr_discrim;
261 		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
262 		    b_thr_discrim);
263 		if (ret == 0) {
264 			uint64_t a_tctx_uid = a->tctx_uid;
265 			uint64_t b_tctx_uid = b->tctx_uid;
266 			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
267 			    b_tctx_uid);
268 		}
269 	}
270 	return ret;
271 }
272 
rb_gen(static UNUSED,tctx_tree_,prof_tctx_tree_t,prof_tctx_t,tctx_link,prof_tctx_comp)273 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
274     tctx_link, prof_tctx_comp)
275 
276 static int
277 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
278 	unsigned a_len = a->bt.len;
279 	unsigned b_len = b->bt.len;
280 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
281 	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
282 	if (ret == 0) {
283 		ret = (a_len > b_len) - (a_len < b_len);
284 	}
285 	return ret;
286 }
287 
rb_gen(static UNUSED,gctx_tree_,prof_gctx_tree_t,prof_gctx_t,dump_link,prof_gctx_comp)288 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
289     prof_gctx_comp)
290 
291 static int
292 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
293 	int ret;
294 	uint64_t a_uid = a->thr_uid;
295 	uint64_t b_uid = b->thr_uid;
296 
297 	ret = ((a_uid > b_uid) - (a_uid < b_uid));
298 	if (ret == 0) {
299 		uint64_t a_discrim = a->thr_discrim;
300 		uint64_t b_discrim = b->thr_discrim;
301 
302 		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
303 	}
304 	return ret;
305 }
306 
rb_gen(static UNUSED,tdata_tree_,prof_tdata_tree_t,prof_tdata_t,tdata_link,prof_tdata_comp)307 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
308     prof_tdata_comp)
309 
310 /******************************************************************************/
311 
312 void
313 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
314 	prof_tdata_t *tdata;
315 
316 	cassert(config_prof);
317 
318 	if (updated) {
319 		/*
320 		 * Compute a new sample threshold.  This isn't very important in
321 		 * practice, because this function is rarely executed, so the
322 		 * potential for sample bias is minimal except in contrived
323 		 * programs.
324 		 */
325 		tdata = prof_tdata_get(tsd, true);
326 		if (tdata != NULL) {
327 			prof_sample_threshold_update(tdata);
328 		}
329 	}
330 
331 	if ((uintptr_t)tctx > (uintptr_t)1U) {
332 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
333 		tctx->prepared = false;
334 		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
335 			prof_tctx_destroy(tsd, tctx);
336 		} else {
337 			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
338 		}
339 	}
340 }
341 
342 void
prof_malloc_sample_object(tsdn_t * tsdn,const void * ptr,size_t usize,prof_tctx_t * tctx)343 prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
344     prof_tctx_t *tctx) {
345 	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
346 
347 	/* Get the current time and set this in the extent_t. We'll read this
348 	 * when free() is called. */
349 	nstime_t t = NSTIME_ZERO_INITIALIZER;
350 	nstime_update(&t);
351 	prof_alloc_time_set(tsdn, ptr, NULL, t);
352 
353 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
354 	tctx->cnts.curobjs++;
355 	tctx->cnts.curbytes += usize;
356 	if (opt_prof_accum) {
357 		tctx->cnts.accumobjs++;
358 		tctx->cnts.accumbytes += usize;
359 	}
360 	tctx->prepared = false;
361 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
362 }
363 
364 static size_t
prof_log_bt_index(tsd_t * tsd,prof_bt_t * bt)365 prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
366 	assert(prof_logging_state == prof_logging_state_started);
367 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
368 
369 	prof_bt_node_t dummy_node;
370 	dummy_node.bt = *bt;
371 	prof_bt_node_t *node;
372 
373 	/* See if this backtrace is already cached in the table. */
374 	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
375 	    (void **)(&node), NULL)) {
376 		size_t sz = offsetof(prof_bt_node_t, vec) +
377 			        (bt->len * sizeof(void *));
378 		prof_bt_node_t *new_node = (prof_bt_node_t *)
379 		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
380 		    true, arena_get(TSDN_NULL, 0, true), true);
381 		if (log_bt_first == NULL) {
382 			log_bt_first = new_node;
383 			log_bt_last = new_node;
384 		} else {
385 			log_bt_last->next = new_node;
386 			log_bt_last = new_node;
387 		}
388 
389 		new_node->next = NULL;
390 		new_node->index = log_bt_index;
391 		/*
392 		 * Copy the backtrace: bt is inside a tdata or gctx, which
393 		 * might die before prof_log_stop is called.
394 		 */
395 		new_node->bt.len = bt->len;
396 		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
397 		new_node->bt.vec = new_node->vec;
398 
399 		log_bt_index++;
400 		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
401 		return new_node->index;
402 	} else {
403 		return node->index;
404 	}
405 }
406 static size_t
prof_log_thr_index(tsd_t * tsd,uint64_t thr_uid,const char * name)407 prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
408 	assert(prof_logging_state == prof_logging_state_started);
409 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
410 
411 	prof_thr_node_t dummy_node;
412 	dummy_node.thr_uid = thr_uid;
413 	prof_thr_node_t *node;
414 
415 	/* See if this thread is already cached in the table. */
416 	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
417 	    (void **)(&node), NULL)) {
418 		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
419 		prof_thr_node_t *new_node = (prof_thr_node_t *)
420 		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
421 		    true, arena_get(TSDN_NULL, 0, true), true);
422 		if (log_thr_first == NULL) {
423 			log_thr_first = new_node;
424 			log_thr_last = new_node;
425 		} else {
426 			log_thr_last->next = new_node;
427 			log_thr_last = new_node;
428 		}
429 
430 		new_node->next = NULL;
431 		new_node->index = log_thr_index;
432 		new_node->thr_uid = thr_uid;
433 		strcpy(new_node->name, name);
434 
435 		log_thr_index++;
436 		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
437 		return new_node->index;
438 	} else {
439 		return node->index;
440 	}
441 }
442 
443 static void
prof_try_log(tsd_t * tsd,const void * ptr,size_t usize,prof_tctx_t * tctx)444 prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
445 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
446 
447 	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
448 	if (cons_tdata == NULL) {
449 		/*
450 		 * We decide not to log these allocations. cons_tdata will be
451 		 * NULL only when the current thread is in a weird state (e.g.
452 		 * it's being destroyed).
453 		 */
454 		return;
455 	}
456 
457 	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
458 
459 	if (prof_logging_state != prof_logging_state_started) {
460 		goto label_done;
461 	}
462 
463 	if (!log_tables_initialized) {
464 		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
465 				prof_bt_node_hash, prof_bt_node_keycomp);
466 		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
467 				prof_thr_node_hash, prof_thr_node_keycomp);
468 		if (err1 || err2) {
469 			goto label_done;
470 		}
471 		log_tables_initialized = true;
472 	}
473 
474 	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
475 			          (alloc_ctx_t *)NULL);
476 	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
477 	nstime_update(&free_time);
478 
479 	size_t sz = sizeof(prof_alloc_node_t);
480 	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
481 	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
482 	    arena_get(TSDN_NULL, 0, true), true);
483 
484 	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
485 				        "" : tctx->tdata->thread_name;
486 	const char *cons_thr_name = prof_thread_name_get(tsd);
487 
488 	prof_bt_t bt;
489 	/* Initialize the backtrace, using the buffer in tdata to store it. */
490 	bt_init(&bt, cons_tdata->vec);
491 	prof_backtrace(&bt);
492 	prof_bt_t *cons_bt = &bt;
493 
494 	/* We haven't destroyed tctx yet, so gctx should be good to read. */
495 	prof_bt_t *prod_bt = &tctx->gctx->bt;
496 
497 	new_node->next = NULL;
498 	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
499 				      prod_thr_name);
500 	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
501 				     cons_thr_name);
502 	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
503 	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
504 	new_node->alloc_time_ns = nstime_ns(&alloc_time);
505 	new_node->free_time_ns = nstime_ns(&free_time);
506 	new_node->usize = usize;
507 
508 	if (log_alloc_first == NULL) {
509 		log_alloc_first = new_node;
510 		log_alloc_last = new_node;
511 	} else {
512 		log_alloc_last->next = new_node;
513 		log_alloc_last = new_node;
514 	}
515 
516 label_done:
517 	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
518 }
519 
520 void
prof_free_sampled_object(tsd_t * tsd,const void * ptr,size_t usize,prof_tctx_t * tctx)521 prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
522     prof_tctx_t *tctx) {
523 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
524 
525 	assert(tctx->cnts.curobjs > 0);
526 	assert(tctx->cnts.curbytes >= usize);
527 	tctx->cnts.curobjs--;
528 	tctx->cnts.curbytes -= usize;
529 
530 	prof_try_log(tsd, ptr, usize, tctx);
531 
532 	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
533 		prof_tctx_destroy(tsd, tctx);
534 	} else {
535 		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
536 	}
537 }
538 
539 void
bt_init(prof_bt_t * bt,void ** vec)540 bt_init(prof_bt_t *bt, void **vec) {
541 	cassert(config_prof);
542 
543 	bt->vec = vec;
544 	bt->len = 0;
545 }
546 
547 static void
prof_enter(tsd_t * tsd,prof_tdata_t * tdata)548 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
549 	cassert(config_prof);
550 	assert(tdata == prof_tdata_get(tsd, false));
551 
552 	if (tdata != NULL) {
553 		assert(!tdata->enq);
554 		tdata->enq = true;
555 	}
556 
557 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
558 }
559 
560 static void
prof_leave(tsd_t * tsd,prof_tdata_t * tdata)561 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
562 	cassert(config_prof);
563 	assert(tdata == prof_tdata_get(tsd, false));
564 
565 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
566 
567 	if (tdata != NULL) {
568 		bool idump, gdump;
569 
570 		assert(tdata->enq);
571 		tdata->enq = false;
572 		idump = tdata->enq_idump;
573 		tdata->enq_idump = false;
574 		gdump = tdata->enq_gdump;
575 		tdata->enq_gdump = false;
576 
577 		if (idump) {
578 			prof_idump(tsd_tsdn(tsd));
579 		}
580 		if (gdump) {
581 			prof_gdump(tsd_tsdn(tsd));
582 		}
583 	}
584 }
585 
586 #ifdef JEMALLOC_PROF_LIBUNWIND
587 void
prof_backtrace(prof_bt_t * bt)588 prof_backtrace(prof_bt_t *bt) {
589 	int nframes;
590 
591 	cassert(config_prof);
592 	assert(bt->len == 0);
593 	assert(bt->vec != NULL);
594 
595 	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
596 	if (nframes <= 0) {
597 		return;
598 	}
599 	bt->len = nframes;
600 }
601 #elif (defined(JEMALLOC_PROF_LIBGCC))
602 static _Unwind_Reason_Code
prof_unwind_init_callback(struct _Unwind_Context * context,void * arg)603 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
604 	cassert(config_prof);
605 
606 	return _URC_NO_REASON;
607 }
608 
609 static _Unwind_Reason_Code
prof_unwind_callback(struct _Unwind_Context * context,void * arg)610 prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
611 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
612 	void *ip;
613 
614 	cassert(config_prof);
615 
616 	ip = (void *)_Unwind_GetIP(context);
617 	if (ip == NULL) {
618 		return _URC_END_OF_STACK;
619 	}
620 	data->bt->vec[data->bt->len] = ip;
621 	data->bt->len++;
622 	if (data->bt->len == data->max) {
623 		return _URC_END_OF_STACK;
624 	}
625 
626 	return _URC_NO_REASON;
627 }
628 
629 void
prof_backtrace(prof_bt_t * bt)630 prof_backtrace(prof_bt_t *bt) {
631 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
632 
633 	cassert(config_prof);
634 
635 	_Unwind_Backtrace(prof_unwind_callback, &data);
636 }
637 #elif (defined(JEMALLOC_PROF_GCC))
638 void
prof_backtrace(prof_bt_t * bt)639 prof_backtrace(prof_bt_t *bt) {
640 #define BT_FRAME(i)							\
641 	if ((i) < PROF_BT_MAX) {					\
642 		void *p;						\
643 		if (__builtin_frame_address(i) == 0) {			\
644 			return;						\
645 		}							\
646 		p = __builtin_return_address(i);			\
647 		if (p == NULL) {					\
648 			return;						\
649 		}							\
650 		bt->vec[(i)] = p;					\
651 		bt->len = (i) + 1;					\
652 	} else {							\
653 		return;							\
654 	}
655 
656 	cassert(config_prof);
657 
658 	BT_FRAME(0)
659 	BT_FRAME(1)
660 	BT_FRAME(2)
661 	BT_FRAME(3)
662 	BT_FRAME(4)
663 	BT_FRAME(5)
664 	BT_FRAME(6)
665 	BT_FRAME(7)
666 	BT_FRAME(8)
667 	BT_FRAME(9)
668 
669 	BT_FRAME(10)
670 	BT_FRAME(11)
671 	BT_FRAME(12)
672 	BT_FRAME(13)
673 	BT_FRAME(14)
674 	BT_FRAME(15)
675 	BT_FRAME(16)
676 	BT_FRAME(17)
677 	BT_FRAME(18)
678 	BT_FRAME(19)
679 
680 	BT_FRAME(20)
681 	BT_FRAME(21)
682 	BT_FRAME(22)
683 	BT_FRAME(23)
684 	BT_FRAME(24)
685 	BT_FRAME(25)
686 	BT_FRAME(26)
687 	BT_FRAME(27)
688 	BT_FRAME(28)
689 	BT_FRAME(29)
690 
691 	BT_FRAME(30)
692 	BT_FRAME(31)
693 	BT_FRAME(32)
694 	BT_FRAME(33)
695 	BT_FRAME(34)
696 	BT_FRAME(35)
697 	BT_FRAME(36)
698 	BT_FRAME(37)
699 	BT_FRAME(38)
700 	BT_FRAME(39)
701 
702 	BT_FRAME(40)
703 	BT_FRAME(41)
704 	BT_FRAME(42)
705 	BT_FRAME(43)
706 	BT_FRAME(44)
707 	BT_FRAME(45)
708 	BT_FRAME(46)
709 	BT_FRAME(47)
710 	BT_FRAME(48)
711 	BT_FRAME(49)
712 
713 	BT_FRAME(50)
714 	BT_FRAME(51)
715 	BT_FRAME(52)
716 	BT_FRAME(53)
717 	BT_FRAME(54)
718 	BT_FRAME(55)
719 	BT_FRAME(56)
720 	BT_FRAME(57)
721 	BT_FRAME(58)
722 	BT_FRAME(59)
723 
724 	BT_FRAME(60)
725 	BT_FRAME(61)
726 	BT_FRAME(62)
727 	BT_FRAME(63)
728 	BT_FRAME(64)
729 	BT_FRAME(65)
730 	BT_FRAME(66)
731 	BT_FRAME(67)
732 	BT_FRAME(68)
733 	BT_FRAME(69)
734 
735 	BT_FRAME(70)
736 	BT_FRAME(71)
737 	BT_FRAME(72)
738 	BT_FRAME(73)
739 	BT_FRAME(74)
740 	BT_FRAME(75)
741 	BT_FRAME(76)
742 	BT_FRAME(77)
743 	BT_FRAME(78)
744 	BT_FRAME(79)
745 
746 	BT_FRAME(80)
747 	BT_FRAME(81)
748 	BT_FRAME(82)
749 	BT_FRAME(83)
750 	BT_FRAME(84)
751 	BT_FRAME(85)
752 	BT_FRAME(86)
753 	BT_FRAME(87)
754 	BT_FRAME(88)
755 	BT_FRAME(89)
756 
757 	BT_FRAME(90)
758 	BT_FRAME(91)
759 	BT_FRAME(92)
760 	BT_FRAME(93)
761 	BT_FRAME(94)
762 	BT_FRAME(95)
763 	BT_FRAME(96)
764 	BT_FRAME(97)
765 	BT_FRAME(98)
766 	BT_FRAME(99)
767 
768 	BT_FRAME(100)
769 	BT_FRAME(101)
770 	BT_FRAME(102)
771 	BT_FRAME(103)
772 	BT_FRAME(104)
773 	BT_FRAME(105)
774 	BT_FRAME(106)
775 	BT_FRAME(107)
776 	BT_FRAME(108)
777 	BT_FRAME(109)
778 
779 	BT_FRAME(110)
780 	BT_FRAME(111)
781 	BT_FRAME(112)
782 	BT_FRAME(113)
783 	BT_FRAME(114)
784 	BT_FRAME(115)
785 	BT_FRAME(116)
786 	BT_FRAME(117)
787 	BT_FRAME(118)
788 	BT_FRAME(119)
789 
790 	BT_FRAME(120)
791 	BT_FRAME(121)
792 	BT_FRAME(122)
793 	BT_FRAME(123)
794 	BT_FRAME(124)
795 	BT_FRAME(125)
796 	BT_FRAME(126)
797 	BT_FRAME(127)
798 #undef BT_FRAME
799 }
800 #else
801 void
prof_backtrace(prof_bt_t * bt)802 prof_backtrace(prof_bt_t *bt) {
803 	cassert(config_prof);
804 	not_reached();
805 }
806 #endif
807 
808 static malloc_mutex_t *
prof_gctx_mutex_choose(void)809 prof_gctx_mutex_choose(void) {
810 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
811 
812 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
813 }
814 
815 static malloc_mutex_t *
prof_tdata_mutex_choose(uint64_t thr_uid)816 prof_tdata_mutex_choose(uint64_t thr_uid) {
817 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
818 }
819 
820 static prof_gctx_t *
prof_gctx_create(tsdn_t * tsdn,prof_bt_t * bt)821 prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
822 	/*
823 	 * Create a single allocation that has space for vec of length bt->len.
824 	 */
825 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
826 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
827 	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
828 	    true);
829 	if (gctx == NULL) {
830 		return NULL;
831 	}
832 	gctx->lock = prof_gctx_mutex_choose();
833 	/*
834 	 * Set nlimbo to 1, in order to avoid a race condition with
835 	 * prof_tctx_destroy()/prof_gctx_try_destroy().
836 	 */
837 	gctx->nlimbo = 1;
838 	tctx_tree_new(&gctx->tctxs);
839 	/* Duplicate bt. */
840 	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
841 	gctx->bt.vec = gctx->vec;
842 	gctx->bt.len = bt->len;
843 	return gctx;
844 }
845 
846 static void
prof_gctx_try_destroy(tsd_t * tsd,prof_tdata_t * tdata_self,prof_gctx_t * gctx,prof_tdata_t * tdata)847 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
848     prof_tdata_t *tdata) {
849 	cassert(config_prof);
850 
851 	/*
852 	 * Check that gctx is still unused by any thread cache before destroying
853 	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
854 	 * condition with this function, as does prof_tctx_destroy() in order to
855 	 * avoid a race between the main body of prof_tctx_destroy() and entry
856 	 * into this function.
857 	 */
858 	prof_enter(tsd, tdata_self);
859 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
860 	assert(gctx->nlimbo != 0);
861 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
862 		/* Remove gctx from bt2gctx. */
863 		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
864 			not_reached();
865 		}
866 		prof_leave(tsd, tdata_self);
867 		/* Destroy gctx. */
868 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
869 		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
870 	} else {
871 		/*
872 		 * Compensate for increment in prof_tctx_destroy() or
873 		 * prof_lookup().
874 		 */
875 		gctx->nlimbo--;
876 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
877 		prof_leave(tsd, tdata_self);
878 	}
879 }
880 
881 static bool
prof_tctx_should_destroy(tsdn_t * tsdn,prof_tctx_t * tctx)882 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
883 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
884 
885 	if (opt_prof_accum) {
886 		return false;
887 	}
888 	if (tctx->cnts.curobjs != 0) {
889 		return false;
890 	}
891 	if (tctx->prepared) {
892 		return false;
893 	}
894 	return true;
895 }
896 
897 static bool
prof_gctx_should_destroy(prof_gctx_t * gctx)898 prof_gctx_should_destroy(prof_gctx_t *gctx) {
899 	if (opt_prof_accum) {
900 		return false;
901 	}
902 	if (!tctx_tree_empty(&gctx->tctxs)) {
903 		return false;
904 	}
905 	if (gctx->nlimbo != 0) {
906 		return false;
907 	}
908 	return true;
909 }
910 
911 static void
prof_tctx_destroy(tsd_t * tsd,prof_tctx_t * tctx)912 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
913 	prof_tdata_t *tdata = tctx->tdata;
914 	prof_gctx_t *gctx = tctx->gctx;
915 	bool destroy_tdata, destroy_tctx, destroy_gctx;
916 
917 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
918 
919 	assert(tctx->cnts.curobjs == 0);
920 	assert(tctx->cnts.curbytes == 0);
921 	assert(!opt_prof_accum);
922 	assert(tctx->cnts.accumobjs == 0);
923 	assert(tctx->cnts.accumbytes == 0);
924 
925 	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
926 	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
927 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
928 
929 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
930 	switch (tctx->state) {
931 	case prof_tctx_state_nominal:
932 		tctx_tree_remove(&gctx->tctxs, tctx);
933 		destroy_tctx = true;
934 		if (prof_gctx_should_destroy(gctx)) {
935 			/*
936 			 * Increment gctx->nlimbo in order to keep another
937 			 * thread from winning the race to destroy gctx while
938 			 * this one has gctx->lock dropped.  Without this, it
939 			 * would be possible for another thread to:
940 			 *
941 			 * 1) Sample an allocation associated with gctx.
942 			 * 2) Deallocate the sampled object.
943 			 * 3) Successfully prof_gctx_try_destroy(gctx).
944 			 *
945 			 * The result would be that gctx no longer exists by the
946 			 * time this thread accesses it in
947 			 * prof_gctx_try_destroy().
948 			 */
949 			gctx->nlimbo++;
950 			destroy_gctx = true;
951 		} else {
952 			destroy_gctx = false;
953 		}
954 		break;
955 	case prof_tctx_state_dumping:
956 		/*
957 		 * A dumping thread needs tctx to remain valid until dumping
958 		 * has finished.  Change state such that the dumping thread will
959 		 * complete destruction during a late dump iteration phase.
960 		 */
961 		tctx->state = prof_tctx_state_purgatory;
962 		destroy_tctx = false;
963 		destroy_gctx = false;
964 		break;
965 	default:
966 		not_reached();
967 		destroy_tctx = false;
968 		destroy_gctx = false;
969 	}
970 	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
971 	if (destroy_gctx) {
972 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
973 		    tdata);
974 	}
975 
976 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
977 
978 	if (destroy_tdata) {
979 		prof_tdata_destroy(tsd, tdata, false);
980 	}
981 
982 	if (destroy_tctx) {
983 		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
984 	}
985 }
986 
987 static bool
prof_lookup_global(tsd_t * tsd,prof_bt_t * bt,prof_tdata_t * tdata,void ** p_btkey,prof_gctx_t ** p_gctx,bool * p_new_gctx)988 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
989     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
990 	union {
991 		prof_gctx_t	*p;
992 		void		*v;
993 	} gctx, tgctx;
994 	union {
995 		prof_bt_t	*p;
996 		void		*v;
997 	} btkey;
998 	bool new_gctx;
999 
1000 	prof_enter(tsd, tdata);
1001 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
1002 		/* bt has never been seen before.  Insert it. */
1003 		prof_leave(tsd, tdata);
1004 		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
1005 		if (tgctx.v == NULL) {
1006 			return true;
1007 		}
1008 		prof_enter(tsd, tdata);
1009 		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
1010 			gctx.p = tgctx.p;
1011 			btkey.p = &gctx.p->bt;
1012 			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
1013 				/* OOM. */
1014 				prof_leave(tsd, tdata);
1015 				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
1016 				    true, true);
1017 				return true;
1018 			}
1019 			new_gctx = true;
1020 		} else {
1021 			new_gctx = false;
1022 		}
1023 	} else {
1024 		tgctx.v = NULL;
1025 		new_gctx = false;
1026 	}
1027 
1028 	if (!new_gctx) {
1029 		/*
1030 		 * Increment nlimbo, in order to avoid a race condition with
1031 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
1032 		 */
1033 		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
1034 		gctx.p->nlimbo++;
1035 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
1036 		new_gctx = false;
1037 
1038 		if (tgctx.v != NULL) {
1039 			/* Lost race to insert. */
1040 			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
1041 			    true);
1042 		}
1043 	}
1044 	prof_leave(tsd, tdata);
1045 
1046 	*p_btkey = btkey.v;
1047 	*p_gctx = gctx.p;
1048 	*p_new_gctx = new_gctx;
1049 	return false;
1050 }
1051 
1052 prof_tctx_t *
prof_lookup(tsd_t * tsd,prof_bt_t * bt)1053 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
1054 	union {
1055 		prof_tctx_t	*p;
1056 		void		*v;
1057 	} ret;
1058 	prof_tdata_t *tdata;
1059 	bool not_found;
1060 
1061 	cassert(config_prof);
1062 
1063 	tdata = prof_tdata_get(tsd, false);
1064 	if (tdata == NULL) {
1065 		return NULL;
1066 	}
1067 
1068 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1069 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
1070 	if (!not_found) { /* Note double negative! */
1071 		ret.p->prepared = true;
1072 	}
1073 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1074 	if (not_found) {
1075 		void *btkey;
1076 		prof_gctx_t *gctx;
1077 		bool new_gctx, error;
1078 
1079 		/*
1080 		 * This thread's cache lacks bt.  Look for it in the global
1081 		 * cache.
1082 		 */
1083 		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
1084 		    &new_gctx)) {
1085 			return NULL;
1086 		}
1087 
1088 		/* Link a prof_tctx_t into gctx for this thread. */
1089 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
1090 		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
1091 		    arena_ichoose(tsd, NULL), true);
1092 		if (ret.p == NULL) {
1093 			if (new_gctx) {
1094 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1095 			}
1096 			return NULL;
1097 		}
1098 		ret.p->tdata = tdata;
1099 		ret.p->thr_uid = tdata->thr_uid;
1100 		ret.p->thr_discrim = tdata->thr_discrim;
1101 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
1102 		ret.p->gctx = gctx;
1103 		ret.p->tctx_uid = tdata->tctx_uid_next++;
1104 		ret.p->prepared = true;
1105 		ret.p->state = prof_tctx_state_initializing;
1106 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1107 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
1108 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1109 		if (error) {
1110 			if (new_gctx) {
1111 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1112 			}
1113 			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
1114 			return NULL;
1115 		}
1116 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1117 		ret.p->state = prof_tctx_state_nominal;
1118 		tctx_tree_insert(&gctx->tctxs, ret.p);
1119 		gctx->nlimbo--;
1120 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1121 	}
1122 
1123 	return ret.p;
1124 }
1125 
1126 /*
1127  * The bodies of this function and prof_leakcheck() are compiled out unless heap
1128  * profiling is enabled, so that it is possible to compile jemalloc with
1129  * floating point support completely disabled.  Avoiding floating point code is
1130  * important on memory-constrained systems, but it also enables a workaround for
1131  * versions of glibc that don't properly save/restore floating point registers
1132  * during dynamic lazy symbol loading (which internally calls into whatever
1133  * malloc implementation happens to be integrated into the application).  Note
1134  * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
1135  * memory moves, so jemalloc must be compiled with such optimizations disabled
1136  * (e.g.
1137  * -mno-sse) in order for the workaround to be complete.
1138  */
1139 void
prof_sample_threshold_update(prof_tdata_t * tdata)1140 prof_sample_threshold_update(prof_tdata_t *tdata) {
1141 #ifdef JEMALLOC_PROF
1142 	if (!config_prof) {
1143 		return;
1144 	}
1145 
1146 	if (lg_prof_sample == 0) {
1147 		tsd_bytes_until_sample_set(tsd_fetch(), 0);
1148 		return;
1149 	}
1150 
1151 	/*
1152 	 * Compute sample interval as a geometrically distributed random
1153 	 * variable with mean (2^lg_prof_sample).
1154 	 *
1155 	 *                             __        __
1156 	 *                             |  log(u)  |                     1
1157 	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
1158 	 *                             | log(1-p) |             lg_prof_sample
1159 	 *                                                     2
1160 	 *
1161 	 * For more information on the math, see:
1162 	 *
1163 	 *   Non-Uniform Random Variate Generation
1164 	 *   Luc Devroye
1165 	 *   Springer-Verlag, New York, 1986
1166 	 *   pp 500
1167 	 *   (http://luc.devroye.org/rnbookindex.html)
1168 	 */
1169 	uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
1170 	double u = (double)r * (1.0/9007199254740992.0L);
1171 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
1172 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
1173 	    + (uint64_t)1U;
1174 	if (bytes_until_sample > SSIZE_MAX) {
1175 		bytes_until_sample = SSIZE_MAX;
1176 	}
1177 	tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
1178 
1179 #endif
1180 }
1181 
1182 #ifdef JEMALLOC_JET
1183 static prof_tdata_t *
prof_tdata_count_iter(prof_tdata_tree_t * tdatas,prof_tdata_t * tdata,void * arg)1184 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1185     void *arg) {
1186 	size_t *tdata_count = (size_t *)arg;
1187 
1188 	(*tdata_count)++;
1189 
1190 	return NULL;
1191 }
1192 
1193 size_t
prof_tdata_count(void)1194 prof_tdata_count(void) {
1195 	size_t tdata_count = 0;
1196 	tsdn_t *tsdn;
1197 
1198 	tsdn = tsdn_fetch();
1199 	malloc_mutex_lock(tsdn, &tdatas_mtx);
1200 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
1201 	    (void *)&tdata_count);
1202 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1203 
1204 	return tdata_count;
1205 }
1206 
1207 size_t
prof_bt_count(void)1208 prof_bt_count(void) {
1209 	size_t bt_count;
1210 	tsd_t *tsd;
1211 	prof_tdata_t *tdata;
1212 
1213 	tsd = tsd_fetch();
1214 	tdata = prof_tdata_get(tsd, false);
1215 	if (tdata == NULL) {
1216 		return 0;
1217 	}
1218 
1219 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
1220 	bt_count = ckh_count(&bt2gctx);
1221 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
1222 
1223 	return bt_count;
1224 }
1225 #endif
1226 
1227 static int
prof_dump_open_impl(bool propagate_err,const char * filename)1228 prof_dump_open_impl(bool propagate_err, const char *filename) {
1229 	int fd;
1230 
1231 	fd = creat(filename, 0644);
1232 	if (fd == -1 && !propagate_err) {
1233 		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
1234 		    filename);
1235 		if (opt_abort) {
1236 			abort();
1237 		}
1238 	}
1239 
1240 	return fd;
1241 }
1242 prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
1243 
1244 static bool
prof_dump_flush(bool propagate_err)1245 prof_dump_flush(bool propagate_err) {
1246 	bool ret = false;
1247 	ssize_t err;
1248 
1249 	cassert(config_prof);
1250 
1251 	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
1252 	if (err == -1) {
1253 		if (!propagate_err) {
1254 			malloc_write("<jemalloc>: write() failed during heap "
1255 			    "profile flush\n");
1256 			if (opt_abort) {
1257 				abort();
1258 			}
1259 		}
1260 		ret = true;
1261 	}
1262 	prof_dump_buf_end = 0;
1263 
1264 	return ret;
1265 }
1266 
1267 static bool
prof_dump_close(bool propagate_err)1268 prof_dump_close(bool propagate_err) {
1269 	bool ret;
1270 
1271 	assert(prof_dump_fd != -1);
1272 	ret = prof_dump_flush(propagate_err);
1273 	close(prof_dump_fd);
1274 	prof_dump_fd = -1;
1275 
1276 	return ret;
1277 }
1278 
1279 static bool
prof_dump_write(bool propagate_err,const char * s)1280 prof_dump_write(bool propagate_err, const char *s) {
1281 	size_t i, slen, n;
1282 
1283 	cassert(config_prof);
1284 
1285 	i = 0;
1286 	slen = strlen(s);
1287 	while (i < slen) {
1288 		/* Flush the buffer if it is full. */
1289 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1290 			if (prof_dump_flush(propagate_err) && propagate_err) {
1291 				return true;
1292 			}
1293 		}
1294 
1295 		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
1296 			/* Finish writing. */
1297 			n = slen - i;
1298 		} else {
1299 			/* Write as much of s as will fit. */
1300 			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
1301 		}
1302 		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
1303 		prof_dump_buf_end += n;
1304 		i += n;
1305 	}
1306 	assert(i == slen);
1307 
1308 	return false;
1309 }
1310 
1311 JEMALLOC_FORMAT_PRINTF(2, 3)
1312 static bool
prof_dump_printf(bool propagate_err,const char * format,...)1313 prof_dump_printf(bool propagate_err, const char *format, ...) {
1314 	bool ret;
1315 	va_list ap;
1316 	char buf[PROF_PRINTF_BUFSIZE];
1317 
1318 	va_start(ap, format);
1319 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
1320 	va_end(ap);
1321 	ret = prof_dump_write(propagate_err, buf);
1322 
1323 	return ret;
1324 }
1325 
1326 static void
prof_tctx_merge_tdata(tsdn_t * tsdn,prof_tctx_t * tctx,prof_tdata_t * tdata)1327 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
1328 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
1329 
1330 	malloc_mutex_lock(tsdn, tctx->gctx->lock);
1331 
1332 	switch (tctx->state) {
1333 	case prof_tctx_state_initializing:
1334 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1335 		return;
1336 	case prof_tctx_state_nominal:
1337 		tctx->state = prof_tctx_state_dumping;
1338 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1339 
1340 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
1341 
1342 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1343 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1344 		if (opt_prof_accum) {
1345 			tdata->cnt_summed.accumobjs +=
1346 			    tctx->dump_cnts.accumobjs;
1347 			tdata->cnt_summed.accumbytes +=
1348 			    tctx->dump_cnts.accumbytes;
1349 		}
1350 		break;
1351 	case prof_tctx_state_dumping:
1352 	case prof_tctx_state_purgatory:
1353 		not_reached();
1354 	}
1355 }
1356 
1357 static void
prof_tctx_merge_gctx(tsdn_t * tsdn,prof_tctx_t * tctx,prof_gctx_t * gctx)1358 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
1359 	malloc_mutex_assert_owner(tsdn, gctx->lock);
1360 
1361 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1362 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1363 	if (opt_prof_accum) {
1364 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
1365 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
1366 	}
1367 }
1368 
1369 static prof_tctx_t *
prof_tctx_merge_iter(prof_tctx_tree_t * tctxs,prof_tctx_t * tctx,void * arg)1370 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1371 	tsdn_t *tsdn = (tsdn_t *)arg;
1372 
1373 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1374 
1375 	switch (tctx->state) {
1376 	case prof_tctx_state_nominal:
1377 		/* New since dumping started; ignore. */
1378 		break;
1379 	case prof_tctx_state_dumping:
1380 	case prof_tctx_state_purgatory:
1381 		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
1382 		break;
1383 	default:
1384 		not_reached();
1385 	}
1386 
1387 	return NULL;
1388 }
1389 
1390 struct prof_tctx_dump_iter_arg_s {
1391 	tsdn_t	*tsdn;
1392 	bool	propagate_err;
1393 };
1394 
1395 static prof_tctx_t *
prof_tctx_dump_iter(prof_tctx_tree_t * tctxs,prof_tctx_t * tctx,void * opaque)1396 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
1397 	struct prof_tctx_dump_iter_arg_s *arg =
1398 	    (struct prof_tctx_dump_iter_arg_s *)opaque;
1399 
1400 	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
1401 
1402 	switch (tctx->state) {
1403 	case prof_tctx_state_initializing:
1404 	case prof_tctx_state_nominal:
1405 		/* Not captured by this dump. */
1406 		break;
1407 	case prof_tctx_state_dumping:
1408 	case prof_tctx_state_purgatory:
1409 		if (prof_dump_printf(arg->propagate_err,
1410 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
1411 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
1412 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
1413 		    tctx->dump_cnts.accumbytes)) {
1414 			return tctx;
1415 		}
1416 		break;
1417 	default:
1418 		not_reached();
1419 	}
1420 	return NULL;
1421 }
1422 
1423 static prof_tctx_t *
prof_tctx_finish_iter(prof_tctx_tree_t * tctxs,prof_tctx_t * tctx,void * arg)1424 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1425 	tsdn_t *tsdn = (tsdn_t *)arg;
1426 	prof_tctx_t *ret;
1427 
1428 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1429 
1430 	switch (tctx->state) {
1431 	case prof_tctx_state_nominal:
1432 		/* New since dumping started; ignore. */
1433 		break;
1434 	case prof_tctx_state_dumping:
1435 		tctx->state = prof_tctx_state_nominal;
1436 		break;
1437 	case prof_tctx_state_purgatory:
1438 		ret = tctx;
1439 		goto label_return;
1440 	default:
1441 		not_reached();
1442 	}
1443 
1444 	ret = NULL;
1445 label_return:
1446 	return ret;
1447 }
1448 
1449 static void
prof_dump_gctx_prep(tsdn_t * tsdn,prof_gctx_t * gctx,prof_gctx_tree_t * gctxs)1450 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
1451 	cassert(config_prof);
1452 
1453 	malloc_mutex_lock(tsdn, gctx->lock);
1454 
1455 	/*
1456 	 * Increment nlimbo so that gctx won't go away before dump.
1457 	 * Additionally, link gctx into the dump list so that it is included in
1458 	 * prof_dump()'s second pass.
1459 	 */
1460 	gctx->nlimbo++;
1461 	gctx_tree_insert(gctxs, gctx);
1462 
1463 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
1464 
1465 	malloc_mutex_unlock(tsdn, gctx->lock);
1466 }
1467 
1468 struct prof_gctx_merge_iter_arg_s {
1469 	tsdn_t	*tsdn;
1470 	size_t	leak_ngctx;
1471 };
1472 
1473 static prof_gctx_t *
prof_gctx_merge_iter(prof_gctx_tree_t * gctxs,prof_gctx_t * gctx,void * opaque)1474 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1475 	struct prof_gctx_merge_iter_arg_s *arg =
1476 	    (struct prof_gctx_merge_iter_arg_s *)opaque;
1477 
1478 	malloc_mutex_lock(arg->tsdn, gctx->lock);
1479 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
1480 	    (void *)arg->tsdn);
1481 	if (gctx->cnt_summed.curobjs != 0) {
1482 		arg->leak_ngctx++;
1483 	}
1484 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1485 
1486 	return NULL;
1487 }
1488 
1489 static void
prof_gctx_finish(tsd_t * tsd,prof_gctx_tree_t * gctxs)1490 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
1491 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1492 	prof_gctx_t *gctx;
1493 
1494 	/*
1495 	 * Standard tree iteration won't work here, because as soon as we
1496 	 * decrement gctx->nlimbo and unlock gctx, another thread can
1497 	 * concurrently destroy it, which will corrupt the tree.  Therefore,
1498 	 * tear down the tree one node at a time during iteration.
1499 	 */
1500 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
1501 		gctx_tree_remove(gctxs, gctx);
1502 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1503 		{
1504 			prof_tctx_t *next;
1505 
1506 			next = NULL;
1507 			do {
1508 				prof_tctx_t *to_destroy =
1509 				    tctx_tree_iter(&gctx->tctxs, next,
1510 				    prof_tctx_finish_iter,
1511 				    (void *)tsd_tsdn(tsd));
1512 				if (to_destroy != NULL) {
1513 					next = tctx_tree_next(&gctx->tctxs,
1514 					    to_destroy);
1515 					tctx_tree_remove(&gctx->tctxs,
1516 					    to_destroy);
1517 					idalloctm(tsd_tsdn(tsd), to_destroy,
1518 					    NULL, NULL, true, true);
1519 				} else {
1520 					next = NULL;
1521 				}
1522 			} while (next != NULL);
1523 		}
1524 		gctx->nlimbo--;
1525 		if (prof_gctx_should_destroy(gctx)) {
1526 			gctx->nlimbo++;
1527 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1528 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1529 		} else {
1530 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1531 		}
1532 	}
1533 }
1534 
1535 struct prof_tdata_merge_iter_arg_s {
1536 	tsdn_t		*tsdn;
1537 	prof_cnt_t	cnt_all;
1538 };
1539 
1540 static prof_tdata_t *
prof_tdata_merge_iter(prof_tdata_tree_t * tdatas,prof_tdata_t * tdata,void * opaque)1541 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1542     void *opaque) {
1543 	struct prof_tdata_merge_iter_arg_s *arg =
1544 	    (struct prof_tdata_merge_iter_arg_s *)opaque;
1545 
1546 	malloc_mutex_lock(arg->tsdn, tdata->lock);
1547 	if (!tdata->expired) {
1548 		size_t tabind;
1549 		union {
1550 			prof_tctx_t	*p;
1551 			void		*v;
1552 		} tctx;
1553 
1554 		tdata->dumping = true;
1555 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
1556 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
1557 		    &tctx.v);) {
1558 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
1559 		}
1560 
1561 		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
1562 		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
1563 		if (opt_prof_accum) {
1564 			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
1565 			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
1566 		}
1567 	} else {
1568 		tdata->dumping = false;
1569 	}
1570 	malloc_mutex_unlock(arg->tsdn, tdata->lock);
1571 
1572 	return NULL;
1573 }
1574 
1575 static prof_tdata_t *
prof_tdata_dump_iter(prof_tdata_tree_t * tdatas,prof_tdata_t * tdata,void * arg)1576 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1577     void *arg) {
1578 	bool propagate_err = *(bool *)arg;
1579 
1580 	if (!tdata->dumping) {
1581 		return NULL;
1582 	}
1583 
1584 	if (prof_dump_printf(propagate_err,
1585 	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
1586 	    tdata->thr_uid, tdata->cnt_summed.curobjs,
1587 	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
1588 	    tdata->cnt_summed.accumbytes,
1589 	    (tdata->thread_name != NULL) ? " " : "",
1590 	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
1591 		return tdata;
1592 	}
1593 	return NULL;
1594 }
1595 
1596 static bool
prof_dump_header_impl(tsdn_t * tsdn,bool propagate_err,const prof_cnt_t * cnt_all)1597 prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
1598     const prof_cnt_t *cnt_all) {
1599 	bool ret;
1600 
1601 	if (prof_dump_printf(propagate_err,
1602 	    "heap_v2/%"FMTu64"\n"
1603 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1604 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
1605 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
1606 		return true;
1607 	}
1608 
1609 	malloc_mutex_lock(tsdn, &tdatas_mtx);
1610 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
1611 	    (void *)&propagate_err) != NULL);
1612 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1613 	return ret;
1614 }
1615 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
1616 
1617 static bool
prof_dump_gctx(tsdn_t * tsdn,bool propagate_err,prof_gctx_t * gctx,const prof_bt_t * bt,prof_gctx_tree_t * gctxs)1618 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
1619     const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
1620 	bool ret;
1621 	unsigned i;
1622 	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
1623 
1624 	cassert(config_prof);
1625 	malloc_mutex_assert_owner(tsdn, gctx->lock);
1626 
1627 	/* Avoid dumping such gctx's that have no useful data. */
1628 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
1629 	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
1630 		assert(gctx->cnt_summed.curobjs == 0);
1631 		assert(gctx->cnt_summed.curbytes == 0);
1632 		assert(gctx->cnt_summed.accumobjs == 0);
1633 		assert(gctx->cnt_summed.accumbytes == 0);
1634 		ret = false;
1635 		goto label_return;
1636 	}
1637 
1638 	if (prof_dump_printf(propagate_err, "@")) {
1639 		ret = true;
1640 		goto label_return;
1641 	}
1642 	for (i = 0; i < bt->len; i++) {
1643 		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
1644 		    (uintptr_t)bt->vec[i])) {
1645 			ret = true;
1646 			goto label_return;
1647 		}
1648 	}
1649 
1650 	if (prof_dump_printf(propagate_err,
1651 	    "\n"
1652 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1653 	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
1654 	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
1655 		ret = true;
1656 		goto label_return;
1657 	}
1658 
1659 	prof_tctx_dump_iter_arg.tsdn = tsdn;
1660 	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
1661 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
1662 	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
1663 		ret = true;
1664 		goto label_return;
1665 	}
1666 
1667 	ret = false;
1668 label_return:
1669 	return ret;
1670 }
1671 
1672 #ifndef _WIN32
1673 JEMALLOC_FORMAT_PRINTF(1, 2)
1674 static int
prof_open_maps(const char * format,...)1675 prof_open_maps(const char *format, ...) {
1676 	int mfd;
1677 	va_list ap;
1678 	char filename[PATH_MAX + 1];
1679 
1680 	va_start(ap, format);
1681 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
1682 	va_end(ap);
1683 
1684 #if defined(O_CLOEXEC)
1685 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
1686 #else
1687 	mfd = open(filename, O_RDONLY);
1688 	if (mfd != -1) {
1689 		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
1690 	}
1691 #endif
1692 
1693 	return mfd;
1694 }
1695 #endif
1696 
1697 static int
prof_getpid(void)1698 prof_getpid(void) {
1699 #ifdef _WIN32
1700 	return GetCurrentProcessId();
1701 #else
1702 	return getpid();
1703 #endif
1704 }
1705 
1706 static bool
prof_dump_maps(bool propagate_err)1707 prof_dump_maps(bool propagate_err) {
1708 	bool ret;
1709 	int mfd;
1710 
1711 	cassert(config_prof);
1712 #ifdef __FreeBSD__
1713 	mfd = prof_open_maps("/proc/curproc/map");
1714 #elif defined(_WIN32)
1715 	mfd = -1; // Not implemented
1716 #else
1717 	{
1718 		int pid = prof_getpid();
1719 
1720 		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
1721 		if (mfd == -1) {
1722 			mfd = prof_open_maps("/proc/%d/maps", pid);
1723 		}
1724 	}
1725 #endif
1726 	if (mfd != -1) {
1727 		ssize_t nread;
1728 
1729 		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
1730 		    propagate_err) {
1731 			ret = true;
1732 			goto label_return;
1733 		}
1734 		nread = 0;
1735 		do {
1736 			prof_dump_buf_end += nread;
1737 			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1738 				/* Make space in prof_dump_buf before read(). */
1739 				if (prof_dump_flush(propagate_err) &&
1740 				    propagate_err) {
1741 					ret = true;
1742 					goto label_return;
1743 				}
1744 			}
1745 			nread = malloc_read_fd(mfd,
1746 			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
1747 			    - prof_dump_buf_end);
1748 		} while (nread > 0);
1749 	} else {
1750 		ret = true;
1751 		goto label_return;
1752 	}
1753 
1754 	ret = false;
1755 label_return:
1756 	if (mfd != -1) {
1757 		close(mfd);
1758 	}
1759 	return ret;
1760 }
1761 
1762 /*
1763  * See prof_sample_threshold_update() comment for why the body of this function
1764  * is conditionally compiled.
1765  */
1766 static void
prof_leakcheck(const prof_cnt_t * cnt_all,size_t leak_ngctx,const char * filename)1767 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
1768     const char *filename) {
1769 #ifdef JEMALLOC_PROF
1770 	/*
1771 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
1772 	 * differ slightly from what jeprof reports, because here we scale the
1773 	 * summary values, whereas jeprof scales each context individually and
1774 	 * reports the sums of the scaled values.
1775 	 */
1776 	if (cnt_all->curbytes != 0) {
1777 		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
1778 		double ratio = (((double)cnt_all->curbytes) /
1779 		    (double)cnt_all->curobjs) / sample_period;
1780 		double scale_factor = 1.0 / (1.0 - exp(-ratio));
1781 		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1782 		    * scale_factor);
1783 		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1784 		    scale_factor);
1785 
1786 		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1787 		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1788 		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
1789 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
1790 		malloc_printf(
1791 		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
1792 		    filename);
1793 	}
1794 #endif
1795 }
1796 
1797 struct prof_gctx_dump_iter_arg_s {
1798 	tsdn_t	*tsdn;
1799 	bool	propagate_err;
1800 };
1801 
1802 static prof_gctx_t *
prof_gctx_dump_iter(prof_gctx_tree_t * gctxs,prof_gctx_t * gctx,void * opaque)1803 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1804 	prof_gctx_t *ret;
1805 	struct prof_gctx_dump_iter_arg_s *arg =
1806 	    (struct prof_gctx_dump_iter_arg_s *)opaque;
1807 
1808 	malloc_mutex_lock(arg->tsdn, gctx->lock);
1809 
1810 	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
1811 	    gctxs)) {
1812 		ret = gctx;
1813 		goto label_return;
1814 	}
1815 
1816 	ret = NULL;
1817 label_return:
1818 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1819 	return ret;
1820 }
1821 
1822 static void
prof_dump_prep(tsd_t * tsd,prof_tdata_t * tdata,struct prof_tdata_merge_iter_arg_s * prof_tdata_merge_iter_arg,struct prof_gctx_merge_iter_arg_s * prof_gctx_merge_iter_arg,prof_gctx_tree_t * gctxs)1823 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
1824     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1825     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1826     prof_gctx_tree_t *gctxs) {
1827 	size_t tabind;
1828 	union {
1829 		prof_gctx_t	*p;
1830 		void		*v;
1831 	} gctx;
1832 
1833 	prof_enter(tsd, tdata);
1834 
1835 	/*
1836 	 * Put gctx's in limbo and clear their counters in preparation for
1837 	 * summing.
1838 	 */
1839 	gctx_tree_new(gctxs);
1840 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
1841 		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
1842 	}
1843 
1844 	/*
1845 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
1846 	 * stats and merge them into the associated gctx's.
1847 	 */
1848 	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1849 	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
1850 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1851 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1852 	    (void *)prof_tdata_merge_iter_arg);
1853 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1854 
1855 	/* Merge tctx stats into gctx's. */
1856 	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1857 	prof_gctx_merge_iter_arg->leak_ngctx = 0;
1858 	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
1859 	    (void *)prof_gctx_merge_iter_arg);
1860 
1861 	prof_leave(tsd, tdata);
1862 }
1863 
1864 static bool
prof_dump_file(tsd_t * tsd,bool propagate_err,const char * filename,bool leakcheck,prof_tdata_t * tdata,struct prof_tdata_merge_iter_arg_s * prof_tdata_merge_iter_arg,struct prof_gctx_merge_iter_arg_s * prof_gctx_merge_iter_arg,struct prof_gctx_dump_iter_arg_s * prof_gctx_dump_iter_arg,prof_gctx_tree_t * gctxs)1865 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
1866     bool leakcheck, prof_tdata_t *tdata,
1867     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1868     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1869     struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
1870     prof_gctx_tree_t *gctxs) {
1871 	/* Create dump file. */
1872 	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
1873 		return true;
1874 	}
1875 
1876 	/* Dump profile header. */
1877 	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
1878 	    &prof_tdata_merge_iter_arg->cnt_all)) {
1879 		goto label_write_error;
1880 	}
1881 
1882 	/* Dump per gctx profile stats. */
1883 	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
1884 	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
1885 	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
1886 	    (void *)prof_gctx_dump_iter_arg) != NULL) {
1887 		goto label_write_error;
1888 	}
1889 
1890 	/* Dump /proc/<pid>/maps if possible. */
1891 	if (prof_dump_maps(propagate_err)) {
1892 		goto label_write_error;
1893 	}
1894 
1895 	if (prof_dump_close(propagate_err)) {
1896 		return true;
1897 	}
1898 
1899 	return false;
1900 label_write_error:
1901 	prof_dump_close(propagate_err);
1902 	return true;
1903 }
1904 
1905 static bool
prof_dump(tsd_t * tsd,bool propagate_err,const char * filename,bool leakcheck)1906 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
1907     bool leakcheck) {
1908 	cassert(config_prof);
1909 	assert(tsd_reentrancy_level_get(tsd) == 0);
1910 
1911 	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
1912 	if (tdata == NULL) {
1913 		return true;
1914 	}
1915 
1916 	pre_reentrancy(tsd, NULL);
1917 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1918 
1919 	prof_gctx_tree_t gctxs;
1920 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1921 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1922 	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
1923 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1924 	    &prof_gctx_merge_iter_arg, &gctxs);
1925 	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
1926 	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
1927 	    &prof_gctx_dump_iter_arg, &gctxs);
1928 	prof_gctx_finish(tsd, &gctxs);
1929 
1930 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1931 	post_reentrancy(tsd);
1932 
1933 	if (err) {
1934 		return true;
1935 	}
1936 
1937 	if (leakcheck) {
1938 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
1939 		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
1940 	}
1941 	return false;
1942 }
1943 
1944 #ifdef JEMALLOC_JET
1945 void
prof_cnt_all(uint64_t * curobjs,uint64_t * curbytes,uint64_t * accumobjs,uint64_t * accumbytes)1946 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
1947     uint64_t *accumbytes) {
1948 	tsd_t *tsd;
1949 	prof_tdata_t *tdata;
1950 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1951 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1952 	prof_gctx_tree_t gctxs;
1953 
1954 	tsd = tsd_fetch();
1955 	tdata = prof_tdata_get(tsd, false);
1956 	if (tdata == NULL) {
1957 		if (curobjs != NULL) {
1958 			*curobjs = 0;
1959 		}
1960 		if (curbytes != NULL) {
1961 			*curbytes = 0;
1962 		}
1963 		if (accumobjs != NULL) {
1964 			*accumobjs = 0;
1965 		}
1966 		if (accumbytes != NULL) {
1967 			*accumbytes = 0;
1968 		}
1969 		return;
1970 	}
1971 
1972 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1973 	    &prof_gctx_merge_iter_arg, &gctxs);
1974 	prof_gctx_finish(tsd, &gctxs);
1975 
1976 	if (curobjs != NULL) {
1977 		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
1978 	}
1979 	if (curbytes != NULL) {
1980 		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
1981 	}
1982 	if (accumobjs != NULL) {
1983 		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
1984 	}
1985 	if (accumbytes != NULL) {
1986 		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
1987 	}
1988 }
1989 #endif
1990 
1991 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
1992 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
1993 static void
prof_dump_filename(char * filename,char v,uint64_t vseq)1994 prof_dump_filename(char *filename, char v, uint64_t vseq) {
1995 	cassert(config_prof);
1996 
1997 	if (vseq != VSEQ_INVALID) {
1998 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1999 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
2000 		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
2001 		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
2002 	} else {
2003 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
2004 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
2005 		    "%s.%d.%"FMTu64".%c.heap",
2006 		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
2007 	}
2008 	prof_dump_seq++;
2009 }
2010 
2011 static void
prof_fdump(void)2012 prof_fdump(void) {
2013 	tsd_t *tsd;
2014 	char filename[DUMP_FILENAME_BUFSIZE];
2015 
2016 	cassert(config_prof);
2017 	assert(opt_prof_final);
2018 	assert(opt_prof_prefix[0] != '\0');
2019 
2020 	if (!prof_booted) {
2021 		return;
2022 	}
2023 	tsd = tsd_fetch();
2024 	assert(tsd_reentrancy_level_get(tsd) == 0);
2025 
2026 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2027 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
2028 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2029 	prof_dump(tsd, false, filename, opt_prof_leak);
2030 }
2031 
2032 bool
prof_accum_init(tsdn_t * tsdn,prof_accum_t * prof_accum)2033 prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
2034 	cassert(config_prof);
2035 
2036 #ifndef JEMALLOC_ATOMIC_U64
2037 	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
2038 	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
2039 		return true;
2040 	}
2041 	prof_accum->accumbytes = 0;
2042 #else
2043 	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
2044 #endif
2045 	return false;
2046 }
2047 
2048 void
prof_idump(tsdn_t * tsdn)2049 prof_idump(tsdn_t *tsdn) {
2050 	tsd_t *tsd;
2051 	prof_tdata_t *tdata;
2052 
2053 	cassert(config_prof);
2054 
2055 	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
2056 		return;
2057 	}
2058 	tsd = tsdn_tsd(tsdn);
2059 	if (tsd_reentrancy_level_get(tsd) > 0) {
2060 		return;
2061 	}
2062 
2063 	tdata = prof_tdata_get(tsd, false);
2064 	if (tdata == NULL) {
2065 		return;
2066 	}
2067 	if (tdata->enq) {
2068 		tdata->enq_idump = true;
2069 		return;
2070 	}
2071 
2072 	if (opt_prof_prefix[0] != '\0') {
2073 		char filename[PATH_MAX + 1];
2074 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2075 		prof_dump_filename(filename, 'i', prof_dump_iseq);
2076 		prof_dump_iseq++;
2077 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2078 		prof_dump(tsd, false, filename, false);
2079 	}
2080 }
2081 
2082 bool
prof_mdump(tsd_t * tsd,const char * filename)2083 prof_mdump(tsd_t *tsd, const char *filename) {
2084 	cassert(config_prof);
2085 	assert(tsd_reentrancy_level_get(tsd) == 0);
2086 
2087 	if (!opt_prof || !prof_booted) {
2088 		return true;
2089 	}
2090 	char filename_buf[DUMP_FILENAME_BUFSIZE];
2091 	if (filename == NULL) {
2092 		/* No filename specified, so automatically generate one. */
2093 		if (opt_prof_prefix[0] == '\0') {
2094 			return true;
2095 		}
2096 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2097 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
2098 		prof_dump_mseq++;
2099 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2100 		filename = filename_buf;
2101 	}
2102 	return prof_dump(tsd, true, filename, false);
2103 }
2104 
2105 void
prof_gdump(tsdn_t * tsdn)2106 prof_gdump(tsdn_t *tsdn) {
2107 	tsd_t *tsd;
2108 	prof_tdata_t *tdata;
2109 
2110 	cassert(config_prof);
2111 
2112 	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
2113 		return;
2114 	}
2115 	tsd = tsdn_tsd(tsdn);
2116 	if (tsd_reentrancy_level_get(tsd) > 0) {
2117 		return;
2118 	}
2119 
2120 	tdata = prof_tdata_get(tsd, false);
2121 	if (tdata == NULL) {
2122 		return;
2123 	}
2124 	if (tdata->enq) {
2125 		tdata->enq_gdump = true;
2126 		return;
2127 	}
2128 
2129 	if (opt_prof_prefix[0] != '\0') {
2130 		char filename[DUMP_FILENAME_BUFSIZE];
2131 		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
2132 		prof_dump_filename(filename, 'u', prof_dump_useq);
2133 		prof_dump_useq++;
2134 		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
2135 		prof_dump(tsd, false, filename, false);
2136 	}
2137 }
2138 
2139 static void
prof_bt_hash(const void * key,size_t r_hash[2])2140 prof_bt_hash(const void *key, size_t r_hash[2]) {
2141 	prof_bt_t *bt = (prof_bt_t *)key;
2142 
2143 	cassert(config_prof);
2144 
2145 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
2146 }
2147 
2148 static bool
prof_bt_keycomp(const void * k1,const void * k2)2149 prof_bt_keycomp(const void *k1, const void *k2) {
2150 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
2151 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
2152 
2153 	cassert(config_prof);
2154 
2155 	if (bt1->len != bt2->len) {
2156 		return false;
2157 	}
2158 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
2159 }
2160 
2161 static void
prof_bt_node_hash(const void * key,size_t r_hash[2])2162 prof_bt_node_hash(const void *key, size_t r_hash[2]) {
2163 	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
2164 	prof_bt_hash((void *)(&bt_node->bt), r_hash);
2165 }
2166 
2167 static bool
prof_bt_node_keycomp(const void * k1,const void * k2)2168 prof_bt_node_keycomp(const void *k1, const void *k2) {
2169 	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
2170 	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
2171 	return prof_bt_keycomp((void *)(&bt_node1->bt),
2172 	    (void *)(&bt_node2->bt));
2173 }
2174 
2175 static void
prof_thr_node_hash(const void * key,size_t r_hash[2])2176 prof_thr_node_hash(const void *key, size_t r_hash[2]) {
2177 	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
2178 	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
2179 }
2180 
2181 static bool
prof_thr_node_keycomp(const void * k1,const void * k2)2182 prof_thr_node_keycomp(const void *k1, const void *k2) {
2183 	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
2184 	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
2185 	return thr_node1->thr_uid == thr_node2->thr_uid;
2186 }
2187 
2188 static uint64_t
prof_thr_uid_alloc(tsdn_t * tsdn)2189 prof_thr_uid_alloc(tsdn_t *tsdn) {
2190 	uint64_t thr_uid;
2191 
2192 	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
2193 	thr_uid = next_thr_uid;
2194 	next_thr_uid++;
2195 	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
2196 
2197 	return thr_uid;
2198 }
2199 
2200 static prof_tdata_t *
prof_tdata_init_impl(tsd_t * tsd,uint64_t thr_uid,uint64_t thr_discrim,char * thread_name,bool active)2201 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
2202     char *thread_name, bool active) {
2203 	prof_tdata_t *tdata;
2204 
2205 	cassert(config_prof);
2206 
2207 	/* Initialize an empty cache for this thread. */
2208 	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
2209 	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
2210 	    arena_get(TSDN_NULL, 0, true), true);
2211 	if (tdata == NULL) {
2212 		return NULL;
2213 	}
2214 
2215 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
2216 	tdata->thr_uid = thr_uid;
2217 	tdata->thr_discrim = thr_discrim;
2218 	tdata->thread_name = thread_name;
2219 	tdata->attached = true;
2220 	tdata->expired = false;
2221 	tdata->tctx_uid_next = 0;
2222 
2223 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
2224 	    prof_bt_keycomp)) {
2225 		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2226 		return NULL;
2227 	}
2228 
2229 	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
2230 	prof_sample_threshold_update(tdata);
2231 
2232 	tdata->enq = false;
2233 	tdata->enq_idump = false;
2234 	tdata->enq_gdump = false;
2235 
2236 	tdata->dumping = false;
2237 	tdata->active = active;
2238 
2239 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2240 	tdata_tree_insert(&tdatas, tdata);
2241 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2242 
2243 	return tdata;
2244 }
2245 
2246 prof_tdata_t *
prof_tdata_init(tsd_t * tsd)2247 prof_tdata_init(tsd_t *tsd) {
2248 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
2249 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
2250 }
2251 
2252 static bool
prof_tdata_should_destroy_unlocked(prof_tdata_t * tdata,bool even_if_attached)2253 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
2254 	if (tdata->attached && !even_if_attached) {
2255 		return false;
2256 	}
2257 	if (ckh_count(&tdata->bt2tctx) != 0) {
2258 		return false;
2259 	}
2260 	return true;
2261 }
2262 
2263 static bool
prof_tdata_should_destroy(tsdn_t * tsdn,prof_tdata_t * tdata,bool even_if_attached)2264 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
2265     bool even_if_attached) {
2266 	malloc_mutex_assert_owner(tsdn, tdata->lock);
2267 
2268 	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
2269 }
2270 
2271 static void
prof_tdata_destroy_locked(tsd_t * tsd,prof_tdata_t * tdata,bool even_if_attached)2272 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
2273     bool even_if_attached) {
2274 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
2275 
2276 	tdata_tree_remove(&tdatas, tdata);
2277 
2278 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
2279 
2280 	if (tdata->thread_name != NULL) {
2281 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2282 		    true);
2283 	}
2284 	ckh_delete(tsd, &tdata->bt2tctx);
2285 	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2286 }
2287 
2288 static void
prof_tdata_destroy(tsd_t * tsd,prof_tdata_t * tdata,bool even_if_attached)2289 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
2290 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2291 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
2292 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2293 }
2294 
2295 static void
prof_tdata_detach(tsd_t * tsd,prof_tdata_t * tdata)2296 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
2297 	bool destroy_tdata;
2298 
2299 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
2300 	if (tdata->attached) {
2301 		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
2302 		    true);
2303 		/*
2304 		 * Only detach if !destroy_tdata, because detaching would allow
2305 		 * another thread to win the race to destroy tdata.
2306 		 */
2307 		if (!destroy_tdata) {
2308 			tdata->attached = false;
2309 		}
2310 		tsd_prof_tdata_set(tsd, NULL);
2311 	} else {
2312 		destroy_tdata = false;
2313 	}
2314 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
2315 	if (destroy_tdata) {
2316 		prof_tdata_destroy(tsd, tdata, true);
2317 	}
2318 }
2319 
2320 prof_tdata_t *
prof_tdata_reinit(tsd_t * tsd,prof_tdata_t * tdata)2321 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
2322 	uint64_t thr_uid = tdata->thr_uid;
2323 	uint64_t thr_discrim = tdata->thr_discrim + 1;
2324 	char *thread_name = (tdata->thread_name != NULL) ?
2325 	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
2326 	bool active = tdata->active;
2327 
2328 	prof_tdata_detach(tsd, tdata);
2329 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
2330 	    active);
2331 }
2332 
2333 static bool
prof_tdata_expire(tsdn_t * tsdn,prof_tdata_t * tdata)2334 prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
2335 	bool destroy_tdata;
2336 
2337 	malloc_mutex_lock(tsdn, tdata->lock);
2338 	if (!tdata->expired) {
2339 		tdata->expired = true;
2340 		destroy_tdata = tdata->attached ? false :
2341 		    prof_tdata_should_destroy(tsdn, tdata, false);
2342 	} else {
2343 		destroy_tdata = false;
2344 	}
2345 	malloc_mutex_unlock(tsdn, tdata->lock);
2346 
2347 	return destroy_tdata;
2348 }
2349 
2350 static prof_tdata_t *
prof_tdata_reset_iter(prof_tdata_tree_t * tdatas,prof_tdata_t * tdata,void * arg)2351 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
2352     void *arg) {
2353 	tsdn_t *tsdn = (tsdn_t *)arg;
2354 
2355 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
2356 }
2357 
2358 void
prof_reset(tsd_t * tsd,size_t lg_sample)2359 prof_reset(tsd_t *tsd, size_t lg_sample) {
2360 	prof_tdata_t *next;
2361 
2362 	assert(lg_sample < (sizeof(uint64_t) << 3));
2363 
2364 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
2365 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2366 
2367 	lg_prof_sample = lg_sample;
2368 
2369 	next = NULL;
2370 	do {
2371 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
2372 		    prof_tdata_reset_iter, (void *)tsd);
2373 		if (to_destroy != NULL) {
2374 			next = tdata_tree_next(&tdatas, to_destroy);
2375 			prof_tdata_destroy_locked(tsd, to_destroy, false);
2376 		} else {
2377 			next = NULL;
2378 		}
2379 	} while (next != NULL);
2380 
2381 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2382 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
2383 }
2384 
2385 void
prof_tdata_cleanup(tsd_t * tsd)2386 prof_tdata_cleanup(tsd_t *tsd) {
2387 	prof_tdata_t *tdata;
2388 
2389 	if (!config_prof) {
2390 		return;
2391 	}
2392 
2393 	tdata = tsd_prof_tdata_get(tsd);
2394 	if (tdata != NULL) {
2395 		prof_tdata_detach(tsd, tdata);
2396 	}
2397 }
2398 
2399 bool
prof_active_get(tsdn_t * tsdn)2400 prof_active_get(tsdn_t *tsdn) {
2401 	bool prof_active_current;
2402 
2403 	malloc_mutex_lock(tsdn, &prof_active_mtx);
2404 	prof_active_current = prof_active;
2405 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2406 	return prof_active_current;
2407 }
2408 
2409 bool
prof_active_set(tsdn_t * tsdn,bool active)2410 prof_active_set(tsdn_t *tsdn, bool active) {
2411 	bool prof_active_old;
2412 
2413 	malloc_mutex_lock(tsdn, &prof_active_mtx);
2414 	prof_active_old = prof_active;
2415 	prof_active = active;
2416 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2417 	return prof_active_old;
2418 }
2419 
2420 #ifdef JEMALLOC_JET
2421 size_t
prof_log_bt_count(void)2422 prof_log_bt_count(void) {
2423 	size_t cnt = 0;
2424 	prof_bt_node_t *node = log_bt_first;
2425 	while (node != NULL) {
2426 		cnt++;
2427 		node = node->next;
2428 	}
2429 	return cnt;
2430 }
2431 
2432 size_t
prof_log_alloc_count(void)2433 prof_log_alloc_count(void) {
2434 	size_t cnt = 0;
2435 	prof_alloc_node_t *node = log_alloc_first;
2436 	while (node != NULL) {
2437 		cnt++;
2438 		node = node->next;
2439 	}
2440 	return cnt;
2441 }
2442 
2443 size_t
prof_log_thr_count(void)2444 prof_log_thr_count(void) {
2445 	size_t cnt = 0;
2446 	prof_thr_node_t *node = log_thr_first;
2447 	while (node != NULL) {
2448 		cnt++;
2449 		node = node->next;
2450 	}
2451 	return cnt;
2452 }
2453 
2454 bool
prof_log_is_logging(void)2455 prof_log_is_logging(void) {
2456 	return prof_logging_state == prof_logging_state_started;
2457 }
2458 
2459 bool
prof_log_rep_check(void)2460 prof_log_rep_check(void) {
2461 	if (prof_logging_state == prof_logging_state_stopped
2462 	    && log_tables_initialized) {
2463 		return true;
2464 	}
2465 
2466 	if (log_bt_last != NULL && log_bt_last->next != NULL) {
2467 		return true;
2468 	}
2469 	if (log_thr_last != NULL && log_thr_last->next != NULL) {
2470 		return true;
2471 	}
2472 	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
2473 		return true;
2474 	}
2475 
2476 	size_t bt_count = prof_log_bt_count();
2477 	size_t thr_count = prof_log_thr_count();
2478 	size_t alloc_count = prof_log_alloc_count();
2479 
2480 
2481 	if (prof_logging_state == prof_logging_state_stopped) {
2482 		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
2483 			return true;
2484 		}
2485 	}
2486 
2487 	prof_alloc_node_t *node = log_alloc_first;
2488 	while (node != NULL) {
2489 		if (node->alloc_bt_ind >= bt_count) {
2490 			return true;
2491 		}
2492 		if (node->free_bt_ind >= bt_count) {
2493 			return true;
2494 		}
2495 		if (node->alloc_thr_ind >= thr_count) {
2496 			return true;
2497 		}
2498 		if (node->free_thr_ind >= thr_count) {
2499 			return true;
2500 		}
2501 		if (node->alloc_time_ns > node->free_time_ns) {
2502 			return true;
2503 		}
2504 		node = node->next;
2505 	}
2506 
2507 	return false;
2508 }
2509 
2510 void
prof_log_dummy_set(bool new_value)2511 prof_log_dummy_set(bool new_value) {
2512 	prof_log_dummy = new_value;
2513 }
2514 #endif
2515 
2516 bool
prof_log_start(tsdn_t * tsdn,const char * filename)2517 prof_log_start(tsdn_t *tsdn, const char *filename) {
2518 	if (!opt_prof || !prof_booted) {
2519 		return true;
2520 	}
2521 
2522 	bool ret = false;
2523 	size_t buf_size = PATH_MAX + 1;
2524 
2525 	malloc_mutex_lock(tsdn, &log_mtx);
2526 
2527 	if (prof_logging_state != prof_logging_state_stopped) {
2528 		ret = true;
2529 	} else if (filename == NULL) {
2530 		/* Make default name. */
2531 		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
2532 		    opt_prof_prefix, prof_getpid(), log_seq);
2533 		log_seq++;
2534 		prof_logging_state = prof_logging_state_started;
2535 	} else if (strlen(filename) >= buf_size) {
2536 		ret = true;
2537 	} else {
2538 		strcpy(log_filename, filename);
2539 		prof_logging_state = prof_logging_state_started;
2540 	}
2541 
2542 	if (!ret) {
2543 		nstime_update(&log_start_timestamp);
2544 	}
2545 
2546 	malloc_mutex_unlock(tsdn, &log_mtx);
2547 
2548 	return ret;
2549 }
2550 
2551 /* Used as an atexit function to stop logging on exit. */
2552 static void
prof_log_stop_final(void)2553 prof_log_stop_final(void) {
2554 	tsd_t *tsd = tsd_fetch();
2555 	prof_log_stop(tsd_tsdn(tsd));
2556 }
2557 
2558 struct prof_emitter_cb_arg_s {
2559 	int fd;
2560 	ssize_t ret;
2561 };
2562 
2563 static void
prof_emitter_write_cb(void * opaque,const char * to_write)2564 prof_emitter_write_cb(void *opaque, const char *to_write) {
2565 	struct prof_emitter_cb_arg_s *arg =
2566 	    (struct prof_emitter_cb_arg_s *)opaque;
2567 	size_t bytes = strlen(to_write);
2568 #ifdef JEMALLOC_JET
2569 	if (prof_log_dummy) {
2570 		return;
2571 	}
2572 #endif
2573 	arg->ret = write(arg->fd, (void *)to_write, bytes);
2574 }
2575 
2576 /*
2577  * prof_log_emit_{...} goes through the appropriate linked list, emitting each
2578  * node to the json and deallocating it.
2579  */
2580 static void
prof_log_emit_threads(tsd_t * tsd,emitter_t * emitter)2581 prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
2582 	emitter_json_array_kv_begin(emitter, "threads");
2583 	prof_thr_node_t *thr_node = log_thr_first;
2584 	prof_thr_node_t *thr_old_node;
2585 	while (thr_node != NULL) {
2586 		emitter_json_object_begin(emitter);
2587 
2588 		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
2589 		    &thr_node->thr_uid);
2590 
2591 		char *thr_name = thr_node->name;
2592 
2593 		emitter_json_kv(emitter, "thr_name", emitter_type_string,
2594 		    &thr_name);
2595 
2596 		emitter_json_object_end(emitter);
2597 		thr_old_node = thr_node;
2598 		thr_node = thr_node->next;
2599 		idalloc(tsd, thr_old_node);
2600 	}
2601 	emitter_json_array_end(emitter);
2602 }
2603 
2604 static void
prof_log_emit_traces(tsd_t * tsd,emitter_t * emitter)2605 prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
2606 	emitter_json_array_kv_begin(emitter, "stack_traces");
2607 	prof_bt_node_t *bt_node = log_bt_first;
2608 	prof_bt_node_t *bt_old_node;
2609 	/*
2610 	 * Calculate how many hex digits we need: twice number of bytes, two for
2611 	 * "0x", and then one more for terminating '\0'.
2612 	 */
2613 	char buf[2 * sizeof(intptr_t) + 3];
2614 	size_t buf_sz = sizeof(buf);
2615 	while (bt_node != NULL) {
2616 		emitter_json_array_begin(emitter);
2617 		size_t i;
2618 		for (i = 0; i < bt_node->bt.len; i++) {
2619 			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
2620 			char *trace_str = buf;
2621 			emitter_json_value(emitter, emitter_type_string,
2622 			    &trace_str);
2623 		}
2624 		emitter_json_array_end(emitter);
2625 
2626 		bt_old_node = bt_node;
2627 		bt_node = bt_node->next;
2628 		idalloc(tsd, bt_old_node);
2629 	}
2630 	emitter_json_array_end(emitter);
2631 }
2632 
2633 static void
prof_log_emit_allocs(tsd_t * tsd,emitter_t * emitter)2634 prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
2635 	emitter_json_array_kv_begin(emitter, "allocations");
2636 	prof_alloc_node_t *alloc_node = log_alloc_first;
2637 	prof_alloc_node_t *alloc_old_node;
2638 	while (alloc_node != NULL) {
2639 		emitter_json_object_begin(emitter);
2640 
2641 		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
2642 		    &alloc_node->alloc_thr_ind);
2643 
2644 		emitter_json_kv(emitter, "free_thread", emitter_type_size,
2645 		    &alloc_node->free_thr_ind);
2646 
2647 		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
2648 		    &alloc_node->alloc_bt_ind);
2649 
2650 		emitter_json_kv(emitter, "free_trace", emitter_type_size,
2651 		    &alloc_node->free_bt_ind);
2652 
2653 		emitter_json_kv(emitter, "alloc_timestamp",
2654 		    emitter_type_uint64, &alloc_node->alloc_time_ns);
2655 
2656 		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
2657 		    &alloc_node->free_time_ns);
2658 
2659 		emitter_json_kv(emitter, "usize", emitter_type_uint64,
2660 		    &alloc_node->usize);
2661 
2662 		emitter_json_object_end(emitter);
2663 
2664 		alloc_old_node = alloc_node;
2665 		alloc_node = alloc_node->next;
2666 		idalloc(tsd, alloc_old_node);
2667 	}
2668 	emitter_json_array_end(emitter);
2669 }
2670 
2671 static void
prof_log_emit_metadata(emitter_t * emitter)2672 prof_log_emit_metadata(emitter_t *emitter) {
2673 	emitter_json_object_kv_begin(emitter, "info");
2674 
2675 	nstime_t now = NSTIME_ZERO_INITIALIZER;
2676 
2677 	nstime_update(&now);
2678 	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
2679 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
2680 
2681 	char *vers = JEMALLOC_VERSION;
2682 	emitter_json_kv(emitter, "version",
2683 	    emitter_type_string, &vers);
2684 
2685 	emitter_json_kv(emitter, "lg_sample_rate",
2686 	    emitter_type_int, &lg_prof_sample);
2687 
2688 	int pid = prof_getpid();
2689 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
2690 
2691 	emitter_json_object_end(emitter);
2692 }
2693 
2694 
2695 bool
prof_log_stop(tsdn_t * tsdn)2696 prof_log_stop(tsdn_t *tsdn) {
2697 	if (!opt_prof || !prof_booted) {
2698 		return true;
2699 	}
2700 
2701 	tsd_t *tsd = tsdn_tsd(tsdn);
2702 	malloc_mutex_lock(tsdn, &log_mtx);
2703 
2704 	if (prof_logging_state != prof_logging_state_started) {
2705 		malloc_mutex_unlock(tsdn, &log_mtx);
2706 		return true;
2707 	}
2708 
2709 	/*
2710 	 * Set the state to dumping. We'll set it to stopped when we're done.
2711 	 * Since other threads won't be able to start/stop/log when the state is
2712 	 * dumping, we don't have to hold the lock during the whole method.
2713 	 */
2714 	prof_logging_state = prof_logging_state_dumping;
2715 	malloc_mutex_unlock(tsdn, &log_mtx);
2716 
2717 
2718 	emitter_t emitter;
2719 
2720 	/* Create a file. */
2721 
2722 	int fd;
2723 #ifdef JEMALLOC_JET
2724 	if (prof_log_dummy) {
2725 		fd = 0;
2726 	} else {
2727 		fd = creat(log_filename, 0644);
2728 	}
2729 #else
2730 	fd = creat(log_filename, 0644);
2731 #endif
2732 
2733 	if (fd == -1) {
2734 		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
2735 			      " failed with %d\n", log_filename, errno);
2736 		if (opt_abort) {
2737 			abort();
2738 		}
2739 		return true;
2740 	}
2741 
2742 	/* Emit to json. */
2743 	struct prof_emitter_cb_arg_s arg;
2744 	arg.fd = fd;
2745 	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
2746 	    (void *)(&arg));
2747 
2748 	emitter_begin(&emitter);
2749 	prof_log_emit_metadata(&emitter);
2750 	prof_log_emit_threads(tsd, &emitter);
2751 	prof_log_emit_traces(tsd, &emitter);
2752 	prof_log_emit_allocs(tsd, &emitter);
2753 	emitter_end(&emitter);
2754 
2755 	/* Reset global state. */
2756 	if (log_tables_initialized) {
2757 		ckh_delete(tsd, &log_bt_node_set);
2758 		ckh_delete(tsd, &log_thr_node_set);
2759 	}
2760 	log_tables_initialized = false;
2761 	log_bt_index = 0;
2762 	log_thr_index = 0;
2763 	log_bt_first = NULL;
2764 	log_bt_last = NULL;
2765 	log_thr_first = NULL;
2766 	log_thr_last = NULL;
2767 	log_alloc_first = NULL;
2768 	log_alloc_last = NULL;
2769 
2770 	malloc_mutex_lock(tsdn, &log_mtx);
2771 	prof_logging_state = prof_logging_state_stopped;
2772 	malloc_mutex_unlock(tsdn, &log_mtx);
2773 
2774 #ifdef JEMALLOC_JET
2775 	if (prof_log_dummy) {
2776 		return false;
2777 	}
2778 #endif
2779 	return close(fd);
2780 }
2781 
2782 const char *
prof_thread_name_get(tsd_t * tsd)2783 prof_thread_name_get(tsd_t *tsd) {
2784 	prof_tdata_t *tdata;
2785 
2786 	tdata = prof_tdata_get(tsd, true);
2787 	if (tdata == NULL) {
2788 		return "";
2789 	}
2790 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
2791 }
2792 
2793 static char *
prof_thread_name_alloc(tsdn_t * tsdn,const char * thread_name)2794 prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
2795 	char *ret;
2796 	size_t size;
2797 
2798 	if (thread_name == NULL) {
2799 		return NULL;
2800 	}
2801 
2802 	size = strlen(thread_name) + 1;
2803 	if (size == 1) {
2804 		return "";
2805 	}
2806 
2807 	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
2808 	    arena_get(TSDN_NULL, 0, true), true);
2809 	if (ret == NULL) {
2810 		return NULL;
2811 	}
2812 	memcpy(ret, thread_name, size);
2813 	return ret;
2814 }
2815 
2816 int
prof_thread_name_set(tsd_t * tsd,const char * thread_name)2817 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
2818 	prof_tdata_t *tdata;
2819 	unsigned i;
2820 	char *s;
2821 
2822 	tdata = prof_tdata_get(tsd, true);
2823 	if (tdata == NULL) {
2824 		return EAGAIN;
2825 	}
2826 
2827 	/* Validate input. */
2828 	if (thread_name == NULL) {
2829 		return EFAULT;
2830 	}
2831 	for (i = 0; thread_name[i] != '\0'; i++) {
2832 		char c = thread_name[i];
2833 		if (!isgraph(c) && !isblank(c)) {
2834 			return EFAULT;
2835 		}
2836 	}
2837 
2838 	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
2839 	if (s == NULL) {
2840 		return EAGAIN;
2841 	}
2842 
2843 	if (tdata->thread_name != NULL) {
2844 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2845 		    true);
2846 		tdata->thread_name = NULL;
2847 	}
2848 	if (strlen(s) > 0) {
2849 		tdata->thread_name = s;
2850 	}
2851 	return 0;
2852 }
2853 
2854 bool
prof_thread_active_get(tsd_t * tsd)2855 prof_thread_active_get(tsd_t *tsd) {
2856 	prof_tdata_t *tdata;
2857 
2858 	tdata = prof_tdata_get(tsd, true);
2859 	if (tdata == NULL) {
2860 		return false;
2861 	}
2862 	return tdata->active;
2863 }
2864 
2865 bool
prof_thread_active_set(tsd_t * tsd,bool active)2866 prof_thread_active_set(tsd_t *tsd, bool active) {
2867 	prof_tdata_t *tdata;
2868 
2869 	tdata = prof_tdata_get(tsd, true);
2870 	if (tdata == NULL) {
2871 		return true;
2872 	}
2873 	tdata->active = active;
2874 	return false;
2875 }
2876 
2877 bool
prof_thread_active_init_get(tsdn_t * tsdn)2878 prof_thread_active_init_get(tsdn_t *tsdn) {
2879 	bool active_init;
2880 
2881 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2882 	active_init = prof_thread_active_init;
2883 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2884 	return active_init;
2885 }
2886 
2887 bool
prof_thread_active_init_set(tsdn_t * tsdn,bool active_init)2888 prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
2889 	bool active_init_old;
2890 
2891 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2892 	active_init_old = prof_thread_active_init;
2893 	prof_thread_active_init = active_init;
2894 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2895 	return active_init_old;
2896 }
2897 
2898 bool
prof_gdump_get(tsdn_t * tsdn)2899 prof_gdump_get(tsdn_t *tsdn) {
2900 	bool prof_gdump_current;
2901 
2902 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2903 	prof_gdump_current = prof_gdump_val;
2904 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2905 	return prof_gdump_current;
2906 }
2907 
2908 bool
prof_gdump_set(tsdn_t * tsdn,bool gdump)2909 prof_gdump_set(tsdn_t *tsdn, bool gdump) {
2910 	bool prof_gdump_old;
2911 
2912 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2913 	prof_gdump_old = prof_gdump_val;
2914 	prof_gdump_val = gdump;
2915 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2916 	return prof_gdump_old;
2917 }
2918 
2919 void
prof_boot0(void)2920 prof_boot0(void) {
2921 	cassert(config_prof);
2922 
2923 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
2924 	    sizeof(PROF_PREFIX_DEFAULT));
2925 }
2926 
2927 void
prof_boot1(void)2928 prof_boot1(void) {
2929 	cassert(config_prof);
2930 
2931 	/*
2932 	 * opt_prof must be in its final state before any arenas are
2933 	 * initialized, so this function must be executed early.
2934 	 */
2935 
2936 	if (opt_prof_leak && !opt_prof) {
2937 		/*
2938 		 * Enable opt_prof, but in such a way that profiles are never
2939 		 * automatically dumped.
2940 		 */
2941 		opt_prof = true;
2942 		opt_prof_gdump = false;
2943 	} else if (opt_prof) {
2944 		if (opt_lg_prof_interval >= 0) {
2945 			prof_interval = (((uint64_t)1U) <<
2946 			    opt_lg_prof_interval);
2947 		}
2948 	}
2949 }
2950 
2951 bool
prof_boot2(tsd_t * tsd)2952 prof_boot2(tsd_t *tsd) {
2953 	cassert(config_prof);
2954 
2955 	if (opt_prof) {
2956 		unsigned i;
2957 
2958 		lg_prof_sample = opt_lg_prof_sample;
2959 
2960 		prof_active = opt_prof_active;
2961 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
2962 		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
2963 			return true;
2964 		}
2965 
2966 		prof_gdump_val = opt_prof_gdump;
2967 		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
2968 		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
2969 			return true;
2970 		}
2971 
2972 		prof_thread_active_init = opt_prof_thread_active_init;
2973 		if (malloc_mutex_init(&prof_thread_active_init_mtx,
2974 		    "prof_thread_active_init",
2975 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
2976 		    malloc_mutex_rank_exclusive)) {
2977 			return true;
2978 		}
2979 
2980 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
2981 		    prof_bt_keycomp)) {
2982 			return true;
2983 		}
2984 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
2985 		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
2986 			return true;
2987 		}
2988 
2989 		tdata_tree_new(&tdatas);
2990 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
2991 		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
2992 			return true;
2993 		}
2994 
2995 		next_thr_uid = 0;
2996 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
2997 		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
2998 			return true;
2999 		}
3000 
3001 		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
3002 		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
3003 			return true;
3004 		}
3005 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
3006 		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
3007 			return true;
3008 		}
3009 
3010 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
3011 		    atexit(prof_fdump) != 0) {
3012 			malloc_write("<jemalloc>: Error in atexit()\n");
3013 			if (opt_abort) {
3014 				abort();
3015 			}
3016 		}
3017 
3018 		if (opt_prof_log) {
3019 			prof_log_start(tsd_tsdn(tsd), NULL);
3020 		}
3021 
3022 		if (atexit(prof_log_stop_final) != 0) {
3023 			malloc_write("<jemalloc>: Error in atexit() "
3024 				     "for logging\n");
3025 			if (opt_abort) {
3026 				abort();
3027 			}
3028 		}
3029 
3030 		if (malloc_mutex_init(&log_mtx, "prof_log",
3031 		    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
3032 			return true;
3033 		}
3034 
3035 		if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
3036 		    prof_bt_node_hash, prof_bt_node_keycomp)) {
3037 			return true;
3038 		}
3039 
3040 		if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
3041 		    prof_thr_node_hash, prof_thr_node_keycomp)) {
3042 			return true;
3043 		}
3044 
3045 		log_tables_initialized = true;
3046 
3047 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
3048 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
3049 		    CACHELINE);
3050 		if (gctx_locks == NULL) {
3051 			return true;
3052 		}
3053 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3054 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
3055 			    WITNESS_RANK_PROF_GCTX,
3056 			    malloc_mutex_rank_exclusive)) {
3057 				return true;
3058 			}
3059 		}
3060 
3061 		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
3062 		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
3063 		    CACHELINE);
3064 		if (tdata_locks == NULL) {
3065 			return true;
3066 		}
3067 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3068 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
3069 			    WITNESS_RANK_PROF_TDATA,
3070 			    malloc_mutex_rank_exclusive)) {
3071 				return true;
3072 			}
3073 		}
3074 #ifdef JEMALLOC_PROF_LIBGCC
3075 		/*
3076 		 * Cause the backtracing machinery to allocate its internal
3077 		 * state before enabling profiling.
3078 		 */
3079 		_Unwind_Backtrace(prof_unwind_init_callback, NULL);
3080 #endif
3081 	}
3082 	prof_booted = true;
3083 
3084 	return false;
3085 }
3086 
3087 void
prof_prefork0(tsdn_t * tsdn)3088 prof_prefork0(tsdn_t *tsdn) {
3089 	if (config_prof && opt_prof) {
3090 		unsigned i;
3091 
3092 		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
3093 		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
3094 		malloc_mutex_prefork(tsdn, &tdatas_mtx);
3095 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3096 			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
3097 		}
3098 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3099 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
3100 		}
3101 	}
3102 }
3103 
3104 void
prof_prefork1(tsdn_t * tsdn)3105 prof_prefork1(tsdn_t *tsdn) {
3106 	if (config_prof && opt_prof) {
3107 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
3108 		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
3109 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
3110 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
3111 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
3112 	}
3113 }
3114 
3115 void
prof_postfork_parent(tsdn_t * tsdn)3116 prof_postfork_parent(tsdn_t *tsdn) {
3117 	if (config_prof && opt_prof) {
3118 		unsigned i;
3119 
3120 		malloc_mutex_postfork_parent(tsdn,
3121 		    &prof_thread_active_init_mtx);
3122 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
3123 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
3124 		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
3125 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
3126 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3127 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
3128 		}
3129 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3130 			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
3131 		}
3132 		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
3133 		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
3134 		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
3135 	}
3136 }
3137 
3138 void
prof_postfork_child(tsdn_t * tsdn)3139 prof_postfork_child(tsdn_t *tsdn) {
3140 	if (config_prof && opt_prof) {
3141 		unsigned i;
3142 
3143 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
3144 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
3145 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
3146 		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
3147 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
3148 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3149 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
3150 		}
3151 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3152 			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
3153 		}
3154 		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
3155 		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
3156 		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
3157 	}
3158 }
3159 
3160 /******************************************************************************/
3161