xref: /freebsd/contrib/jemalloc/src/prof.c (revision e12ff891366cf94db4bfe4c2c810b26a5531053d)
1 #define JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/ckh.h"
7 #include "jemalloc/internal/hash.h"
8 #include "jemalloc/internal/malloc_io.h"
9 #include "jemalloc/internal/mutex.h"
10 
11 /******************************************************************************/
12 
13 #ifdef JEMALLOC_PROF_LIBUNWIND
14 #define UNW_LOCAL_ONLY
15 #include <libunwind.h>
16 #endif
17 
18 #ifdef JEMALLOC_PROF_LIBGCC
19 /*
20  * We have a circular dependency -- jemalloc_internal.h tells us if we should
21  * use libgcc's unwinding functionality, but after we've included that, we've
22  * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
23  */
24 #undef _Unwind_Backtrace
25 #include <unwind.h>
26 #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
27 #endif
28 
29 /******************************************************************************/
30 /* Data. */
31 
32 bool		opt_prof = false;
33 bool		opt_prof_active = true;
34 bool		opt_prof_thread_active_init = true;
35 size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
36 ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
37 bool		opt_prof_gdump = false;
38 bool		opt_prof_final = false;
39 bool		opt_prof_leak = false;
40 bool		opt_prof_accum = false;
41 char		opt_prof_prefix[
42     /* Minimize memory bloat for non-prof builds. */
43 #ifdef JEMALLOC_PROF
44     PATH_MAX +
45 #endif
46     1];
47 
48 /*
49  * Initialized as opt_prof_active, and accessed via
50  * prof_active_[gs]et{_unlocked,}().
51  */
52 bool			prof_active;
53 static malloc_mutex_t	prof_active_mtx;
54 
55 /*
56  * Initialized as opt_prof_thread_active_init, and accessed via
57  * prof_thread_active_init_[gs]et().
58  */
59 static bool		prof_thread_active_init;
60 static malloc_mutex_t	prof_thread_active_init_mtx;
61 
62 /*
63  * Initialized as opt_prof_gdump, and accessed via
64  * prof_gdump_[gs]et{_unlocked,}().
65  */
66 bool			prof_gdump_val;
67 static malloc_mutex_t	prof_gdump_mtx;
68 
69 uint64_t	prof_interval = 0;
70 
71 size_t		lg_prof_sample;
72 
73 /*
74  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
75  * there is no problem with using them for more than one gctx at the same time.
76  * The primary motivation for this sharing though is that gctx's are ephemeral,
77  * and destroying mutexes causes complications for systems that allocate when
78  * creating/destroying mutexes.
79  */
80 static malloc_mutex_t	*gctx_locks;
81 static atomic_u_t	cum_gctxs; /* Atomic counter. */
82 
83 /*
84  * Table of mutexes that are shared among tdata's.  No operations require
85  * holding multiple tdata locks, so there is no problem with using them for more
86  * than one tdata at the same time, even though a gctx lock may be acquired
87  * while holding a tdata lock.
88  */
89 static malloc_mutex_t	*tdata_locks;
90 
91 /*
92  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
93  * structure that knows about all backtraces currently captured.
94  */
95 static ckh_t		bt2gctx;
96 /* Non static to enable profiling. */
97 malloc_mutex_t		bt2gctx_mtx;
98 
99 /*
100  * Tree of all extant prof_tdata_t structures, regardless of state,
101  * {attached,detached,expired}.
102  */
103 static prof_tdata_tree_t	tdatas;
104 static malloc_mutex_t	tdatas_mtx;
105 
106 static uint64_t		next_thr_uid;
107 static malloc_mutex_t	next_thr_uid_mtx;
108 
109 static malloc_mutex_t	prof_dump_seq_mtx;
110 static uint64_t		prof_dump_seq;
111 static uint64_t		prof_dump_iseq;
112 static uint64_t		prof_dump_mseq;
113 static uint64_t		prof_dump_useq;
114 
115 /*
116  * This buffer is rather large for stack allocation, so use a single buffer for
117  * all profile dumps.
118  */
119 static malloc_mutex_t	prof_dump_mtx;
120 static char		prof_dump_buf[
121     /* Minimize memory bloat for non-prof builds. */
122 #ifdef JEMALLOC_PROF
123     PROF_DUMP_BUFSIZE
124 #else
125     1
126 #endif
127 ];
128 static size_t		prof_dump_buf_end;
129 static int		prof_dump_fd;
130 
131 /* Do not dump any profiles until bootstrapping is complete. */
132 static bool		prof_booted = false;
133 
134 /******************************************************************************/
135 /*
136  * Function prototypes for static functions that are referenced prior to
137  * definition.
138  */
139 
140 static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
141 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
142 static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
143     bool even_if_attached);
144 static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
145     bool even_if_attached);
146 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
147 
148 /******************************************************************************/
149 /* Red-black trees. */
150 
151 static int
152 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
153 	uint64_t a_thr_uid = a->thr_uid;
154 	uint64_t b_thr_uid = b->thr_uid;
155 	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
156 	if (ret == 0) {
157 		uint64_t a_thr_discrim = a->thr_discrim;
158 		uint64_t b_thr_discrim = b->thr_discrim;
159 		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
160 		    b_thr_discrim);
161 		if (ret == 0) {
162 			uint64_t a_tctx_uid = a->tctx_uid;
163 			uint64_t b_tctx_uid = b->tctx_uid;
164 			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
165 			    b_tctx_uid);
166 		}
167 	}
168 	return ret;
169 }
170 
171 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
172     tctx_link, prof_tctx_comp)
173 
174 static int
175 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
176 	unsigned a_len = a->bt.len;
177 	unsigned b_len = b->bt.len;
178 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
179 	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
180 	if (ret == 0) {
181 		ret = (a_len > b_len) - (a_len < b_len);
182 	}
183 	return ret;
184 }
185 
186 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
187     prof_gctx_comp)
188 
189 static int
190 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
191 	int ret;
192 	uint64_t a_uid = a->thr_uid;
193 	uint64_t b_uid = b->thr_uid;
194 
195 	ret = ((a_uid > b_uid) - (a_uid < b_uid));
196 	if (ret == 0) {
197 		uint64_t a_discrim = a->thr_discrim;
198 		uint64_t b_discrim = b->thr_discrim;
199 
200 		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
201 	}
202 	return ret;
203 }
204 
205 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
206     prof_tdata_comp)
207 
208 /******************************************************************************/
209 
210 void
211 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
212 	prof_tdata_t *tdata;
213 
214 	cassert(config_prof);
215 
216 	if (updated) {
217 		/*
218 		 * Compute a new sample threshold.  This isn't very important in
219 		 * practice, because this function is rarely executed, so the
220 		 * potential for sample bias is minimal except in contrived
221 		 * programs.
222 		 */
223 		tdata = prof_tdata_get(tsd, true);
224 		if (tdata != NULL) {
225 			prof_sample_threshold_update(tdata);
226 		}
227 	}
228 
229 	if ((uintptr_t)tctx > (uintptr_t)1U) {
230 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
231 		tctx->prepared = false;
232 		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
233 			prof_tctx_destroy(tsd, tctx);
234 		} else {
235 			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
236 		}
237 	}
238 }
239 
240 void
241 prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
242     prof_tctx_t *tctx) {
243 	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
244 
245 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
246 	tctx->cnts.curobjs++;
247 	tctx->cnts.curbytes += usize;
248 	if (opt_prof_accum) {
249 		tctx->cnts.accumobjs++;
250 		tctx->cnts.accumbytes += usize;
251 	}
252 	tctx->prepared = false;
253 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
254 }
255 
256 void
257 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) {
258 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
259 	assert(tctx->cnts.curobjs > 0);
260 	assert(tctx->cnts.curbytes >= usize);
261 	tctx->cnts.curobjs--;
262 	tctx->cnts.curbytes -= usize;
263 
264 	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
265 		prof_tctx_destroy(tsd, tctx);
266 	} else {
267 		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
268 	}
269 }
270 
271 void
272 bt_init(prof_bt_t *bt, void **vec) {
273 	cassert(config_prof);
274 
275 	bt->vec = vec;
276 	bt->len = 0;
277 }
278 
279 static void
280 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
281 	cassert(config_prof);
282 	assert(tdata == prof_tdata_get(tsd, false));
283 
284 	if (tdata != NULL) {
285 		assert(!tdata->enq);
286 		tdata->enq = true;
287 	}
288 
289 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
290 }
291 
292 static void
293 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
294 	cassert(config_prof);
295 	assert(tdata == prof_tdata_get(tsd, false));
296 
297 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
298 
299 	if (tdata != NULL) {
300 		bool idump, gdump;
301 
302 		assert(tdata->enq);
303 		tdata->enq = false;
304 		idump = tdata->enq_idump;
305 		tdata->enq_idump = false;
306 		gdump = tdata->enq_gdump;
307 		tdata->enq_gdump = false;
308 
309 		if (idump) {
310 			prof_idump(tsd_tsdn(tsd));
311 		}
312 		if (gdump) {
313 			prof_gdump(tsd_tsdn(tsd));
314 		}
315 	}
316 }
317 
318 #ifdef JEMALLOC_PROF_LIBUNWIND
319 void
320 prof_backtrace(prof_bt_t *bt) {
321 	int nframes;
322 
323 	cassert(config_prof);
324 	assert(bt->len == 0);
325 	assert(bt->vec != NULL);
326 
327 	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
328 	if (nframes <= 0) {
329 		return;
330 	}
331 	bt->len = nframes;
332 }
333 #elif (defined(JEMALLOC_PROF_LIBGCC))
334 static _Unwind_Reason_Code
335 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
336 	cassert(config_prof);
337 
338 	return _URC_NO_REASON;
339 }
340 
341 static _Unwind_Reason_Code
342 prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
343 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
344 	void *ip;
345 
346 	cassert(config_prof);
347 
348 	ip = (void *)_Unwind_GetIP(context);
349 	if (ip == NULL) {
350 		return _URC_END_OF_STACK;
351 	}
352 	data->bt->vec[data->bt->len] = ip;
353 	data->bt->len++;
354 	if (data->bt->len == data->max) {
355 		return _URC_END_OF_STACK;
356 	}
357 
358 	return _URC_NO_REASON;
359 }
360 
361 void
362 prof_backtrace(prof_bt_t *bt) {
363 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
364 
365 	cassert(config_prof);
366 
367 	_Unwind_Backtrace(prof_unwind_callback, &data);
368 }
369 #elif (defined(JEMALLOC_PROF_GCC))
370 void
371 prof_backtrace(prof_bt_t *bt) {
372 #define BT_FRAME(i)							\
373 	if ((i) < PROF_BT_MAX) {					\
374 		void *p;						\
375 		if (__builtin_frame_address(i) == 0) {			\
376 			return;						\
377 		}							\
378 		p = __builtin_return_address(i);			\
379 		if (p == NULL) {					\
380 			return;						\
381 		}							\
382 		bt->vec[(i)] = p;					\
383 		bt->len = (i) + 1;					\
384 	} else {							\
385 		return;							\
386 	}
387 
388 	cassert(config_prof);
389 
390 	BT_FRAME(0)
391 	BT_FRAME(1)
392 	BT_FRAME(2)
393 	BT_FRAME(3)
394 	BT_FRAME(4)
395 	BT_FRAME(5)
396 	BT_FRAME(6)
397 	BT_FRAME(7)
398 	BT_FRAME(8)
399 	BT_FRAME(9)
400 
401 	BT_FRAME(10)
402 	BT_FRAME(11)
403 	BT_FRAME(12)
404 	BT_FRAME(13)
405 	BT_FRAME(14)
406 	BT_FRAME(15)
407 	BT_FRAME(16)
408 	BT_FRAME(17)
409 	BT_FRAME(18)
410 	BT_FRAME(19)
411 
412 	BT_FRAME(20)
413 	BT_FRAME(21)
414 	BT_FRAME(22)
415 	BT_FRAME(23)
416 	BT_FRAME(24)
417 	BT_FRAME(25)
418 	BT_FRAME(26)
419 	BT_FRAME(27)
420 	BT_FRAME(28)
421 	BT_FRAME(29)
422 
423 	BT_FRAME(30)
424 	BT_FRAME(31)
425 	BT_FRAME(32)
426 	BT_FRAME(33)
427 	BT_FRAME(34)
428 	BT_FRAME(35)
429 	BT_FRAME(36)
430 	BT_FRAME(37)
431 	BT_FRAME(38)
432 	BT_FRAME(39)
433 
434 	BT_FRAME(40)
435 	BT_FRAME(41)
436 	BT_FRAME(42)
437 	BT_FRAME(43)
438 	BT_FRAME(44)
439 	BT_FRAME(45)
440 	BT_FRAME(46)
441 	BT_FRAME(47)
442 	BT_FRAME(48)
443 	BT_FRAME(49)
444 
445 	BT_FRAME(50)
446 	BT_FRAME(51)
447 	BT_FRAME(52)
448 	BT_FRAME(53)
449 	BT_FRAME(54)
450 	BT_FRAME(55)
451 	BT_FRAME(56)
452 	BT_FRAME(57)
453 	BT_FRAME(58)
454 	BT_FRAME(59)
455 
456 	BT_FRAME(60)
457 	BT_FRAME(61)
458 	BT_FRAME(62)
459 	BT_FRAME(63)
460 	BT_FRAME(64)
461 	BT_FRAME(65)
462 	BT_FRAME(66)
463 	BT_FRAME(67)
464 	BT_FRAME(68)
465 	BT_FRAME(69)
466 
467 	BT_FRAME(70)
468 	BT_FRAME(71)
469 	BT_FRAME(72)
470 	BT_FRAME(73)
471 	BT_FRAME(74)
472 	BT_FRAME(75)
473 	BT_FRAME(76)
474 	BT_FRAME(77)
475 	BT_FRAME(78)
476 	BT_FRAME(79)
477 
478 	BT_FRAME(80)
479 	BT_FRAME(81)
480 	BT_FRAME(82)
481 	BT_FRAME(83)
482 	BT_FRAME(84)
483 	BT_FRAME(85)
484 	BT_FRAME(86)
485 	BT_FRAME(87)
486 	BT_FRAME(88)
487 	BT_FRAME(89)
488 
489 	BT_FRAME(90)
490 	BT_FRAME(91)
491 	BT_FRAME(92)
492 	BT_FRAME(93)
493 	BT_FRAME(94)
494 	BT_FRAME(95)
495 	BT_FRAME(96)
496 	BT_FRAME(97)
497 	BT_FRAME(98)
498 	BT_FRAME(99)
499 
500 	BT_FRAME(100)
501 	BT_FRAME(101)
502 	BT_FRAME(102)
503 	BT_FRAME(103)
504 	BT_FRAME(104)
505 	BT_FRAME(105)
506 	BT_FRAME(106)
507 	BT_FRAME(107)
508 	BT_FRAME(108)
509 	BT_FRAME(109)
510 
511 	BT_FRAME(110)
512 	BT_FRAME(111)
513 	BT_FRAME(112)
514 	BT_FRAME(113)
515 	BT_FRAME(114)
516 	BT_FRAME(115)
517 	BT_FRAME(116)
518 	BT_FRAME(117)
519 	BT_FRAME(118)
520 	BT_FRAME(119)
521 
522 	BT_FRAME(120)
523 	BT_FRAME(121)
524 	BT_FRAME(122)
525 	BT_FRAME(123)
526 	BT_FRAME(124)
527 	BT_FRAME(125)
528 	BT_FRAME(126)
529 	BT_FRAME(127)
530 #undef BT_FRAME
531 }
532 #else
533 void
534 prof_backtrace(prof_bt_t *bt) {
535 	cassert(config_prof);
536 	not_reached();
537 }
538 #endif
539 
540 static malloc_mutex_t *
541 prof_gctx_mutex_choose(void) {
542 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
543 
544 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
545 }
546 
547 static malloc_mutex_t *
548 prof_tdata_mutex_choose(uint64_t thr_uid) {
549 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
550 }
551 
552 static prof_gctx_t *
553 prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
554 	/*
555 	 * Create a single allocation that has space for vec of length bt->len.
556 	 */
557 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
558 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
559 	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
560 	    true);
561 	if (gctx == NULL) {
562 		return NULL;
563 	}
564 	gctx->lock = prof_gctx_mutex_choose();
565 	/*
566 	 * Set nlimbo to 1, in order to avoid a race condition with
567 	 * prof_tctx_destroy()/prof_gctx_try_destroy().
568 	 */
569 	gctx->nlimbo = 1;
570 	tctx_tree_new(&gctx->tctxs);
571 	/* Duplicate bt. */
572 	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
573 	gctx->bt.vec = gctx->vec;
574 	gctx->bt.len = bt->len;
575 	return gctx;
576 }
577 
578 static void
579 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
580     prof_tdata_t *tdata) {
581 	cassert(config_prof);
582 
583 	/*
584 	 * Check that gctx is still unused by any thread cache before destroying
585 	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
586 	 * condition with this function, as does prof_tctx_destroy() in order to
587 	 * avoid a race between the main body of prof_tctx_destroy() and entry
588 	 * into this function.
589 	 */
590 	prof_enter(tsd, tdata_self);
591 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
592 	assert(gctx->nlimbo != 0);
593 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
594 		/* Remove gctx from bt2gctx. */
595 		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
596 			not_reached();
597 		}
598 		prof_leave(tsd, tdata_self);
599 		/* Destroy gctx. */
600 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
601 		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
602 	} else {
603 		/*
604 		 * Compensate for increment in prof_tctx_destroy() or
605 		 * prof_lookup().
606 		 */
607 		gctx->nlimbo--;
608 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
609 		prof_leave(tsd, tdata_self);
610 	}
611 }
612 
613 static bool
614 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
615 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
616 
617 	if (opt_prof_accum) {
618 		return false;
619 	}
620 	if (tctx->cnts.curobjs != 0) {
621 		return false;
622 	}
623 	if (tctx->prepared) {
624 		return false;
625 	}
626 	return true;
627 }
628 
629 static bool
630 prof_gctx_should_destroy(prof_gctx_t *gctx) {
631 	if (opt_prof_accum) {
632 		return false;
633 	}
634 	if (!tctx_tree_empty(&gctx->tctxs)) {
635 		return false;
636 	}
637 	if (gctx->nlimbo != 0) {
638 		return false;
639 	}
640 	return true;
641 }
642 
643 static void
644 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
645 	prof_tdata_t *tdata = tctx->tdata;
646 	prof_gctx_t *gctx = tctx->gctx;
647 	bool destroy_tdata, destroy_tctx, destroy_gctx;
648 
649 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
650 
651 	assert(tctx->cnts.curobjs == 0);
652 	assert(tctx->cnts.curbytes == 0);
653 	assert(!opt_prof_accum);
654 	assert(tctx->cnts.accumobjs == 0);
655 	assert(tctx->cnts.accumbytes == 0);
656 
657 	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
658 	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
659 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
660 
661 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
662 	switch (tctx->state) {
663 	case prof_tctx_state_nominal:
664 		tctx_tree_remove(&gctx->tctxs, tctx);
665 		destroy_tctx = true;
666 		if (prof_gctx_should_destroy(gctx)) {
667 			/*
668 			 * Increment gctx->nlimbo in order to keep another
669 			 * thread from winning the race to destroy gctx while
670 			 * this one has gctx->lock dropped.  Without this, it
671 			 * would be possible for another thread to:
672 			 *
673 			 * 1) Sample an allocation associated with gctx.
674 			 * 2) Deallocate the sampled object.
675 			 * 3) Successfully prof_gctx_try_destroy(gctx).
676 			 *
677 			 * The result would be that gctx no longer exists by the
678 			 * time this thread accesses it in
679 			 * prof_gctx_try_destroy().
680 			 */
681 			gctx->nlimbo++;
682 			destroy_gctx = true;
683 		} else {
684 			destroy_gctx = false;
685 		}
686 		break;
687 	case prof_tctx_state_dumping:
688 		/*
689 		 * A dumping thread needs tctx to remain valid until dumping
690 		 * has finished.  Change state such that the dumping thread will
691 		 * complete destruction during a late dump iteration phase.
692 		 */
693 		tctx->state = prof_tctx_state_purgatory;
694 		destroy_tctx = false;
695 		destroy_gctx = false;
696 		break;
697 	default:
698 		not_reached();
699 		destroy_tctx = false;
700 		destroy_gctx = false;
701 	}
702 	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
703 	if (destroy_gctx) {
704 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
705 		    tdata);
706 	}
707 
708 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
709 
710 	if (destroy_tdata) {
711 		prof_tdata_destroy(tsd, tdata, false);
712 	}
713 
714 	if (destroy_tctx) {
715 		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
716 	}
717 }
718 
719 static bool
720 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
721     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
722 	union {
723 		prof_gctx_t	*p;
724 		void		*v;
725 	} gctx, tgctx;
726 	union {
727 		prof_bt_t	*p;
728 		void		*v;
729 	} btkey;
730 	bool new_gctx;
731 
732 	prof_enter(tsd, tdata);
733 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
734 		/* bt has never been seen before.  Insert it. */
735 		prof_leave(tsd, tdata);
736 		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
737 		if (tgctx.v == NULL) {
738 			return true;
739 		}
740 		prof_enter(tsd, tdata);
741 		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
742 			gctx.p = tgctx.p;
743 			btkey.p = &gctx.p->bt;
744 			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
745 				/* OOM. */
746 				prof_leave(tsd, tdata);
747 				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
748 				    true, true);
749 				return true;
750 			}
751 			new_gctx = true;
752 		} else {
753 			new_gctx = false;
754 		}
755 	} else {
756 		tgctx.v = NULL;
757 		new_gctx = false;
758 	}
759 
760 	if (!new_gctx) {
761 		/*
762 		 * Increment nlimbo, in order to avoid a race condition with
763 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
764 		 */
765 		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
766 		gctx.p->nlimbo++;
767 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
768 		new_gctx = false;
769 
770 		if (tgctx.v != NULL) {
771 			/* Lost race to insert. */
772 			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
773 			    true);
774 		}
775 	}
776 	prof_leave(tsd, tdata);
777 
778 	*p_btkey = btkey.v;
779 	*p_gctx = gctx.p;
780 	*p_new_gctx = new_gctx;
781 	return false;
782 }
783 
784 prof_tctx_t *
785 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
786 	union {
787 		prof_tctx_t	*p;
788 		void		*v;
789 	} ret;
790 	prof_tdata_t *tdata;
791 	bool not_found;
792 
793 	cassert(config_prof);
794 
795 	tdata = prof_tdata_get(tsd, false);
796 	if (tdata == NULL) {
797 		return NULL;
798 	}
799 
800 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
801 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
802 	if (!not_found) { /* Note double negative! */
803 		ret.p->prepared = true;
804 	}
805 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
806 	if (not_found) {
807 		void *btkey;
808 		prof_gctx_t *gctx;
809 		bool new_gctx, error;
810 
811 		/*
812 		 * This thread's cache lacks bt.  Look for it in the global
813 		 * cache.
814 		 */
815 		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
816 		    &new_gctx)) {
817 			return NULL;
818 		}
819 
820 		/* Link a prof_tctx_t into gctx for this thread. */
821 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
822 		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
823 		    arena_ichoose(tsd, NULL), true);
824 		if (ret.p == NULL) {
825 			if (new_gctx) {
826 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
827 			}
828 			return NULL;
829 		}
830 		ret.p->tdata = tdata;
831 		ret.p->thr_uid = tdata->thr_uid;
832 		ret.p->thr_discrim = tdata->thr_discrim;
833 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
834 		ret.p->gctx = gctx;
835 		ret.p->tctx_uid = tdata->tctx_uid_next++;
836 		ret.p->prepared = true;
837 		ret.p->state = prof_tctx_state_initializing;
838 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
839 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
840 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
841 		if (error) {
842 			if (new_gctx) {
843 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
844 			}
845 			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
846 			return NULL;
847 		}
848 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
849 		ret.p->state = prof_tctx_state_nominal;
850 		tctx_tree_insert(&gctx->tctxs, ret.p);
851 		gctx->nlimbo--;
852 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
853 	}
854 
855 	return ret.p;
856 }
857 
858 /*
859  * The bodies of this function and prof_leakcheck() are compiled out unless heap
860  * profiling is enabled, so that it is possible to compile jemalloc with
861  * floating point support completely disabled.  Avoiding floating point code is
862  * important on memory-constrained systems, but it also enables a workaround for
863  * versions of glibc that don't properly save/restore floating point registers
864  * during dynamic lazy symbol loading (which internally calls into whatever
865  * malloc implementation happens to be integrated into the application).  Note
866  * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
867  * memory moves, so jemalloc must be compiled with such optimizations disabled
868  * (e.g.
869  * -mno-sse) in order for the workaround to be complete.
870  */
871 void
872 prof_sample_threshold_update(prof_tdata_t *tdata) {
873 #ifdef JEMALLOC_PROF
874 	uint64_t r;
875 	double u;
876 
877 	if (!config_prof) {
878 		return;
879 	}
880 
881 	if (lg_prof_sample == 0) {
882 		tdata->bytes_until_sample = 0;
883 		return;
884 	}
885 
886 	/*
887 	 * Compute sample interval as a geometrically distributed random
888 	 * variable with mean (2^lg_prof_sample).
889 	 *
890 	 *                             __        __
891 	 *                             |  log(u)  |                     1
892 	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
893 	 *                             | log(1-p) |             lg_prof_sample
894 	 *                                                     2
895 	 *
896 	 * For more information on the math, see:
897 	 *
898 	 *   Non-Uniform Random Variate Generation
899 	 *   Luc Devroye
900 	 *   Springer-Verlag, New York, 1986
901 	 *   pp 500
902 	 *   (http://luc.devroye.org/rnbookindex.html)
903 	 */
904 	r = prng_lg_range_u64(&tdata->prng_state, 53);
905 	u = (double)r * (1.0/9007199254740992.0L);
906 	tdata->bytes_until_sample = (uint64_t)(log(u) /
907 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
908 	    + (uint64_t)1U;
909 #endif
910 }
911 
912 #ifdef JEMALLOC_JET
913 static prof_tdata_t *
914 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
915     void *arg) {
916 	size_t *tdata_count = (size_t *)arg;
917 
918 	(*tdata_count)++;
919 
920 	return NULL;
921 }
922 
923 size_t
924 prof_tdata_count(void) {
925 	size_t tdata_count = 0;
926 	tsdn_t *tsdn;
927 
928 	tsdn = tsdn_fetch();
929 	malloc_mutex_lock(tsdn, &tdatas_mtx);
930 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
931 	    (void *)&tdata_count);
932 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
933 
934 	return tdata_count;
935 }
936 
937 size_t
938 prof_bt_count(void) {
939 	size_t bt_count;
940 	tsd_t *tsd;
941 	prof_tdata_t *tdata;
942 
943 	tsd = tsd_fetch();
944 	tdata = prof_tdata_get(tsd, false);
945 	if (tdata == NULL) {
946 		return 0;
947 	}
948 
949 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
950 	bt_count = ckh_count(&bt2gctx);
951 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
952 
953 	return bt_count;
954 }
955 #endif
956 
957 static int
958 prof_dump_open_impl(bool propagate_err, const char *filename) {
959 	int fd;
960 
961 	fd = creat(filename, 0644);
962 	if (fd == -1 && !propagate_err) {
963 		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
964 		    filename);
965 		if (opt_abort) {
966 			abort();
967 		}
968 	}
969 
970 	return fd;
971 }
972 prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
973 
974 static bool
975 prof_dump_flush(bool propagate_err) {
976 	bool ret = false;
977 	ssize_t err;
978 
979 	cassert(config_prof);
980 
981 	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
982 	if (err == -1) {
983 		if (!propagate_err) {
984 			malloc_write("<jemalloc>: write() failed during heap "
985 			    "profile flush\n");
986 			if (opt_abort) {
987 				abort();
988 			}
989 		}
990 		ret = true;
991 	}
992 	prof_dump_buf_end = 0;
993 
994 	return ret;
995 }
996 
997 static bool
998 prof_dump_close(bool propagate_err) {
999 	bool ret;
1000 
1001 	assert(prof_dump_fd != -1);
1002 	ret = prof_dump_flush(propagate_err);
1003 	close(prof_dump_fd);
1004 	prof_dump_fd = -1;
1005 
1006 	return ret;
1007 }
1008 
1009 static bool
1010 prof_dump_write(bool propagate_err, const char *s) {
1011 	size_t i, slen, n;
1012 
1013 	cassert(config_prof);
1014 
1015 	i = 0;
1016 	slen = strlen(s);
1017 	while (i < slen) {
1018 		/* Flush the buffer if it is full. */
1019 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1020 			if (prof_dump_flush(propagate_err) && propagate_err) {
1021 				return true;
1022 			}
1023 		}
1024 
1025 		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
1026 			/* Finish writing. */
1027 			n = slen - i;
1028 		} else {
1029 			/* Write as much of s as will fit. */
1030 			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
1031 		}
1032 		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
1033 		prof_dump_buf_end += n;
1034 		i += n;
1035 	}
1036 
1037 	return false;
1038 }
1039 
1040 JEMALLOC_FORMAT_PRINTF(2, 3)
1041 static bool
1042 prof_dump_printf(bool propagate_err, const char *format, ...) {
1043 	bool ret;
1044 	va_list ap;
1045 	char buf[PROF_PRINTF_BUFSIZE];
1046 
1047 	va_start(ap, format);
1048 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
1049 	va_end(ap);
1050 	ret = prof_dump_write(propagate_err, buf);
1051 
1052 	return ret;
1053 }
1054 
1055 static void
1056 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
1057 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
1058 
1059 	malloc_mutex_lock(tsdn, tctx->gctx->lock);
1060 
1061 	switch (tctx->state) {
1062 	case prof_tctx_state_initializing:
1063 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1064 		return;
1065 	case prof_tctx_state_nominal:
1066 		tctx->state = prof_tctx_state_dumping;
1067 		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1068 
1069 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
1070 
1071 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1072 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1073 		if (opt_prof_accum) {
1074 			tdata->cnt_summed.accumobjs +=
1075 			    tctx->dump_cnts.accumobjs;
1076 			tdata->cnt_summed.accumbytes +=
1077 			    tctx->dump_cnts.accumbytes;
1078 		}
1079 		break;
1080 	case prof_tctx_state_dumping:
1081 	case prof_tctx_state_purgatory:
1082 		not_reached();
1083 	}
1084 }
1085 
1086 static void
1087 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
1088 	malloc_mutex_assert_owner(tsdn, gctx->lock);
1089 
1090 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1091 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1092 	if (opt_prof_accum) {
1093 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
1094 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
1095 	}
1096 }
1097 
1098 static prof_tctx_t *
1099 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1100 	tsdn_t *tsdn = (tsdn_t *)arg;
1101 
1102 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1103 
1104 	switch (tctx->state) {
1105 	case prof_tctx_state_nominal:
1106 		/* New since dumping started; ignore. */
1107 		break;
1108 	case prof_tctx_state_dumping:
1109 	case prof_tctx_state_purgatory:
1110 		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
1111 		break;
1112 	default:
1113 		not_reached();
1114 	}
1115 
1116 	return NULL;
1117 }
1118 
1119 struct prof_tctx_dump_iter_arg_s {
1120 	tsdn_t	*tsdn;
1121 	bool	propagate_err;
1122 };
1123 
1124 static prof_tctx_t *
1125 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
1126 	struct prof_tctx_dump_iter_arg_s *arg =
1127 	    (struct prof_tctx_dump_iter_arg_s *)opaque;
1128 
1129 	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
1130 
1131 	switch (tctx->state) {
1132 	case prof_tctx_state_initializing:
1133 	case prof_tctx_state_nominal:
1134 		/* Not captured by this dump. */
1135 		break;
1136 	case prof_tctx_state_dumping:
1137 	case prof_tctx_state_purgatory:
1138 		if (prof_dump_printf(arg->propagate_err,
1139 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
1140 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
1141 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
1142 		    tctx->dump_cnts.accumbytes)) {
1143 			return tctx;
1144 		}
1145 		break;
1146 	default:
1147 		not_reached();
1148 	}
1149 	return NULL;
1150 }
1151 
1152 static prof_tctx_t *
1153 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1154 	tsdn_t *tsdn = (tsdn_t *)arg;
1155 	prof_tctx_t *ret;
1156 
1157 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1158 
1159 	switch (tctx->state) {
1160 	case prof_tctx_state_nominal:
1161 		/* New since dumping started; ignore. */
1162 		break;
1163 	case prof_tctx_state_dumping:
1164 		tctx->state = prof_tctx_state_nominal;
1165 		break;
1166 	case prof_tctx_state_purgatory:
1167 		ret = tctx;
1168 		goto label_return;
1169 	default:
1170 		not_reached();
1171 	}
1172 
1173 	ret = NULL;
1174 label_return:
1175 	return ret;
1176 }
1177 
1178 static void
1179 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
1180 	cassert(config_prof);
1181 
1182 	malloc_mutex_lock(tsdn, gctx->lock);
1183 
1184 	/*
1185 	 * Increment nlimbo so that gctx won't go away before dump.
1186 	 * Additionally, link gctx into the dump list so that it is included in
1187 	 * prof_dump()'s second pass.
1188 	 */
1189 	gctx->nlimbo++;
1190 	gctx_tree_insert(gctxs, gctx);
1191 
1192 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
1193 
1194 	malloc_mutex_unlock(tsdn, gctx->lock);
1195 }
1196 
1197 struct prof_gctx_merge_iter_arg_s {
1198 	tsdn_t	*tsdn;
1199 	size_t	leak_ngctx;
1200 };
1201 
1202 static prof_gctx_t *
1203 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1204 	struct prof_gctx_merge_iter_arg_s *arg =
1205 	    (struct prof_gctx_merge_iter_arg_s *)opaque;
1206 
1207 	malloc_mutex_lock(arg->tsdn, gctx->lock);
1208 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
1209 	    (void *)arg->tsdn);
1210 	if (gctx->cnt_summed.curobjs != 0) {
1211 		arg->leak_ngctx++;
1212 	}
1213 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1214 
1215 	return NULL;
1216 }
1217 
1218 static void
1219 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
1220 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1221 	prof_gctx_t *gctx;
1222 
1223 	/*
1224 	 * Standard tree iteration won't work here, because as soon as we
1225 	 * decrement gctx->nlimbo and unlock gctx, another thread can
1226 	 * concurrently destroy it, which will corrupt the tree.  Therefore,
1227 	 * tear down the tree one node at a time during iteration.
1228 	 */
1229 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
1230 		gctx_tree_remove(gctxs, gctx);
1231 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1232 		{
1233 			prof_tctx_t *next;
1234 
1235 			next = NULL;
1236 			do {
1237 				prof_tctx_t *to_destroy =
1238 				    tctx_tree_iter(&gctx->tctxs, next,
1239 				    prof_tctx_finish_iter,
1240 				    (void *)tsd_tsdn(tsd));
1241 				if (to_destroy != NULL) {
1242 					next = tctx_tree_next(&gctx->tctxs,
1243 					    to_destroy);
1244 					tctx_tree_remove(&gctx->tctxs,
1245 					    to_destroy);
1246 					idalloctm(tsd_tsdn(tsd), to_destroy,
1247 					    NULL, NULL, true, true);
1248 				} else {
1249 					next = NULL;
1250 				}
1251 			} while (next != NULL);
1252 		}
1253 		gctx->nlimbo--;
1254 		if (prof_gctx_should_destroy(gctx)) {
1255 			gctx->nlimbo++;
1256 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1257 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1258 		} else {
1259 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1260 		}
1261 	}
1262 }
1263 
1264 struct prof_tdata_merge_iter_arg_s {
1265 	tsdn_t		*tsdn;
1266 	prof_cnt_t	cnt_all;
1267 };
1268 
1269 static prof_tdata_t *
1270 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1271     void *opaque) {
1272 	struct prof_tdata_merge_iter_arg_s *arg =
1273 	    (struct prof_tdata_merge_iter_arg_s *)opaque;
1274 
1275 	malloc_mutex_lock(arg->tsdn, tdata->lock);
1276 	if (!tdata->expired) {
1277 		size_t tabind;
1278 		union {
1279 			prof_tctx_t	*p;
1280 			void		*v;
1281 		} tctx;
1282 
1283 		tdata->dumping = true;
1284 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
1285 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
1286 		    &tctx.v);) {
1287 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
1288 		}
1289 
1290 		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
1291 		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
1292 		if (opt_prof_accum) {
1293 			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
1294 			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
1295 		}
1296 	} else {
1297 		tdata->dumping = false;
1298 	}
1299 	malloc_mutex_unlock(arg->tsdn, tdata->lock);
1300 
1301 	return NULL;
1302 }
1303 
1304 static prof_tdata_t *
1305 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1306     void *arg) {
1307 	bool propagate_err = *(bool *)arg;
1308 
1309 	if (!tdata->dumping) {
1310 		return NULL;
1311 	}
1312 
1313 	if (prof_dump_printf(propagate_err,
1314 	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
1315 	    tdata->thr_uid, tdata->cnt_summed.curobjs,
1316 	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
1317 	    tdata->cnt_summed.accumbytes,
1318 	    (tdata->thread_name != NULL) ? " " : "",
1319 	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
1320 		return tdata;
1321 	}
1322 	return NULL;
1323 }
1324 
1325 static bool
1326 prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
1327     const prof_cnt_t *cnt_all) {
1328 	bool ret;
1329 
1330 	if (prof_dump_printf(propagate_err,
1331 	    "heap_v2/%"FMTu64"\n"
1332 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1333 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
1334 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
1335 		return true;
1336 	}
1337 
1338 	malloc_mutex_lock(tsdn, &tdatas_mtx);
1339 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
1340 	    (void *)&propagate_err) != NULL);
1341 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
1342 	return ret;
1343 }
1344 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
1345 
1346 static bool
1347 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
1348     const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
1349 	bool ret;
1350 	unsigned i;
1351 	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
1352 
1353 	cassert(config_prof);
1354 	malloc_mutex_assert_owner(tsdn, gctx->lock);
1355 
1356 	/* Avoid dumping such gctx's that have no useful data. */
1357 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
1358 	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
1359 		assert(gctx->cnt_summed.curobjs == 0);
1360 		assert(gctx->cnt_summed.curbytes == 0);
1361 		assert(gctx->cnt_summed.accumobjs == 0);
1362 		assert(gctx->cnt_summed.accumbytes == 0);
1363 		ret = false;
1364 		goto label_return;
1365 	}
1366 
1367 	if (prof_dump_printf(propagate_err, "@")) {
1368 		ret = true;
1369 		goto label_return;
1370 	}
1371 	for (i = 0; i < bt->len; i++) {
1372 		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
1373 		    (uintptr_t)bt->vec[i])) {
1374 			ret = true;
1375 			goto label_return;
1376 		}
1377 	}
1378 
1379 	if (prof_dump_printf(propagate_err,
1380 	    "\n"
1381 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1382 	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
1383 	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
1384 		ret = true;
1385 		goto label_return;
1386 	}
1387 
1388 	prof_tctx_dump_iter_arg.tsdn = tsdn;
1389 	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
1390 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
1391 	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
1392 		ret = true;
1393 		goto label_return;
1394 	}
1395 
1396 	ret = false;
1397 label_return:
1398 	return ret;
1399 }
1400 
1401 #ifndef _WIN32
1402 JEMALLOC_FORMAT_PRINTF(1, 2)
1403 static int
1404 prof_open_maps(const char *format, ...) {
1405 	int mfd;
1406 	va_list ap;
1407 	char filename[PATH_MAX + 1];
1408 
1409 	va_start(ap, format);
1410 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
1411 	va_end(ap);
1412 
1413 #if defined(O_CLOEXEC)
1414 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
1415 #else
1416 	mfd = open(filename, O_RDONLY);
1417 	if (mfd != -1) {
1418 		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
1419 	}
1420 #endif
1421 
1422 	return mfd;
1423 }
1424 #endif
1425 
1426 static int
1427 prof_getpid(void) {
1428 #ifdef _WIN32
1429 	return GetCurrentProcessId();
1430 #else
1431 	return getpid();
1432 #endif
1433 }
1434 
1435 static bool
1436 prof_dump_maps(bool propagate_err) {
1437 	bool ret;
1438 	int mfd;
1439 
1440 	cassert(config_prof);
1441 #ifdef __FreeBSD__
1442 	mfd = prof_open_maps("/proc/curproc/map");
1443 #elif defined(_WIN32)
1444 	mfd = -1; // Not implemented
1445 #else
1446 	{
1447 		int pid = prof_getpid();
1448 
1449 		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
1450 		if (mfd == -1) {
1451 			mfd = prof_open_maps("/proc/%d/maps", pid);
1452 		}
1453 	}
1454 #endif
1455 	if (mfd != -1) {
1456 		ssize_t nread;
1457 
1458 		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
1459 		    propagate_err) {
1460 			ret = true;
1461 			goto label_return;
1462 		}
1463 		nread = 0;
1464 		do {
1465 			prof_dump_buf_end += nread;
1466 			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1467 				/* Make space in prof_dump_buf before read(). */
1468 				if (prof_dump_flush(propagate_err) &&
1469 				    propagate_err) {
1470 					ret = true;
1471 					goto label_return;
1472 				}
1473 			}
1474 			nread = malloc_read_fd(mfd,
1475 			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
1476 			    - prof_dump_buf_end);
1477 		} while (nread > 0);
1478 	} else {
1479 		ret = true;
1480 		goto label_return;
1481 	}
1482 
1483 	ret = false;
1484 label_return:
1485 	if (mfd != -1) {
1486 		close(mfd);
1487 	}
1488 	return ret;
1489 }
1490 
1491 /*
1492  * See prof_sample_threshold_update() comment for why the body of this function
1493  * is conditionally compiled.
1494  */
1495 static void
1496 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
1497     const char *filename) {
1498 #ifdef JEMALLOC_PROF
1499 	/*
1500 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
1501 	 * differ slightly from what jeprof reports, because here we scale the
1502 	 * summary values, whereas jeprof scales each context individually and
1503 	 * reports the sums of the scaled values.
1504 	 */
1505 	if (cnt_all->curbytes != 0) {
1506 		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
1507 		double ratio = (((double)cnt_all->curbytes) /
1508 		    (double)cnt_all->curobjs) / sample_period;
1509 		double scale_factor = 1.0 / (1.0 - exp(-ratio));
1510 		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1511 		    * scale_factor);
1512 		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1513 		    scale_factor);
1514 
1515 		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1516 		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1517 		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
1518 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
1519 		malloc_printf(
1520 		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
1521 		    filename);
1522 	}
1523 #endif
1524 }
1525 
1526 struct prof_gctx_dump_iter_arg_s {
1527 	tsdn_t	*tsdn;
1528 	bool	propagate_err;
1529 };
1530 
1531 static prof_gctx_t *
1532 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1533 	prof_gctx_t *ret;
1534 	struct prof_gctx_dump_iter_arg_s *arg =
1535 	    (struct prof_gctx_dump_iter_arg_s *)opaque;
1536 
1537 	malloc_mutex_lock(arg->tsdn, gctx->lock);
1538 
1539 	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
1540 	    gctxs)) {
1541 		ret = gctx;
1542 		goto label_return;
1543 	}
1544 
1545 	ret = NULL;
1546 label_return:
1547 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
1548 	return ret;
1549 }
1550 
1551 static void
1552 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
1553     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1554     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1555     prof_gctx_tree_t *gctxs) {
1556 	size_t tabind;
1557 	union {
1558 		prof_gctx_t	*p;
1559 		void		*v;
1560 	} gctx;
1561 
1562 	prof_enter(tsd, tdata);
1563 
1564 	/*
1565 	 * Put gctx's in limbo and clear their counters in preparation for
1566 	 * summing.
1567 	 */
1568 	gctx_tree_new(gctxs);
1569 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
1570 		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
1571 	}
1572 
1573 	/*
1574 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
1575 	 * stats and merge them into the associated gctx's.
1576 	 */
1577 	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1578 	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
1579 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1580 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1581 	    (void *)prof_tdata_merge_iter_arg);
1582 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1583 
1584 	/* Merge tctx stats into gctx's. */
1585 	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1586 	prof_gctx_merge_iter_arg->leak_ngctx = 0;
1587 	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
1588 	    (void *)prof_gctx_merge_iter_arg);
1589 
1590 	prof_leave(tsd, tdata);
1591 }
1592 
1593 static bool
1594 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
1595     bool leakcheck, prof_tdata_t *tdata,
1596     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1597     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1598     struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
1599     prof_gctx_tree_t *gctxs) {
1600 	/* Create dump file. */
1601 	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
1602 		return true;
1603 	}
1604 
1605 	/* Dump profile header. */
1606 	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
1607 	    &prof_tdata_merge_iter_arg->cnt_all)) {
1608 		goto label_write_error;
1609 	}
1610 
1611 	/* Dump per gctx profile stats. */
1612 	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
1613 	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
1614 	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
1615 	    (void *)prof_gctx_dump_iter_arg) != NULL) {
1616 		goto label_write_error;
1617 	}
1618 
1619 	/* Dump /proc/<pid>/maps if possible. */
1620 	if (prof_dump_maps(propagate_err)) {
1621 		goto label_write_error;
1622 	}
1623 
1624 	if (prof_dump_close(propagate_err)) {
1625 		return true;
1626 	}
1627 
1628 	return false;
1629 label_write_error:
1630 	prof_dump_close(propagate_err);
1631 	return true;
1632 }
1633 
1634 static bool
1635 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
1636     bool leakcheck) {
1637 	cassert(config_prof);
1638 	assert(tsd_reentrancy_level_get(tsd) == 0);
1639 
1640 	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
1641 	if (tdata == NULL) {
1642 		return true;
1643 	}
1644 
1645 	pre_reentrancy(tsd, NULL);
1646 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1647 
1648 	prof_gctx_tree_t gctxs;
1649 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1650 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1651 	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
1652 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1653 	    &prof_gctx_merge_iter_arg, &gctxs);
1654 	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
1655 	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
1656 	    &prof_gctx_dump_iter_arg, &gctxs);
1657 	prof_gctx_finish(tsd, &gctxs);
1658 
1659 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1660 	post_reentrancy(tsd);
1661 
1662 	if (err) {
1663 		return true;
1664 	}
1665 
1666 	if (leakcheck) {
1667 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
1668 		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
1669 	}
1670 	return false;
1671 }
1672 
1673 #ifdef JEMALLOC_JET
1674 void
1675 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
1676     uint64_t *accumbytes) {
1677 	tsd_t *tsd;
1678 	prof_tdata_t *tdata;
1679 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1680 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1681 	prof_gctx_tree_t gctxs;
1682 
1683 	tsd = tsd_fetch();
1684 	tdata = prof_tdata_get(tsd, false);
1685 	if (tdata == NULL) {
1686 		if (curobjs != NULL) {
1687 			*curobjs = 0;
1688 		}
1689 		if (curbytes != NULL) {
1690 			*curbytes = 0;
1691 		}
1692 		if (accumobjs != NULL) {
1693 			*accumobjs = 0;
1694 		}
1695 		if (accumbytes != NULL) {
1696 			*accumbytes = 0;
1697 		}
1698 		return;
1699 	}
1700 
1701 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1702 	    &prof_gctx_merge_iter_arg, &gctxs);
1703 	prof_gctx_finish(tsd, &gctxs);
1704 
1705 	if (curobjs != NULL) {
1706 		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
1707 	}
1708 	if (curbytes != NULL) {
1709 		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
1710 	}
1711 	if (accumobjs != NULL) {
1712 		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
1713 	}
1714 	if (accumbytes != NULL) {
1715 		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
1716 	}
1717 }
1718 #endif
1719 
1720 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
1721 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
1722 static void
1723 prof_dump_filename(char *filename, char v, uint64_t vseq) {
1724 	cassert(config_prof);
1725 
1726 	if (vseq != VSEQ_INVALID) {
1727 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1728 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1729 		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
1730 		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
1731 	} else {
1732 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
1733 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1734 		    "%s.%d.%"FMTu64".%c.heap",
1735 		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
1736 	}
1737 	prof_dump_seq++;
1738 }
1739 
1740 static void
1741 prof_fdump(void) {
1742 	tsd_t *tsd;
1743 	char filename[DUMP_FILENAME_BUFSIZE];
1744 
1745 	cassert(config_prof);
1746 	assert(opt_prof_final);
1747 	assert(opt_prof_prefix[0] != '\0');
1748 
1749 	if (!prof_booted) {
1750 		return;
1751 	}
1752 	tsd = tsd_fetch();
1753 	assert(tsd_reentrancy_level_get(tsd) == 0);
1754 
1755 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1756 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
1757 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1758 	prof_dump(tsd, false, filename, opt_prof_leak);
1759 }
1760 
1761 bool
1762 prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
1763 	cassert(config_prof);
1764 
1765 #ifndef JEMALLOC_ATOMIC_U64
1766 	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
1767 	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
1768 		return true;
1769 	}
1770 	prof_accum->accumbytes = 0;
1771 #else
1772 	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
1773 #endif
1774 	return false;
1775 }
1776 
1777 void
1778 prof_idump(tsdn_t *tsdn) {
1779 	tsd_t *tsd;
1780 	prof_tdata_t *tdata;
1781 
1782 	cassert(config_prof);
1783 
1784 	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
1785 		return;
1786 	}
1787 	tsd = tsdn_tsd(tsdn);
1788 	if (tsd_reentrancy_level_get(tsd) > 0) {
1789 		return;
1790 	}
1791 
1792 	tdata = prof_tdata_get(tsd, false);
1793 	if (tdata == NULL) {
1794 		return;
1795 	}
1796 	if (tdata->enq) {
1797 		tdata->enq_idump = true;
1798 		return;
1799 	}
1800 
1801 	if (opt_prof_prefix[0] != '\0') {
1802 		char filename[PATH_MAX + 1];
1803 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1804 		prof_dump_filename(filename, 'i', prof_dump_iseq);
1805 		prof_dump_iseq++;
1806 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1807 		prof_dump(tsd, false, filename, false);
1808 	}
1809 }
1810 
1811 bool
1812 prof_mdump(tsd_t *tsd, const char *filename) {
1813 	cassert(config_prof);
1814 	assert(tsd_reentrancy_level_get(tsd) == 0);
1815 
1816 	if (!opt_prof || !prof_booted) {
1817 		return true;
1818 	}
1819 	char filename_buf[DUMP_FILENAME_BUFSIZE];
1820 	if (filename == NULL) {
1821 		/* No filename specified, so automatically generate one. */
1822 		if (opt_prof_prefix[0] == '\0') {
1823 			return true;
1824 		}
1825 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1826 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1827 		prof_dump_mseq++;
1828 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
1829 		filename = filename_buf;
1830 	}
1831 	return prof_dump(tsd, true, filename, false);
1832 }
1833 
1834 void
1835 prof_gdump(tsdn_t *tsdn) {
1836 	tsd_t *tsd;
1837 	prof_tdata_t *tdata;
1838 
1839 	cassert(config_prof);
1840 
1841 	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
1842 		return;
1843 	}
1844 	tsd = tsdn_tsd(tsdn);
1845 	if (tsd_reentrancy_level_get(tsd) > 0) {
1846 		return;
1847 	}
1848 
1849 	tdata = prof_tdata_get(tsd, false);
1850 	if (tdata == NULL) {
1851 		return;
1852 	}
1853 	if (tdata->enq) {
1854 		tdata->enq_gdump = true;
1855 		return;
1856 	}
1857 
1858 	if (opt_prof_prefix[0] != '\0') {
1859 		char filename[DUMP_FILENAME_BUFSIZE];
1860 		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
1861 		prof_dump_filename(filename, 'u', prof_dump_useq);
1862 		prof_dump_useq++;
1863 		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
1864 		prof_dump(tsd, false, filename, false);
1865 	}
1866 }
1867 
1868 static void
1869 prof_bt_hash(const void *key, size_t r_hash[2]) {
1870 	prof_bt_t *bt = (prof_bt_t *)key;
1871 
1872 	cassert(config_prof);
1873 
1874 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1875 }
1876 
1877 static bool
1878 prof_bt_keycomp(const void *k1, const void *k2) {
1879 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
1880 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
1881 
1882 	cassert(config_prof);
1883 
1884 	if (bt1->len != bt2->len) {
1885 		return false;
1886 	}
1887 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1888 }
1889 
1890 static uint64_t
1891 prof_thr_uid_alloc(tsdn_t *tsdn) {
1892 	uint64_t thr_uid;
1893 
1894 	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
1895 	thr_uid = next_thr_uid;
1896 	next_thr_uid++;
1897 	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
1898 
1899 	return thr_uid;
1900 }
1901 
1902 static prof_tdata_t *
1903 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
1904     char *thread_name, bool active) {
1905 	prof_tdata_t *tdata;
1906 
1907 	cassert(config_prof);
1908 
1909 	/* Initialize an empty cache for this thread. */
1910 	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
1911 	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
1912 	    arena_get(TSDN_NULL, 0, true), true);
1913 	if (tdata == NULL) {
1914 		return NULL;
1915 	}
1916 
1917 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
1918 	tdata->thr_uid = thr_uid;
1919 	tdata->thr_discrim = thr_discrim;
1920 	tdata->thread_name = thread_name;
1921 	tdata->attached = true;
1922 	tdata->expired = false;
1923 	tdata->tctx_uid_next = 0;
1924 
1925 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
1926 	    prof_bt_keycomp)) {
1927 		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
1928 		return NULL;
1929 	}
1930 
1931 	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
1932 	prof_sample_threshold_update(tdata);
1933 
1934 	tdata->enq = false;
1935 	tdata->enq_idump = false;
1936 	tdata->enq_gdump = false;
1937 
1938 	tdata->dumping = false;
1939 	tdata->active = active;
1940 
1941 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1942 	tdata_tree_insert(&tdatas, tdata);
1943 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1944 
1945 	return tdata;
1946 }
1947 
1948 prof_tdata_t *
1949 prof_tdata_init(tsd_t *tsd) {
1950 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
1951 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
1952 }
1953 
1954 static bool
1955 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
1956 	if (tdata->attached && !even_if_attached) {
1957 		return false;
1958 	}
1959 	if (ckh_count(&tdata->bt2tctx) != 0) {
1960 		return false;
1961 	}
1962 	return true;
1963 }
1964 
1965 static bool
1966 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
1967     bool even_if_attached) {
1968 	malloc_mutex_assert_owner(tsdn, tdata->lock);
1969 
1970 	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
1971 }
1972 
1973 static void
1974 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
1975     bool even_if_attached) {
1976 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
1977 
1978 	tdata_tree_remove(&tdatas, tdata);
1979 
1980 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
1981 
1982 	if (tdata->thread_name != NULL) {
1983 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
1984 		    true);
1985 	}
1986 	ckh_delete(tsd, &tdata->bt2tctx);
1987 	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
1988 }
1989 
1990 static void
1991 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
1992 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1993 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
1994 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1995 }
1996 
1997 static void
1998 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
1999 	bool destroy_tdata;
2000 
2001 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
2002 	if (tdata->attached) {
2003 		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
2004 		    true);
2005 		/*
2006 		 * Only detach if !destroy_tdata, because detaching would allow
2007 		 * another thread to win the race to destroy tdata.
2008 		 */
2009 		if (!destroy_tdata) {
2010 			tdata->attached = false;
2011 		}
2012 		tsd_prof_tdata_set(tsd, NULL);
2013 	} else {
2014 		destroy_tdata = false;
2015 	}
2016 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
2017 	if (destroy_tdata) {
2018 		prof_tdata_destroy(tsd, tdata, true);
2019 	}
2020 }
2021 
2022 prof_tdata_t *
2023 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
2024 	uint64_t thr_uid = tdata->thr_uid;
2025 	uint64_t thr_discrim = tdata->thr_discrim + 1;
2026 	char *thread_name = (tdata->thread_name != NULL) ?
2027 	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
2028 	bool active = tdata->active;
2029 
2030 	prof_tdata_detach(tsd, tdata);
2031 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
2032 	    active);
2033 }
2034 
2035 static bool
2036 prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
2037 	bool destroy_tdata;
2038 
2039 	malloc_mutex_lock(tsdn, tdata->lock);
2040 	if (!tdata->expired) {
2041 		tdata->expired = true;
2042 		destroy_tdata = tdata->attached ? false :
2043 		    prof_tdata_should_destroy(tsdn, tdata, false);
2044 	} else {
2045 		destroy_tdata = false;
2046 	}
2047 	malloc_mutex_unlock(tsdn, tdata->lock);
2048 
2049 	return destroy_tdata;
2050 }
2051 
2052 static prof_tdata_t *
2053 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
2054     void *arg) {
2055 	tsdn_t *tsdn = (tsdn_t *)arg;
2056 
2057 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
2058 }
2059 
2060 void
2061 prof_reset(tsd_t *tsd, size_t lg_sample) {
2062 	prof_tdata_t *next;
2063 
2064 	assert(lg_sample < (sizeof(uint64_t) << 3));
2065 
2066 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
2067 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2068 
2069 	lg_prof_sample = lg_sample;
2070 
2071 	next = NULL;
2072 	do {
2073 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
2074 		    prof_tdata_reset_iter, (void *)tsd);
2075 		if (to_destroy != NULL) {
2076 			next = tdata_tree_next(&tdatas, to_destroy);
2077 			prof_tdata_destroy_locked(tsd, to_destroy, false);
2078 		} else {
2079 			next = NULL;
2080 		}
2081 	} while (next != NULL);
2082 
2083 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2084 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
2085 }
2086 
2087 void
2088 prof_tdata_cleanup(tsd_t *tsd) {
2089 	prof_tdata_t *tdata;
2090 
2091 	if (!config_prof) {
2092 		return;
2093 	}
2094 
2095 	tdata = tsd_prof_tdata_get(tsd);
2096 	if (tdata != NULL) {
2097 		prof_tdata_detach(tsd, tdata);
2098 	}
2099 }
2100 
2101 bool
2102 prof_active_get(tsdn_t *tsdn) {
2103 	bool prof_active_current;
2104 
2105 	malloc_mutex_lock(tsdn, &prof_active_mtx);
2106 	prof_active_current = prof_active;
2107 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2108 	return prof_active_current;
2109 }
2110 
2111 bool
2112 prof_active_set(tsdn_t *tsdn, bool active) {
2113 	bool prof_active_old;
2114 
2115 	malloc_mutex_lock(tsdn, &prof_active_mtx);
2116 	prof_active_old = prof_active;
2117 	prof_active = active;
2118 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
2119 	return prof_active_old;
2120 }
2121 
2122 const char *
2123 prof_thread_name_get(tsd_t *tsd) {
2124 	prof_tdata_t *tdata;
2125 
2126 	tdata = prof_tdata_get(tsd, true);
2127 	if (tdata == NULL) {
2128 		return "";
2129 	}
2130 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
2131 }
2132 
2133 static char *
2134 prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
2135 	char *ret;
2136 	size_t size;
2137 
2138 	if (thread_name == NULL) {
2139 		return NULL;
2140 	}
2141 
2142 	size = strlen(thread_name) + 1;
2143 	if (size == 1) {
2144 		return "";
2145 	}
2146 
2147 	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
2148 	    arena_get(TSDN_NULL, 0, true), true);
2149 	if (ret == NULL) {
2150 		return NULL;
2151 	}
2152 	memcpy(ret, thread_name, size);
2153 	return ret;
2154 }
2155 
2156 int
2157 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
2158 	prof_tdata_t *tdata;
2159 	unsigned i;
2160 	char *s;
2161 
2162 	tdata = prof_tdata_get(tsd, true);
2163 	if (tdata == NULL) {
2164 		return EAGAIN;
2165 	}
2166 
2167 	/* Validate input. */
2168 	if (thread_name == NULL) {
2169 		return EFAULT;
2170 	}
2171 	for (i = 0; thread_name[i] != '\0'; i++) {
2172 		char c = thread_name[i];
2173 		if (!isgraph(c) && !isblank(c)) {
2174 			return EFAULT;
2175 		}
2176 	}
2177 
2178 	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
2179 	if (s == NULL) {
2180 		return EAGAIN;
2181 	}
2182 
2183 	if (tdata->thread_name != NULL) {
2184 		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2185 		    true);
2186 		tdata->thread_name = NULL;
2187 	}
2188 	if (strlen(s) > 0) {
2189 		tdata->thread_name = s;
2190 	}
2191 	return 0;
2192 }
2193 
2194 bool
2195 prof_thread_active_get(tsd_t *tsd) {
2196 	prof_tdata_t *tdata;
2197 
2198 	tdata = prof_tdata_get(tsd, true);
2199 	if (tdata == NULL) {
2200 		return false;
2201 	}
2202 	return tdata->active;
2203 }
2204 
2205 bool
2206 prof_thread_active_set(tsd_t *tsd, bool active) {
2207 	prof_tdata_t *tdata;
2208 
2209 	tdata = prof_tdata_get(tsd, true);
2210 	if (tdata == NULL) {
2211 		return true;
2212 	}
2213 	tdata->active = active;
2214 	return false;
2215 }
2216 
2217 bool
2218 prof_thread_active_init_get(tsdn_t *tsdn) {
2219 	bool active_init;
2220 
2221 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2222 	active_init = prof_thread_active_init;
2223 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2224 	return active_init;
2225 }
2226 
2227 bool
2228 prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
2229 	bool active_init_old;
2230 
2231 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2232 	active_init_old = prof_thread_active_init;
2233 	prof_thread_active_init = active_init;
2234 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2235 	return active_init_old;
2236 }
2237 
2238 bool
2239 prof_gdump_get(tsdn_t *tsdn) {
2240 	bool prof_gdump_current;
2241 
2242 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2243 	prof_gdump_current = prof_gdump_val;
2244 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2245 	return prof_gdump_current;
2246 }
2247 
2248 bool
2249 prof_gdump_set(tsdn_t *tsdn, bool gdump) {
2250 	bool prof_gdump_old;
2251 
2252 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2253 	prof_gdump_old = prof_gdump_val;
2254 	prof_gdump_val = gdump;
2255 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2256 	return prof_gdump_old;
2257 }
2258 
2259 void
2260 prof_boot0(void) {
2261 	cassert(config_prof);
2262 
2263 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
2264 	    sizeof(PROF_PREFIX_DEFAULT));
2265 }
2266 
2267 void
2268 prof_boot1(void) {
2269 	cassert(config_prof);
2270 
2271 	/*
2272 	 * opt_prof must be in its final state before any arenas are
2273 	 * initialized, so this function must be executed early.
2274 	 */
2275 
2276 	if (opt_prof_leak && !opt_prof) {
2277 		/*
2278 		 * Enable opt_prof, but in such a way that profiles are never
2279 		 * automatically dumped.
2280 		 */
2281 		opt_prof = true;
2282 		opt_prof_gdump = false;
2283 	} else if (opt_prof) {
2284 		if (opt_lg_prof_interval >= 0) {
2285 			prof_interval = (((uint64_t)1U) <<
2286 			    opt_lg_prof_interval);
2287 		}
2288 	}
2289 }
2290 
2291 bool
2292 prof_boot2(tsd_t *tsd) {
2293 	cassert(config_prof);
2294 
2295 	if (opt_prof) {
2296 		unsigned i;
2297 
2298 		lg_prof_sample = opt_lg_prof_sample;
2299 
2300 		prof_active = opt_prof_active;
2301 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
2302 		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
2303 			return true;
2304 		}
2305 
2306 		prof_gdump_val = opt_prof_gdump;
2307 		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
2308 		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
2309 			return true;
2310 		}
2311 
2312 		prof_thread_active_init = opt_prof_thread_active_init;
2313 		if (malloc_mutex_init(&prof_thread_active_init_mtx,
2314 		    "prof_thread_active_init",
2315 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
2316 		    malloc_mutex_rank_exclusive)) {
2317 			return true;
2318 		}
2319 
2320 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
2321 		    prof_bt_keycomp)) {
2322 			return true;
2323 		}
2324 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
2325 		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
2326 			return true;
2327 		}
2328 
2329 		tdata_tree_new(&tdatas);
2330 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
2331 		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
2332 			return true;
2333 		}
2334 
2335 		next_thr_uid = 0;
2336 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
2337 		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
2338 			return true;
2339 		}
2340 
2341 		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
2342 		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
2343 			return true;
2344 		}
2345 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
2346 		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
2347 			return true;
2348 		}
2349 
2350 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
2351 		    atexit(prof_fdump) != 0) {
2352 			malloc_write("<jemalloc>: Error in atexit()\n");
2353 			if (opt_abort) {
2354 				abort();
2355 			}
2356 		}
2357 
2358 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
2359 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
2360 		    CACHELINE);
2361 		if (gctx_locks == NULL) {
2362 			return true;
2363 		}
2364 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2365 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
2366 			    WITNESS_RANK_PROF_GCTX,
2367 			    malloc_mutex_rank_exclusive)) {
2368 				return true;
2369 			}
2370 		}
2371 
2372 		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
2373 		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
2374 		    CACHELINE);
2375 		if (tdata_locks == NULL) {
2376 			return true;
2377 		}
2378 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2379 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
2380 			    WITNESS_RANK_PROF_TDATA,
2381 			    malloc_mutex_rank_exclusive)) {
2382 				return true;
2383 			}
2384 		}
2385 	}
2386 
2387 #ifdef JEMALLOC_PROF_LIBGCC
2388 	/*
2389 	 * Cause the backtracing machinery to allocate its internal state
2390 	 * before enabling profiling.
2391 	 */
2392 	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
2393 #endif
2394 
2395 	prof_booted = true;
2396 
2397 	return false;
2398 }
2399 
2400 void
2401 prof_prefork0(tsdn_t *tsdn) {
2402 	if (config_prof && opt_prof) {
2403 		unsigned i;
2404 
2405 		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
2406 		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
2407 		malloc_mutex_prefork(tsdn, &tdatas_mtx);
2408 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2409 			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
2410 		}
2411 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2412 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
2413 		}
2414 	}
2415 }
2416 
2417 void
2418 prof_prefork1(tsdn_t *tsdn) {
2419 	if (config_prof && opt_prof) {
2420 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
2421 		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
2422 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
2423 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
2424 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
2425 	}
2426 }
2427 
2428 void
2429 prof_postfork_parent(tsdn_t *tsdn) {
2430 	if (config_prof && opt_prof) {
2431 		unsigned i;
2432 
2433 		malloc_mutex_postfork_parent(tsdn,
2434 		    &prof_thread_active_init_mtx);
2435 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
2436 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
2437 		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
2438 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
2439 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2440 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
2441 		}
2442 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2443 			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
2444 		}
2445 		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
2446 		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
2447 		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
2448 	}
2449 }
2450 
2451 void
2452 prof_postfork_child(tsdn_t *tsdn) {
2453 	if (config_prof && opt_prof) {
2454 		unsigned i;
2455 
2456 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
2457 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
2458 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
2459 		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
2460 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
2461 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
2462 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
2463 		}
2464 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
2465 			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
2466 		}
2467 		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
2468 		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
2469 		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
2470 	}
2471 }
2472 
2473 /******************************************************************************/
2474