1 #define JEMALLOC_PROF_C_ 2 #include "jemalloc/internal/jemalloc_preamble.h" 3 #include "jemalloc/internal/jemalloc_internal_includes.h" 4 5 #include "jemalloc/internal/assert.h" 6 #include "jemalloc/internal/ckh.h" 7 #include "jemalloc/internal/hash.h" 8 #include "jemalloc/internal/malloc_io.h" 9 #include "jemalloc/internal/mutex.h" 10 11 /******************************************************************************/ 12 13 #ifdef JEMALLOC_PROF_LIBUNWIND 14 #define UNW_LOCAL_ONLY 15 #include <libunwind.h> 16 #endif 17 18 #ifdef JEMALLOC_PROF_LIBGCC 19 /* 20 * We have a circular dependency -- jemalloc_internal.h tells us if we should 21 * use libgcc's unwinding functionality, but after we've included that, we've 22 * already hooked _Unwind_Backtrace. We'll temporarily disable hooking. 23 */ 24 #undef _Unwind_Backtrace 25 #include <unwind.h> 26 #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook) 27 #endif 28 29 /******************************************************************************/ 30 /* Data. */ 31 32 bool opt_prof = false; 33 bool opt_prof_active = true; 34 bool opt_prof_thread_active_init = true; 35 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 36 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 37 bool opt_prof_gdump = false; 38 bool opt_prof_final = false; 39 bool opt_prof_leak = false; 40 bool opt_prof_accum = false; 41 char opt_prof_prefix[ 42 /* Minimize memory bloat for non-prof builds. */ 43 #ifdef JEMALLOC_PROF 44 PATH_MAX + 45 #endif 46 1]; 47 48 /* 49 * Initialized as opt_prof_active, and accessed via 50 * prof_active_[gs]et{_unlocked,}(). 51 */ 52 bool prof_active; 53 static malloc_mutex_t prof_active_mtx; 54 55 /* 56 * Initialized as opt_prof_thread_active_init, and accessed via 57 * prof_thread_active_init_[gs]et(). 58 */ 59 static bool prof_thread_active_init; 60 static malloc_mutex_t prof_thread_active_init_mtx; 61 62 /* 63 * Initialized as opt_prof_gdump, and accessed via 64 * prof_gdump_[gs]et{_unlocked,}(). 65 */ 66 bool prof_gdump_val; 67 static malloc_mutex_t prof_gdump_mtx; 68 69 uint64_t prof_interval = 0; 70 71 size_t lg_prof_sample; 72 73 /* 74 * Table of mutexes that are shared among gctx's. These are leaf locks, so 75 * there is no problem with using them for more than one gctx at the same time. 76 * The primary motivation for this sharing though is that gctx's are ephemeral, 77 * and destroying mutexes causes complications for systems that allocate when 78 * creating/destroying mutexes. 79 */ 80 static malloc_mutex_t *gctx_locks; 81 static atomic_u_t cum_gctxs; /* Atomic counter. */ 82 83 /* 84 * Table of mutexes that are shared among tdata's. No operations require 85 * holding multiple tdata locks, so there is no problem with using them for more 86 * than one tdata at the same time, even though a gctx lock may be acquired 87 * while holding a tdata lock. 88 */ 89 static malloc_mutex_t *tdata_locks; 90 91 /* 92 * Global hash of (prof_bt_t *)-->(prof_gctx_t *). This is the master data 93 * structure that knows about all backtraces currently captured. 94 */ 95 static ckh_t bt2gctx; 96 /* Non static to enable profiling. */ 97 malloc_mutex_t bt2gctx_mtx; 98 99 /* 100 * Tree of all extant prof_tdata_t structures, regardless of state, 101 * {attached,detached,expired}. 102 */ 103 static prof_tdata_tree_t tdatas; 104 static malloc_mutex_t tdatas_mtx; 105 106 static uint64_t next_thr_uid; 107 static malloc_mutex_t next_thr_uid_mtx; 108 109 static malloc_mutex_t prof_dump_seq_mtx; 110 static uint64_t prof_dump_seq; 111 static uint64_t prof_dump_iseq; 112 static uint64_t prof_dump_mseq; 113 static uint64_t prof_dump_useq; 114 115 /* 116 * This buffer is rather large for stack allocation, so use a single buffer for 117 * all profile dumps. 118 */ 119 static malloc_mutex_t prof_dump_mtx; 120 static char prof_dump_buf[ 121 /* Minimize memory bloat for non-prof builds. */ 122 #ifdef JEMALLOC_PROF 123 PROF_DUMP_BUFSIZE 124 #else 125 1 126 #endif 127 ]; 128 static size_t prof_dump_buf_end; 129 static int prof_dump_fd; 130 131 /* Do not dump any profiles until bootstrapping is complete. */ 132 static bool prof_booted = false; 133 134 /******************************************************************************/ 135 /* 136 * Function prototypes for static functions that are referenced prior to 137 * definition. 138 */ 139 140 static bool prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx); 141 static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); 142 static bool prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, 143 bool even_if_attached); 144 static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, 145 bool even_if_attached); 146 static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name); 147 148 /******************************************************************************/ 149 /* Red-black trees. */ 150 151 static int 152 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) { 153 uint64_t a_thr_uid = a->thr_uid; 154 uint64_t b_thr_uid = b->thr_uid; 155 int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid); 156 if (ret == 0) { 157 uint64_t a_thr_discrim = a->thr_discrim; 158 uint64_t b_thr_discrim = b->thr_discrim; 159 ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim < 160 b_thr_discrim); 161 if (ret == 0) { 162 uint64_t a_tctx_uid = a->tctx_uid; 163 uint64_t b_tctx_uid = b->tctx_uid; 164 ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid < 165 b_tctx_uid); 166 } 167 } 168 return ret; 169 } 170 171 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t, 172 tctx_link, prof_tctx_comp) 173 174 static int 175 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) { 176 unsigned a_len = a->bt.len; 177 unsigned b_len = b->bt.len; 178 unsigned comp_len = (a_len < b_len) ? a_len : b_len; 179 int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *)); 180 if (ret == 0) { 181 ret = (a_len > b_len) - (a_len < b_len); 182 } 183 return ret; 184 } 185 186 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link, 187 prof_gctx_comp) 188 189 static int 190 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) { 191 int ret; 192 uint64_t a_uid = a->thr_uid; 193 uint64_t b_uid = b->thr_uid; 194 195 ret = ((a_uid > b_uid) - (a_uid < b_uid)); 196 if (ret == 0) { 197 uint64_t a_discrim = a->thr_discrim; 198 uint64_t b_discrim = b->thr_discrim; 199 200 ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim)); 201 } 202 return ret; 203 } 204 205 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, 206 prof_tdata_comp) 207 208 /******************************************************************************/ 209 210 void 211 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) { 212 prof_tdata_t *tdata; 213 214 cassert(config_prof); 215 216 if (updated) { 217 /* 218 * Compute a new sample threshold. This isn't very important in 219 * practice, because this function is rarely executed, so the 220 * potential for sample bias is minimal except in contrived 221 * programs. 222 */ 223 tdata = prof_tdata_get(tsd, true); 224 if (tdata != NULL) { 225 prof_sample_threshold_update(tdata); 226 } 227 } 228 229 if ((uintptr_t)tctx > (uintptr_t)1U) { 230 malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); 231 tctx->prepared = false; 232 if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) { 233 prof_tctx_destroy(tsd, tctx); 234 } else { 235 malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock); 236 } 237 } 238 } 239 240 void 241 prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, 242 prof_tctx_t *tctx) { 243 prof_tctx_set(tsdn, ptr, usize, NULL, tctx); 244 245 malloc_mutex_lock(tsdn, tctx->tdata->lock); 246 tctx->cnts.curobjs++; 247 tctx->cnts.curbytes += usize; 248 if (opt_prof_accum) { 249 tctx->cnts.accumobjs++; 250 tctx->cnts.accumbytes += usize; 251 } 252 tctx->prepared = false; 253 malloc_mutex_unlock(tsdn, tctx->tdata->lock); 254 } 255 256 void 257 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { 258 malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); 259 assert(tctx->cnts.curobjs > 0); 260 assert(tctx->cnts.curbytes >= usize); 261 tctx->cnts.curobjs--; 262 tctx->cnts.curbytes -= usize; 263 264 if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) { 265 prof_tctx_destroy(tsd, tctx); 266 } else { 267 malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock); 268 } 269 } 270 271 void 272 bt_init(prof_bt_t *bt, void **vec) { 273 cassert(config_prof); 274 275 bt->vec = vec; 276 bt->len = 0; 277 } 278 279 static void 280 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) { 281 cassert(config_prof); 282 assert(tdata == prof_tdata_get(tsd, false)); 283 284 if (tdata != NULL) { 285 assert(!tdata->enq); 286 tdata->enq = true; 287 } 288 289 malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx); 290 } 291 292 static void 293 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) { 294 cassert(config_prof); 295 assert(tdata == prof_tdata_get(tsd, false)); 296 297 malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx); 298 299 if (tdata != NULL) { 300 bool idump, gdump; 301 302 assert(tdata->enq); 303 tdata->enq = false; 304 idump = tdata->enq_idump; 305 tdata->enq_idump = false; 306 gdump = tdata->enq_gdump; 307 tdata->enq_gdump = false; 308 309 if (idump) { 310 prof_idump(tsd_tsdn(tsd)); 311 } 312 if (gdump) { 313 prof_gdump(tsd_tsdn(tsd)); 314 } 315 } 316 } 317 318 #ifdef JEMALLOC_PROF_LIBUNWIND 319 void 320 prof_backtrace(prof_bt_t *bt) { 321 int nframes; 322 323 cassert(config_prof); 324 assert(bt->len == 0); 325 assert(bt->vec != NULL); 326 327 nframes = unw_backtrace(bt->vec, PROF_BT_MAX); 328 if (nframes <= 0) { 329 return; 330 } 331 bt->len = nframes; 332 } 333 #elif (defined(JEMALLOC_PROF_LIBGCC)) 334 static _Unwind_Reason_Code 335 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) { 336 cassert(config_prof); 337 338 return _URC_NO_REASON; 339 } 340 341 static _Unwind_Reason_Code 342 prof_unwind_callback(struct _Unwind_Context *context, void *arg) { 343 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 344 void *ip; 345 346 cassert(config_prof); 347 348 ip = (void *)_Unwind_GetIP(context); 349 if (ip == NULL) { 350 return _URC_END_OF_STACK; 351 } 352 data->bt->vec[data->bt->len] = ip; 353 data->bt->len++; 354 if (data->bt->len == data->max) { 355 return _URC_END_OF_STACK; 356 } 357 358 return _URC_NO_REASON; 359 } 360 361 void 362 prof_backtrace(prof_bt_t *bt) { 363 prof_unwind_data_t data = {bt, PROF_BT_MAX}; 364 365 cassert(config_prof); 366 367 _Unwind_Backtrace(prof_unwind_callback, &data); 368 } 369 #elif (defined(JEMALLOC_PROF_GCC)) 370 void 371 prof_backtrace(prof_bt_t *bt) { 372 #define BT_FRAME(i) \ 373 if ((i) < PROF_BT_MAX) { \ 374 void *p; \ 375 if (__builtin_frame_address(i) == 0) { \ 376 return; \ 377 } \ 378 p = __builtin_return_address(i); \ 379 if (p == NULL) { \ 380 return; \ 381 } \ 382 bt->vec[(i)] = p; \ 383 bt->len = (i) + 1; \ 384 } else { \ 385 return; \ 386 } 387 388 cassert(config_prof); 389 390 BT_FRAME(0) 391 BT_FRAME(1) 392 BT_FRAME(2) 393 BT_FRAME(3) 394 BT_FRAME(4) 395 BT_FRAME(5) 396 BT_FRAME(6) 397 BT_FRAME(7) 398 BT_FRAME(8) 399 BT_FRAME(9) 400 401 BT_FRAME(10) 402 BT_FRAME(11) 403 BT_FRAME(12) 404 BT_FRAME(13) 405 BT_FRAME(14) 406 BT_FRAME(15) 407 BT_FRAME(16) 408 BT_FRAME(17) 409 BT_FRAME(18) 410 BT_FRAME(19) 411 412 BT_FRAME(20) 413 BT_FRAME(21) 414 BT_FRAME(22) 415 BT_FRAME(23) 416 BT_FRAME(24) 417 BT_FRAME(25) 418 BT_FRAME(26) 419 BT_FRAME(27) 420 BT_FRAME(28) 421 BT_FRAME(29) 422 423 BT_FRAME(30) 424 BT_FRAME(31) 425 BT_FRAME(32) 426 BT_FRAME(33) 427 BT_FRAME(34) 428 BT_FRAME(35) 429 BT_FRAME(36) 430 BT_FRAME(37) 431 BT_FRAME(38) 432 BT_FRAME(39) 433 434 BT_FRAME(40) 435 BT_FRAME(41) 436 BT_FRAME(42) 437 BT_FRAME(43) 438 BT_FRAME(44) 439 BT_FRAME(45) 440 BT_FRAME(46) 441 BT_FRAME(47) 442 BT_FRAME(48) 443 BT_FRAME(49) 444 445 BT_FRAME(50) 446 BT_FRAME(51) 447 BT_FRAME(52) 448 BT_FRAME(53) 449 BT_FRAME(54) 450 BT_FRAME(55) 451 BT_FRAME(56) 452 BT_FRAME(57) 453 BT_FRAME(58) 454 BT_FRAME(59) 455 456 BT_FRAME(60) 457 BT_FRAME(61) 458 BT_FRAME(62) 459 BT_FRAME(63) 460 BT_FRAME(64) 461 BT_FRAME(65) 462 BT_FRAME(66) 463 BT_FRAME(67) 464 BT_FRAME(68) 465 BT_FRAME(69) 466 467 BT_FRAME(70) 468 BT_FRAME(71) 469 BT_FRAME(72) 470 BT_FRAME(73) 471 BT_FRAME(74) 472 BT_FRAME(75) 473 BT_FRAME(76) 474 BT_FRAME(77) 475 BT_FRAME(78) 476 BT_FRAME(79) 477 478 BT_FRAME(80) 479 BT_FRAME(81) 480 BT_FRAME(82) 481 BT_FRAME(83) 482 BT_FRAME(84) 483 BT_FRAME(85) 484 BT_FRAME(86) 485 BT_FRAME(87) 486 BT_FRAME(88) 487 BT_FRAME(89) 488 489 BT_FRAME(90) 490 BT_FRAME(91) 491 BT_FRAME(92) 492 BT_FRAME(93) 493 BT_FRAME(94) 494 BT_FRAME(95) 495 BT_FRAME(96) 496 BT_FRAME(97) 497 BT_FRAME(98) 498 BT_FRAME(99) 499 500 BT_FRAME(100) 501 BT_FRAME(101) 502 BT_FRAME(102) 503 BT_FRAME(103) 504 BT_FRAME(104) 505 BT_FRAME(105) 506 BT_FRAME(106) 507 BT_FRAME(107) 508 BT_FRAME(108) 509 BT_FRAME(109) 510 511 BT_FRAME(110) 512 BT_FRAME(111) 513 BT_FRAME(112) 514 BT_FRAME(113) 515 BT_FRAME(114) 516 BT_FRAME(115) 517 BT_FRAME(116) 518 BT_FRAME(117) 519 BT_FRAME(118) 520 BT_FRAME(119) 521 522 BT_FRAME(120) 523 BT_FRAME(121) 524 BT_FRAME(122) 525 BT_FRAME(123) 526 BT_FRAME(124) 527 BT_FRAME(125) 528 BT_FRAME(126) 529 BT_FRAME(127) 530 #undef BT_FRAME 531 } 532 #else 533 void 534 prof_backtrace(prof_bt_t *bt) { 535 cassert(config_prof); 536 not_reached(); 537 } 538 #endif 539 540 static malloc_mutex_t * 541 prof_gctx_mutex_choose(void) { 542 unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED); 543 544 return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]; 545 } 546 547 static malloc_mutex_t * 548 prof_tdata_mutex_choose(uint64_t thr_uid) { 549 return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS]; 550 } 551 552 static prof_gctx_t * 553 prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) { 554 /* 555 * Create a single allocation that has space for vec of length bt->len. 556 */ 557 size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *)); 558 prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size, 559 sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true), 560 true); 561 if (gctx == NULL) { 562 return NULL; 563 } 564 gctx->lock = prof_gctx_mutex_choose(); 565 /* 566 * Set nlimbo to 1, in order to avoid a race condition with 567 * prof_tctx_destroy()/prof_gctx_try_destroy(). 568 */ 569 gctx->nlimbo = 1; 570 tctx_tree_new(&gctx->tctxs); 571 /* Duplicate bt. */ 572 memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *)); 573 gctx->bt.vec = gctx->vec; 574 gctx->bt.len = bt->len; 575 return gctx; 576 } 577 578 static void 579 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, 580 prof_tdata_t *tdata) { 581 cassert(config_prof); 582 583 /* 584 * Check that gctx is still unused by any thread cache before destroying 585 * it. prof_lookup() increments gctx->nlimbo in order to avoid a race 586 * condition with this function, as does prof_tctx_destroy() in order to 587 * avoid a race between the main body of prof_tctx_destroy() and entry 588 * into this function. 589 */ 590 prof_enter(tsd, tdata_self); 591 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); 592 assert(gctx->nlimbo != 0); 593 if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { 594 /* Remove gctx from bt2gctx. */ 595 if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) { 596 not_reached(); 597 } 598 prof_leave(tsd, tdata_self); 599 /* Destroy gctx. */ 600 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 601 idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true); 602 } else { 603 /* 604 * Compensate for increment in prof_tctx_destroy() or 605 * prof_lookup(). 606 */ 607 gctx->nlimbo--; 608 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 609 prof_leave(tsd, tdata_self); 610 } 611 } 612 613 static bool 614 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) { 615 malloc_mutex_assert_owner(tsdn, tctx->tdata->lock); 616 617 if (opt_prof_accum) { 618 return false; 619 } 620 if (tctx->cnts.curobjs != 0) { 621 return false; 622 } 623 if (tctx->prepared) { 624 return false; 625 } 626 return true; 627 } 628 629 static bool 630 prof_gctx_should_destroy(prof_gctx_t *gctx) { 631 if (opt_prof_accum) { 632 return false; 633 } 634 if (!tctx_tree_empty(&gctx->tctxs)) { 635 return false; 636 } 637 if (gctx->nlimbo != 0) { 638 return false; 639 } 640 return true; 641 } 642 643 static void 644 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) { 645 prof_tdata_t *tdata = tctx->tdata; 646 prof_gctx_t *gctx = tctx->gctx; 647 bool destroy_tdata, destroy_tctx, destroy_gctx; 648 649 malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); 650 651 assert(tctx->cnts.curobjs == 0); 652 assert(tctx->cnts.curbytes == 0); 653 assert(!opt_prof_accum); 654 assert(tctx->cnts.accumobjs == 0); 655 assert(tctx->cnts.accumbytes == 0); 656 657 ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); 658 destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false); 659 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); 660 661 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); 662 switch (tctx->state) { 663 case prof_tctx_state_nominal: 664 tctx_tree_remove(&gctx->tctxs, tctx); 665 destroy_tctx = true; 666 if (prof_gctx_should_destroy(gctx)) { 667 /* 668 * Increment gctx->nlimbo in order to keep another 669 * thread from winning the race to destroy gctx while 670 * this one has gctx->lock dropped. Without this, it 671 * would be possible for another thread to: 672 * 673 * 1) Sample an allocation associated with gctx. 674 * 2) Deallocate the sampled object. 675 * 3) Successfully prof_gctx_try_destroy(gctx). 676 * 677 * The result would be that gctx no longer exists by the 678 * time this thread accesses it in 679 * prof_gctx_try_destroy(). 680 */ 681 gctx->nlimbo++; 682 destroy_gctx = true; 683 } else { 684 destroy_gctx = false; 685 } 686 break; 687 case prof_tctx_state_dumping: 688 /* 689 * A dumping thread needs tctx to remain valid until dumping 690 * has finished. Change state such that the dumping thread will 691 * complete destruction during a late dump iteration phase. 692 */ 693 tctx->state = prof_tctx_state_purgatory; 694 destroy_tctx = false; 695 destroy_gctx = false; 696 break; 697 default: 698 not_reached(); 699 destroy_tctx = false; 700 destroy_gctx = false; 701 } 702 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 703 if (destroy_gctx) { 704 prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx, 705 tdata); 706 } 707 708 malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock); 709 710 if (destroy_tdata) { 711 prof_tdata_destroy(tsd, tdata, false); 712 } 713 714 if (destroy_tctx) { 715 idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true); 716 } 717 } 718 719 static bool 720 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, 721 void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) { 722 union { 723 prof_gctx_t *p; 724 void *v; 725 } gctx, tgctx; 726 union { 727 prof_bt_t *p; 728 void *v; 729 } btkey; 730 bool new_gctx; 731 732 prof_enter(tsd, tdata); 733 if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { 734 /* bt has never been seen before. Insert it. */ 735 prof_leave(tsd, tdata); 736 tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt); 737 if (tgctx.v == NULL) { 738 return true; 739 } 740 prof_enter(tsd, tdata); 741 if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { 742 gctx.p = tgctx.p; 743 btkey.p = &gctx.p->bt; 744 if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { 745 /* OOM. */ 746 prof_leave(tsd, tdata); 747 idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL, 748 true, true); 749 return true; 750 } 751 new_gctx = true; 752 } else { 753 new_gctx = false; 754 } 755 } else { 756 tgctx.v = NULL; 757 new_gctx = false; 758 } 759 760 if (!new_gctx) { 761 /* 762 * Increment nlimbo, in order to avoid a race condition with 763 * prof_tctx_destroy()/prof_gctx_try_destroy(). 764 */ 765 malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock); 766 gctx.p->nlimbo++; 767 malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock); 768 new_gctx = false; 769 770 if (tgctx.v != NULL) { 771 /* Lost race to insert. */ 772 idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true, 773 true); 774 } 775 } 776 prof_leave(tsd, tdata); 777 778 *p_btkey = btkey.v; 779 *p_gctx = gctx.p; 780 *p_new_gctx = new_gctx; 781 return false; 782 } 783 784 prof_tctx_t * 785 prof_lookup(tsd_t *tsd, prof_bt_t *bt) { 786 union { 787 prof_tctx_t *p; 788 void *v; 789 } ret; 790 prof_tdata_t *tdata; 791 bool not_found; 792 793 cassert(config_prof); 794 795 tdata = prof_tdata_get(tsd, false); 796 if (tdata == NULL) { 797 return NULL; 798 } 799 800 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); 801 not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); 802 if (!not_found) { /* Note double negative! */ 803 ret.p->prepared = true; 804 } 805 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); 806 if (not_found) { 807 void *btkey; 808 prof_gctx_t *gctx; 809 bool new_gctx, error; 810 811 /* 812 * This thread's cache lacks bt. Look for it in the global 813 * cache. 814 */ 815 if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx, 816 &new_gctx)) { 817 return NULL; 818 } 819 820 /* Link a prof_tctx_t into gctx for this thread. */ 821 ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t), 822 sz_size2index(sizeof(prof_tctx_t)), false, NULL, true, 823 arena_ichoose(tsd, NULL), true); 824 if (ret.p == NULL) { 825 if (new_gctx) { 826 prof_gctx_try_destroy(tsd, tdata, gctx, tdata); 827 } 828 return NULL; 829 } 830 ret.p->tdata = tdata; 831 ret.p->thr_uid = tdata->thr_uid; 832 ret.p->thr_discrim = tdata->thr_discrim; 833 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 834 ret.p->gctx = gctx; 835 ret.p->tctx_uid = tdata->tctx_uid_next++; 836 ret.p->prepared = true; 837 ret.p->state = prof_tctx_state_initializing; 838 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); 839 error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); 840 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); 841 if (error) { 842 if (new_gctx) { 843 prof_gctx_try_destroy(tsd, tdata, gctx, tdata); 844 } 845 idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true); 846 return NULL; 847 } 848 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); 849 ret.p->state = prof_tctx_state_nominal; 850 tctx_tree_insert(&gctx->tctxs, ret.p); 851 gctx->nlimbo--; 852 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 853 } 854 855 return ret.p; 856 } 857 858 /* 859 * The bodies of this function and prof_leakcheck() are compiled out unless heap 860 * profiling is enabled, so that it is possible to compile jemalloc with 861 * floating point support completely disabled. Avoiding floating point code is 862 * important on memory-constrained systems, but it also enables a workaround for 863 * versions of glibc that don't properly save/restore floating point registers 864 * during dynamic lazy symbol loading (which internally calls into whatever 865 * malloc implementation happens to be integrated into the application). Note 866 * that some compilers (e.g. gcc 4.8) may use floating point registers for fast 867 * memory moves, so jemalloc must be compiled with such optimizations disabled 868 * (e.g. 869 * -mno-sse) in order for the workaround to be complete. 870 */ 871 void 872 prof_sample_threshold_update(prof_tdata_t *tdata) { 873 #ifdef JEMALLOC_PROF 874 uint64_t r; 875 double u; 876 877 if (!config_prof) { 878 return; 879 } 880 881 if (lg_prof_sample == 0) { 882 tdata->bytes_until_sample = 0; 883 return; 884 } 885 886 /* 887 * Compute sample interval as a geometrically distributed random 888 * variable with mean (2^lg_prof_sample). 889 * 890 * __ __ 891 * | log(u) | 1 892 * tdata->bytes_until_sample = | -------- |, where p = --------------- 893 * | log(1-p) | lg_prof_sample 894 * 2 895 * 896 * For more information on the math, see: 897 * 898 * Non-Uniform Random Variate Generation 899 * Luc Devroye 900 * Springer-Verlag, New York, 1986 901 * pp 500 902 * (http://luc.devroye.org/rnbookindex.html) 903 */ 904 r = prng_lg_range_u64(&tdata->prng_state, 53); 905 u = (double)r * (1.0/9007199254740992.0L); 906 tdata->bytes_until_sample = (uint64_t)(log(u) / 907 log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) 908 + (uint64_t)1U; 909 #endif 910 } 911 912 #ifdef JEMALLOC_JET 913 static prof_tdata_t * 914 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, 915 void *arg) { 916 size_t *tdata_count = (size_t *)arg; 917 918 (*tdata_count)++; 919 920 return NULL; 921 } 922 923 size_t 924 prof_tdata_count(void) { 925 size_t tdata_count = 0; 926 tsdn_t *tsdn; 927 928 tsdn = tsdn_fetch(); 929 malloc_mutex_lock(tsdn, &tdatas_mtx); 930 tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter, 931 (void *)&tdata_count); 932 malloc_mutex_unlock(tsdn, &tdatas_mtx); 933 934 return tdata_count; 935 } 936 937 size_t 938 prof_bt_count(void) { 939 size_t bt_count; 940 tsd_t *tsd; 941 prof_tdata_t *tdata; 942 943 tsd = tsd_fetch(); 944 tdata = prof_tdata_get(tsd, false); 945 if (tdata == NULL) { 946 return 0; 947 } 948 949 malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx); 950 bt_count = ckh_count(&bt2gctx); 951 malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx); 952 953 return bt_count; 954 } 955 #endif 956 957 static int 958 prof_dump_open_impl(bool propagate_err, const char *filename) { 959 int fd; 960 961 fd = creat(filename, 0644); 962 if (fd == -1 && !propagate_err) { 963 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", 964 filename); 965 if (opt_abort) { 966 abort(); 967 } 968 } 969 970 return fd; 971 } 972 prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl; 973 974 static bool 975 prof_dump_flush(bool propagate_err) { 976 bool ret = false; 977 ssize_t err; 978 979 cassert(config_prof); 980 981 err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 982 if (err == -1) { 983 if (!propagate_err) { 984 malloc_write("<jemalloc>: write() failed during heap " 985 "profile flush\n"); 986 if (opt_abort) { 987 abort(); 988 } 989 } 990 ret = true; 991 } 992 prof_dump_buf_end = 0; 993 994 return ret; 995 } 996 997 static bool 998 prof_dump_close(bool propagate_err) { 999 bool ret; 1000 1001 assert(prof_dump_fd != -1); 1002 ret = prof_dump_flush(propagate_err); 1003 close(prof_dump_fd); 1004 prof_dump_fd = -1; 1005 1006 return ret; 1007 } 1008 1009 static bool 1010 prof_dump_write(bool propagate_err, const char *s) { 1011 size_t i, slen, n; 1012 1013 cassert(config_prof); 1014 1015 i = 0; 1016 slen = strlen(s); 1017 while (i < slen) { 1018 /* Flush the buffer if it is full. */ 1019 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 1020 if (prof_dump_flush(propagate_err) && propagate_err) { 1021 return true; 1022 } 1023 } 1024 1025 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 1026 /* Finish writing. */ 1027 n = slen - i; 1028 } else { 1029 /* Write as much of s as will fit. */ 1030 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 1031 } 1032 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 1033 prof_dump_buf_end += n; 1034 i += n; 1035 } 1036 1037 return false; 1038 } 1039 1040 JEMALLOC_FORMAT_PRINTF(2, 3) 1041 static bool 1042 prof_dump_printf(bool propagate_err, const char *format, ...) { 1043 bool ret; 1044 va_list ap; 1045 char buf[PROF_PRINTF_BUFSIZE]; 1046 1047 va_start(ap, format); 1048 malloc_vsnprintf(buf, sizeof(buf), format, ap); 1049 va_end(ap); 1050 ret = prof_dump_write(propagate_err, buf); 1051 1052 return ret; 1053 } 1054 1055 static void 1056 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) { 1057 malloc_mutex_assert_owner(tsdn, tctx->tdata->lock); 1058 1059 malloc_mutex_lock(tsdn, tctx->gctx->lock); 1060 1061 switch (tctx->state) { 1062 case prof_tctx_state_initializing: 1063 malloc_mutex_unlock(tsdn, tctx->gctx->lock); 1064 return; 1065 case prof_tctx_state_nominal: 1066 tctx->state = prof_tctx_state_dumping; 1067 malloc_mutex_unlock(tsdn, tctx->gctx->lock); 1068 1069 memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); 1070 1071 tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs; 1072 tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes; 1073 if (opt_prof_accum) { 1074 tdata->cnt_summed.accumobjs += 1075 tctx->dump_cnts.accumobjs; 1076 tdata->cnt_summed.accumbytes += 1077 tctx->dump_cnts.accumbytes; 1078 } 1079 break; 1080 case prof_tctx_state_dumping: 1081 case prof_tctx_state_purgatory: 1082 not_reached(); 1083 } 1084 } 1085 1086 static void 1087 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) { 1088 malloc_mutex_assert_owner(tsdn, gctx->lock); 1089 1090 gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs; 1091 gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes; 1092 if (opt_prof_accum) { 1093 gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs; 1094 gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes; 1095 } 1096 } 1097 1098 static prof_tctx_t * 1099 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { 1100 tsdn_t *tsdn = (tsdn_t *)arg; 1101 1102 malloc_mutex_assert_owner(tsdn, tctx->gctx->lock); 1103 1104 switch (tctx->state) { 1105 case prof_tctx_state_nominal: 1106 /* New since dumping started; ignore. */ 1107 break; 1108 case prof_tctx_state_dumping: 1109 case prof_tctx_state_purgatory: 1110 prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx); 1111 break; 1112 default: 1113 not_reached(); 1114 } 1115 1116 return NULL; 1117 } 1118 1119 struct prof_tctx_dump_iter_arg_s { 1120 tsdn_t *tsdn; 1121 bool propagate_err; 1122 }; 1123 1124 static prof_tctx_t * 1125 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) { 1126 struct prof_tctx_dump_iter_arg_s *arg = 1127 (struct prof_tctx_dump_iter_arg_s *)opaque; 1128 1129 malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock); 1130 1131 switch (tctx->state) { 1132 case prof_tctx_state_initializing: 1133 case prof_tctx_state_nominal: 1134 /* Not captured by this dump. */ 1135 break; 1136 case prof_tctx_state_dumping: 1137 case prof_tctx_state_purgatory: 1138 if (prof_dump_printf(arg->propagate_err, 1139 " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": " 1140 "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs, 1141 tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs, 1142 tctx->dump_cnts.accumbytes)) { 1143 return tctx; 1144 } 1145 break; 1146 default: 1147 not_reached(); 1148 } 1149 return NULL; 1150 } 1151 1152 static prof_tctx_t * 1153 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { 1154 tsdn_t *tsdn = (tsdn_t *)arg; 1155 prof_tctx_t *ret; 1156 1157 malloc_mutex_assert_owner(tsdn, tctx->gctx->lock); 1158 1159 switch (tctx->state) { 1160 case prof_tctx_state_nominal: 1161 /* New since dumping started; ignore. */ 1162 break; 1163 case prof_tctx_state_dumping: 1164 tctx->state = prof_tctx_state_nominal; 1165 break; 1166 case prof_tctx_state_purgatory: 1167 ret = tctx; 1168 goto label_return; 1169 default: 1170 not_reached(); 1171 } 1172 1173 ret = NULL; 1174 label_return: 1175 return ret; 1176 } 1177 1178 static void 1179 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) { 1180 cassert(config_prof); 1181 1182 malloc_mutex_lock(tsdn, gctx->lock); 1183 1184 /* 1185 * Increment nlimbo so that gctx won't go away before dump. 1186 * Additionally, link gctx into the dump list so that it is included in 1187 * prof_dump()'s second pass. 1188 */ 1189 gctx->nlimbo++; 1190 gctx_tree_insert(gctxs, gctx); 1191 1192 memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t)); 1193 1194 malloc_mutex_unlock(tsdn, gctx->lock); 1195 } 1196 1197 struct prof_gctx_merge_iter_arg_s { 1198 tsdn_t *tsdn; 1199 size_t leak_ngctx; 1200 }; 1201 1202 static prof_gctx_t * 1203 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) { 1204 struct prof_gctx_merge_iter_arg_s *arg = 1205 (struct prof_gctx_merge_iter_arg_s *)opaque; 1206 1207 malloc_mutex_lock(arg->tsdn, gctx->lock); 1208 tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, 1209 (void *)arg->tsdn); 1210 if (gctx->cnt_summed.curobjs != 0) { 1211 arg->leak_ngctx++; 1212 } 1213 malloc_mutex_unlock(arg->tsdn, gctx->lock); 1214 1215 return NULL; 1216 } 1217 1218 static void 1219 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) { 1220 prof_tdata_t *tdata = prof_tdata_get(tsd, false); 1221 prof_gctx_t *gctx; 1222 1223 /* 1224 * Standard tree iteration won't work here, because as soon as we 1225 * decrement gctx->nlimbo and unlock gctx, another thread can 1226 * concurrently destroy it, which will corrupt the tree. Therefore, 1227 * tear down the tree one node at a time during iteration. 1228 */ 1229 while ((gctx = gctx_tree_first(gctxs)) != NULL) { 1230 gctx_tree_remove(gctxs, gctx); 1231 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); 1232 { 1233 prof_tctx_t *next; 1234 1235 next = NULL; 1236 do { 1237 prof_tctx_t *to_destroy = 1238 tctx_tree_iter(&gctx->tctxs, next, 1239 prof_tctx_finish_iter, 1240 (void *)tsd_tsdn(tsd)); 1241 if (to_destroy != NULL) { 1242 next = tctx_tree_next(&gctx->tctxs, 1243 to_destroy); 1244 tctx_tree_remove(&gctx->tctxs, 1245 to_destroy); 1246 idalloctm(tsd_tsdn(tsd), to_destroy, 1247 NULL, NULL, true, true); 1248 } else { 1249 next = NULL; 1250 } 1251 } while (next != NULL); 1252 } 1253 gctx->nlimbo--; 1254 if (prof_gctx_should_destroy(gctx)) { 1255 gctx->nlimbo++; 1256 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 1257 prof_gctx_try_destroy(tsd, tdata, gctx, tdata); 1258 } else { 1259 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 1260 } 1261 } 1262 } 1263 1264 struct prof_tdata_merge_iter_arg_s { 1265 tsdn_t *tsdn; 1266 prof_cnt_t cnt_all; 1267 }; 1268 1269 static prof_tdata_t * 1270 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, 1271 void *opaque) { 1272 struct prof_tdata_merge_iter_arg_s *arg = 1273 (struct prof_tdata_merge_iter_arg_s *)opaque; 1274 1275 malloc_mutex_lock(arg->tsdn, tdata->lock); 1276 if (!tdata->expired) { 1277 size_t tabind; 1278 union { 1279 prof_tctx_t *p; 1280 void *v; 1281 } tctx; 1282 1283 tdata->dumping = true; 1284 memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t)); 1285 for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL, 1286 &tctx.v);) { 1287 prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata); 1288 } 1289 1290 arg->cnt_all.curobjs += tdata->cnt_summed.curobjs; 1291 arg->cnt_all.curbytes += tdata->cnt_summed.curbytes; 1292 if (opt_prof_accum) { 1293 arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs; 1294 arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes; 1295 } 1296 } else { 1297 tdata->dumping = false; 1298 } 1299 malloc_mutex_unlock(arg->tsdn, tdata->lock); 1300 1301 return NULL; 1302 } 1303 1304 static prof_tdata_t * 1305 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, 1306 void *arg) { 1307 bool propagate_err = *(bool *)arg; 1308 1309 if (!tdata->dumping) { 1310 return NULL; 1311 } 1312 1313 if (prof_dump_printf(propagate_err, 1314 " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n", 1315 tdata->thr_uid, tdata->cnt_summed.curobjs, 1316 tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs, 1317 tdata->cnt_summed.accumbytes, 1318 (tdata->thread_name != NULL) ? " " : "", 1319 (tdata->thread_name != NULL) ? tdata->thread_name : "")) { 1320 return tdata; 1321 } 1322 return NULL; 1323 } 1324 1325 static bool 1326 prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err, 1327 const prof_cnt_t *cnt_all) { 1328 bool ret; 1329 1330 if (prof_dump_printf(propagate_err, 1331 "heap_v2/%"FMTu64"\n" 1332 " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n", 1333 ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs, 1334 cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) { 1335 return true; 1336 } 1337 1338 malloc_mutex_lock(tsdn, &tdatas_mtx); 1339 ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, 1340 (void *)&propagate_err) != NULL); 1341 malloc_mutex_unlock(tsdn, &tdatas_mtx); 1342 return ret; 1343 } 1344 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl; 1345 1346 static bool 1347 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx, 1348 const prof_bt_t *bt, prof_gctx_tree_t *gctxs) { 1349 bool ret; 1350 unsigned i; 1351 struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg; 1352 1353 cassert(config_prof); 1354 malloc_mutex_assert_owner(tsdn, gctx->lock); 1355 1356 /* Avoid dumping such gctx's that have no useful data. */ 1357 if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) || 1358 (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) { 1359 assert(gctx->cnt_summed.curobjs == 0); 1360 assert(gctx->cnt_summed.curbytes == 0); 1361 assert(gctx->cnt_summed.accumobjs == 0); 1362 assert(gctx->cnt_summed.accumbytes == 0); 1363 ret = false; 1364 goto label_return; 1365 } 1366 1367 if (prof_dump_printf(propagate_err, "@")) { 1368 ret = true; 1369 goto label_return; 1370 } 1371 for (i = 0; i < bt->len; i++) { 1372 if (prof_dump_printf(propagate_err, " %#"FMTxPTR, 1373 (uintptr_t)bt->vec[i])) { 1374 ret = true; 1375 goto label_return; 1376 } 1377 } 1378 1379 if (prof_dump_printf(propagate_err, 1380 "\n" 1381 " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n", 1382 gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes, 1383 gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) { 1384 ret = true; 1385 goto label_return; 1386 } 1387 1388 prof_tctx_dump_iter_arg.tsdn = tsdn; 1389 prof_tctx_dump_iter_arg.propagate_err = propagate_err; 1390 if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, 1391 (void *)&prof_tctx_dump_iter_arg) != NULL) { 1392 ret = true; 1393 goto label_return; 1394 } 1395 1396 ret = false; 1397 label_return: 1398 return ret; 1399 } 1400 1401 #ifndef _WIN32 1402 JEMALLOC_FORMAT_PRINTF(1, 2) 1403 static int 1404 prof_open_maps(const char *format, ...) { 1405 int mfd; 1406 va_list ap; 1407 char filename[PATH_MAX + 1]; 1408 1409 va_start(ap, format); 1410 malloc_vsnprintf(filename, sizeof(filename), format, ap); 1411 va_end(ap); 1412 1413 #if defined(O_CLOEXEC) 1414 mfd = open(filename, O_RDONLY | O_CLOEXEC); 1415 #else 1416 mfd = open(filename, O_RDONLY); 1417 if (mfd != -1) { 1418 fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC); 1419 } 1420 #endif 1421 1422 return mfd; 1423 } 1424 #endif 1425 1426 static int 1427 prof_getpid(void) { 1428 #ifdef _WIN32 1429 return GetCurrentProcessId(); 1430 #else 1431 return getpid(); 1432 #endif 1433 } 1434 1435 static bool 1436 prof_dump_maps(bool propagate_err) { 1437 bool ret; 1438 int mfd; 1439 1440 cassert(config_prof); 1441 #ifdef __FreeBSD__ 1442 mfd = prof_open_maps("/proc/curproc/map"); 1443 #elif defined(_WIN32) 1444 mfd = -1; // Not implemented 1445 #else 1446 { 1447 int pid = prof_getpid(); 1448 1449 mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid); 1450 if (mfd == -1) { 1451 mfd = prof_open_maps("/proc/%d/maps", pid); 1452 } 1453 } 1454 #endif 1455 if (mfd != -1) { 1456 ssize_t nread; 1457 1458 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 1459 propagate_err) { 1460 ret = true; 1461 goto label_return; 1462 } 1463 nread = 0; 1464 do { 1465 prof_dump_buf_end += nread; 1466 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 1467 /* Make space in prof_dump_buf before read(). */ 1468 if (prof_dump_flush(propagate_err) && 1469 propagate_err) { 1470 ret = true; 1471 goto label_return; 1472 } 1473 } 1474 nread = malloc_read_fd(mfd, 1475 &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE 1476 - prof_dump_buf_end); 1477 } while (nread > 0); 1478 } else { 1479 ret = true; 1480 goto label_return; 1481 } 1482 1483 ret = false; 1484 label_return: 1485 if (mfd != -1) { 1486 close(mfd); 1487 } 1488 return ret; 1489 } 1490 1491 /* 1492 * See prof_sample_threshold_update() comment for why the body of this function 1493 * is conditionally compiled. 1494 */ 1495 static void 1496 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx, 1497 const char *filename) { 1498 #ifdef JEMALLOC_PROF 1499 /* 1500 * Scaling is equivalent AdjustSamples() in jeprof, but the result may 1501 * differ slightly from what jeprof reports, because here we scale the 1502 * summary values, whereas jeprof scales each context individually and 1503 * reports the sums of the scaled values. 1504 */ 1505 if (cnt_all->curbytes != 0) { 1506 double sample_period = (double)((uint64_t)1 << lg_prof_sample); 1507 double ratio = (((double)cnt_all->curbytes) / 1508 (double)cnt_all->curobjs) / sample_period; 1509 double scale_factor = 1.0 / (1.0 - exp(-ratio)); 1510 uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes) 1511 * scale_factor); 1512 uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) * 1513 scale_factor); 1514 1515 malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64 1516 " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n", 1517 curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs != 1518 1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : ""); 1519 malloc_printf( 1520 "<jemalloc>: Run jeprof on \"%s\" for leak detail\n", 1521 filename); 1522 } 1523 #endif 1524 } 1525 1526 struct prof_gctx_dump_iter_arg_s { 1527 tsdn_t *tsdn; 1528 bool propagate_err; 1529 }; 1530 1531 static prof_gctx_t * 1532 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) { 1533 prof_gctx_t *ret; 1534 struct prof_gctx_dump_iter_arg_s *arg = 1535 (struct prof_gctx_dump_iter_arg_s *)opaque; 1536 1537 malloc_mutex_lock(arg->tsdn, gctx->lock); 1538 1539 if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt, 1540 gctxs)) { 1541 ret = gctx; 1542 goto label_return; 1543 } 1544 1545 ret = NULL; 1546 label_return: 1547 malloc_mutex_unlock(arg->tsdn, gctx->lock); 1548 return ret; 1549 } 1550 1551 static void 1552 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, 1553 struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg, 1554 struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg, 1555 prof_gctx_tree_t *gctxs) { 1556 size_t tabind; 1557 union { 1558 prof_gctx_t *p; 1559 void *v; 1560 } gctx; 1561 1562 prof_enter(tsd, tdata); 1563 1564 /* 1565 * Put gctx's in limbo and clear their counters in preparation for 1566 * summing. 1567 */ 1568 gctx_tree_new(gctxs); 1569 for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) { 1570 prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs); 1571 } 1572 1573 /* 1574 * Iterate over tdatas, and for the non-expired ones snapshot their tctx 1575 * stats and merge them into the associated gctx's. 1576 */ 1577 prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd); 1578 memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t)); 1579 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); 1580 tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, 1581 (void *)prof_tdata_merge_iter_arg); 1582 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); 1583 1584 /* Merge tctx stats into gctx's. */ 1585 prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd); 1586 prof_gctx_merge_iter_arg->leak_ngctx = 0; 1587 gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter, 1588 (void *)prof_gctx_merge_iter_arg); 1589 1590 prof_leave(tsd, tdata); 1591 } 1592 1593 static bool 1594 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename, 1595 bool leakcheck, prof_tdata_t *tdata, 1596 struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg, 1597 struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg, 1598 struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg, 1599 prof_gctx_tree_t *gctxs) { 1600 /* Create dump file. */ 1601 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) { 1602 return true; 1603 } 1604 1605 /* Dump profile header. */ 1606 if (prof_dump_header(tsd_tsdn(tsd), propagate_err, 1607 &prof_tdata_merge_iter_arg->cnt_all)) { 1608 goto label_write_error; 1609 } 1610 1611 /* Dump per gctx profile stats. */ 1612 prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd); 1613 prof_gctx_dump_iter_arg->propagate_err = propagate_err; 1614 if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter, 1615 (void *)prof_gctx_dump_iter_arg) != NULL) { 1616 goto label_write_error; 1617 } 1618 1619 /* Dump /proc/<pid>/maps if possible. */ 1620 if (prof_dump_maps(propagate_err)) { 1621 goto label_write_error; 1622 } 1623 1624 if (prof_dump_close(propagate_err)) { 1625 return true; 1626 } 1627 1628 return false; 1629 label_write_error: 1630 prof_dump_close(propagate_err); 1631 return true; 1632 } 1633 1634 static bool 1635 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, 1636 bool leakcheck) { 1637 cassert(config_prof); 1638 assert(tsd_reentrancy_level_get(tsd) == 0); 1639 1640 prof_tdata_t * tdata = prof_tdata_get(tsd, true); 1641 if (tdata == NULL) { 1642 return true; 1643 } 1644 1645 pre_reentrancy(tsd, NULL); 1646 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); 1647 1648 prof_gctx_tree_t gctxs; 1649 struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg; 1650 struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg; 1651 struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg; 1652 prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg, 1653 &prof_gctx_merge_iter_arg, &gctxs); 1654 bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata, 1655 &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg, 1656 &prof_gctx_dump_iter_arg, &gctxs); 1657 prof_gctx_finish(tsd, &gctxs); 1658 1659 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); 1660 post_reentrancy(tsd); 1661 1662 if (err) { 1663 return true; 1664 } 1665 1666 if (leakcheck) { 1667 prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all, 1668 prof_gctx_merge_iter_arg.leak_ngctx, filename); 1669 } 1670 return false; 1671 } 1672 1673 #ifdef JEMALLOC_JET 1674 void 1675 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs, 1676 uint64_t *accumbytes) { 1677 tsd_t *tsd; 1678 prof_tdata_t *tdata; 1679 struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg; 1680 struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg; 1681 prof_gctx_tree_t gctxs; 1682 1683 tsd = tsd_fetch(); 1684 tdata = prof_tdata_get(tsd, false); 1685 if (tdata == NULL) { 1686 if (curobjs != NULL) { 1687 *curobjs = 0; 1688 } 1689 if (curbytes != NULL) { 1690 *curbytes = 0; 1691 } 1692 if (accumobjs != NULL) { 1693 *accumobjs = 0; 1694 } 1695 if (accumbytes != NULL) { 1696 *accumbytes = 0; 1697 } 1698 return; 1699 } 1700 1701 prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg, 1702 &prof_gctx_merge_iter_arg, &gctxs); 1703 prof_gctx_finish(tsd, &gctxs); 1704 1705 if (curobjs != NULL) { 1706 *curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs; 1707 } 1708 if (curbytes != NULL) { 1709 *curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes; 1710 } 1711 if (accumobjs != NULL) { 1712 *accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs; 1713 } 1714 if (accumbytes != NULL) { 1715 *accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes; 1716 } 1717 } 1718 #endif 1719 1720 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 1721 #define VSEQ_INVALID UINT64_C(0xffffffffffffffff) 1722 static void 1723 prof_dump_filename(char *filename, char v, uint64_t vseq) { 1724 cassert(config_prof); 1725 1726 if (vseq != VSEQ_INVALID) { 1727 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 1728 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1729 "%s.%d.%"FMTu64".%c%"FMTu64".heap", 1730 opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq); 1731 } else { 1732 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 1733 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1734 "%s.%d.%"FMTu64".%c.heap", 1735 opt_prof_prefix, prof_getpid(), prof_dump_seq, v); 1736 } 1737 prof_dump_seq++; 1738 } 1739 1740 static void 1741 prof_fdump(void) { 1742 tsd_t *tsd; 1743 char filename[DUMP_FILENAME_BUFSIZE]; 1744 1745 cassert(config_prof); 1746 assert(opt_prof_final); 1747 assert(opt_prof_prefix[0] != '\0'); 1748 1749 if (!prof_booted) { 1750 return; 1751 } 1752 tsd = tsd_fetch(); 1753 assert(tsd_reentrancy_level_get(tsd) == 0); 1754 1755 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1756 prof_dump_filename(filename, 'f', VSEQ_INVALID); 1757 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1758 prof_dump(tsd, false, filename, opt_prof_leak); 1759 } 1760 1761 bool 1762 prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) { 1763 cassert(config_prof); 1764 1765 #ifndef JEMALLOC_ATOMIC_U64 1766 if (malloc_mutex_init(&prof_accum->mtx, "prof_accum", 1767 WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) { 1768 return true; 1769 } 1770 prof_accum->accumbytes = 0; 1771 #else 1772 atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED); 1773 #endif 1774 return false; 1775 } 1776 1777 void 1778 prof_idump(tsdn_t *tsdn) { 1779 tsd_t *tsd; 1780 prof_tdata_t *tdata; 1781 1782 cassert(config_prof); 1783 1784 if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) { 1785 return; 1786 } 1787 tsd = tsdn_tsd(tsdn); 1788 if (tsd_reentrancy_level_get(tsd) > 0) { 1789 return; 1790 } 1791 1792 tdata = prof_tdata_get(tsd, false); 1793 if (tdata == NULL) { 1794 return; 1795 } 1796 if (tdata->enq) { 1797 tdata->enq_idump = true; 1798 return; 1799 } 1800 1801 if (opt_prof_prefix[0] != '\0') { 1802 char filename[PATH_MAX + 1]; 1803 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1804 prof_dump_filename(filename, 'i', prof_dump_iseq); 1805 prof_dump_iseq++; 1806 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1807 prof_dump(tsd, false, filename, false); 1808 } 1809 } 1810 1811 bool 1812 prof_mdump(tsd_t *tsd, const char *filename) { 1813 cassert(config_prof); 1814 assert(tsd_reentrancy_level_get(tsd) == 0); 1815 1816 if (!opt_prof || !prof_booted) { 1817 return true; 1818 } 1819 char filename_buf[DUMP_FILENAME_BUFSIZE]; 1820 if (filename == NULL) { 1821 /* No filename specified, so automatically generate one. */ 1822 if (opt_prof_prefix[0] == '\0') { 1823 return true; 1824 } 1825 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1826 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 1827 prof_dump_mseq++; 1828 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1829 filename = filename_buf; 1830 } 1831 return prof_dump(tsd, true, filename, false); 1832 } 1833 1834 void 1835 prof_gdump(tsdn_t *tsdn) { 1836 tsd_t *tsd; 1837 prof_tdata_t *tdata; 1838 1839 cassert(config_prof); 1840 1841 if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) { 1842 return; 1843 } 1844 tsd = tsdn_tsd(tsdn); 1845 if (tsd_reentrancy_level_get(tsd) > 0) { 1846 return; 1847 } 1848 1849 tdata = prof_tdata_get(tsd, false); 1850 if (tdata == NULL) { 1851 return; 1852 } 1853 if (tdata->enq) { 1854 tdata->enq_gdump = true; 1855 return; 1856 } 1857 1858 if (opt_prof_prefix[0] != '\0') { 1859 char filename[DUMP_FILENAME_BUFSIZE]; 1860 malloc_mutex_lock(tsdn, &prof_dump_seq_mtx); 1861 prof_dump_filename(filename, 'u', prof_dump_useq); 1862 prof_dump_useq++; 1863 malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx); 1864 prof_dump(tsd, false, filename, false); 1865 } 1866 } 1867 1868 static void 1869 prof_bt_hash(const void *key, size_t r_hash[2]) { 1870 prof_bt_t *bt = (prof_bt_t *)key; 1871 1872 cassert(config_prof); 1873 1874 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); 1875 } 1876 1877 static bool 1878 prof_bt_keycomp(const void *k1, const void *k2) { 1879 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1880 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1881 1882 cassert(config_prof); 1883 1884 if (bt1->len != bt2->len) { 1885 return false; 1886 } 1887 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1888 } 1889 1890 static uint64_t 1891 prof_thr_uid_alloc(tsdn_t *tsdn) { 1892 uint64_t thr_uid; 1893 1894 malloc_mutex_lock(tsdn, &next_thr_uid_mtx); 1895 thr_uid = next_thr_uid; 1896 next_thr_uid++; 1897 malloc_mutex_unlock(tsdn, &next_thr_uid_mtx); 1898 1899 return thr_uid; 1900 } 1901 1902 static prof_tdata_t * 1903 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, 1904 char *thread_name, bool active) { 1905 prof_tdata_t *tdata; 1906 1907 cassert(config_prof); 1908 1909 /* Initialize an empty cache for this thread. */ 1910 tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t), 1911 sz_size2index(sizeof(prof_tdata_t)), false, NULL, true, 1912 arena_get(TSDN_NULL, 0, true), true); 1913 if (tdata == NULL) { 1914 return NULL; 1915 } 1916 1917 tdata->lock = prof_tdata_mutex_choose(thr_uid); 1918 tdata->thr_uid = thr_uid; 1919 tdata->thr_discrim = thr_discrim; 1920 tdata->thread_name = thread_name; 1921 tdata->attached = true; 1922 tdata->expired = false; 1923 tdata->tctx_uid_next = 0; 1924 1925 if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, 1926 prof_bt_keycomp)) { 1927 idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true); 1928 return NULL; 1929 } 1930 1931 tdata->prng_state = (uint64_t)(uintptr_t)tdata; 1932 prof_sample_threshold_update(tdata); 1933 1934 tdata->enq = false; 1935 tdata->enq_idump = false; 1936 tdata->enq_gdump = false; 1937 1938 tdata->dumping = false; 1939 tdata->active = active; 1940 1941 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); 1942 tdata_tree_insert(&tdatas, tdata); 1943 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); 1944 1945 return tdata; 1946 } 1947 1948 prof_tdata_t * 1949 prof_tdata_init(tsd_t *tsd) { 1950 return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0, 1951 NULL, prof_thread_active_init_get(tsd_tsdn(tsd))); 1952 } 1953 1954 static bool 1955 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) { 1956 if (tdata->attached && !even_if_attached) { 1957 return false; 1958 } 1959 if (ckh_count(&tdata->bt2tctx) != 0) { 1960 return false; 1961 } 1962 return true; 1963 } 1964 1965 static bool 1966 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, 1967 bool even_if_attached) { 1968 malloc_mutex_assert_owner(tsdn, tdata->lock); 1969 1970 return prof_tdata_should_destroy_unlocked(tdata, even_if_attached); 1971 } 1972 1973 static void 1974 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, 1975 bool even_if_attached) { 1976 malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx); 1977 1978 tdata_tree_remove(&tdatas, tdata); 1979 1980 assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached)); 1981 1982 if (tdata->thread_name != NULL) { 1983 idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true, 1984 true); 1985 } 1986 ckh_delete(tsd, &tdata->bt2tctx); 1987 idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true); 1988 } 1989 1990 static void 1991 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { 1992 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); 1993 prof_tdata_destroy_locked(tsd, tdata, even_if_attached); 1994 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); 1995 } 1996 1997 static void 1998 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) { 1999 bool destroy_tdata; 2000 2001 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); 2002 if (tdata->attached) { 2003 destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, 2004 true); 2005 /* 2006 * Only detach if !destroy_tdata, because detaching would allow 2007 * another thread to win the race to destroy tdata. 2008 */ 2009 if (!destroy_tdata) { 2010 tdata->attached = false; 2011 } 2012 tsd_prof_tdata_set(tsd, NULL); 2013 } else { 2014 destroy_tdata = false; 2015 } 2016 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); 2017 if (destroy_tdata) { 2018 prof_tdata_destroy(tsd, tdata, true); 2019 } 2020 } 2021 2022 prof_tdata_t * 2023 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) { 2024 uint64_t thr_uid = tdata->thr_uid; 2025 uint64_t thr_discrim = tdata->thr_discrim + 1; 2026 char *thread_name = (tdata->thread_name != NULL) ? 2027 prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL; 2028 bool active = tdata->active; 2029 2030 prof_tdata_detach(tsd, tdata); 2031 return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name, 2032 active); 2033 } 2034 2035 static bool 2036 prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) { 2037 bool destroy_tdata; 2038 2039 malloc_mutex_lock(tsdn, tdata->lock); 2040 if (!tdata->expired) { 2041 tdata->expired = true; 2042 destroy_tdata = tdata->attached ? false : 2043 prof_tdata_should_destroy(tsdn, tdata, false); 2044 } else { 2045 destroy_tdata = false; 2046 } 2047 malloc_mutex_unlock(tsdn, tdata->lock); 2048 2049 return destroy_tdata; 2050 } 2051 2052 static prof_tdata_t * 2053 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, 2054 void *arg) { 2055 tsdn_t *tsdn = (tsdn_t *)arg; 2056 2057 return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL); 2058 } 2059 2060 void 2061 prof_reset(tsd_t *tsd, size_t lg_sample) { 2062 prof_tdata_t *next; 2063 2064 assert(lg_sample < (sizeof(uint64_t) << 3)); 2065 2066 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); 2067 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); 2068 2069 lg_prof_sample = lg_sample; 2070 2071 next = NULL; 2072 do { 2073 prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next, 2074 prof_tdata_reset_iter, (void *)tsd); 2075 if (to_destroy != NULL) { 2076 next = tdata_tree_next(&tdatas, to_destroy); 2077 prof_tdata_destroy_locked(tsd, to_destroy, false); 2078 } else { 2079 next = NULL; 2080 } 2081 } while (next != NULL); 2082 2083 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); 2084 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); 2085 } 2086 2087 void 2088 prof_tdata_cleanup(tsd_t *tsd) { 2089 prof_tdata_t *tdata; 2090 2091 if (!config_prof) { 2092 return; 2093 } 2094 2095 tdata = tsd_prof_tdata_get(tsd); 2096 if (tdata != NULL) { 2097 prof_tdata_detach(tsd, tdata); 2098 } 2099 } 2100 2101 bool 2102 prof_active_get(tsdn_t *tsdn) { 2103 bool prof_active_current; 2104 2105 malloc_mutex_lock(tsdn, &prof_active_mtx); 2106 prof_active_current = prof_active; 2107 malloc_mutex_unlock(tsdn, &prof_active_mtx); 2108 return prof_active_current; 2109 } 2110 2111 bool 2112 prof_active_set(tsdn_t *tsdn, bool active) { 2113 bool prof_active_old; 2114 2115 malloc_mutex_lock(tsdn, &prof_active_mtx); 2116 prof_active_old = prof_active; 2117 prof_active = active; 2118 malloc_mutex_unlock(tsdn, &prof_active_mtx); 2119 return prof_active_old; 2120 } 2121 2122 const char * 2123 prof_thread_name_get(tsd_t *tsd) { 2124 prof_tdata_t *tdata; 2125 2126 tdata = prof_tdata_get(tsd, true); 2127 if (tdata == NULL) { 2128 return ""; 2129 } 2130 return (tdata->thread_name != NULL ? tdata->thread_name : ""); 2131 } 2132 2133 static char * 2134 prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) { 2135 char *ret; 2136 size_t size; 2137 2138 if (thread_name == NULL) { 2139 return NULL; 2140 } 2141 2142 size = strlen(thread_name) + 1; 2143 if (size == 1) { 2144 return ""; 2145 } 2146 2147 ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true, 2148 arena_get(TSDN_NULL, 0, true), true); 2149 if (ret == NULL) { 2150 return NULL; 2151 } 2152 memcpy(ret, thread_name, size); 2153 return ret; 2154 } 2155 2156 int 2157 prof_thread_name_set(tsd_t *tsd, const char *thread_name) { 2158 prof_tdata_t *tdata; 2159 unsigned i; 2160 char *s; 2161 2162 tdata = prof_tdata_get(tsd, true); 2163 if (tdata == NULL) { 2164 return EAGAIN; 2165 } 2166 2167 /* Validate input. */ 2168 if (thread_name == NULL) { 2169 return EFAULT; 2170 } 2171 for (i = 0; thread_name[i] != '\0'; i++) { 2172 char c = thread_name[i]; 2173 if (!isgraph(c) && !isblank(c)) { 2174 return EFAULT; 2175 } 2176 } 2177 2178 s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name); 2179 if (s == NULL) { 2180 return EAGAIN; 2181 } 2182 2183 if (tdata->thread_name != NULL) { 2184 idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true, 2185 true); 2186 tdata->thread_name = NULL; 2187 } 2188 if (strlen(s) > 0) { 2189 tdata->thread_name = s; 2190 } 2191 return 0; 2192 } 2193 2194 bool 2195 prof_thread_active_get(tsd_t *tsd) { 2196 prof_tdata_t *tdata; 2197 2198 tdata = prof_tdata_get(tsd, true); 2199 if (tdata == NULL) { 2200 return false; 2201 } 2202 return tdata->active; 2203 } 2204 2205 bool 2206 prof_thread_active_set(tsd_t *tsd, bool active) { 2207 prof_tdata_t *tdata; 2208 2209 tdata = prof_tdata_get(tsd, true); 2210 if (tdata == NULL) { 2211 return true; 2212 } 2213 tdata->active = active; 2214 return false; 2215 } 2216 2217 bool 2218 prof_thread_active_init_get(tsdn_t *tsdn) { 2219 bool active_init; 2220 2221 malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx); 2222 active_init = prof_thread_active_init; 2223 malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx); 2224 return active_init; 2225 } 2226 2227 bool 2228 prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) { 2229 bool active_init_old; 2230 2231 malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx); 2232 active_init_old = prof_thread_active_init; 2233 prof_thread_active_init = active_init; 2234 malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx); 2235 return active_init_old; 2236 } 2237 2238 bool 2239 prof_gdump_get(tsdn_t *tsdn) { 2240 bool prof_gdump_current; 2241 2242 malloc_mutex_lock(tsdn, &prof_gdump_mtx); 2243 prof_gdump_current = prof_gdump_val; 2244 malloc_mutex_unlock(tsdn, &prof_gdump_mtx); 2245 return prof_gdump_current; 2246 } 2247 2248 bool 2249 prof_gdump_set(tsdn_t *tsdn, bool gdump) { 2250 bool prof_gdump_old; 2251 2252 malloc_mutex_lock(tsdn, &prof_gdump_mtx); 2253 prof_gdump_old = prof_gdump_val; 2254 prof_gdump_val = gdump; 2255 malloc_mutex_unlock(tsdn, &prof_gdump_mtx); 2256 return prof_gdump_old; 2257 } 2258 2259 void 2260 prof_boot0(void) { 2261 cassert(config_prof); 2262 2263 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 2264 sizeof(PROF_PREFIX_DEFAULT)); 2265 } 2266 2267 void 2268 prof_boot1(void) { 2269 cassert(config_prof); 2270 2271 /* 2272 * opt_prof must be in its final state before any arenas are 2273 * initialized, so this function must be executed early. 2274 */ 2275 2276 if (opt_prof_leak && !opt_prof) { 2277 /* 2278 * Enable opt_prof, but in such a way that profiles are never 2279 * automatically dumped. 2280 */ 2281 opt_prof = true; 2282 opt_prof_gdump = false; 2283 } else if (opt_prof) { 2284 if (opt_lg_prof_interval >= 0) { 2285 prof_interval = (((uint64_t)1U) << 2286 opt_lg_prof_interval); 2287 } 2288 } 2289 } 2290 2291 bool 2292 prof_boot2(tsd_t *tsd) { 2293 cassert(config_prof); 2294 2295 if (opt_prof) { 2296 unsigned i; 2297 2298 lg_prof_sample = opt_lg_prof_sample; 2299 2300 prof_active = opt_prof_active; 2301 if (malloc_mutex_init(&prof_active_mtx, "prof_active", 2302 WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) { 2303 return true; 2304 } 2305 2306 prof_gdump_val = opt_prof_gdump; 2307 if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump", 2308 WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) { 2309 return true; 2310 } 2311 2312 prof_thread_active_init = opt_prof_thread_active_init; 2313 if (malloc_mutex_init(&prof_thread_active_init_mtx, 2314 "prof_thread_active_init", 2315 WITNESS_RANK_PROF_THREAD_ACTIVE_INIT, 2316 malloc_mutex_rank_exclusive)) { 2317 return true; 2318 } 2319 2320 if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, 2321 prof_bt_keycomp)) { 2322 return true; 2323 } 2324 if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx", 2325 WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) { 2326 return true; 2327 } 2328 2329 tdata_tree_new(&tdatas); 2330 if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas", 2331 WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) { 2332 return true; 2333 } 2334 2335 next_thr_uid = 0; 2336 if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid", 2337 WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) { 2338 return true; 2339 } 2340 2341 if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq", 2342 WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) { 2343 return true; 2344 } 2345 if (malloc_mutex_init(&prof_dump_mtx, "prof_dump", 2346 WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) { 2347 return true; 2348 } 2349 2350 if (opt_prof_final && opt_prof_prefix[0] != '\0' && 2351 atexit(prof_fdump) != 0) { 2352 malloc_write("<jemalloc>: Error in atexit()\n"); 2353 if (opt_abort) { 2354 abort(); 2355 } 2356 } 2357 2358 gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), 2359 b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t), 2360 CACHELINE); 2361 if (gctx_locks == NULL) { 2362 return true; 2363 } 2364 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 2365 if (malloc_mutex_init(&gctx_locks[i], "prof_gctx", 2366 WITNESS_RANK_PROF_GCTX, 2367 malloc_mutex_rank_exclusive)) { 2368 return true; 2369 } 2370 } 2371 2372 tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), 2373 b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t), 2374 CACHELINE); 2375 if (tdata_locks == NULL) { 2376 return true; 2377 } 2378 for (i = 0; i < PROF_NTDATA_LOCKS; i++) { 2379 if (malloc_mutex_init(&tdata_locks[i], "prof_tdata", 2380 WITNESS_RANK_PROF_TDATA, 2381 malloc_mutex_rank_exclusive)) { 2382 return true; 2383 } 2384 } 2385 } 2386 2387 #ifdef JEMALLOC_PROF_LIBGCC 2388 /* 2389 * Cause the backtracing machinery to allocate its internal state 2390 * before enabling profiling. 2391 */ 2392 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 2393 #endif 2394 2395 prof_booted = true; 2396 2397 return false; 2398 } 2399 2400 void 2401 prof_prefork0(tsdn_t *tsdn) { 2402 if (config_prof && opt_prof) { 2403 unsigned i; 2404 2405 malloc_mutex_prefork(tsdn, &prof_dump_mtx); 2406 malloc_mutex_prefork(tsdn, &bt2gctx_mtx); 2407 malloc_mutex_prefork(tsdn, &tdatas_mtx); 2408 for (i = 0; i < PROF_NTDATA_LOCKS; i++) { 2409 malloc_mutex_prefork(tsdn, &tdata_locks[i]); 2410 } 2411 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 2412 malloc_mutex_prefork(tsdn, &gctx_locks[i]); 2413 } 2414 } 2415 } 2416 2417 void 2418 prof_prefork1(tsdn_t *tsdn) { 2419 if (config_prof && opt_prof) { 2420 malloc_mutex_prefork(tsdn, &prof_active_mtx); 2421 malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx); 2422 malloc_mutex_prefork(tsdn, &prof_gdump_mtx); 2423 malloc_mutex_prefork(tsdn, &next_thr_uid_mtx); 2424 malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx); 2425 } 2426 } 2427 2428 void 2429 prof_postfork_parent(tsdn_t *tsdn) { 2430 if (config_prof && opt_prof) { 2431 unsigned i; 2432 2433 malloc_mutex_postfork_parent(tsdn, 2434 &prof_thread_active_init_mtx); 2435 malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx); 2436 malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx); 2437 malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx); 2438 malloc_mutex_postfork_parent(tsdn, &prof_active_mtx); 2439 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 2440 malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]); 2441 } 2442 for (i = 0; i < PROF_NTDATA_LOCKS; i++) { 2443 malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]); 2444 } 2445 malloc_mutex_postfork_parent(tsdn, &tdatas_mtx); 2446 malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx); 2447 malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx); 2448 } 2449 } 2450 2451 void 2452 prof_postfork_child(tsdn_t *tsdn) { 2453 if (config_prof && opt_prof) { 2454 unsigned i; 2455 2456 malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx); 2457 malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx); 2458 malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx); 2459 malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx); 2460 malloc_mutex_postfork_child(tsdn, &prof_active_mtx); 2461 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 2462 malloc_mutex_postfork_child(tsdn, &gctx_locks[i]); 2463 } 2464 for (i = 0; i < PROF_NTDATA_LOCKS; i++) { 2465 malloc_mutex_postfork_child(tsdn, &tdata_locks[i]); 2466 } 2467 malloc_mutex_postfork_child(tsdn, &tdatas_mtx); 2468 malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx); 2469 malloc_mutex_postfork_child(tsdn, &prof_dump_mtx); 2470 } 2471 } 2472 2473 /******************************************************************************/ 2474