1 #define JEMALLOC_PROF_C_ 2 #include "jemalloc/internal/jemalloc_preamble.h" 3 #include "jemalloc/internal/jemalloc_internal_includes.h" 4 5 #include "jemalloc/internal/assert.h" 6 #include "jemalloc/internal/ckh.h" 7 #include "jemalloc/internal/hash.h" 8 #include "jemalloc/internal/malloc_io.h" 9 #include "jemalloc/internal/mutex.h" 10 11 /******************************************************************************/ 12 13 #ifdef JEMALLOC_PROF_LIBUNWIND 14 #define UNW_LOCAL_ONLY 15 #include <libunwind.h> 16 #endif 17 18 #ifdef JEMALLOC_PROF_LIBGCC 19 /* 20 * We have a circular dependency -- jemalloc_internal.h tells us if we should 21 * use libgcc's unwinding functionality, but after we've included that, we've 22 * already hooked _Unwind_Backtrace. We'll temporarily disable hooking. 23 */ 24 #undef _Unwind_Backtrace 25 #include <unwind.h> 26 #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook) 27 #endif 28 29 /******************************************************************************/ 30 /* Data. */ 31 32 bool opt_prof = false; 33 bool opt_prof_active = true; 34 bool opt_prof_thread_active_init = true; 35 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 36 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 37 bool opt_prof_gdump = false; 38 bool opt_prof_final = false; 39 bool opt_prof_leak = false; 40 bool opt_prof_accum = false; 41 char opt_prof_prefix[ 42 /* Minimize memory bloat for non-prof builds. */ 43 #ifdef JEMALLOC_PROF 44 PATH_MAX + 45 #endif 46 1]; 47 48 /* 49 * Initialized as opt_prof_active, and accessed via 50 * prof_active_[gs]et{_unlocked,}(). 51 */ 52 bool prof_active; 53 static malloc_mutex_t prof_active_mtx; 54 55 /* 56 * Initialized as opt_prof_thread_active_init, and accessed via 57 * prof_thread_active_init_[gs]et(). 58 */ 59 static bool prof_thread_active_init; 60 static malloc_mutex_t prof_thread_active_init_mtx; 61 62 /* 63 * Initialized as opt_prof_gdump, and accessed via 64 * prof_gdump_[gs]et{_unlocked,}(). 65 */ 66 bool prof_gdump_val; 67 static malloc_mutex_t prof_gdump_mtx; 68 69 uint64_t prof_interval = 0; 70 71 size_t lg_prof_sample; 72 73 /* 74 * Table of mutexes that are shared among gctx's. These are leaf locks, so 75 * there is no problem with using them for more than one gctx at the same time. 76 * The primary motivation for this sharing though is that gctx's are ephemeral, 77 * and destroying mutexes causes complications for systems that allocate when 78 * creating/destroying mutexes. 79 */ 80 static malloc_mutex_t *gctx_locks; 81 static atomic_u_t cum_gctxs; /* Atomic counter. */ 82 83 /* 84 * Table of mutexes that are shared among tdata's. No operations require 85 * holding multiple tdata locks, so there is no problem with using them for more 86 * than one tdata at the same time, even though a gctx lock may be acquired 87 * while holding a tdata lock. 88 */ 89 static malloc_mutex_t *tdata_locks; 90 91 /* 92 * Global hash of (prof_bt_t *)-->(prof_gctx_t *). This is the master data 93 * structure that knows about all backtraces currently captured. 94 */ 95 static ckh_t bt2gctx; 96 /* Non static to enable profiling. */ 97 malloc_mutex_t bt2gctx_mtx; 98 99 /* 100 * Tree of all extant prof_tdata_t structures, regardless of state, 101 * {attached,detached,expired}. 102 */ 103 static prof_tdata_tree_t tdatas; 104 static malloc_mutex_t tdatas_mtx; 105 106 static uint64_t next_thr_uid; 107 static malloc_mutex_t next_thr_uid_mtx; 108 109 static malloc_mutex_t prof_dump_seq_mtx; 110 static uint64_t prof_dump_seq; 111 static uint64_t prof_dump_iseq; 112 static uint64_t prof_dump_mseq; 113 static uint64_t prof_dump_useq; 114 115 /* 116 * This buffer is rather large for stack allocation, so use a single buffer for 117 * all profile dumps. 118 */ 119 static malloc_mutex_t prof_dump_mtx; 120 static char prof_dump_buf[ 121 /* Minimize memory bloat for non-prof builds. */ 122 #ifdef JEMALLOC_PROF 123 PROF_DUMP_BUFSIZE 124 #else 125 1 126 #endif 127 ]; 128 static size_t prof_dump_buf_end; 129 static int prof_dump_fd; 130 131 /* Do not dump any profiles until bootstrapping is complete. */ 132 static bool prof_booted = false; 133 134 /******************************************************************************/ 135 /* 136 * Function prototypes for static functions that are referenced prior to 137 * definition. 138 */ 139 140 static bool prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx); 141 static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx); 142 static bool prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, 143 bool even_if_attached); 144 static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, 145 bool even_if_attached); 146 static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name); 147 148 /******************************************************************************/ 149 /* Red-black trees. */ 150 151 static int 152 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) { 153 uint64_t a_thr_uid = a->thr_uid; 154 uint64_t b_thr_uid = b->thr_uid; 155 int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid); 156 if (ret == 0) { 157 uint64_t a_thr_discrim = a->thr_discrim; 158 uint64_t b_thr_discrim = b->thr_discrim; 159 ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim < 160 b_thr_discrim); 161 if (ret == 0) { 162 uint64_t a_tctx_uid = a->tctx_uid; 163 uint64_t b_tctx_uid = b->tctx_uid; 164 ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid < 165 b_tctx_uid); 166 } 167 } 168 return ret; 169 } 170 171 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t, 172 tctx_link, prof_tctx_comp) 173 174 static int 175 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) { 176 unsigned a_len = a->bt.len; 177 unsigned b_len = b->bt.len; 178 unsigned comp_len = (a_len < b_len) ? a_len : b_len; 179 int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *)); 180 if (ret == 0) { 181 ret = (a_len > b_len) - (a_len < b_len); 182 } 183 return ret; 184 } 185 186 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link, 187 prof_gctx_comp) 188 189 static int 190 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) { 191 int ret; 192 uint64_t a_uid = a->thr_uid; 193 uint64_t b_uid = b->thr_uid; 194 195 ret = ((a_uid > b_uid) - (a_uid < b_uid)); 196 if (ret == 0) { 197 uint64_t a_discrim = a->thr_discrim; 198 uint64_t b_discrim = b->thr_discrim; 199 200 ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim)); 201 } 202 return ret; 203 } 204 205 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link, 206 prof_tdata_comp) 207 208 /******************************************************************************/ 209 210 void 211 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) { 212 prof_tdata_t *tdata; 213 214 cassert(config_prof); 215 216 if (updated) { 217 /* 218 * Compute a new sample threshold. This isn't very important in 219 * practice, because this function is rarely executed, so the 220 * potential for sample bias is minimal except in contrived 221 * programs. 222 */ 223 tdata = prof_tdata_get(tsd, true); 224 if (tdata != NULL) { 225 prof_sample_threshold_update(tdata); 226 } 227 } 228 229 if ((uintptr_t)tctx > (uintptr_t)1U) { 230 malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); 231 tctx->prepared = false; 232 if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) { 233 prof_tctx_destroy(tsd, tctx); 234 } else { 235 malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock); 236 } 237 } 238 } 239 240 void 241 prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize, 242 prof_tctx_t *tctx) { 243 prof_tctx_set(tsdn, ptr, usize, NULL, tctx); 244 245 malloc_mutex_lock(tsdn, tctx->tdata->lock); 246 tctx->cnts.curobjs++; 247 tctx->cnts.curbytes += usize; 248 if (opt_prof_accum) { 249 tctx->cnts.accumobjs++; 250 tctx->cnts.accumbytes += usize; 251 } 252 tctx->prepared = false; 253 malloc_mutex_unlock(tsdn, tctx->tdata->lock); 254 } 255 256 void 257 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) { 258 malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); 259 assert(tctx->cnts.curobjs > 0); 260 assert(tctx->cnts.curbytes >= usize); 261 tctx->cnts.curobjs--; 262 tctx->cnts.curbytes -= usize; 263 264 if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) { 265 prof_tctx_destroy(tsd, tctx); 266 } else { 267 malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock); 268 } 269 } 270 271 void 272 bt_init(prof_bt_t *bt, void **vec) { 273 cassert(config_prof); 274 275 bt->vec = vec; 276 bt->len = 0; 277 } 278 279 static void 280 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) { 281 cassert(config_prof); 282 assert(tdata == prof_tdata_get(tsd, false)); 283 284 if (tdata != NULL) { 285 assert(!tdata->enq); 286 tdata->enq = true; 287 } 288 289 malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx); 290 } 291 292 static void 293 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) { 294 cassert(config_prof); 295 assert(tdata == prof_tdata_get(tsd, false)); 296 297 malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx); 298 299 if (tdata != NULL) { 300 bool idump, gdump; 301 302 assert(tdata->enq); 303 tdata->enq = false; 304 idump = tdata->enq_idump; 305 tdata->enq_idump = false; 306 gdump = tdata->enq_gdump; 307 tdata->enq_gdump = false; 308 309 if (idump) { 310 prof_idump(tsd_tsdn(tsd)); 311 } 312 if (gdump) { 313 prof_gdump(tsd_tsdn(tsd)); 314 } 315 } 316 } 317 318 #ifdef JEMALLOC_PROF_LIBUNWIND 319 void 320 prof_backtrace(prof_bt_t *bt) { 321 int nframes; 322 323 cassert(config_prof); 324 assert(bt->len == 0); 325 assert(bt->vec != NULL); 326 327 nframes = unw_backtrace(bt->vec, PROF_BT_MAX); 328 if (nframes <= 0) { 329 return; 330 } 331 bt->len = nframes; 332 } 333 #elif (defined(JEMALLOC_PROF_LIBGCC)) 334 static _Unwind_Reason_Code 335 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) { 336 cassert(config_prof); 337 338 return _URC_NO_REASON; 339 } 340 341 static _Unwind_Reason_Code 342 prof_unwind_callback(struct _Unwind_Context *context, void *arg) { 343 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 344 void *ip; 345 346 cassert(config_prof); 347 348 ip = (void *)_Unwind_GetIP(context); 349 if (ip == NULL) { 350 return _URC_END_OF_STACK; 351 } 352 data->bt->vec[data->bt->len] = ip; 353 data->bt->len++; 354 if (data->bt->len == data->max) { 355 return _URC_END_OF_STACK; 356 } 357 358 return _URC_NO_REASON; 359 } 360 361 void 362 prof_backtrace(prof_bt_t *bt) { 363 prof_unwind_data_t data = {bt, PROF_BT_MAX}; 364 365 cassert(config_prof); 366 367 _Unwind_Backtrace(prof_unwind_callback, &data); 368 } 369 #elif (defined(JEMALLOC_PROF_GCC)) 370 void 371 prof_backtrace(prof_bt_t *bt) { 372 #define BT_FRAME(i) \ 373 if ((i) < PROF_BT_MAX) { \ 374 void *p; \ 375 if (__builtin_frame_address(i) == 0) { \ 376 return; \ 377 } \ 378 p = __builtin_return_address(i); \ 379 if (p == NULL) { \ 380 return; \ 381 } \ 382 bt->vec[(i)] = p; \ 383 bt->len = (i) + 1; \ 384 } else { \ 385 return; \ 386 } 387 388 cassert(config_prof); 389 390 BT_FRAME(0) 391 BT_FRAME(1) 392 BT_FRAME(2) 393 BT_FRAME(3) 394 BT_FRAME(4) 395 BT_FRAME(5) 396 BT_FRAME(6) 397 BT_FRAME(7) 398 BT_FRAME(8) 399 BT_FRAME(9) 400 401 BT_FRAME(10) 402 BT_FRAME(11) 403 BT_FRAME(12) 404 BT_FRAME(13) 405 BT_FRAME(14) 406 BT_FRAME(15) 407 BT_FRAME(16) 408 BT_FRAME(17) 409 BT_FRAME(18) 410 BT_FRAME(19) 411 412 BT_FRAME(20) 413 BT_FRAME(21) 414 BT_FRAME(22) 415 BT_FRAME(23) 416 BT_FRAME(24) 417 BT_FRAME(25) 418 BT_FRAME(26) 419 BT_FRAME(27) 420 BT_FRAME(28) 421 BT_FRAME(29) 422 423 BT_FRAME(30) 424 BT_FRAME(31) 425 BT_FRAME(32) 426 BT_FRAME(33) 427 BT_FRAME(34) 428 BT_FRAME(35) 429 BT_FRAME(36) 430 BT_FRAME(37) 431 BT_FRAME(38) 432 BT_FRAME(39) 433 434 BT_FRAME(40) 435 BT_FRAME(41) 436 BT_FRAME(42) 437 BT_FRAME(43) 438 BT_FRAME(44) 439 BT_FRAME(45) 440 BT_FRAME(46) 441 BT_FRAME(47) 442 BT_FRAME(48) 443 BT_FRAME(49) 444 445 BT_FRAME(50) 446 BT_FRAME(51) 447 BT_FRAME(52) 448 BT_FRAME(53) 449 BT_FRAME(54) 450 BT_FRAME(55) 451 BT_FRAME(56) 452 BT_FRAME(57) 453 BT_FRAME(58) 454 BT_FRAME(59) 455 456 BT_FRAME(60) 457 BT_FRAME(61) 458 BT_FRAME(62) 459 BT_FRAME(63) 460 BT_FRAME(64) 461 BT_FRAME(65) 462 BT_FRAME(66) 463 BT_FRAME(67) 464 BT_FRAME(68) 465 BT_FRAME(69) 466 467 BT_FRAME(70) 468 BT_FRAME(71) 469 BT_FRAME(72) 470 BT_FRAME(73) 471 BT_FRAME(74) 472 BT_FRAME(75) 473 BT_FRAME(76) 474 BT_FRAME(77) 475 BT_FRAME(78) 476 BT_FRAME(79) 477 478 BT_FRAME(80) 479 BT_FRAME(81) 480 BT_FRAME(82) 481 BT_FRAME(83) 482 BT_FRAME(84) 483 BT_FRAME(85) 484 BT_FRAME(86) 485 BT_FRAME(87) 486 BT_FRAME(88) 487 BT_FRAME(89) 488 489 BT_FRAME(90) 490 BT_FRAME(91) 491 BT_FRAME(92) 492 BT_FRAME(93) 493 BT_FRAME(94) 494 BT_FRAME(95) 495 BT_FRAME(96) 496 BT_FRAME(97) 497 BT_FRAME(98) 498 BT_FRAME(99) 499 500 BT_FRAME(100) 501 BT_FRAME(101) 502 BT_FRAME(102) 503 BT_FRAME(103) 504 BT_FRAME(104) 505 BT_FRAME(105) 506 BT_FRAME(106) 507 BT_FRAME(107) 508 BT_FRAME(108) 509 BT_FRAME(109) 510 511 BT_FRAME(110) 512 BT_FRAME(111) 513 BT_FRAME(112) 514 BT_FRAME(113) 515 BT_FRAME(114) 516 BT_FRAME(115) 517 BT_FRAME(116) 518 BT_FRAME(117) 519 BT_FRAME(118) 520 BT_FRAME(119) 521 522 BT_FRAME(120) 523 BT_FRAME(121) 524 BT_FRAME(122) 525 BT_FRAME(123) 526 BT_FRAME(124) 527 BT_FRAME(125) 528 BT_FRAME(126) 529 BT_FRAME(127) 530 #undef BT_FRAME 531 } 532 #else 533 void 534 prof_backtrace(prof_bt_t *bt) { 535 cassert(config_prof); 536 not_reached(); 537 } 538 #endif 539 540 static malloc_mutex_t * 541 prof_gctx_mutex_choose(void) { 542 unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED); 543 544 return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]; 545 } 546 547 static malloc_mutex_t * 548 prof_tdata_mutex_choose(uint64_t thr_uid) { 549 return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS]; 550 } 551 552 static prof_gctx_t * 553 prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) { 554 /* 555 * Create a single allocation that has space for vec of length bt->len. 556 */ 557 size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *)); 558 prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size, 559 sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true), 560 true); 561 if (gctx == NULL) { 562 return NULL; 563 } 564 gctx->lock = prof_gctx_mutex_choose(); 565 /* 566 * Set nlimbo to 1, in order to avoid a race condition with 567 * prof_tctx_destroy()/prof_gctx_try_destroy(). 568 */ 569 gctx->nlimbo = 1; 570 tctx_tree_new(&gctx->tctxs); 571 /* Duplicate bt. */ 572 memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *)); 573 gctx->bt.vec = gctx->vec; 574 gctx->bt.len = bt->len; 575 return gctx; 576 } 577 578 static void 579 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx, 580 prof_tdata_t *tdata) { 581 cassert(config_prof); 582 583 /* 584 * Check that gctx is still unused by any thread cache before destroying 585 * it. prof_lookup() increments gctx->nlimbo in order to avoid a race 586 * condition with this function, as does prof_tctx_destroy() in order to 587 * avoid a race between the main body of prof_tctx_destroy() and entry 588 * into this function. 589 */ 590 prof_enter(tsd, tdata_self); 591 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); 592 assert(gctx->nlimbo != 0); 593 if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) { 594 /* Remove gctx from bt2gctx. */ 595 if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) { 596 not_reached(); 597 } 598 prof_leave(tsd, tdata_self); 599 /* Destroy gctx. */ 600 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 601 idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true); 602 } else { 603 /* 604 * Compensate for increment in prof_tctx_destroy() or 605 * prof_lookup(). 606 */ 607 gctx->nlimbo--; 608 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 609 prof_leave(tsd, tdata_self); 610 } 611 } 612 613 static bool 614 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) { 615 malloc_mutex_assert_owner(tsdn, tctx->tdata->lock); 616 617 if (opt_prof_accum) { 618 return false; 619 } 620 if (tctx->cnts.curobjs != 0) { 621 return false; 622 } 623 if (tctx->prepared) { 624 return false; 625 } 626 return true; 627 } 628 629 static bool 630 prof_gctx_should_destroy(prof_gctx_t *gctx) { 631 if (opt_prof_accum) { 632 return false; 633 } 634 if (!tctx_tree_empty(&gctx->tctxs)) { 635 return false; 636 } 637 if (gctx->nlimbo != 0) { 638 return false; 639 } 640 return true; 641 } 642 643 static void 644 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) { 645 prof_tdata_t *tdata = tctx->tdata; 646 prof_gctx_t *gctx = tctx->gctx; 647 bool destroy_tdata, destroy_tctx, destroy_gctx; 648 649 malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); 650 651 assert(tctx->cnts.curobjs == 0); 652 assert(tctx->cnts.curbytes == 0); 653 assert(!opt_prof_accum); 654 assert(tctx->cnts.accumobjs == 0); 655 assert(tctx->cnts.accumbytes == 0); 656 657 ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL); 658 destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false); 659 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); 660 661 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); 662 switch (tctx->state) { 663 case prof_tctx_state_nominal: 664 tctx_tree_remove(&gctx->tctxs, tctx); 665 destroy_tctx = true; 666 if (prof_gctx_should_destroy(gctx)) { 667 /* 668 * Increment gctx->nlimbo in order to keep another 669 * thread from winning the race to destroy gctx while 670 * this one has gctx->lock dropped. Without this, it 671 * would be possible for another thread to: 672 * 673 * 1) Sample an allocation associated with gctx. 674 * 2) Deallocate the sampled object. 675 * 3) Successfully prof_gctx_try_destroy(gctx). 676 * 677 * The result would be that gctx no longer exists by the 678 * time this thread accesses it in 679 * prof_gctx_try_destroy(). 680 */ 681 gctx->nlimbo++; 682 destroy_gctx = true; 683 } else { 684 destroy_gctx = false; 685 } 686 break; 687 case prof_tctx_state_dumping: 688 /* 689 * A dumping thread needs tctx to remain valid until dumping 690 * has finished. Change state such that the dumping thread will 691 * complete destruction during a late dump iteration phase. 692 */ 693 tctx->state = prof_tctx_state_purgatory; 694 destroy_tctx = false; 695 destroy_gctx = false; 696 break; 697 default: 698 not_reached(); 699 destroy_tctx = false; 700 destroy_gctx = false; 701 } 702 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 703 if (destroy_gctx) { 704 prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx, 705 tdata); 706 } 707 708 malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock); 709 710 if (destroy_tdata) { 711 prof_tdata_destroy(tsd, tdata, false); 712 } 713 714 if (destroy_tctx) { 715 idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true); 716 } 717 } 718 719 static bool 720 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata, 721 void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) { 722 union { 723 prof_gctx_t *p; 724 void *v; 725 } gctx, tgctx; 726 union { 727 prof_bt_t *p; 728 void *v; 729 } btkey; 730 bool new_gctx; 731 732 prof_enter(tsd, tdata); 733 if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { 734 /* bt has never been seen before. Insert it. */ 735 prof_leave(tsd, tdata); 736 tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt); 737 if (tgctx.v == NULL) { 738 return true; 739 } 740 prof_enter(tsd, tdata); 741 if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) { 742 gctx.p = tgctx.p; 743 btkey.p = &gctx.p->bt; 744 if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) { 745 /* OOM. */ 746 prof_leave(tsd, tdata); 747 idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL, 748 true, true); 749 return true; 750 } 751 new_gctx = true; 752 } else { 753 new_gctx = false; 754 } 755 } else { 756 tgctx.v = NULL; 757 new_gctx = false; 758 } 759 760 if (!new_gctx) { 761 /* 762 * Increment nlimbo, in order to avoid a race condition with 763 * prof_tctx_destroy()/prof_gctx_try_destroy(). 764 */ 765 malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock); 766 gctx.p->nlimbo++; 767 malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock); 768 new_gctx = false; 769 770 if (tgctx.v != NULL) { 771 /* Lost race to insert. */ 772 idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true, 773 true); 774 } 775 } 776 prof_leave(tsd, tdata); 777 778 *p_btkey = btkey.v; 779 *p_gctx = gctx.p; 780 *p_new_gctx = new_gctx; 781 return false; 782 } 783 784 prof_tctx_t * 785 prof_lookup(tsd_t *tsd, prof_bt_t *bt) { 786 union { 787 prof_tctx_t *p; 788 void *v; 789 } ret; 790 prof_tdata_t *tdata; 791 bool not_found; 792 793 cassert(config_prof); 794 795 tdata = prof_tdata_get(tsd, false); 796 if (tdata == NULL) { 797 return NULL; 798 } 799 800 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); 801 not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v); 802 if (!not_found) { /* Note double negative! */ 803 ret.p->prepared = true; 804 } 805 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); 806 if (not_found) { 807 void *btkey; 808 prof_gctx_t *gctx; 809 bool new_gctx, error; 810 811 /* 812 * This thread's cache lacks bt. Look for it in the global 813 * cache. 814 */ 815 if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx, 816 &new_gctx)) { 817 return NULL; 818 } 819 820 /* Link a prof_tctx_t into gctx for this thread. */ 821 ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t), 822 sz_size2index(sizeof(prof_tctx_t)), false, NULL, true, 823 arena_ichoose(tsd, NULL), true); 824 if (ret.p == NULL) { 825 if (new_gctx) { 826 prof_gctx_try_destroy(tsd, tdata, gctx, tdata); 827 } 828 return NULL; 829 } 830 ret.p->tdata = tdata; 831 ret.p->thr_uid = tdata->thr_uid; 832 ret.p->thr_discrim = tdata->thr_discrim; 833 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 834 ret.p->gctx = gctx; 835 ret.p->tctx_uid = tdata->tctx_uid_next++; 836 ret.p->prepared = true; 837 ret.p->state = prof_tctx_state_initializing; 838 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); 839 error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v); 840 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); 841 if (error) { 842 if (new_gctx) { 843 prof_gctx_try_destroy(tsd, tdata, gctx, tdata); 844 } 845 idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true); 846 return NULL; 847 } 848 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); 849 ret.p->state = prof_tctx_state_nominal; 850 tctx_tree_insert(&gctx->tctxs, ret.p); 851 gctx->nlimbo--; 852 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 853 } 854 855 return ret.p; 856 } 857 858 /* 859 * The bodies of this function and prof_leakcheck() are compiled out unless heap 860 * profiling is enabled, so that it is possible to compile jemalloc with 861 * floating point support completely disabled. Avoiding floating point code is 862 * important on memory-constrained systems, but it also enables a workaround for 863 * versions of glibc that don't properly save/restore floating point registers 864 * during dynamic lazy symbol loading (which internally calls into whatever 865 * malloc implementation happens to be integrated into the application). Note 866 * that some compilers (e.g. gcc 4.8) may use floating point registers for fast 867 * memory moves, so jemalloc must be compiled with such optimizations disabled 868 * (e.g. 869 * -mno-sse) in order for the workaround to be complete. 870 */ 871 void 872 prof_sample_threshold_update(prof_tdata_t *tdata) { 873 #ifdef JEMALLOC_PROF 874 uint64_t r; 875 double u; 876 877 if (!config_prof) { 878 return; 879 } 880 881 if (lg_prof_sample == 0) { 882 tdata->bytes_until_sample = 0; 883 return; 884 } 885 886 /* 887 * Compute sample interval as a geometrically distributed random 888 * variable with mean (2^lg_prof_sample). 889 * 890 * __ __ 891 * | log(u) | 1 892 * tdata->bytes_until_sample = | -------- |, where p = --------------- 893 * | log(1-p) | lg_prof_sample 894 * 2 895 * 896 * For more information on the math, see: 897 * 898 * Non-Uniform Random Variate Generation 899 * Luc Devroye 900 * Springer-Verlag, New York, 1986 901 * pp 500 902 * (http://luc.devroye.org/rnbookindex.html) 903 */ 904 r = prng_lg_range_u64(&tdata->prng_state, 53); 905 u = (double)r * (1.0/9007199254740992.0L); 906 tdata->bytes_until_sample = (uint64_t)(log(u) / 907 log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample)))) 908 + (uint64_t)1U; 909 #endif 910 } 911 912 #ifdef JEMALLOC_JET 913 static prof_tdata_t * 914 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, 915 void *arg) { 916 size_t *tdata_count = (size_t *)arg; 917 918 (*tdata_count)++; 919 920 return NULL; 921 } 922 923 size_t 924 prof_tdata_count(void) { 925 size_t tdata_count = 0; 926 tsdn_t *tsdn; 927 928 tsdn = tsdn_fetch(); 929 malloc_mutex_lock(tsdn, &tdatas_mtx); 930 tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter, 931 (void *)&tdata_count); 932 malloc_mutex_unlock(tsdn, &tdatas_mtx); 933 934 return tdata_count; 935 } 936 937 size_t 938 prof_bt_count(void) { 939 size_t bt_count; 940 tsd_t *tsd; 941 prof_tdata_t *tdata; 942 943 tsd = tsd_fetch(); 944 tdata = prof_tdata_get(tsd, false); 945 if (tdata == NULL) { 946 return 0; 947 } 948 949 malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx); 950 bt_count = ckh_count(&bt2gctx); 951 malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx); 952 953 return bt_count; 954 } 955 #endif 956 957 static int 958 prof_dump_open_impl(bool propagate_err, const char *filename) { 959 int fd; 960 961 fd = creat(filename, 0644); 962 if (fd == -1 && !propagate_err) { 963 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", 964 filename); 965 if (opt_abort) { 966 abort(); 967 } 968 } 969 970 return fd; 971 } 972 prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl; 973 974 static bool 975 prof_dump_flush(bool propagate_err) { 976 bool ret = false; 977 ssize_t err; 978 979 cassert(config_prof); 980 981 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 982 if (err == -1) { 983 if (!propagate_err) { 984 malloc_write("<jemalloc>: write() failed during heap " 985 "profile flush\n"); 986 if (opt_abort) { 987 abort(); 988 } 989 } 990 ret = true; 991 } 992 prof_dump_buf_end = 0; 993 994 return ret; 995 } 996 997 static bool 998 prof_dump_close(bool propagate_err) { 999 bool ret; 1000 1001 assert(prof_dump_fd != -1); 1002 ret = prof_dump_flush(propagate_err); 1003 close(prof_dump_fd); 1004 prof_dump_fd = -1; 1005 1006 return ret; 1007 } 1008 1009 static bool 1010 prof_dump_write(bool propagate_err, const char *s) { 1011 size_t i, slen, n; 1012 1013 cassert(config_prof); 1014 1015 i = 0; 1016 slen = strlen(s); 1017 while (i < slen) { 1018 /* Flush the buffer if it is full. */ 1019 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 1020 if (prof_dump_flush(propagate_err) && propagate_err) { 1021 return true; 1022 } 1023 } 1024 1025 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 1026 /* Finish writing. */ 1027 n = slen - i; 1028 } else { 1029 /* Write as much of s as will fit. */ 1030 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 1031 } 1032 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 1033 prof_dump_buf_end += n; 1034 i += n; 1035 } 1036 1037 return false; 1038 } 1039 1040 JEMALLOC_FORMAT_PRINTF(2, 3) 1041 static bool 1042 prof_dump_printf(bool propagate_err, const char *format, ...) { 1043 bool ret; 1044 va_list ap; 1045 char buf[PROF_PRINTF_BUFSIZE]; 1046 1047 va_start(ap, format); 1048 malloc_vsnprintf(buf, sizeof(buf), format, ap); 1049 va_end(ap); 1050 ret = prof_dump_write(propagate_err, buf); 1051 1052 return ret; 1053 } 1054 1055 static void 1056 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) { 1057 malloc_mutex_assert_owner(tsdn, tctx->tdata->lock); 1058 1059 malloc_mutex_lock(tsdn, tctx->gctx->lock); 1060 1061 switch (tctx->state) { 1062 case prof_tctx_state_initializing: 1063 malloc_mutex_unlock(tsdn, tctx->gctx->lock); 1064 return; 1065 case prof_tctx_state_nominal: 1066 tctx->state = prof_tctx_state_dumping; 1067 malloc_mutex_unlock(tsdn, tctx->gctx->lock); 1068 1069 memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t)); 1070 1071 tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs; 1072 tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes; 1073 if (opt_prof_accum) { 1074 tdata->cnt_summed.accumobjs += 1075 tctx->dump_cnts.accumobjs; 1076 tdata->cnt_summed.accumbytes += 1077 tctx->dump_cnts.accumbytes; 1078 } 1079 break; 1080 case prof_tctx_state_dumping: 1081 case prof_tctx_state_purgatory: 1082 not_reached(); 1083 } 1084 } 1085 1086 static void 1087 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) { 1088 malloc_mutex_assert_owner(tsdn, gctx->lock); 1089 1090 gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs; 1091 gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes; 1092 if (opt_prof_accum) { 1093 gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs; 1094 gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes; 1095 } 1096 } 1097 1098 static prof_tctx_t * 1099 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { 1100 tsdn_t *tsdn = (tsdn_t *)arg; 1101 1102 malloc_mutex_assert_owner(tsdn, tctx->gctx->lock); 1103 1104 switch (tctx->state) { 1105 case prof_tctx_state_nominal: 1106 /* New since dumping started; ignore. */ 1107 break; 1108 case prof_tctx_state_dumping: 1109 case prof_tctx_state_purgatory: 1110 prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx); 1111 break; 1112 default: 1113 not_reached(); 1114 } 1115 1116 return NULL; 1117 } 1118 1119 struct prof_tctx_dump_iter_arg_s { 1120 tsdn_t *tsdn; 1121 bool propagate_err; 1122 }; 1123 1124 static prof_tctx_t * 1125 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) { 1126 struct prof_tctx_dump_iter_arg_s *arg = 1127 (struct prof_tctx_dump_iter_arg_s *)opaque; 1128 1129 malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock); 1130 1131 switch (tctx->state) { 1132 case prof_tctx_state_initializing: 1133 case prof_tctx_state_nominal: 1134 /* Not captured by this dump. */ 1135 break; 1136 case prof_tctx_state_dumping: 1137 case prof_tctx_state_purgatory: 1138 if (prof_dump_printf(arg->propagate_err, 1139 " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": " 1140 "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs, 1141 tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs, 1142 tctx->dump_cnts.accumbytes)) { 1143 return tctx; 1144 } 1145 break; 1146 default: 1147 not_reached(); 1148 } 1149 return NULL; 1150 } 1151 1152 static prof_tctx_t * 1153 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) { 1154 tsdn_t *tsdn = (tsdn_t *)arg; 1155 prof_tctx_t *ret; 1156 1157 malloc_mutex_assert_owner(tsdn, tctx->gctx->lock); 1158 1159 switch (tctx->state) { 1160 case prof_tctx_state_nominal: 1161 /* New since dumping started; ignore. */ 1162 break; 1163 case prof_tctx_state_dumping: 1164 tctx->state = prof_tctx_state_nominal; 1165 break; 1166 case prof_tctx_state_purgatory: 1167 ret = tctx; 1168 goto label_return; 1169 default: 1170 not_reached(); 1171 } 1172 1173 ret = NULL; 1174 label_return: 1175 return ret; 1176 } 1177 1178 static void 1179 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) { 1180 cassert(config_prof); 1181 1182 malloc_mutex_lock(tsdn, gctx->lock); 1183 1184 /* 1185 * Increment nlimbo so that gctx won't go away before dump. 1186 * Additionally, link gctx into the dump list so that it is included in 1187 * prof_dump()'s second pass. 1188 */ 1189 gctx->nlimbo++; 1190 gctx_tree_insert(gctxs, gctx); 1191 1192 memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t)); 1193 1194 malloc_mutex_unlock(tsdn, gctx->lock); 1195 } 1196 1197 struct prof_gctx_merge_iter_arg_s { 1198 tsdn_t *tsdn; 1199 size_t leak_ngctx; 1200 }; 1201 1202 static prof_gctx_t * 1203 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) { 1204 struct prof_gctx_merge_iter_arg_s *arg = 1205 (struct prof_gctx_merge_iter_arg_s *)opaque; 1206 1207 malloc_mutex_lock(arg->tsdn, gctx->lock); 1208 tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, 1209 (void *)arg->tsdn); 1210 if (gctx->cnt_summed.curobjs != 0) { 1211 arg->leak_ngctx++; 1212 } 1213 malloc_mutex_unlock(arg->tsdn, gctx->lock); 1214 1215 return NULL; 1216 } 1217 1218 static void 1219 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) { 1220 prof_tdata_t *tdata = prof_tdata_get(tsd, false); 1221 prof_gctx_t *gctx; 1222 1223 /* 1224 * Standard tree iteration won't work here, because as soon as we 1225 * decrement gctx->nlimbo and unlock gctx, another thread can 1226 * concurrently destroy it, which will corrupt the tree. Therefore, 1227 * tear down the tree one node at a time during iteration. 1228 */ 1229 while ((gctx = gctx_tree_first(gctxs)) != NULL) { 1230 gctx_tree_remove(gctxs, gctx); 1231 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock); 1232 { 1233 prof_tctx_t *next; 1234 1235 next = NULL; 1236 do { 1237 prof_tctx_t *to_destroy = 1238 tctx_tree_iter(&gctx->tctxs, next, 1239 prof_tctx_finish_iter, 1240 (void *)tsd_tsdn(tsd)); 1241 if (to_destroy != NULL) { 1242 next = tctx_tree_next(&gctx->tctxs, 1243 to_destroy); 1244 tctx_tree_remove(&gctx->tctxs, 1245 to_destroy); 1246 idalloctm(tsd_tsdn(tsd), to_destroy, 1247 NULL, NULL, true, true); 1248 } else { 1249 next = NULL; 1250 } 1251 } while (next != NULL); 1252 } 1253 gctx->nlimbo--; 1254 if (prof_gctx_should_destroy(gctx)) { 1255 gctx->nlimbo++; 1256 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 1257 prof_gctx_try_destroy(tsd, tdata, gctx, tdata); 1258 } else { 1259 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock); 1260 } 1261 } 1262 } 1263 1264 struct prof_tdata_merge_iter_arg_s { 1265 tsdn_t *tsdn; 1266 prof_cnt_t cnt_all; 1267 }; 1268 1269 static prof_tdata_t * 1270 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, 1271 void *opaque) { 1272 struct prof_tdata_merge_iter_arg_s *arg = 1273 (struct prof_tdata_merge_iter_arg_s *)opaque; 1274 1275 malloc_mutex_lock(arg->tsdn, tdata->lock); 1276 if (!tdata->expired) { 1277 size_t tabind; 1278 union { 1279 prof_tctx_t *p; 1280 void *v; 1281 } tctx; 1282 1283 tdata->dumping = true; 1284 memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t)); 1285 for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL, 1286 &tctx.v);) { 1287 prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata); 1288 } 1289 1290 arg->cnt_all.curobjs += tdata->cnt_summed.curobjs; 1291 arg->cnt_all.curbytes += tdata->cnt_summed.curbytes; 1292 if (opt_prof_accum) { 1293 arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs; 1294 arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes; 1295 } 1296 } else { 1297 tdata->dumping = false; 1298 } 1299 malloc_mutex_unlock(arg->tsdn, tdata->lock); 1300 1301 return NULL; 1302 } 1303 1304 static prof_tdata_t * 1305 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, 1306 void *arg) { 1307 bool propagate_err = *(bool *)arg; 1308 1309 if (!tdata->dumping) { 1310 return NULL; 1311 } 1312 1313 if (prof_dump_printf(propagate_err, 1314 " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n", 1315 tdata->thr_uid, tdata->cnt_summed.curobjs, 1316 tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs, 1317 tdata->cnt_summed.accumbytes, 1318 (tdata->thread_name != NULL) ? " " : "", 1319 (tdata->thread_name != NULL) ? tdata->thread_name : "")) { 1320 return tdata; 1321 } 1322 return NULL; 1323 } 1324 1325 static bool 1326 prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err, 1327 const prof_cnt_t *cnt_all) { 1328 bool ret; 1329 1330 if (prof_dump_printf(propagate_err, 1331 "heap_v2/%"FMTu64"\n" 1332 " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n", 1333 ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs, 1334 cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) { 1335 return true; 1336 } 1337 1338 malloc_mutex_lock(tsdn, &tdatas_mtx); 1339 ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, 1340 (void *)&propagate_err) != NULL); 1341 malloc_mutex_unlock(tsdn, &tdatas_mtx); 1342 return ret; 1343 } 1344 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl; 1345 1346 static bool 1347 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx, 1348 const prof_bt_t *bt, prof_gctx_tree_t *gctxs) { 1349 bool ret; 1350 unsigned i; 1351 struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg; 1352 1353 cassert(config_prof); 1354 malloc_mutex_assert_owner(tsdn, gctx->lock); 1355 1356 /* Avoid dumping such gctx's that have no useful data. */ 1357 if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) || 1358 (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) { 1359 assert(gctx->cnt_summed.curobjs == 0); 1360 assert(gctx->cnt_summed.curbytes == 0); 1361 assert(gctx->cnt_summed.accumobjs == 0); 1362 assert(gctx->cnt_summed.accumbytes == 0); 1363 ret = false; 1364 goto label_return; 1365 } 1366 1367 if (prof_dump_printf(propagate_err, "@")) { 1368 ret = true; 1369 goto label_return; 1370 } 1371 for (i = 0; i < bt->len; i++) { 1372 if (prof_dump_printf(propagate_err, " %#"FMTxPTR, 1373 (uintptr_t)bt->vec[i])) { 1374 ret = true; 1375 goto label_return; 1376 } 1377 } 1378 1379 if (prof_dump_printf(propagate_err, 1380 "\n" 1381 " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n", 1382 gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes, 1383 gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) { 1384 ret = true; 1385 goto label_return; 1386 } 1387 1388 prof_tctx_dump_iter_arg.tsdn = tsdn; 1389 prof_tctx_dump_iter_arg.propagate_err = propagate_err; 1390 if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, 1391 (void *)&prof_tctx_dump_iter_arg) != NULL) { 1392 ret = true; 1393 goto label_return; 1394 } 1395 1396 ret = false; 1397 label_return: 1398 return ret; 1399 } 1400 1401 #ifndef _WIN32 1402 JEMALLOC_FORMAT_PRINTF(1, 2) 1403 static int 1404 prof_open_maps(const char *format, ...) { 1405 int mfd; 1406 va_list ap; 1407 char filename[PATH_MAX + 1]; 1408 1409 va_start(ap, format); 1410 malloc_vsnprintf(filename, sizeof(filename), format, ap); 1411 va_end(ap); 1412 mfd = open(filename, O_RDONLY | O_CLOEXEC); 1413 1414 return mfd; 1415 } 1416 #endif 1417 1418 static int 1419 prof_getpid(void) { 1420 #ifdef _WIN32 1421 return GetCurrentProcessId(); 1422 #else 1423 return getpid(); 1424 #endif 1425 } 1426 1427 static bool 1428 prof_dump_maps(bool propagate_err) { 1429 bool ret; 1430 int mfd; 1431 1432 cassert(config_prof); 1433 #ifdef __FreeBSD__ 1434 mfd = prof_open_maps("/proc/curproc/map"); 1435 #elif defined(_WIN32) 1436 mfd = -1; // Not implemented 1437 #else 1438 { 1439 int pid = prof_getpid(); 1440 1441 mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid); 1442 if (mfd == -1) { 1443 mfd = prof_open_maps("/proc/%d/maps", pid); 1444 } 1445 } 1446 #endif 1447 if (mfd != -1) { 1448 ssize_t nread; 1449 1450 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 1451 propagate_err) { 1452 ret = true; 1453 goto label_return; 1454 } 1455 nread = 0; 1456 do { 1457 prof_dump_buf_end += nread; 1458 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 1459 /* Make space in prof_dump_buf before read(). */ 1460 if (prof_dump_flush(propagate_err) && 1461 propagate_err) { 1462 ret = true; 1463 goto label_return; 1464 } 1465 } 1466 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], 1467 PROF_DUMP_BUFSIZE - prof_dump_buf_end); 1468 } while (nread > 0); 1469 } else { 1470 ret = true; 1471 goto label_return; 1472 } 1473 1474 ret = false; 1475 label_return: 1476 if (mfd != -1) { 1477 close(mfd); 1478 } 1479 return ret; 1480 } 1481 1482 /* 1483 * See prof_sample_threshold_update() comment for why the body of this function 1484 * is conditionally compiled. 1485 */ 1486 static void 1487 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx, 1488 const char *filename) { 1489 #ifdef JEMALLOC_PROF 1490 /* 1491 * Scaling is equivalent AdjustSamples() in jeprof, but the result may 1492 * differ slightly from what jeprof reports, because here we scale the 1493 * summary values, whereas jeprof scales each context individually and 1494 * reports the sums of the scaled values. 1495 */ 1496 if (cnt_all->curbytes != 0) { 1497 double sample_period = (double)((uint64_t)1 << lg_prof_sample); 1498 double ratio = (((double)cnt_all->curbytes) / 1499 (double)cnt_all->curobjs) / sample_period; 1500 double scale_factor = 1.0 / (1.0 - exp(-ratio)); 1501 uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes) 1502 * scale_factor); 1503 uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) * 1504 scale_factor); 1505 1506 malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64 1507 " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n", 1508 curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs != 1509 1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : ""); 1510 malloc_printf( 1511 "<jemalloc>: Run jeprof on \"%s\" for leak detail\n", 1512 filename); 1513 } 1514 #endif 1515 } 1516 1517 struct prof_gctx_dump_iter_arg_s { 1518 tsdn_t *tsdn; 1519 bool propagate_err; 1520 }; 1521 1522 static prof_gctx_t * 1523 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) { 1524 prof_gctx_t *ret; 1525 struct prof_gctx_dump_iter_arg_s *arg = 1526 (struct prof_gctx_dump_iter_arg_s *)opaque; 1527 1528 malloc_mutex_lock(arg->tsdn, gctx->lock); 1529 1530 if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt, 1531 gctxs)) { 1532 ret = gctx; 1533 goto label_return; 1534 } 1535 1536 ret = NULL; 1537 label_return: 1538 malloc_mutex_unlock(arg->tsdn, gctx->lock); 1539 return ret; 1540 } 1541 1542 static void 1543 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, 1544 struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg, 1545 struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg, 1546 prof_gctx_tree_t *gctxs) { 1547 size_t tabind; 1548 union { 1549 prof_gctx_t *p; 1550 void *v; 1551 } gctx; 1552 1553 prof_enter(tsd, tdata); 1554 1555 /* 1556 * Put gctx's in limbo and clear their counters in preparation for 1557 * summing. 1558 */ 1559 gctx_tree_new(gctxs); 1560 for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) { 1561 prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs); 1562 } 1563 1564 /* 1565 * Iterate over tdatas, and for the non-expired ones snapshot their tctx 1566 * stats and merge them into the associated gctx's. 1567 */ 1568 prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd); 1569 memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t)); 1570 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); 1571 tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, 1572 (void *)prof_tdata_merge_iter_arg); 1573 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); 1574 1575 /* Merge tctx stats into gctx's. */ 1576 prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd); 1577 prof_gctx_merge_iter_arg->leak_ngctx = 0; 1578 gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter, 1579 (void *)prof_gctx_merge_iter_arg); 1580 1581 prof_leave(tsd, tdata); 1582 } 1583 1584 static bool 1585 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename, 1586 bool leakcheck, prof_tdata_t *tdata, 1587 struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg, 1588 struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg, 1589 struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg, 1590 prof_gctx_tree_t *gctxs) { 1591 /* Create dump file. */ 1592 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) { 1593 return true; 1594 } 1595 1596 /* Dump profile header. */ 1597 if (prof_dump_header(tsd_tsdn(tsd), propagate_err, 1598 &prof_tdata_merge_iter_arg->cnt_all)) { 1599 goto label_write_error; 1600 } 1601 1602 /* Dump per gctx profile stats. */ 1603 prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd); 1604 prof_gctx_dump_iter_arg->propagate_err = propagate_err; 1605 if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter, 1606 (void *)prof_gctx_dump_iter_arg) != NULL) { 1607 goto label_write_error; 1608 } 1609 1610 /* Dump /proc/<pid>/maps if possible. */ 1611 if (prof_dump_maps(propagate_err)) { 1612 goto label_write_error; 1613 } 1614 1615 if (prof_dump_close(propagate_err)) { 1616 return true; 1617 } 1618 1619 return false; 1620 label_write_error: 1621 prof_dump_close(propagate_err); 1622 return true; 1623 } 1624 1625 static bool 1626 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, 1627 bool leakcheck) { 1628 cassert(config_prof); 1629 assert(tsd_reentrancy_level_get(tsd) == 0); 1630 1631 prof_tdata_t * tdata = prof_tdata_get(tsd, true); 1632 if (tdata == NULL) { 1633 return true; 1634 } 1635 1636 pre_reentrancy(tsd, NULL); 1637 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); 1638 1639 prof_gctx_tree_t gctxs; 1640 struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg; 1641 struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg; 1642 struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg; 1643 prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg, 1644 &prof_gctx_merge_iter_arg, &gctxs); 1645 bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata, 1646 &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg, 1647 &prof_gctx_dump_iter_arg, &gctxs); 1648 prof_gctx_finish(tsd, &gctxs); 1649 1650 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); 1651 post_reentrancy(tsd); 1652 1653 if (err) { 1654 return true; 1655 } 1656 1657 if (leakcheck) { 1658 prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all, 1659 prof_gctx_merge_iter_arg.leak_ngctx, filename); 1660 } 1661 return false; 1662 } 1663 1664 #ifdef JEMALLOC_JET 1665 void 1666 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs, 1667 uint64_t *accumbytes) { 1668 tsd_t *tsd; 1669 prof_tdata_t *tdata; 1670 struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg; 1671 struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg; 1672 prof_gctx_tree_t gctxs; 1673 1674 tsd = tsd_fetch(); 1675 tdata = prof_tdata_get(tsd, false); 1676 if (tdata == NULL) { 1677 if (curobjs != NULL) { 1678 *curobjs = 0; 1679 } 1680 if (curbytes != NULL) { 1681 *curbytes = 0; 1682 } 1683 if (accumobjs != NULL) { 1684 *accumobjs = 0; 1685 } 1686 if (accumbytes != NULL) { 1687 *accumbytes = 0; 1688 } 1689 return; 1690 } 1691 1692 prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg, 1693 &prof_gctx_merge_iter_arg, &gctxs); 1694 prof_gctx_finish(tsd, &gctxs); 1695 1696 if (curobjs != NULL) { 1697 *curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs; 1698 } 1699 if (curbytes != NULL) { 1700 *curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes; 1701 } 1702 if (accumobjs != NULL) { 1703 *accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs; 1704 } 1705 if (accumbytes != NULL) { 1706 *accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes; 1707 } 1708 } 1709 #endif 1710 1711 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 1712 #define VSEQ_INVALID UINT64_C(0xffffffffffffffff) 1713 static void 1714 prof_dump_filename(char *filename, char v, uint64_t vseq) { 1715 cassert(config_prof); 1716 1717 if (vseq != VSEQ_INVALID) { 1718 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 1719 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1720 "%s.%d.%"FMTu64".%c%"FMTu64".heap", 1721 opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq); 1722 } else { 1723 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 1724 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1725 "%s.%d.%"FMTu64".%c.heap", 1726 opt_prof_prefix, prof_getpid(), prof_dump_seq, v); 1727 } 1728 prof_dump_seq++; 1729 } 1730 1731 static void 1732 prof_fdump(void) { 1733 tsd_t *tsd; 1734 char filename[DUMP_FILENAME_BUFSIZE]; 1735 1736 cassert(config_prof); 1737 assert(opt_prof_final); 1738 assert(opt_prof_prefix[0] != '\0'); 1739 1740 if (!prof_booted) { 1741 return; 1742 } 1743 tsd = tsd_fetch(); 1744 assert(tsd_reentrancy_level_get(tsd) == 0); 1745 1746 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1747 prof_dump_filename(filename, 'f', VSEQ_INVALID); 1748 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1749 prof_dump(tsd, false, filename, opt_prof_leak); 1750 } 1751 1752 bool 1753 prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) { 1754 cassert(config_prof); 1755 1756 #ifndef JEMALLOC_ATOMIC_U64 1757 if (malloc_mutex_init(&prof_accum->mtx, "prof_accum", 1758 WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) { 1759 return true; 1760 } 1761 prof_accum->accumbytes = 0; 1762 #else 1763 atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED); 1764 #endif 1765 return false; 1766 } 1767 1768 void 1769 prof_idump(tsdn_t *tsdn) { 1770 tsd_t *tsd; 1771 prof_tdata_t *tdata; 1772 1773 cassert(config_prof); 1774 1775 if (!prof_booted || tsdn_null(tsdn)) { 1776 return; 1777 } 1778 tsd = tsdn_tsd(tsdn); 1779 if (tsd_reentrancy_level_get(tsd) > 0) { 1780 return; 1781 } 1782 1783 tdata = prof_tdata_get(tsd, false); 1784 if (tdata == NULL) { 1785 return; 1786 } 1787 if (tdata->enq) { 1788 tdata->enq_idump = true; 1789 return; 1790 } 1791 1792 if (opt_prof_prefix[0] != '\0') { 1793 char filename[PATH_MAX + 1]; 1794 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1795 prof_dump_filename(filename, 'i', prof_dump_iseq); 1796 prof_dump_iseq++; 1797 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1798 prof_dump(tsd, false, filename, false); 1799 } 1800 } 1801 1802 bool 1803 prof_mdump(tsd_t *tsd, const char *filename) { 1804 cassert(config_prof); 1805 assert(tsd_reentrancy_level_get(tsd) == 0); 1806 1807 if (!opt_prof || !prof_booted) { 1808 return true; 1809 } 1810 char filename_buf[DUMP_FILENAME_BUFSIZE]; 1811 if (filename == NULL) { 1812 /* No filename specified, so automatically generate one. */ 1813 if (opt_prof_prefix[0] == '\0') { 1814 return true; 1815 } 1816 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1817 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 1818 prof_dump_mseq++; 1819 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx); 1820 filename = filename_buf; 1821 } 1822 return prof_dump(tsd, true, filename, false); 1823 } 1824 1825 void 1826 prof_gdump(tsdn_t *tsdn) { 1827 tsd_t *tsd; 1828 prof_tdata_t *tdata; 1829 1830 cassert(config_prof); 1831 1832 if (!prof_booted || tsdn_null(tsdn)) { 1833 return; 1834 } 1835 tsd = tsdn_tsd(tsdn); 1836 if (tsd_reentrancy_level_get(tsd) > 0) { 1837 return; 1838 } 1839 1840 tdata = prof_tdata_get(tsd, false); 1841 if (tdata == NULL) { 1842 return; 1843 } 1844 if (tdata->enq) { 1845 tdata->enq_gdump = true; 1846 return; 1847 } 1848 1849 if (opt_prof_prefix[0] != '\0') { 1850 char filename[DUMP_FILENAME_BUFSIZE]; 1851 malloc_mutex_lock(tsdn, &prof_dump_seq_mtx); 1852 prof_dump_filename(filename, 'u', prof_dump_useq); 1853 prof_dump_useq++; 1854 malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx); 1855 prof_dump(tsd, false, filename, false); 1856 } 1857 } 1858 1859 static void 1860 prof_bt_hash(const void *key, size_t r_hash[2]) { 1861 prof_bt_t *bt = (prof_bt_t *)key; 1862 1863 cassert(config_prof); 1864 1865 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); 1866 } 1867 1868 static bool 1869 prof_bt_keycomp(const void *k1, const void *k2) { 1870 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1871 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1872 1873 cassert(config_prof); 1874 1875 if (bt1->len != bt2->len) { 1876 return false; 1877 } 1878 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1879 } 1880 1881 static uint64_t 1882 prof_thr_uid_alloc(tsdn_t *tsdn) { 1883 uint64_t thr_uid; 1884 1885 malloc_mutex_lock(tsdn, &next_thr_uid_mtx); 1886 thr_uid = next_thr_uid; 1887 next_thr_uid++; 1888 malloc_mutex_unlock(tsdn, &next_thr_uid_mtx); 1889 1890 return thr_uid; 1891 } 1892 1893 static prof_tdata_t * 1894 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim, 1895 char *thread_name, bool active) { 1896 prof_tdata_t *tdata; 1897 1898 cassert(config_prof); 1899 1900 /* Initialize an empty cache for this thread. */ 1901 tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t), 1902 sz_size2index(sizeof(prof_tdata_t)), false, NULL, true, 1903 arena_get(TSDN_NULL, 0, true), true); 1904 if (tdata == NULL) { 1905 return NULL; 1906 } 1907 1908 tdata->lock = prof_tdata_mutex_choose(thr_uid); 1909 tdata->thr_uid = thr_uid; 1910 tdata->thr_discrim = thr_discrim; 1911 tdata->thread_name = thread_name; 1912 tdata->attached = true; 1913 tdata->expired = false; 1914 tdata->tctx_uid_next = 0; 1915 1916 if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash, 1917 prof_bt_keycomp)) { 1918 idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true); 1919 return NULL; 1920 } 1921 1922 tdata->prng_state = (uint64_t)(uintptr_t)tdata; 1923 prof_sample_threshold_update(tdata); 1924 1925 tdata->enq = false; 1926 tdata->enq_idump = false; 1927 tdata->enq_gdump = false; 1928 1929 tdata->dumping = false; 1930 tdata->active = active; 1931 1932 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); 1933 tdata_tree_insert(&tdatas, tdata); 1934 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); 1935 1936 return tdata; 1937 } 1938 1939 prof_tdata_t * 1940 prof_tdata_init(tsd_t *tsd) { 1941 return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0, 1942 NULL, prof_thread_active_init_get(tsd_tsdn(tsd))); 1943 } 1944 1945 static bool 1946 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) { 1947 if (tdata->attached && !even_if_attached) { 1948 return false; 1949 } 1950 if (ckh_count(&tdata->bt2tctx) != 0) { 1951 return false; 1952 } 1953 return true; 1954 } 1955 1956 static bool 1957 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, 1958 bool even_if_attached) { 1959 malloc_mutex_assert_owner(tsdn, tdata->lock); 1960 1961 return prof_tdata_should_destroy_unlocked(tdata, even_if_attached); 1962 } 1963 1964 static void 1965 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata, 1966 bool even_if_attached) { 1967 malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx); 1968 1969 tdata_tree_remove(&tdatas, tdata); 1970 1971 assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached)); 1972 1973 if (tdata->thread_name != NULL) { 1974 idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true, 1975 true); 1976 } 1977 ckh_delete(tsd, &tdata->bt2tctx); 1978 idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true); 1979 } 1980 1981 static void 1982 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) { 1983 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); 1984 prof_tdata_destroy_locked(tsd, tdata, even_if_attached); 1985 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); 1986 } 1987 1988 static void 1989 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) { 1990 bool destroy_tdata; 1991 1992 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock); 1993 if (tdata->attached) { 1994 destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, 1995 true); 1996 /* 1997 * Only detach if !destroy_tdata, because detaching would allow 1998 * another thread to win the race to destroy tdata. 1999 */ 2000 if (!destroy_tdata) { 2001 tdata->attached = false; 2002 } 2003 tsd_prof_tdata_set(tsd, NULL); 2004 } else { 2005 destroy_tdata = false; 2006 } 2007 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock); 2008 if (destroy_tdata) { 2009 prof_tdata_destroy(tsd, tdata, true); 2010 } 2011 } 2012 2013 prof_tdata_t * 2014 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) { 2015 uint64_t thr_uid = tdata->thr_uid; 2016 uint64_t thr_discrim = tdata->thr_discrim + 1; 2017 char *thread_name = (tdata->thread_name != NULL) ? 2018 prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL; 2019 bool active = tdata->active; 2020 2021 prof_tdata_detach(tsd, tdata); 2022 return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name, 2023 active); 2024 } 2025 2026 static bool 2027 prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) { 2028 bool destroy_tdata; 2029 2030 malloc_mutex_lock(tsdn, tdata->lock); 2031 if (!tdata->expired) { 2032 tdata->expired = true; 2033 destroy_tdata = tdata->attached ? false : 2034 prof_tdata_should_destroy(tsdn, tdata, false); 2035 } else { 2036 destroy_tdata = false; 2037 } 2038 malloc_mutex_unlock(tsdn, tdata->lock); 2039 2040 return destroy_tdata; 2041 } 2042 2043 static prof_tdata_t * 2044 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, 2045 void *arg) { 2046 tsdn_t *tsdn = (tsdn_t *)arg; 2047 2048 return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL); 2049 } 2050 2051 void 2052 prof_reset(tsd_t *tsd, size_t lg_sample) { 2053 prof_tdata_t *next; 2054 2055 assert(lg_sample < (sizeof(uint64_t) << 3)); 2056 2057 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx); 2058 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx); 2059 2060 lg_prof_sample = lg_sample; 2061 2062 next = NULL; 2063 do { 2064 prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next, 2065 prof_tdata_reset_iter, (void *)tsd); 2066 if (to_destroy != NULL) { 2067 next = tdata_tree_next(&tdatas, to_destroy); 2068 prof_tdata_destroy_locked(tsd, to_destroy, false); 2069 } else { 2070 next = NULL; 2071 } 2072 } while (next != NULL); 2073 2074 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx); 2075 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx); 2076 } 2077 2078 void 2079 prof_tdata_cleanup(tsd_t *tsd) { 2080 prof_tdata_t *tdata; 2081 2082 if (!config_prof) { 2083 return; 2084 } 2085 2086 tdata = tsd_prof_tdata_get(tsd); 2087 if (tdata != NULL) { 2088 prof_tdata_detach(tsd, tdata); 2089 } 2090 } 2091 2092 bool 2093 prof_active_get(tsdn_t *tsdn) { 2094 bool prof_active_current; 2095 2096 malloc_mutex_lock(tsdn, &prof_active_mtx); 2097 prof_active_current = prof_active; 2098 malloc_mutex_unlock(tsdn, &prof_active_mtx); 2099 return prof_active_current; 2100 } 2101 2102 bool 2103 prof_active_set(tsdn_t *tsdn, bool active) { 2104 bool prof_active_old; 2105 2106 malloc_mutex_lock(tsdn, &prof_active_mtx); 2107 prof_active_old = prof_active; 2108 prof_active = active; 2109 malloc_mutex_unlock(tsdn, &prof_active_mtx); 2110 return prof_active_old; 2111 } 2112 2113 const char * 2114 prof_thread_name_get(tsd_t *tsd) { 2115 prof_tdata_t *tdata; 2116 2117 tdata = prof_tdata_get(tsd, true); 2118 if (tdata == NULL) { 2119 return ""; 2120 } 2121 return (tdata->thread_name != NULL ? tdata->thread_name : ""); 2122 } 2123 2124 static char * 2125 prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) { 2126 char *ret; 2127 size_t size; 2128 2129 if (thread_name == NULL) { 2130 return NULL; 2131 } 2132 2133 size = strlen(thread_name) + 1; 2134 if (size == 1) { 2135 return ""; 2136 } 2137 2138 ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true, 2139 arena_get(TSDN_NULL, 0, true), true); 2140 if (ret == NULL) { 2141 return NULL; 2142 } 2143 memcpy(ret, thread_name, size); 2144 return ret; 2145 } 2146 2147 int 2148 prof_thread_name_set(tsd_t *tsd, const char *thread_name) { 2149 prof_tdata_t *tdata; 2150 unsigned i; 2151 char *s; 2152 2153 tdata = prof_tdata_get(tsd, true); 2154 if (tdata == NULL) { 2155 return EAGAIN; 2156 } 2157 2158 /* Validate input. */ 2159 if (thread_name == NULL) { 2160 return EFAULT; 2161 } 2162 for (i = 0; thread_name[i] != '\0'; i++) { 2163 char c = thread_name[i]; 2164 if (!isgraph(c) && !isblank(c)) { 2165 return EFAULT; 2166 } 2167 } 2168 2169 s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name); 2170 if (s == NULL) { 2171 return EAGAIN; 2172 } 2173 2174 if (tdata->thread_name != NULL) { 2175 idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true, 2176 true); 2177 tdata->thread_name = NULL; 2178 } 2179 if (strlen(s) > 0) { 2180 tdata->thread_name = s; 2181 } 2182 return 0; 2183 } 2184 2185 bool 2186 prof_thread_active_get(tsd_t *tsd) { 2187 prof_tdata_t *tdata; 2188 2189 tdata = prof_tdata_get(tsd, true); 2190 if (tdata == NULL) { 2191 return false; 2192 } 2193 return tdata->active; 2194 } 2195 2196 bool 2197 prof_thread_active_set(tsd_t *tsd, bool active) { 2198 prof_tdata_t *tdata; 2199 2200 tdata = prof_tdata_get(tsd, true); 2201 if (tdata == NULL) { 2202 return true; 2203 } 2204 tdata->active = active; 2205 return false; 2206 } 2207 2208 bool 2209 prof_thread_active_init_get(tsdn_t *tsdn) { 2210 bool active_init; 2211 2212 malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx); 2213 active_init = prof_thread_active_init; 2214 malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx); 2215 return active_init; 2216 } 2217 2218 bool 2219 prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) { 2220 bool active_init_old; 2221 2222 malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx); 2223 active_init_old = prof_thread_active_init; 2224 prof_thread_active_init = active_init; 2225 malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx); 2226 return active_init_old; 2227 } 2228 2229 bool 2230 prof_gdump_get(tsdn_t *tsdn) { 2231 bool prof_gdump_current; 2232 2233 malloc_mutex_lock(tsdn, &prof_gdump_mtx); 2234 prof_gdump_current = prof_gdump_val; 2235 malloc_mutex_unlock(tsdn, &prof_gdump_mtx); 2236 return prof_gdump_current; 2237 } 2238 2239 bool 2240 prof_gdump_set(tsdn_t *tsdn, bool gdump) { 2241 bool prof_gdump_old; 2242 2243 malloc_mutex_lock(tsdn, &prof_gdump_mtx); 2244 prof_gdump_old = prof_gdump_val; 2245 prof_gdump_val = gdump; 2246 malloc_mutex_unlock(tsdn, &prof_gdump_mtx); 2247 return prof_gdump_old; 2248 } 2249 2250 void 2251 prof_boot0(void) { 2252 cassert(config_prof); 2253 2254 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 2255 sizeof(PROF_PREFIX_DEFAULT)); 2256 } 2257 2258 void 2259 prof_boot1(void) { 2260 cassert(config_prof); 2261 2262 /* 2263 * opt_prof must be in its final state before any arenas are 2264 * initialized, so this function must be executed early. 2265 */ 2266 2267 if (opt_prof_leak && !opt_prof) { 2268 /* 2269 * Enable opt_prof, but in such a way that profiles are never 2270 * automatically dumped. 2271 */ 2272 opt_prof = true; 2273 opt_prof_gdump = false; 2274 } else if (opt_prof) { 2275 if (opt_lg_prof_interval >= 0) { 2276 prof_interval = (((uint64_t)1U) << 2277 opt_lg_prof_interval); 2278 } 2279 } 2280 } 2281 2282 bool 2283 prof_boot2(tsd_t *tsd) { 2284 cassert(config_prof); 2285 2286 if (opt_prof) { 2287 unsigned i; 2288 2289 lg_prof_sample = opt_lg_prof_sample; 2290 2291 prof_active = opt_prof_active; 2292 if (malloc_mutex_init(&prof_active_mtx, "prof_active", 2293 WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) { 2294 return true; 2295 } 2296 2297 prof_gdump_val = opt_prof_gdump; 2298 if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump", 2299 WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) { 2300 return true; 2301 } 2302 2303 prof_thread_active_init = opt_prof_thread_active_init; 2304 if (malloc_mutex_init(&prof_thread_active_init_mtx, 2305 "prof_thread_active_init", 2306 WITNESS_RANK_PROF_THREAD_ACTIVE_INIT, 2307 malloc_mutex_rank_exclusive)) { 2308 return true; 2309 } 2310 2311 if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, 2312 prof_bt_keycomp)) { 2313 return true; 2314 } 2315 if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx", 2316 WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) { 2317 return true; 2318 } 2319 2320 tdata_tree_new(&tdatas); 2321 if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas", 2322 WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) { 2323 return true; 2324 } 2325 2326 next_thr_uid = 0; 2327 if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid", 2328 WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) { 2329 return true; 2330 } 2331 2332 if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq", 2333 WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) { 2334 return true; 2335 } 2336 if (malloc_mutex_init(&prof_dump_mtx, "prof_dump", 2337 WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) { 2338 return true; 2339 } 2340 2341 if (opt_prof_final && opt_prof_prefix[0] != '\0' && 2342 atexit(prof_fdump) != 0) { 2343 malloc_write("<jemalloc>: Error in atexit()\n"); 2344 if (opt_abort) { 2345 abort(); 2346 } 2347 } 2348 2349 gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), 2350 b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t), 2351 CACHELINE); 2352 if (gctx_locks == NULL) { 2353 return true; 2354 } 2355 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 2356 if (malloc_mutex_init(&gctx_locks[i], "prof_gctx", 2357 WITNESS_RANK_PROF_GCTX, 2358 malloc_mutex_rank_exclusive)) { 2359 return true; 2360 } 2361 } 2362 2363 tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), 2364 b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t), 2365 CACHELINE); 2366 if (tdata_locks == NULL) { 2367 return true; 2368 } 2369 for (i = 0; i < PROF_NTDATA_LOCKS; i++) { 2370 if (malloc_mutex_init(&tdata_locks[i], "prof_tdata", 2371 WITNESS_RANK_PROF_TDATA, 2372 malloc_mutex_rank_exclusive)) { 2373 return true; 2374 } 2375 } 2376 } 2377 2378 #ifdef JEMALLOC_PROF_LIBGCC 2379 /* 2380 * Cause the backtracing machinery to allocate its internal state 2381 * before enabling profiling. 2382 */ 2383 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 2384 #endif 2385 2386 prof_booted = true; 2387 2388 return false; 2389 } 2390 2391 void 2392 prof_prefork0(tsdn_t *tsdn) { 2393 if (config_prof && opt_prof) { 2394 unsigned i; 2395 2396 malloc_mutex_prefork(tsdn, &prof_dump_mtx); 2397 malloc_mutex_prefork(tsdn, &bt2gctx_mtx); 2398 malloc_mutex_prefork(tsdn, &tdatas_mtx); 2399 for (i = 0; i < PROF_NTDATA_LOCKS; i++) { 2400 malloc_mutex_prefork(tsdn, &tdata_locks[i]); 2401 } 2402 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 2403 malloc_mutex_prefork(tsdn, &gctx_locks[i]); 2404 } 2405 } 2406 } 2407 2408 void 2409 prof_prefork1(tsdn_t *tsdn) { 2410 if (config_prof && opt_prof) { 2411 malloc_mutex_prefork(tsdn, &prof_active_mtx); 2412 malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx); 2413 malloc_mutex_prefork(tsdn, &prof_gdump_mtx); 2414 malloc_mutex_prefork(tsdn, &next_thr_uid_mtx); 2415 malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx); 2416 } 2417 } 2418 2419 void 2420 prof_postfork_parent(tsdn_t *tsdn) { 2421 if (config_prof && opt_prof) { 2422 unsigned i; 2423 2424 malloc_mutex_postfork_parent(tsdn, 2425 &prof_thread_active_init_mtx); 2426 malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx); 2427 malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx); 2428 malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx); 2429 malloc_mutex_postfork_parent(tsdn, &prof_active_mtx); 2430 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 2431 malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]); 2432 } 2433 for (i = 0; i < PROF_NTDATA_LOCKS; i++) { 2434 malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]); 2435 } 2436 malloc_mutex_postfork_parent(tsdn, &tdatas_mtx); 2437 malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx); 2438 malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx); 2439 } 2440 } 2441 2442 void 2443 prof_postfork_child(tsdn_t *tsdn) { 2444 if (config_prof && opt_prof) { 2445 unsigned i; 2446 2447 malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx); 2448 malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx); 2449 malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx); 2450 malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx); 2451 malloc_mutex_postfork_child(tsdn, &prof_active_mtx); 2452 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 2453 malloc_mutex_postfork_child(tsdn, &gctx_locks[i]); 2454 } 2455 for (i = 0; i < PROF_NTDATA_LOCKS; i++) { 2456 malloc_mutex_postfork_child(tsdn, &tdata_locks[i]); 2457 } 2458 malloc_mutex_postfork_child(tsdn, &tdatas_mtx); 2459 malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx); 2460 malloc_mutex_postfork_child(tsdn, &prof_dump_mtx); 2461 } 2462 } 2463 2464 /******************************************************************************/ 2465