1 #define JEMALLOC_PROF_C_ 2 #include "jemalloc/internal/jemalloc_internal.h" 3 /******************************************************************************/ 4 5 #ifdef JEMALLOC_PROF_LIBUNWIND 6 #define UNW_LOCAL_ONLY 7 #include <libunwind.h> 8 #endif 9 10 #ifdef JEMALLOC_PROF_LIBGCC 11 #include <unwind.h> 12 #endif 13 14 /******************************************************************************/ 15 /* Data. */ 16 17 malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) 18 19 bool opt_prof = false; 20 bool opt_prof_active = true; 21 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 22 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 23 bool opt_prof_gdump = false; 24 bool opt_prof_final = true; 25 bool opt_prof_leak = false; 26 bool opt_prof_accum = false; 27 char opt_prof_prefix[ 28 /* Minimize memory bloat for non-prof builds. */ 29 #ifdef JEMALLOC_PROF 30 PATH_MAX + 31 #endif 32 1]; 33 34 uint64_t prof_interval = 0; 35 bool prof_promote; 36 37 /* 38 * Table of mutexes that are shared among ctx's. These are leaf locks, so 39 * there is no problem with using them for more than one ctx at the same time. 40 * The primary motivation for this sharing though is that ctx's are ephemeral, 41 * and destroying mutexes causes complications for systems that allocate when 42 * creating/destroying mutexes. 43 */ 44 static malloc_mutex_t *ctx_locks; 45 static unsigned cum_ctxs; /* Atomic counter. */ 46 47 /* 48 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data 49 * structure that knows about all backtraces currently captured. 50 */ 51 static ckh_t bt2ctx; 52 static malloc_mutex_t bt2ctx_mtx; 53 54 static malloc_mutex_t prof_dump_seq_mtx; 55 static uint64_t prof_dump_seq; 56 static uint64_t prof_dump_iseq; 57 static uint64_t prof_dump_mseq; 58 static uint64_t prof_dump_useq; 59 60 /* 61 * This buffer is rather large for stack allocation, so use a single buffer for 62 * all profile dumps. 63 */ 64 static malloc_mutex_t prof_dump_mtx; 65 static char prof_dump_buf[ 66 /* Minimize memory bloat for non-prof builds. */ 67 #ifdef JEMALLOC_PROF 68 PROF_DUMP_BUFSIZE 69 #else 70 1 71 #endif 72 ]; 73 static unsigned prof_dump_buf_end; 74 static int prof_dump_fd; 75 76 /* Do not dump any profiles until bootstrapping is complete. */ 77 static bool prof_booted = false; 78 79 /******************************************************************************/ 80 81 void 82 bt_init(prof_bt_t *bt, void **vec) 83 { 84 85 cassert(config_prof); 86 87 bt->vec = vec; 88 bt->len = 0; 89 } 90 91 static void 92 bt_destroy(prof_bt_t *bt) 93 { 94 95 cassert(config_prof); 96 97 idalloc(bt); 98 } 99 100 static prof_bt_t * 101 bt_dup(prof_bt_t *bt) 102 { 103 prof_bt_t *ret; 104 105 cassert(config_prof); 106 107 /* 108 * Create a single allocation that has space for vec immediately 109 * following the prof_bt_t structure. The backtraces that get 110 * stored in the backtrace caches are copied from stack-allocated 111 * temporary variables, so size is known at creation time. Making this 112 * a contiguous object improves cache locality. 113 */ 114 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + 115 (bt->len * sizeof(void *))); 116 if (ret == NULL) 117 return (NULL); 118 ret->vec = (void **)((uintptr_t)ret + 119 QUANTUM_CEILING(sizeof(prof_bt_t))); 120 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); 121 ret->len = bt->len; 122 123 return (ret); 124 } 125 126 static inline void 127 prof_enter(prof_tdata_t *prof_tdata) 128 { 129 130 cassert(config_prof); 131 132 assert(prof_tdata->enq == false); 133 prof_tdata->enq = true; 134 135 malloc_mutex_lock(&bt2ctx_mtx); 136 } 137 138 static inline void 139 prof_leave(prof_tdata_t *prof_tdata) 140 { 141 bool idump, gdump; 142 143 cassert(config_prof); 144 145 malloc_mutex_unlock(&bt2ctx_mtx); 146 147 assert(prof_tdata->enq); 148 prof_tdata->enq = false; 149 idump = prof_tdata->enq_idump; 150 prof_tdata->enq_idump = false; 151 gdump = prof_tdata->enq_gdump; 152 prof_tdata->enq_gdump = false; 153 154 if (idump) 155 prof_idump(); 156 if (gdump) 157 prof_gdump(); 158 } 159 160 #ifdef JEMALLOC_PROF_LIBUNWIND 161 void 162 prof_backtrace(prof_bt_t *bt, unsigned nignore) 163 { 164 unw_context_t uc; 165 unw_cursor_t cursor; 166 unsigned i; 167 int err; 168 169 cassert(config_prof); 170 assert(bt->len == 0); 171 assert(bt->vec != NULL); 172 173 unw_getcontext(&uc); 174 unw_init_local(&cursor, &uc); 175 176 /* Throw away (nignore+1) stack frames, if that many exist. */ 177 for (i = 0; i < nignore + 1; i++) { 178 err = unw_step(&cursor); 179 if (err <= 0) 180 return; 181 } 182 183 /* 184 * Iterate over stack frames until there are no more, or until no space 185 * remains in bt. 186 */ 187 for (i = 0; i < PROF_BT_MAX; i++) { 188 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); 189 bt->len++; 190 err = unw_step(&cursor); 191 if (err <= 0) 192 break; 193 } 194 } 195 #elif (defined(JEMALLOC_PROF_LIBGCC)) 196 static _Unwind_Reason_Code 197 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) 198 { 199 200 cassert(config_prof); 201 202 return (_URC_NO_REASON); 203 } 204 205 static _Unwind_Reason_Code 206 prof_unwind_callback(struct _Unwind_Context *context, void *arg) 207 { 208 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 209 210 cassert(config_prof); 211 212 if (data->nignore > 0) 213 data->nignore--; 214 else { 215 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); 216 data->bt->len++; 217 if (data->bt->len == data->max) 218 return (_URC_END_OF_STACK); 219 } 220 221 return (_URC_NO_REASON); 222 } 223 224 void 225 prof_backtrace(prof_bt_t *bt, unsigned nignore) 226 { 227 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; 228 229 cassert(config_prof); 230 231 _Unwind_Backtrace(prof_unwind_callback, &data); 232 } 233 #elif (defined(JEMALLOC_PROF_GCC)) 234 void 235 prof_backtrace(prof_bt_t *bt, unsigned nignore) 236 { 237 #define BT_FRAME(i) \ 238 if ((i) < nignore + PROF_BT_MAX) { \ 239 void *p; \ 240 if (__builtin_frame_address(i) == 0) \ 241 return; \ 242 p = __builtin_return_address(i); \ 243 if (p == NULL) \ 244 return; \ 245 if (i >= nignore) { \ 246 bt->vec[(i) - nignore] = p; \ 247 bt->len = (i) - nignore + 1; \ 248 } \ 249 } else \ 250 return; 251 252 cassert(config_prof); 253 assert(nignore <= 3); 254 255 BT_FRAME(0) 256 BT_FRAME(1) 257 BT_FRAME(2) 258 BT_FRAME(3) 259 BT_FRAME(4) 260 BT_FRAME(5) 261 BT_FRAME(6) 262 BT_FRAME(7) 263 BT_FRAME(8) 264 BT_FRAME(9) 265 266 BT_FRAME(10) 267 BT_FRAME(11) 268 BT_FRAME(12) 269 BT_FRAME(13) 270 BT_FRAME(14) 271 BT_FRAME(15) 272 BT_FRAME(16) 273 BT_FRAME(17) 274 BT_FRAME(18) 275 BT_FRAME(19) 276 277 BT_FRAME(20) 278 BT_FRAME(21) 279 BT_FRAME(22) 280 BT_FRAME(23) 281 BT_FRAME(24) 282 BT_FRAME(25) 283 BT_FRAME(26) 284 BT_FRAME(27) 285 BT_FRAME(28) 286 BT_FRAME(29) 287 288 BT_FRAME(30) 289 BT_FRAME(31) 290 BT_FRAME(32) 291 BT_FRAME(33) 292 BT_FRAME(34) 293 BT_FRAME(35) 294 BT_FRAME(36) 295 BT_FRAME(37) 296 BT_FRAME(38) 297 BT_FRAME(39) 298 299 BT_FRAME(40) 300 BT_FRAME(41) 301 BT_FRAME(42) 302 BT_FRAME(43) 303 BT_FRAME(44) 304 BT_FRAME(45) 305 BT_FRAME(46) 306 BT_FRAME(47) 307 BT_FRAME(48) 308 BT_FRAME(49) 309 310 BT_FRAME(50) 311 BT_FRAME(51) 312 BT_FRAME(52) 313 BT_FRAME(53) 314 BT_FRAME(54) 315 BT_FRAME(55) 316 BT_FRAME(56) 317 BT_FRAME(57) 318 BT_FRAME(58) 319 BT_FRAME(59) 320 321 BT_FRAME(60) 322 BT_FRAME(61) 323 BT_FRAME(62) 324 BT_FRAME(63) 325 BT_FRAME(64) 326 BT_FRAME(65) 327 BT_FRAME(66) 328 BT_FRAME(67) 329 BT_FRAME(68) 330 BT_FRAME(69) 331 332 BT_FRAME(70) 333 BT_FRAME(71) 334 BT_FRAME(72) 335 BT_FRAME(73) 336 BT_FRAME(74) 337 BT_FRAME(75) 338 BT_FRAME(76) 339 BT_FRAME(77) 340 BT_FRAME(78) 341 BT_FRAME(79) 342 343 BT_FRAME(80) 344 BT_FRAME(81) 345 BT_FRAME(82) 346 BT_FRAME(83) 347 BT_FRAME(84) 348 BT_FRAME(85) 349 BT_FRAME(86) 350 BT_FRAME(87) 351 BT_FRAME(88) 352 BT_FRAME(89) 353 354 BT_FRAME(90) 355 BT_FRAME(91) 356 BT_FRAME(92) 357 BT_FRAME(93) 358 BT_FRAME(94) 359 BT_FRAME(95) 360 BT_FRAME(96) 361 BT_FRAME(97) 362 BT_FRAME(98) 363 BT_FRAME(99) 364 365 BT_FRAME(100) 366 BT_FRAME(101) 367 BT_FRAME(102) 368 BT_FRAME(103) 369 BT_FRAME(104) 370 BT_FRAME(105) 371 BT_FRAME(106) 372 BT_FRAME(107) 373 BT_FRAME(108) 374 BT_FRAME(109) 375 376 BT_FRAME(110) 377 BT_FRAME(111) 378 BT_FRAME(112) 379 BT_FRAME(113) 380 BT_FRAME(114) 381 BT_FRAME(115) 382 BT_FRAME(116) 383 BT_FRAME(117) 384 BT_FRAME(118) 385 BT_FRAME(119) 386 387 BT_FRAME(120) 388 BT_FRAME(121) 389 BT_FRAME(122) 390 BT_FRAME(123) 391 BT_FRAME(124) 392 BT_FRAME(125) 393 BT_FRAME(126) 394 BT_FRAME(127) 395 396 /* Extras to compensate for nignore. */ 397 BT_FRAME(128) 398 BT_FRAME(129) 399 BT_FRAME(130) 400 #undef BT_FRAME 401 } 402 #else 403 void 404 prof_backtrace(prof_bt_t *bt, unsigned nignore) 405 { 406 407 cassert(config_prof); 408 not_reached(); 409 } 410 #endif 411 412 static malloc_mutex_t * 413 prof_ctx_mutex_choose(void) 414 { 415 unsigned nctxs = atomic_add_u(&cum_ctxs, 1); 416 417 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); 418 } 419 420 static void 421 prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) 422 { 423 424 ctx->bt = bt; 425 ctx->lock = prof_ctx_mutex_choose(); 426 /* 427 * Set nlimbo to 1, in order to avoid a race condition with 428 * prof_ctx_merge()/prof_ctx_destroy(). 429 */ 430 ctx->nlimbo = 1; 431 ql_elm_new(ctx, dump_link); 432 memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); 433 ql_new(&ctx->cnts_ql); 434 } 435 436 static void 437 prof_ctx_destroy(prof_ctx_t *ctx) 438 { 439 prof_tdata_t *prof_tdata; 440 441 cassert(config_prof); 442 443 /* 444 * Check that ctx is still unused by any thread cache before destroying 445 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race 446 * condition with this function, as does prof_ctx_merge() in order to 447 * avoid a race between the main body of prof_ctx_merge() and entry 448 * into this function. 449 */ 450 prof_tdata = prof_tdata_get(false); 451 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); 452 prof_enter(prof_tdata); 453 malloc_mutex_lock(ctx->lock); 454 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && 455 ctx->nlimbo == 1) { 456 assert(ctx->cnt_merged.curbytes == 0); 457 assert(ctx->cnt_merged.accumobjs == 0); 458 assert(ctx->cnt_merged.accumbytes == 0); 459 /* Remove ctx from bt2ctx. */ 460 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) 461 not_reached(); 462 prof_leave(prof_tdata); 463 /* Destroy ctx. */ 464 malloc_mutex_unlock(ctx->lock); 465 bt_destroy(ctx->bt); 466 idalloc(ctx); 467 } else { 468 /* 469 * Compensate for increment in prof_ctx_merge() or 470 * prof_lookup(). 471 */ 472 ctx->nlimbo--; 473 malloc_mutex_unlock(ctx->lock); 474 prof_leave(prof_tdata); 475 } 476 } 477 478 static void 479 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) 480 { 481 bool destroy; 482 483 cassert(config_prof); 484 485 /* Merge cnt stats and detach from ctx. */ 486 malloc_mutex_lock(ctx->lock); 487 ctx->cnt_merged.curobjs += cnt->cnts.curobjs; 488 ctx->cnt_merged.curbytes += cnt->cnts.curbytes; 489 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; 490 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; 491 ql_remove(&ctx->cnts_ql, cnt, cnts_link); 492 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && 493 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { 494 /* 495 * Increment ctx->nlimbo in order to keep another thread from 496 * winning the race to destroy ctx while this one has ctx->lock 497 * dropped. Without this, it would be possible for another 498 * thread to: 499 * 500 * 1) Sample an allocation associated with ctx. 501 * 2) Deallocate the sampled object. 502 * 3) Successfully prof_ctx_destroy(ctx). 503 * 504 * The result would be that ctx no longer exists by the time 505 * this thread accesses it in prof_ctx_destroy(). 506 */ 507 ctx->nlimbo++; 508 destroy = true; 509 } else 510 destroy = false; 511 malloc_mutex_unlock(ctx->lock); 512 if (destroy) 513 prof_ctx_destroy(ctx); 514 } 515 516 static bool 517 prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, 518 prof_ctx_t **p_ctx, bool *p_new_ctx) 519 { 520 union { 521 prof_ctx_t *p; 522 void *v; 523 } ctx; 524 union { 525 prof_bt_t *p; 526 void *v; 527 } btkey; 528 bool new_ctx; 529 530 prof_enter(prof_tdata); 531 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { 532 /* bt has never been seen before. Insert it. */ 533 ctx.v = imalloc(sizeof(prof_ctx_t)); 534 if (ctx.v == NULL) { 535 prof_leave(prof_tdata); 536 return (true); 537 } 538 btkey.p = bt_dup(bt); 539 if (btkey.v == NULL) { 540 prof_leave(prof_tdata); 541 idalloc(ctx.v); 542 return (true); 543 } 544 prof_ctx_init(ctx.p, btkey.p); 545 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { 546 /* OOM. */ 547 prof_leave(prof_tdata); 548 idalloc(btkey.v); 549 idalloc(ctx.v); 550 return (true); 551 } 552 new_ctx = true; 553 } else { 554 /* 555 * Increment nlimbo, in order to avoid a race condition with 556 * prof_ctx_merge()/prof_ctx_destroy(). 557 */ 558 malloc_mutex_lock(ctx.p->lock); 559 ctx.p->nlimbo++; 560 malloc_mutex_unlock(ctx.p->lock); 561 new_ctx = false; 562 } 563 prof_leave(prof_tdata); 564 565 *p_btkey = btkey.v; 566 *p_ctx = ctx.p; 567 *p_new_ctx = new_ctx; 568 return (false); 569 } 570 571 prof_thr_cnt_t * 572 prof_lookup(prof_bt_t *bt) 573 { 574 union { 575 prof_thr_cnt_t *p; 576 void *v; 577 } ret; 578 prof_tdata_t *prof_tdata; 579 580 cassert(config_prof); 581 582 prof_tdata = prof_tdata_get(false); 583 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 584 return (NULL); 585 586 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { 587 void *btkey; 588 prof_ctx_t *ctx; 589 bool new_ctx; 590 591 /* 592 * This thread's cache lacks bt. Look for it in the global 593 * cache. 594 */ 595 if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) 596 return (NULL); 597 598 /* Link a prof_thd_cnt_t into ctx for this thread. */ 599 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { 600 assert(ckh_count(&prof_tdata->bt2cnt) > 0); 601 /* 602 * Flush the least recently used cnt in order to keep 603 * bt2cnt from becoming too large. 604 */ 605 ret.p = ql_last(&prof_tdata->lru_ql, lru_link); 606 assert(ret.v != NULL); 607 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, 608 NULL, NULL)) 609 not_reached(); 610 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 611 prof_ctx_merge(ret.p->ctx, ret.p); 612 /* ret can now be re-used. */ 613 } else { 614 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); 615 /* Allocate and partially initialize a new cnt. */ 616 ret.v = imalloc(sizeof(prof_thr_cnt_t)); 617 if (ret.p == NULL) { 618 if (new_ctx) 619 prof_ctx_destroy(ctx); 620 return (NULL); 621 } 622 ql_elm_new(ret.p, cnts_link); 623 ql_elm_new(ret.p, lru_link); 624 } 625 /* Finish initializing ret. */ 626 ret.p->ctx = ctx; 627 ret.p->epoch = 0; 628 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 629 if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { 630 if (new_ctx) 631 prof_ctx_destroy(ctx); 632 idalloc(ret.v); 633 return (NULL); 634 } 635 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 636 malloc_mutex_lock(ctx->lock); 637 ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); 638 ctx->nlimbo--; 639 malloc_mutex_unlock(ctx->lock); 640 } else { 641 /* Move ret to the front of the LRU. */ 642 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 643 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 644 } 645 646 return (ret.p); 647 } 648 649 #ifdef JEMALLOC_JET 650 size_t 651 prof_bt_count(void) 652 { 653 size_t bt_count; 654 prof_tdata_t *prof_tdata; 655 656 prof_tdata = prof_tdata_get(false); 657 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 658 return (0); 659 660 prof_enter(prof_tdata); 661 bt_count = ckh_count(&bt2ctx); 662 prof_leave(prof_tdata); 663 664 return (bt_count); 665 } 666 #endif 667 668 #ifdef JEMALLOC_JET 669 #undef prof_dump_open 670 #define prof_dump_open JEMALLOC_N(prof_dump_open_impl) 671 #endif 672 static int 673 prof_dump_open(bool propagate_err, const char *filename) 674 { 675 int fd; 676 677 fd = creat(filename, 0644); 678 if (fd == -1 && propagate_err == false) { 679 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", 680 filename); 681 if (opt_abort) 682 abort(); 683 } 684 685 return (fd); 686 } 687 #ifdef JEMALLOC_JET 688 #undef prof_dump_open 689 #define prof_dump_open JEMALLOC_N(prof_dump_open) 690 prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); 691 #endif 692 693 static bool 694 prof_dump_flush(bool propagate_err) 695 { 696 bool ret = false; 697 ssize_t err; 698 699 cassert(config_prof); 700 701 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 702 if (err == -1) { 703 if (propagate_err == false) { 704 malloc_write("<jemalloc>: write() failed during heap " 705 "profile flush\n"); 706 if (opt_abort) 707 abort(); 708 } 709 ret = true; 710 } 711 prof_dump_buf_end = 0; 712 713 return (ret); 714 } 715 716 static bool 717 prof_dump_close(bool propagate_err) 718 { 719 bool ret; 720 721 assert(prof_dump_fd != -1); 722 ret = prof_dump_flush(propagate_err); 723 close(prof_dump_fd); 724 prof_dump_fd = -1; 725 726 return (ret); 727 } 728 729 static bool 730 prof_dump_write(bool propagate_err, const char *s) 731 { 732 unsigned i, slen, n; 733 734 cassert(config_prof); 735 736 i = 0; 737 slen = strlen(s); 738 while (i < slen) { 739 /* Flush the buffer if it is full. */ 740 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) 741 if (prof_dump_flush(propagate_err) && propagate_err) 742 return (true); 743 744 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 745 /* Finish writing. */ 746 n = slen - i; 747 } else { 748 /* Write as much of s as will fit. */ 749 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 750 } 751 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 752 prof_dump_buf_end += n; 753 i += n; 754 } 755 756 return (false); 757 } 758 759 JEMALLOC_ATTR(format(printf, 2, 3)) 760 static bool 761 prof_dump_printf(bool propagate_err, const char *format, ...) 762 { 763 bool ret; 764 va_list ap; 765 char buf[PROF_PRINTF_BUFSIZE]; 766 767 va_start(ap, format); 768 malloc_vsnprintf(buf, sizeof(buf), format, ap); 769 va_end(ap); 770 ret = prof_dump_write(propagate_err, buf); 771 772 return (ret); 773 } 774 775 static void 776 prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, 777 prof_ctx_list_t *ctx_ql) 778 { 779 prof_thr_cnt_t *thr_cnt; 780 prof_cnt_t tcnt; 781 782 cassert(config_prof); 783 784 malloc_mutex_lock(ctx->lock); 785 786 /* 787 * Increment nlimbo so that ctx won't go away before dump. 788 * Additionally, link ctx into the dump list so that it is included in 789 * prof_dump()'s second pass. 790 */ 791 ctx->nlimbo++; 792 ql_tail_insert(ctx_ql, ctx, dump_link); 793 794 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); 795 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { 796 volatile unsigned *epoch = &thr_cnt->epoch; 797 798 while (true) { 799 unsigned epoch0 = *epoch; 800 801 /* Make sure epoch is even. */ 802 if (epoch0 & 1U) 803 continue; 804 805 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); 806 807 /* Terminate if epoch didn't change while reading. */ 808 if (*epoch == epoch0) 809 break; 810 } 811 812 ctx->cnt_summed.curobjs += tcnt.curobjs; 813 ctx->cnt_summed.curbytes += tcnt.curbytes; 814 if (opt_prof_accum) { 815 ctx->cnt_summed.accumobjs += tcnt.accumobjs; 816 ctx->cnt_summed.accumbytes += tcnt.accumbytes; 817 } 818 } 819 820 if (ctx->cnt_summed.curobjs != 0) 821 (*leak_nctx)++; 822 823 /* Add to cnt_all. */ 824 cnt_all->curobjs += ctx->cnt_summed.curobjs; 825 cnt_all->curbytes += ctx->cnt_summed.curbytes; 826 if (opt_prof_accum) { 827 cnt_all->accumobjs += ctx->cnt_summed.accumobjs; 828 cnt_all->accumbytes += ctx->cnt_summed.accumbytes; 829 } 830 831 malloc_mutex_unlock(ctx->lock); 832 } 833 834 static bool 835 prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) 836 { 837 838 if (opt_lg_prof_sample == 0) { 839 if (prof_dump_printf(propagate_err, 840 "heap profile: %"PRId64": %"PRId64 841 " [%"PRIu64": %"PRIu64"] @ heapprofile\n", 842 cnt_all->curobjs, cnt_all->curbytes, 843 cnt_all->accumobjs, cnt_all->accumbytes)) 844 return (true); 845 } else { 846 if (prof_dump_printf(propagate_err, 847 "heap profile: %"PRId64": %"PRId64 848 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", 849 cnt_all->curobjs, cnt_all->curbytes, 850 cnt_all->accumobjs, cnt_all->accumbytes, 851 ((uint64_t)1U << opt_lg_prof_sample))) 852 return (true); 853 } 854 855 return (false); 856 } 857 858 static void 859 prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) 860 { 861 862 ctx->nlimbo--; 863 ql_remove(ctx_ql, ctx, dump_link); 864 } 865 866 static void 867 prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) 868 { 869 870 malloc_mutex_lock(ctx->lock); 871 prof_dump_ctx_cleanup_locked(ctx, ctx_ql); 872 malloc_mutex_unlock(ctx->lock); 873 } 874 875 static bool 876 prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, 877 prof_ctx_list_t *ctx_ql) 878 { 879 bool ret; 880 unsigned i; 881 882 cassert(config_prof); 883 884 /* 885 * Current statistics can sum to 0 as a result of unmerged per thread 886 * statistics. Additionally, interval- and growth-triggered dumps can 887 * occur between the time a ctx is created and when its statistics are 888 * filled in. Avoid dumping any ctx that is an artifact of either 889 * implementation detail. 890 */ 891 malloc_mutex_lock(ctx->lock); 892 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || 893 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { 894 assert(ctx->cnt_summed.curobjs == 0); 895 assert(ctx->cnt_summed.curbytes == 0); 896 assert(ctx->cnt_summed.accumobjs == 0); 897 assert(ctx->cnt_summed.accumbytes == 0); 898 ret = false; 899 goto label_return; 900 } 901 902 if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 903 " [%"PRIu64": %"PRIu64"] @", 904 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, 905 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { 906 ret = true; 907 goto label_return; 908 } 909 910 for (i = 0; i < bt->len; i++) { 911 if (prof_dump_printf(propagate_err, " %#"PRIxPTR, 912 (uintptr_t)bt->vec[i])) { 913 ret = true; 914 goto label_return; 915 } 916 } 917 918 if (prof_dump_write(propagate_err, "\n")) { 919 ret = true; 920 goto label_return; 921 } 922 923 ret = false; 924 label_return: 925 prof_dump_ctx_cleanup_locked(ctx, ctx_ql); 926 malloc_mutex_unlock(ctx->lock); 927 return (ret); 928 } 929 930 static bool 931 prof_dump_maps(bool propagate_err) 932 { 933 bool ret; 934 int mfd; 935 char filename[PATH_MAX + 1]; 936 937 cassert(config_prof); 938 #ifdef __FreeBSD__ 939 malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map"); 940 #else 941 malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", 942 (int)getpid()); 943 #endif 944 mfd = open(filename, O_RDONLY); 945 if (mfd != -1) { 946 ssize_t nread; 947 948 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 949 propagate_err) { 950 ret = true; 951 goto label_return; 952 } 953 nread = 0; 954 do { 955 prof_dump_buf_end += nread; 956 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 957 /* Make space in prof_dump_buf before read(). */ 958 if (prof_dump_flush(propagate_err) && 959 propagate_err) { 960 ret = true; 961 goto label_return; 962 } 963 } 964 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], 965 PROF_DUMP_BUFSIZE - prof_dump_buf_end); 966 } while (nread > 0); 967 } else { 968 ret = true; 969 goto label_return; 970 } 971 972 ret = false; 973 label_return: 974 if (mfd != -1) 975 close(mfd); 976 return (ret); 977 } 978 979 static void 980 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, 981 const char *filename) 982 { 983 984 if (cnt_all->curbytes != 0) { 985 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" 986 PRId64" object%s, %zu context%s\n", 987 cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", 988 cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", 989 leak_nctx, (leak_nctx != 1) ? "s" : ""); 990 malloc_printf( 991 "<jemalloc>: Run pprof on \"%s\" for leak detail\n", 992 filename); 993 } 994 } 995 996 static bool 997 prof_dump(bool propagate_err, const char *filename, bool leakcheck) 998 { 999 prof_tdata_t *prof_tdata; 1000 prof_cnt_t cnt_all; 1001 size_t tabind; 1002 union { 1003 prof_ctx_t *p; 1004 void *v; 1005 } ctx; 1006 size_t leak_nctx; 1007 prof_ctx_list_t ctx_ql; 1008 1009 cassert(config_prof); 1010 1011 prof_tdata = prof_tdata_get(false); 1012 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1013 return (true); 1014 1015 malloc_mutex_lock(&prof_dump_mtx); 1016 1017 /* Merge per thread profile stats, and sum them in cnt_all. */ 1018 memset(&cnt_all, 0, sizeof(prof_cnt_t)); 1019 leak_nctx = 0; 1020 ql_new(&ctx_ql); 1021 prof_enter(prof_tdata); 1022 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) 1023 prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); 1024 prof_leave(prof_tdata); 1025 1026 /* Create dump file. */ 1027 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) 1028 goto label_open_close_error; 1029 1030 /* Dump profile header. */ 1031 if (prof_dump_header(propagate_err, &cnt_all)) 1032 goto label_write_error; 1033 1034 /* Dump per ctx profile stats. */ 1035 while ((ctx.p = ql_first(&ctx_ql)) != NULL) { 1036 if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) 1037 goto label_write_error; 1038 } 1039 1040 /* Dump /proc/<pid>/maps if possible. */ 1041 if (prof_dump_maps(propagate_err)) 1042 goto label_write_error; 1043 1044 if (prof_dump_close(propagate_err)) 1045 goto label_open_close_error; 1046 1047 malloc_mutex_unlock(&prof_dump_mtx); 1048 1049 if (leakcheck) 1050 prof_leakcheck(&cnt_all, leak_nctx, filename); 1051 1052 return (false); 1053 label_write_error: 1054 prof_dump_close(propagate_err); 1055 label_open_close_error: 1056 while ((ctx.p = ql_first(&ctx_ql)) != NULL) 1057 prof_dump_ctx_cleanup(ctx.p, &ctx_ql); 1058 malloc_mutex_unlock(&prof_dump_mtx); 1059 return (true); 1060 } 1061 1062 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 1063 #define VSEQ_INVALID UINT64_C(0xffffffffffffffff) 1064 static void 1065 prof_dump_filename(char *filename, char v, int64_t vseq) 1066 { 1067 1068 cassert(config_prof); 1069 1070 if (vseq != VSEQ_INVALID) { 1071 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 1072 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1073 "%s.%d.%"PRIu64".%c%"PRId64".heap", 1074 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); 1075 } else { 1076 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 1077 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1078 "%s.%d.%"PRIu64".%c.heap", 1079 opt_prof_prefix, (int)getpid(), prof_dump_seq, v); 1080 } 1081 prof_dump_seq++; 1082 } 1083 1084 static void 1085 prof_fdump(void) 1086 { 1087 char filename[DUMP_FILENAME_BUFSIZE]; 1088 1089 cassert(config_prof); 1090 1091 if (prof_booted == false) 1092 return; 1093 1094 if (opt_prof_final && opt_prof_prefix[0] != '\0') { 1095 malloc_mutex_lock(&prof_dump_seq_mtx); 1096 prof_dump_filename(filename, 'f', VSEQ_INVALID); 1097 malloc_mutex_unlock(&prof_dump_seq_mtx); 1098 prof_dump(false, filename, opt_prof_leak); 1099 } 1100 } 1101 1102 void 1103 prof_idump(void) 1104 { 1105 prof_tdata_t *prof_tdata; 1106 char filename[PATH_MAX + 1]; 1107 1108 cassert(config_prof); 1109 1110 if (prof_booted == false) 1111 return; 1112 prof_tdata = prof_tdata_get(false); 1113 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1114 return; 1115 if (prof_tdata->enq) { 1116 prof_tdata->enq_idump = true; 1117 return; 1118 } 1119 1120 if (opt_prof_prefix[0] != '\0') { 1121 malloc_mutex_lock(&prof_dump_seq_mtx); 1122 prof_dump_filename(filename, 'i', prof_dump_iseq); 1123 prof_dump_iseq++; 1124 malloc_mutex_unlock(&prof_dump_seq_mtx); 1125 prof_dump(false, filename, false); 1126 } 1127 } 1128 1129 bool 1130 prof_mdump(const char *filename) 1131 { 1132 char filename_buf[DUMP_FILENAME_BUFSIZE]; 1133 1134 cassert(config_prof); 1135 1136 if (opt_prof == false || prof_booted == false) 1137 return (true); 1138 1139 if (filename == NULL) { 1140 /* No filename specified, so automatically generate one. */ 1141 if (opt_prof_prefix[0] == '\0') 1142 return (true); 1143 malloc_mutex_lock(&prof_dump_seq_mtx); 1144 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 1145 prof_dump_mseq++; 1146 malloc_mutex_unlock(&prof_dump_seq_mtx); 1147 filename = filename_buf; 1148 } 1149 return (prof_dump(true, filename, false)); 1150 } 1151 1152 void 1153 prof_gdump(void) 1154 { 1155 prof_tdata_t *prof_tdata; 1156 char filename[DUMP_FILENAME_BUFSIZE]; 1157 1158 cassert(config_prof); 1159 1160 if (prof_booted == false) 1161 return; 1162 prof_tdata = prof_tdata_get(false); 1163 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1164 return; 1165 if (prof_tdata->enq) { 1166 prof_tdata->enq_gdump = true; 1167 return; 1168 } 1169 1170 if (opt_prof_prefix[0] != '\0') { 1171 malloc_mutex_lock(&prof_dump_seq_mtx); 1172 prof_dump_filename(filename, 'u', prof_dump_useq); 1173 prof_dump_useq++; 1174 malloc_mutex_unlock(&prof_dump_seq_mtx); 1175 prof_dump(false, filename, false); 1176 } 1177 } 1178 1179 static void 1180 prof_bt_hash(const void *key, size_t r_hash[2]) 1181 { 1182 prof_bt_t *bt = (prof_bt_t *)key; 1183 1184 cassert(config_prof); 1185 1186 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); 1187 } 1188 1189 static bool 1190 prof_bt_keycomp(const void *k1, const void *k2) 1191 { 1192 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1193 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1194 1195 cassert(config_prof); 1196 1197 if (bt1->len != bt2->len) 1198 return (false); 1199 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1200 } 1201 1202 prof_tdata_t * 1203 prof_tdata_init(void) 1204 { 1205 prof_tdata_t *prof_tdata; 1206 1207 cassert(config_prof); 1208 1209 /* Initialize an empty cache for this thread. */ 1210 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); 1211 if (prof_tdata == NULL) 1212 return (NULL); 1213 1214 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, 1215 prof_bt_hash, prof_bt_keycomp)) { 1216 idalloc(prof_tdata); 1217 return (NULL); 1218 } 1219 ql_new(&prof_tdata->lru_ql); 1220 1221 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); 1222 if (prof_tdata->vec == NULL) { 1223 ckh_delete(&prof_tdata->bt2cnt); 1224 idalloc(prof_tdata); 1225 return (NULL); 1226 } 1227 1228 prof_tdata->prng_state = 0; 1229 prof_tdata->threshold = 0; 1230 prof_tdata->accum = 0; 1231 1232 prof_tdata->enq = false; 1233 prof_tdata->enq_idump = false; 1234 prof_tdata->enq_gdump = false; 1235 1236 prof_tdata_tsd_set(&prof_tdata); 1237 1238 return (prof_tdata); 1239 } 1240 1241 void 1242 prof_tdata_cleanup(void *arg) 1243 { 1244 prof_thr_cnt_t *cnt; 1245 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; 1246 1247 cassert(config_prof); 1248 1249 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { 1250 /* 1251 * Another destructor deallocated memory after this destructor 1252 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY 1253 * in order to receive another callback. 1254 */ 1255 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1256 prof_tdata_tsd_set(&prof_tdata); 1257 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { 1258 /* 1259 * The previous time this destructor was called, we set the key 1260 * to PROF_TDATA_STATE_PURGATORY so that other destructors 1261 * wouldn't cause re-creation of the prof_tdata. This time, do 1262 * nothing, so that the destructor will not be called again. 1263 */ 1264 } else if (prof_tdata != NULL) { 1265 /* 1266 * Delete the hash table. All of its contents can still be 1267 * iterated over via the LRU. 1268 */ 1269 ckh_delete(&prof_tdata->bt2cnt); 1270 /* 1271 * Iteratively merge cnt's into the global stats and delete 1272 * them. 1273 */ 1274 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { 1275 ql_remove(&prof_tdata->lru_ql, cnt, lru_link); 1276 prof_ctx_merge(cnt->ctx, cnt); 1277 idalloc(cnt); 1278 } 1279 idalloc(prof_tdata->vec); 1280 idalloc(prof_tdata); 1281 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1282 prof_tdata_tsd_set(&prof_tdata); 1283 } 1284 } 1285 1286 void 1287 prof_boot0(void) 1288 { 1289 1290 cassert(config_prof); 1291 1292 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 1293 sizeof(PROF_PREFIX_DEFAULT)); 1294 } 1295 1296 void 1297 prof_boot1(void) 1298 { 1299 1300 cassert(config_prof); 1301 1302 /* 1303 * opt_prof and prof_promote must be in their final state before any 1304 * arenas are initialized, so this function must be executed early. 1305 */ 1306 1307 if (opt_prof_leak && opt_prof == false) { 1308 /* 1309 * Enable opt_prof, but in such a way that profiles are never 1310 * automatically dumped. 1311 */ 1312 opt_prof = true; 1313 opt_prof_gdump = false; 1314 } else if (opt_prof) { 1315 if (opt_lg_prof_interval >= 0) { 1316 prof_interval = (((uint64_t)1U) << 1317 opt_lg_prof_interval); 1318 } 1319 } 1320 1321 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); 1322 } 1323 1324 bool 1325 prof_boot2(void) 1326 { 1327 1328 cassert(config_prof); 1329 1330 if (opt_prof) { 1331 unsigned i; 1332 1333 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, 1334 prof_bt_keycomp)) 1335 return (true); 1336 if (malloc_mutex_init(&bt2ctx_mtx)) 1337 return (true); 1338 if (prof_tdata_tsd_boot()) { 1339 malloc_write( 1340 "<jemalloc>: Error in pthread_key_create()\n"); 1341 abort(); 1342 } 1343 1344 if (malloc_mutex_init(&prof_dump_seq_mtx)) 1345 return (true); 1346 if (malloc_mutex_init(&prof_dump_mtx)) 1347 return (true); 1348 1349 if (atexit(prof_fdump) != 0) { 1350 malloc_write("<jemalloc>: Error in atexit()\n"); 1351 if (opt_abort) 1352 abort(); 1353 } 1354 1355 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * 1356 sizeof(malloc_mutex_t)); 1357 if (ctx_locks == NULL) 1358 return (true); 1359 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 1360 if (malloc_mutex_init(&ctx_locks[i])) 1361 return (true); 1362 } 1363 } 1364 1365 #ifdef JEMALLOC_PROF_LIBGCC 1366 /* 1367 * Cause the backtracing machinery to allocate its internal state 1368 * before enabling profiling. 1369 */ 1370 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 1371 #endif 1372 1373 prof_booted = true; 1374 1375 return (false); 1376 } 1377 1378 void 1379 prof_prefork(void) 1380 { 1381 1382 if (opt_prof) { 1383 unsigned i; 1384 1385 malloc_mutex_prefork(&bt2ctx_mtx); 1386 malloc_mutex_prefork(&prof_dump_seq_mtx); 1387 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1388 malloc_mutex_prefork(&ctx_locks[i]); 1389 } 1390 } 1391 1392 void 1393 prof_postfork_parent(void) 1394 { 1395 1396 if (opt_prof) { 1397 unsigned i; 1398 1399 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1400 malloc_mutex_postfork_parent(&ctx_locks[i]); 1401 malloc_mutex_postfork_parent(&prof_dump_seq_mtx); 1402 malloc_mutex_postfork_parent(&bt2ctx_mtx); 1403 } 1404 } 1405 1406 void 1407 prof_postfork_child(void) 1408 { 1409 1410 if (opt_prof) { 1411 unsigned i; 1412 1413 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1414 malloc_mutex_postfork_child(&ctx_locks[i]); 1415 malloc_mutex_postfork_child(&prof_dump_seq_mtx); 1416 malloc_mutex_postfork_child(&bt2ctx_mtx); 1417 } 1418 } 1419 1420 /******************************************************************************/ 1421