1 #define JEMALLOC_PROF_C_ 2 #include "jemalloc/internal/jemalloc_internal.h" 3 /******************************************************************************/ 4 5 #ifdef JEMALLOC_PROF_LIBUNWIND 6 #define UNW_LOCAL_ONLY 7 #include <libunwind.h> 8 #endif 9 10 #ifdef JEMALLOC_PROF_LIBGCC 11 #include <unwind.h> 12 #endif 13 14 /******************************************************************************/ 15 /* Data. */ 16 17 malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) 18 19 bool opt_prof = false; 20 bool opt_prof_active = true; 21 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 22 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 23 bool opt_prof_gdump = false; 24 bool opt_prof_final = true; 25 bool opt_prof_leak = false; 26 bool opt_prof_accum = false; 27 char opt_prof_prefix[ 28 /* Minimize memory bloat for non-prof builds. */ 29 #ifdef JEMALLOC_PROF 30 PATH_MAX + 31 #endif 32 1]; 33 34 uint64_t prof_interval = 0; 35 bool prof_promote; 36 37 /* 38 * Table of mutexes that are shared among ctx's. These are leaf locks, so 39 * there is no problem with using them for more than one ctx at the same time. 40 * The primary motivation for this sharing though is that ctx's are ephemeral, 41 * and destroying mutexes causes complications for systems that allocate when 42 * creating/destroying mutexes. 43 */ 44 static malloc_mutex_t *ctx_locks; 45 static unsigned cum_ctxs; /* Atomic counter. */ 46 47 /* 48 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data 49 * structure that knows about all backtraces currently captured. 50 */ 51 static ckh_t bt2ctx; 52 static malloc_mutex_t bt2ctx_mtx; 53 54 static malloc_mutex_t prof_dump_seq_mtx; 55 static uint64_t prof_dump_seq; 56 static uint64_t prof_dump_iseq; 57 static uint64_t prof_dump_mseq; 58 static uint64_t prof_dump_useq; 59 60 /* 61 * This buffer is rather large for stack allocation, so use a single buffer for 62 * all profile dumps. 63 */ 64 static malloc_mutex_t prof_dump_mtx; 65 static char prof_dump_buf[ 66 /* Minimize memory bloat for non-prof builds. */ 67 #ifdef JEMALLOC_PROF 68 PROF_DUMP_BUFSIZE 69 #else 70 1 71 #endif 72 ]; 73 static unsigned prof_dump_buf_end; 74 static int prof_dump_fd; 75 76 /* Do not dump any profiles until bootstrapping is complete. */ 77 static bool prof_booted = false; 78 79 /******************************************************************************/ 80 81 void 82 bt_init(prof_bt_t *bt, void **vec) 83 { 84 85 cassert(config_prof); 86 87 bt->vec = vec; 88 bt->len = 0; 89 } 90 91 static void 92 bt_destroy(prof_bt_t *bt) 93 { 94 95 cassert(config_prof); 96 97 idalloc(bt); 98 } 99 100 static prof_bt_t * 101 bt_dup(prof_bt_t *bt) 102 { 103 prof_bt_t *ret; 104 105 cassert(config_prof); 106 107 /* 108 * Create a single allocation that has space for vec immediately 109 * following the prof_bt_t structure. The backtraces that get 110 * stored in the backtrace caches are copied from stack-allocated 111 * temporary variables, so size is known at creation time. Making this 112 * a contiguous object improves cache locality. 113 */ 114 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + 115 (bt->len * sizeof(void *))); 116 if (ret == NULL) 117 return (NULL); 118 ret->vec = (void **)((uintptr_t)ret + 119 QUANTUM_CEILING(sizeof(prof_bt_t))); 120 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); 121 ret->len = bt->len; 122 123 return (ret); 124 } 125 126 static inline void 127 prof_enter(prof_tdata_t *prof_tdata) 128 { 129 130 cassert(config_prof); 131 132 assert(prof_tdata->enq == false); 133 prof_tdata->enq = true; 134 135 malloc_mutex_lock(&bt2ctx_mtx); 136 } 137 138 static inline void 139 prof_leave(prof_tdata_t *prof_tdata) 140 { 141 bool idump, gdump; 142 143 cassert(config_prof); 144 145 malloc_mutex_unlock(&bt2ctx_mtx); 146 147 assert(prof_tdata->enq); 148 prof_tdata->enq = false; 149 idump = prof_tdata->enq_idump; 150 prof_tdata->enq_idump = false; 151 gdump = prof_tdata->enq_gdump; 152 prof_tdata->enq_gdump = false; 153 154 if (idump) 155 prof_idump(); 156 if (gdump) 157 prof_gdump(); 158 } 159 160 #ifdef JEMALLOC_PROF_LIBUNWIND 161 void 162 prof_backtrace(prof_bt_t *bt, unsigned nignore) 163 { 164 unw_context_t uc; 165 unw_cursor_t cursor; 166 unsigned i; 167 int err; 168 169 cassert(config_prof); 170 assert(bt->len == 0); 171 assert(bt->vec != NULL); 172 173 unw_getcontext(&uc); 174 unw_init_local(&cursor, &uc); 175 176 /* Throw away (nignore+1) stack frames, if that many exist. */ 177 for (i = 0; i < nignore + 1; i++) { 178 err = unw_step(&cursor); 179 if (err <= 0) 180 return; 181 } 182 183 /* 184 * Iterate over stack frames until there are no more, or until no space 185 * remains in bt. 186 */ 187 for (i = 0; i < PROF_BT_MAX; i++) { 188 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); 189 bt->len++; 190 err = unw_step(&cursor); 191 if (err <= 0) 192 break; 193 } 194 } 195 #elif (defined(JEMALLOC_PROF_LIBGCC)) 196 static _Unwind_Reason_Code 197 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) 198 { 199 200 cassert(config_prof); 201 202 return (_URC_NO_REASON); 203 } 204 205 static _Unwind_Reason_Code 206 prof_unwind_callback(struct _Unwind_Context *context, void *arg) 207 { 208 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 209 210 cassert(config_prof); 211 212 if (data->nignore > 0) 213 data->nignore--; 214 else { 215 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); 216 data->bt->len++; 217 if (data->bt->len == data->max) 218 return (_URC_END_OF_STACK); 219 } 220 221 return (_URC_NO_REASON); 222 } 223 224 void 225 prof_backtrace(prof_bt_t *bt, unsigned nignore) 226 { 227 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; 228 229 cassert(config_prof); 230 231 _Unwind_Backtrace(prof_unwind_callback, &data); 232 } 233 #elif (defined(JEMALLOC_PROF_GCC)) 234 void 235 prof_backtrace(prof_bt_t *bt, unsigned nignore) 236 { 237 #define BT_FRAME(i) \ 238 if ((i) < nignore + PROF_BT_MAX) { \ 239 void *p; \ 240 if (__builtin_frame_address(i) == 0) \ 241 return; \ 242 p = __builtin_return_address(i); \ 243 if (p == NULL) \ 244 return; \ 245 if (i >= nignore) { \ 246 bt->vec[(i) - nignore] = p; \ 247 bt->len = (i) - nignore + 1; \ 248 } \ 249 } else \ 250 return; 251 252 cassert(config_prof); 253 assert(nignore <= 3); 254 255 BT_FRAME(0) 256 BT_FRAME(1) 257 BT_FRAME(2) 258 BT_FRAME(3) 259 BT_FRAME(4) 260 BT_FRAME(5) 261 BT_FRAME(6) 262 BT_FRAME(7) 263 BT_FRAME(8) 264 BT_FRAME(9) 265 266 BT_FRAME(10) 267 BT_FRAME(11) 268 BT_FRAME(12) 269 BT_FRAME(13) 270 BT_FRAME(14) 271 BT_FRAME(15) 272 BT_FRAME(16) 273 BT_FRAME(17) 274 BT_FRAME(18) 275 BT_FRAME(19) 276 277 BT_FRAME(20) 278 BT_FRAME(21) 279 BT_FRAME(22) 280 BT_FRAME(23) 281 BT_FRAME(24) 282 BT_FRAME(25) 283 BT_FRAME(26) 284 BT_FRAME(27) 285 BT_FRAME(28) 286 BT_FRAME(29) 287 288 BT_FRAME(30) 289 BT_FRAME(31) 290 BT_FRAME(32) 291 BT_FRAME(33) 292 BT_FRAME(34) 293 BT_FRAME(35) 294 BT_FRAME(36) 295 BT_FRAME(37) 296 BT_FRAME(38) 297 BT_FRAME(39) 298 299 BT_FRAME(40) 300 BT_FRAME(41) 301 BT_FRAME(42) 302 BT_FRAME(43) 303 BT_FRAME(44) 304 BT_FRAME(45) 305 BT_FRAME(46) 306 BT_FRAME(47) 307 BT_FRAME(48) 308 BT_FRAME(49) 309 310 BT_FRAME(50) 311 BT_FRAME(51) 312 BT_FRAME(52) 313 BT_FRAME(53) 314 BT_FRAME(54) 315 BT_FRAME(55) 316 BT_FRAME(56) 317 BT_FRAME(57) 318 BT_FRAME(58) 319 BT_FRAME(59) 320 321 BT_FRAME(60) 322 BT_FRAME(61) 323 BT_FRAME(62) 324 BT_FRAME(63) 325 BT_FRAME(64) 326 BT_FRAME(65) 327 BT_FRAME(66) 328 BT_FRAME(67) 329 BT_FRAME(68) 330 BT_FRAME(69) 331 332 BT_FRAME(70) 333 BT_FRAME(71) 334 BT_FRAME(72) 335 BT_FRAME(73) 336 BT_FRAME(74) 337 BT_FRAME(75) 338 BT_FRAME(76) 339 BT_FRAME(77) 340 BT_FRAME(78) 341 BT_FRAME(79) 342 343 BT_FRAME(80) 344 BT_FRAME(81) 345 BT_FRAME(82) 346 BT_FRAME(83) 347 BT_FRAME(84) 348 BT_FRAME(85) 349 BT_FRAME(86) 350 BT_FRAME(87) 351 BT_FRAME(88) 352 BT_FRAME(89) 353 354 BT_FRAME(90) 355 BT_FRAME(91) 356 BT_FRAME(92) 357 BT_FRAME(93) 358 BT_FRAME(94) 359 BT_FRAME(95) 360 BT_FRAME(96) 361 BT_FRAME(97) 362 BT_FRAME(98) 363 BT_FRAME(99) 364 365 BT_FRAME(100) 366 BT_FRAME(101) 367 BT_FRAME(102) 368 BT_FRAME(103) 369 BT_FRAME(104) 370 BT_FRAME(105) 371 BT_FRAME(106) 372 BT_FRAME(107) 373 BT_FRAME(108) 374 BT_FRAME(109) 375 376 BT_FRAME(110) 377 BT_FRAME(111) 378 BT_FRAME(112) 379 BT_FRAME(113) 380 BT_FRAME(114) 381 BT_FRAME(115) 382 BT_FRAME(116) 383 BT_FRAME(117) 384 BT_FRAME(118) 385 BT_FRAME(119) 386 387 BT_FRAME(120) 388 BT_FRAME(121) 389 BT_FRAME(122) 390 BT_FRAME(123) 391 BT_FRAME(124) 392 BT_FRAME(125) 393 BT_FRAME(126) 394 BT_FRAME(127) 395 396 /* Extras to compensate for nignore. */ 397 BT_FRAME(128) 398 BT_FRAME(129) 399 BT_FRAME(130) 400 #undef BT_FRAME 401 } 402 #else 403 void 404 prof_backtrace(prof_bt_t *bt, unsigned nignore) 405 { 406 407 cassert(config_prof); 408 not_reached(); 409 } 410 #endif 411 412 static malloc_mutex_t * 413 prof_ctx_mutex_choose(void) 414 { 415 unsigned nctxs = atomic_add_u(&cum_ctxs, 1); 416 417 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); 418 } 419 420 static void 421 prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt) 422 { 423 424 ctx->bt = bt; 425 ctx->lock = prof_ctx_mutex_choose(); 426 /* 427 * Set nlimbo to 1, in order to avoid a race condition with 428 * prof_ctx_merge()/prof_ctx_destroy(). 429 */ 430 ctx->nlimbo = 1; 431 ql_elm_new(ctx, dump_link); 432 memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t)); 433 ql_new(&ctx->cnts_ql); 434 } 435 436 static void 437 prof_ctx_destroy(prof_ctx_t *ctx) 438 { 439 prof_tdata_t *prof_tdata; 440 441 cassert(config_prof); 442 443 /* 444 * Check that ctx is still unused by any thread cache before destroying 445 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race 446 * condition with this function, as does prof_ctx_merge() in order to 447 * avoid a race between the main body of prof_ctx_merge() and entry 448 * into this function. 449 */ 450 prof_tdata = prof_tdata_get(false); 451 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); 452 prof_enter(prof_tdata); 453 malloc_mutex_lock(ctx->lock); 454 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && 455 ctx->nlimbo == 1) { 456 assert(ctx->cnt_merged.curbytes == 0); 457 assert(ctx->cnt_merged.accumobjs == 0); 458 assert(ctx->cnt_merged.accumbytes == 0); 459 /* Remove ctx from bt2ctx. */ 460 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) 461 not_reached(); 462 prof_leave(prof_tdata); 463 /* Destroy ctx. */ 464 malloc_mutex_unlock(ctx->lock); 465 bt_destroy(ctx->bt); 466 idalloc(ctx); 467 } else { 468 /* 469 * Compensate for increment in prof_ctx_merge() or 470 * prof_lookup(). 471 */ 472 ctx->nlimbo--; 473 malloc_mutex_unlock(ctx->lock); 474 prof_leave(prof_tdata); 475 } 476 } 477 478 static void 479 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) 480 { 481 bool destroy; 482 483 cassert(config_prof); 484 485 /* Merge cnt stats and detach from ctx. */ 486 malloc_mutex_lock(ctx->lock); 487 ctx->cnt_merged.curobjs += cnt->cnts.curobjs; 488 ctx->cnt_merged.curbytes += cnt->cnts.curbytes; 489 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; 490 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; 491 ql_remove(&ctx->cnts_ql, cnt, cnts_link); 492 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && 493 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { 494 /* 495 * Increment ctx->nlimbo in order to keep another thread from 496 * winning the race to destroy ctx while this one has ctx->lock 497 * dropped. Without this, it would be possible for another 498 * thread to: 499 * 500 * 1) Sample an allocation associated with ctx. 501 * 2) Deallocate the sampled object. 502 * 3) Successfully prof_ctx_destroy(ctx). 503 * 504 * The result would be that ctx no longer exists by the time 505 * this thread accesses it in prof_ctx_destroy(). 506 */ 507 ctx->nlimbo++; 508 destroy = true; 509 } else 510 destroy = false; 511 malloc_mutex_unlock(ctx->lock); 512 if (destroy) 513 prof_ctx_destroy(ctx); 514 } 515 516 static bool 517 prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey, 518 prof_ctx_t **p_ctx, bool *p_new_ctx) 519 { 520 union { 521 prof_ctx_t *p; 522 void *v; 523 } ctx; 524 union { 525 prof_bt_t *p; 526 void *v; 527 } btkey; 528 bool new_ctx; 529 530 prof_enter(prof_tdata); 531 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { 532 /* bt has never been seen before. Insert it. */ 533 ctx.v = imalloc(sizeof(prof_ctx_t)); 534 if (ctx.v == NULL) { 535 prof_leave(prof_tdata); 536 return (true); 537 } 538 btkey.p = bt_dup(bt); 539 if (btkey.v == NULL) { 540 prof_leave(prof_tdata); 541 idalloc(ctx.v); 542 return (true); 543 } 544 prof_ctx_init(ctx.p, btkey.p); 545 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { 546 /* OOM. */ 547 prof_leave(prof_tdata); 548 idalloc(btkey.v); 549 idalloc(ctx.v); 550 return (true); 551 } 552 new_ctx = true; 553 } else { 554 /* 555 * Increment nlimbo, in order to avoid a race condition with 556 * prof_ctx_merge()/prof_ctx_destroy(). 557 */ 558 malloc_mutex_lock(ctx.p->lock); 559 ctx.p->nlimbo++; 560 malloc_mutex_unlock(ctx.p->lock); 561 new_ctx = false; 562 } 563 prof_leave(prof_tdata); 564 565 *p_btkey = btkey.v; 566 *p_ctx = ctx.p; 567 *p_new_ctx = new_ctx; 568 return (false); 569 } 570 571 prof_thr_cnt_t * 572 prof_lookup(prof_bt_t *bt) 573 { 574 union { 575 prof_thr_cnt_t *p; 576 void *v; 577 } ret; 578 prof_tdata_t *prof_tdata; 579 580 cassert(config_prof); 581 582 prof_tdata = prof_tdata_get(false); 583 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 584 return (NULL); 585 586 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { 587 void *btkey; 588 prof_ctx_t *ctx; 589 bool new_ctx; 590 591 /* 592 * This thread's cache lacks bt. Look for it in the global 593 * cache. 594 */ 595 if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx)) 596 return (NULL); 597 598 /* Link a prof_thd_cnt_t into ctx for this thread. */ 599 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { 600 assert(ckh_count(&prof_tdata->bt2cnt) > 0); 601 /* 602 * Flush the least recently used cnt in order to keep 603 * bt2cnt from becoming too large. 604 */ 605 ret.p = ql_last(&prof_tdata->lru_ql, lru_link); 606 assert(ret.v != NULL); 607 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, 608 NULL, NULL)) 609 not_reached(); 610 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 611 prof_ctx_merge(ret.p->ctx, ret.p); 612 /* ret can now be re-used. */ 613 } else { 614 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); 615 /* Allocate and partially initialize a new cnt. */ 616 ret.v = imalloc(sizeof(prof_thr_cnt_t)); 617 if (ret.p == NULL) { 618 if (new_ctx) 619 prof_ctx_destroy(ctx); 620 return (NULL); 621 } 622 ql_elm_new(ret.p, cnts_link); 623 ql_elm_new(ret.p, lru_link); 624 } 625 /* Finish initializing ret. */ 626 ret.p->ctx = ctx; 627 ret.p->epoch = 0; 628 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 629 if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) { 630 if (new_ctx) 631 prof_ctx_destroy(ctx); 632 idalloc(ret.v); 633 return (NULL); 634 } 635 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 636 malloc_mutex_lock(ctx->lock); 637 ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link); 638 ctx->nlimbo--; 639 malloc_mutex_unlock(ctx->lock); 640 } else { 641 /* Move ret to the front of the LRU. */ 642 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 643 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 644 } 645 646 return (ret.p); 647 } 648 649 #ifdef JEMALLOC_JET 650 size_t 651 prof_bt_count(void) 652 { 653 size_t bt_count; 654 prof_tdata_t *prof_tdata; 655 656 prof_tdata = prof_tdata_get(false); 657 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 658 return (0); 659 660 prof_enter(prof_tdata); 661 bt_count = ckh_count(&bt2ctx); 662 prof_leave(prof_tdata); 663 664 return (bt_count); 665 } 666 #endif 667 668 #ifdef JEMALLOC_JET 669 #undef prof_dump_open 670 #define prof_dump_open JEMALLOC_N(prof_dump_open_impl) 671 #endif 672 static int 673 prof_dump_open(bool propagate_err, const char *filename) 674 { 675 int fd; 676 677 fd = creat(filename, 0644); 678 if (fd == -1 && propagate_err == false) { 679 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n", 680 filename); 681 if (opt_abort) 682 abort(); 683 } 684 685 return (fd); 686 } 687 #ifdef JEMALLOC_JET 688 #undef prof_dump_open 689 #define prof_dump_open JEMALLOC_N(prof_dump_open) 690 prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl); 691 #endif 692 693 static bool 694 prof_dump_flush(bool propagate_err) 695 { 696 bool ret = false; 697 ssize_t err; 698 699 cassert(config_prof); 700 701 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 702 if (err == -1) { 703 if (propagate_err == false) { 704 malloc_write("<jemalloc>: write() failed during heap " 705 "profile flush\n"); 706 if (opt_abort) 707 abort(); 708 } 709 ret = true; 710 } 711 prof_dump_buf_end = 0; 712 713 return (ret); 714 } 715 716 static bool 717 prof_dump_close(bool propagate_err) 718 { 719 bool ret; 720 721 assert(prof_dump_fd != -1); 722 ret = prof_dump_flush(propagate_err); 723 close(prof_dump_fd); 724 prof_dump_fd = -1; 725 726 return (ret); 727 } 728 729 static bool 730 prof_dump_write(bool propagate_err, const char *s) 731 { 732 unsigned i, slen, n; 733 734 cassert(config_prof); 735 736 i = 0; 737 slen = strlen(s); 738 while (i < slen) { 739 /* Flush the buffer if it is full. */ 740 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) 741 if (prof_dump_flush(propagate_err) && propagate_err) 742 return (true); 743 744 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 745 /* Finish writing. */ 746 n = slen - i; 747 } else { 748 /* Write as much of s as will fit. */ 749 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 750 } 751 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 752 prof_dump_buf_end += n; 753 i += n; 754 } 755 756 return (false); 757 } 758 759 JEMALLOC_ATTR(format(printf, 2, 3)) 760 static bool 761 prof_dump_printf(bool propagate_err, const char *format, ...) 762 { 763 bool ret; 764 va_list ap; 765 char buf[PROF_PRINTF_BUFSIZE]; 766 767 va_start(ap, format); 768 malloc_vsnprintf(buf, sizeof(buf), format, ap); 769 va_end(ap); 770 ret = prof_dump_write(propagate_err, buf); 771 772 return (ret); 773 } 774 775 static void 776 prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx, 777 prof_ctx_list_t *ctx_ql) 778 { 779 prof_thr_cnt_t *thr_cnt; 780 prof_cnt_t tcnt; 781 782 cassert(config_prof); 783 784 malloc_mutex_lock(ctx->lock); 785 786 /* 787 * Increment nlimbo so that ctx won't go away before dump. 788 * Additionally, link ctx into the dump list so that it is included in 789 * prof_dump()'s second pass. 790 */ 791 ctx->nlimbo++; 792 ql_tail_insert(ctx_ql, ctx, dump_link); 793 794 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); 795 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { 796 volatile unsigned *epoch = &thr_cnt->epoch; 797 798 while (true) { 799 unsigned epoch0 = *epoch; 800 801 /* Make sure epoch is even. */ 802 if (epoch0 & 1U) 803 continue; 804 805 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); 806 807 /* Terminate if epoch didn't change while reading. */ 808 if (*epoch == epoch0) 809 break; 810 } 811 812 ctx->cnt_summed.curobjs += tcnt.curobjs; 813 ctx->cnt_summed.curbytes += tcnt.curbytes; 814 if (opt_prof_accum) { 815 ctx->cnt_summed.accumobjs += tcnt.accumobjs; 816 ctx->cnt_summed.accumbytes += tcnt.accumbytes; 817 } 818 } 819 820 if (ctx->cnt_summed.curobjs != 0) 821 (*leak_nctx)++; 822 823 /* Add to cnt_all. */ 824 cnt_all->curobjs += ctx->cnt_summed.curobjs; 825 cnt_all->curbytes += ctx->cnt_summed.curbytes; 826 if (opt_prof_accum) { 827 cnt_all->accumobjs += ctx->cnt_summed.accumobjs; 828 cnt_all->accumbytes += ctx->cnt_summed.accumbytes; 829 } 830 831 malloc_mutex_unlock(ctx->lock); 832 } 833 834 static bool 835 prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all) 836 { 837 838 if (opt_lg_prof_sample == 0) { 839 if (prof_dump_printf(propagate_err, 840 "heap profile: %"PRId64": %"PRId64 841 " [%"PRIu64": %"PRIu64"] @ heapprofile\n", 842 cnt_all->curobjs, cnt_all->curbytes, 843 cnt_all->accumobjs, cnt_all->accumbytes)) 844 return (true); 845 } else { 846 if (prof_dump_printf(propagate_err, 847 "heap profile: %"PRId64": %"PRId64 848 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", 849 cnt_all->curobjs, cnt_all->curbytes, 850 cnt_all->accumobjs, cnt_all->accumbytes, 851 ((uint64_t)1U << opt_lg_prof_sample))) 852 return (true); 853 } 854 855 return (false); 856 } 857 858 static void 859 prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) 860 { 861 862 ctx->nlimbo--; 863 ql_remove(ctx_ql, ctx, dump_link); 864 } 865 866 static void 867 prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql) 868 { 869 870 malloc_mutex_lock(ctx->lock); 871 prof_dump_ctx_cleanup_locked(ctx, ctx_ql); 872 malloc_mutex_unlock(ctx->lock); 873 } 874 875 static bool 876 prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt, 877 prof_ctx_list_t *ctx_ql) 878 { 879 bool ret; 880 unsigned i; 881 882 cassert(config_prof); 883 884 /* 885 * Current statistics can sum to 0 as a result of unmerged per thread 886 * statistics. Additionally, interval- and growth-triggered dumps can 887 * occur between the time a ctx is created and when its statistics are 888 * filled in. Avoid dumping any ctx that is an artifact of either 889 * implementation detail. 890 */ 891 malloc_mutex_lock(ctx->lock); 892 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || 893 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { 894 assert(ctx->cnt_summed.curobjs == 0); 895 assert(ctx->cnt_summed.curbytes == 0); 896 assert(ctx->cnt_summed.accumobjs == 0); 897 assert(ctx->cnt_summed.accumbytes == 0); 898 ret = false; 899 goto label_return; 900 } 901 902 if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64 903 " [%"PRIu64": %"PRIu64"] @", 904 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, 905 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) { 906 ret = true; 907 goto label_return; 908 } 909 910 for (i = 0; i < bt->len; i++) { 911 if (prof_dump_printf(propagate_err, " %#"PRIxPTR, 912 (uintptr_t)bt->vec[i])) { 913 ret = true; 914 goto label_return; 915 } 916 } 917 918 if (prof_dump_write(propagate_err, "\n")) { 919 ret = true; 920 goto label_return; 921 } 922 923 ret = false; 924 label_return: 925 prof_dump_ctx_cleanup_locked(ctx, ctx_ql); 926 malloc_mutex_unlock(ctx->lock); 927 return (ret); 928 } 929 930 static bool 931 prof_dump_maps(bool propagate_err) 932 { 933 bool ret; 934 int mfd; 935 char filename[PATH_MAX + 1]; 936 937 cassert(config_prof); 938 939 malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", 940 (int)getpid()); 941 mfd = open(filename, O_RDONLY); 942 if (mfd != -1) { 943 ssize_t nread; 944 945 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 946 propagate_err) { 947 ret = true; 948 goto label_return; 949 } 950 nread = 0; 951 do { 952 prof_dump_buf_end += nread; 953 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 954 /* Make space in prof_dump_buf before read(). */ 955 if (prof_dump_flush(propagate_err) && 956 propagate_err) { 957 ret = true; 958 goto label_return; 959 } 960 } 961 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], 962 PROF_DUMP_BUFSIZE - prof_dump_buf_end); 963 } while (nread > 0); 964 } else { 965 ret = true; 966 goto label_return; 967 } 968 969 ret = false; 970 label_return: 971 if (mfd != -1) 972 close(mfd); 973 return (ret); 974 } 975 976 static void 977 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx, 978 const char *filename) 979 { 980 981 if (cnt_all->curbytes != 0) { 982 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" 983 PRId64" object%s, %zu context%s\n", 984 cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "", 985 cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "", 986 leak_nctx, (leak_nctx != 1) ? "s" : ""); 987 malloc_printf( 988 "<jemalloc>: Run pprof on \"%s\" for leak detail\n", 989 filename); 990 } 991 } 992 993 static bool 994 prof_dump(bool propagate_err, const char *filename, bool leakcheck) 995 { 996 prof_tdata_t *prof_tdata; 997 prof_cnt_t cnt_all; 998 size_t tabind; 999 union { 1000 prof_ctx_t *p; 1001 void *v; 1002 } ctx; 1003 size_t leak_nctx; 1004 prof_ctx_list_t ctx_ql; 1005 1006 cassert(config_prof); 1007 1008 prof_tdata = prof_tdata_get(false); 1009 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1010 return (true); 1011 1012 malloc_mutex_lock(&prof_dump_mtx); 1013 1014 /* Merge per thread profile stats, and sum them in cnt_all. */ 1015 memset(&cnt_all, 0, sizeof(prof_cnt_t)); 1016 leak_nctx = 0; 1017 ql_new(&ctx_ql); 1018 prof_enter(prof_tdata); 1019 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) 1020 prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql); 1021 prof_leave(prof_tdata); 1022 1023 /* Create dump file. */ 1024 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) 1025 goto label_open_close_error; 1026 1027 /* Dump profile header. */ 1028 if (prof_dump_header(propagate_err, &cnt_all)) 1029 goto label_write_error; 1030 1031 /* Dump per ctx profile stats. */ 1032 while ((ctx.p = ql_first(&ctx_ql)) != NULL) { 1033 if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql)) 1034 goto label_write_error; 1035 } 1036 1037 /* Dump /proc/<pid>/maps if possible. */ 1038 if (prof_dump_maps(propagate_err)) 1039 goto label_write_error; 1040 1041 if (prof_dump_close(propagate_err)) 1042 goto label_open_close_error; 1043 1044 malloc_mutex_unlock(&prof_dump_mtx); 1045 1046 if (leakcheck) 1047 prof_leakcheck(&cnt_all, leak_nctx, filename); 1048 1049 return (false); 1050 label_write_error: 1051 prof_dump_close(propagate_err); 1052 label_open_close_error: 1053 while ((ctx.p = ql_first(&ctx_ql)) != NULL) 1054 prof_dump_ctx_cleanup(ctx.p, &ctx_ql); 1055 malloc_mutex_unlock(&prof_dump_mtx); 1056 return (true); 1057 } 1058 1059 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 1060 #define VSEQ_INVALID UINT64_C(0xffffffffffffffff) 1061 static void 1062 prof_dump_filename(char *filename, char v, int64_t vseq) 1063 { 1064 1065 cassert(config_prof); 1066 1067 if (vseq != VSEQ_INVALID) { 1068 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 1069 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1070 "%s.%d.%"PRIu64".%c%"PRId64".heap", 1071 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); 1072 } else { 1073 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 1074 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 1075 "%s.%d.%"PRIu64".%c.heap", 1076 opt_prof_prefix, (int)getpid(), prof_dump_seq, v); 1077 } 1078 prof_dump_seq++; 1079 } 1080 1081 static void 1082 prof_fdump(void) 1083 { 1084 char filename[DUMP_FILENAME_BUFSIZE]; 1085 1086 cassert(config_prof); 1087 1088 if (prof_booted == false) 1089 return; 1090 1091 if (opt_prof_final && opt_prof_prefix[0] != '\0') { 1092 malloc_mutex_lock(&prof_dump_seq_mtx); 1093 prof_dump_filename(filename, 'f', VSEQ_INVALID); 1094 malloc_mutex_unlock(&prof_dump_seq_mtx); 1095 prof_dump(false, filename, opt_prof_leak); 1096 } 1097 } 1098 1099 void 1100 prof_idump(void) 1101 { 1102 prof_tdata_t *prof_tdata; 1103 char filename[PATH_MAX + 1]; 1104 1105 cassert(config_prof); 1106 1107 if (prof_booted == false) 1108 return; 1109 prof_tdata = prof_tdata_get(false); 1110 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1111 return; 1112 if (prof_tdata->enq) { 1113 prof_tdata->enq_idump = true; 1114 return; 1115 } 1116 1117 if (opt_prof_prefix[0] != '\0') { 1118 malloc_mutex_lock(&prof_dump_seq_mtx); 1119 prof_dump_filename(filename, 'i', prof_dump_iseq); 1120 prof_dump_iseq++; 1121 malloc_mutex_unlock(&prof_dump_seq_mtx); 1122 prof_dump(false, filename, false); 1123 } 1124 } 1125 1126 bool 1127 prof_mdump(const char *filename) 1128 { 1129 char filename_buf[DUMP_FILENAME_BUFSIZE]; 1130 1131 cassert(config_prof); 1132 1133 if (opt_prof == false || prof_booted == false) 1134 return (true); 1135 1136 if (filename == NULL) { 1137 /* No filename specified, so automatically generate one. */ 1138 if (opt_prof_prefix[0] == '\0') 1139 return (true); 1140 malloc_mutex_lock(&prof_dump_seq_mtx); 1141 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 1142 prof_dump_mseq++; 1143 malloc_mutex_unlock(&prof_dump_seq_mtx); 1144 filename = filename_buf; 1145 } 1146 return (prof_dump(true, filename, false)); 1147 } 1148 1149 void 1150 prof_gdump(void) 1151 { 1152 prof_tdata_t *prof_tdata; 1153 char filename[DUMP_FILENAME_BUFSIZE]; 1154 1155 cassert(config_prof); 1156 1157 if (prof_booted == false) 1158 return; 1159 prof_tdata = prof_tdata_get(false); 1160 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1161 return; 1162 if (prof_tdata->enq) { 1163 prof_tdata->enq_gdump = true; 1164 return; 1165 } 1166 1167 if (opt_prof_prefix[0] != '\0') { 1168 malloc_mutex_lock(&prof_dump_seq_mtx); 1169 prof_dump_filename(filename, 'u', prof_dump_useq); 1170 prof_dump_useq++; 1171 malloc_mutex_unlock(&prof_dump_seq_mtx); 1172 prof_dump(false, filename, false); 1173 } 1174 } 1175 1176 static void 1177 prof_bt_hash(const void *key, size_t r_hash[2]) 1178 { 1179 prof_bt_t *bt = (prof_bt_t *)key; 1180 1181 cassert(config_prof); 1182 1183 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); 1184 } 1185 1186 static bool 1187 prof_bt_keycomp(const void *k1, const void *k2) 1188 { 1189 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1190 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1191 1192 cassert(config_prof); 1193 1194 if (bt1->len != bt2->len) 1195 return (false); 1196 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1197 } 1198 1199 prof_tdata_t * 1200 prof_tdata_init(void) 1201 { 1202 prof_tdata_t *prof_tdata; 1203 1204 cassert(config_prof); 1205 1206 /* Initialize an empty cache for this thread. */ 1207 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); 1208 if (prof_tdata == NULL) 1209 return (NULL); 1210 1211 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, 1212 prof_bt_hash, prof_bt_keycomp)) { 1213 idalloc(prof_tdata); 1214 return (NULL); 1215 } 1216 ql_new(&prof_tdata->lru_ql); 1217 1218 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); 1219 if (prof_tdata->vec == NULL) { 1220 ckh_delete(&prof_tdata->bt2cnt); 1221 idalloc(prof_tdata); 1222 return (NULL); 1223 } 1224 1225 prof_tdata->prng_state = 0; 1226 prof_tdata->threshold = 0; 1227 prof_tdata->accum = 0; 1228 1229 prof_tdata->enq = false; 1230 prof_tdata->enq_idump = false; 1231 prof_tdata->enq_gdump = false; 1232 1233 prof_tdata_tsd_set(&prof_tdata); 1234 1235 return (prof_tdata); 1236 } 1237 1238 void 1239 prof_tdata_cleanup(void *arg) 1240 { 1241 prof_thr_cnt_t *cnt; 1242 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; 1243 1244 cassert(config_prof); 1245 1246 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { 1247 /* 1248 * Another destructor deallocated memory after this destructor 1249 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY 1250 * in order to receive another callback. 1251 */ 1252 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1253 prof_tdata_tsd_set(&prof_tdata); 1254 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { 1255 /* 1256 * The previous time this destructor was called, we set the key 1257 * to PROF_TDATA_STATE_PURGATORY so that other destructors 1258 * wouldn't cause re-creation of the prof_tdata. This time, do 1259 * nothing, so that the destructor will not be called again. 1260 */ 1261 } else if (prof_tdata != NULL) { 1262 /* 1263 * Delete the hash table. All of its contents can still be 1264 * iterated over via the LRU. 1265 */ 1266 ckh_delete(&prof_tdata->bt2cnt); 1267 /* 1268 * Iteratively merge cnt's into the global stats and delete 1269 * them. 1270 */ 1271 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { 1272 ql_remove(&prof_tdata->lru_ql, cnt, lru_link); 1273 prof_ctx_merge(cnt->ctx, cnt); 1274 idalloc(cnt); 1275 } 1276 idalloc(prof_tdata->vec); 1277 idalloc(prof_tdata); 1278 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1279 prof_tdata_tsd_set(&prof_tdata); 1280 } 1281 } 1282 1283 void 1284 prof_boot0(void) 1285 { 1286 1287 cassert(config_prof); 1288 1289 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 1290 sizeof(PROF_PREFIX_DEFAULT)); 1291 } 1292 1293 void 1294 prof_boot1(void) 1295 { 1296 1297 cassert(config_prof); 1298 1299 /* 1300 * opt_prof and prof_promote must be in their final state before any 1301 * arenas are initialized, so this function must be executed early. 1302 */ 1303 1304 if (opt_prof_leak && opt_prof == false) { 1305 /* 1306 * Enable opt_prof, but in such a way that profiles are never 1307 * automatically dumped. 1308 */ 1309 opt_prof = true; 1310 opt_prof_gdump = false; 1311 } else if (opt_prof) { 1312 if (opt_lg_prof_interval >= 0) { 1313 prof_interval = (((uint64_t)1U) << 1314 opt_lg_prof_interval); 1315 } 1316 } 1317 1318 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); 1319 } 1320 1321 bool 1322 prof_boot2(void) 1323 { 1324 1325 cassert(config_prof); 1326 1327 if (opt_prof) { 1328 unsigned i; 1329 1330 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, 1331 prof_bt_keycomp)) 1332 return (true); 1333 if (malloc_mutex_init(&bt2ctx_mtx)) 1334 return (true); 1335 if (prof_tdata_tsd_boot()) { 1336 malloc_write( 1337 "<jemalloc>: Error in pthread_key_create()\n"); 1338 abort(); 1339 } 1340 1341 if (malloc_mutex_init(&prof_dump_seq_mtx)) 1342 return (true); 1343 if (malloc_mutex_init(&prof_dump_mtx)) 1344 return (true); 1345 1346 if (atexit(prof_fdump) != 0) { 1347 malloc_write("<jemalloc>: Error in atexit()\n"); 1348 if (opt_abort) 1349 abort(); 1350 } 1351 1352 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * 1353 sizeof(malloc_mutex_t)); 1354 if (ctx_locks == NULL) 1355 return (true); 1356 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 1357 if (malloc_mutex_init(&ctx_locks[i])) 1358 return (true); 1359 } 1360 } 1361 1362 #ifdef JEMALLOC_PROF_LIBGCC 1363 /* 1364 * Cause the backtracing machinery to allocate its internal state 1365 * before enabling profiling. 1366 */ 1367 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 1368 #endif 1369 1370 prof_booted = true; 1371 1372 return (false); 1373 } 1374 1375 void 1376 prof_prefork(void) 1377 { 1378 1379 if (opt_prof) { 1380 unsigned i; 1381 1382 malloc_mutex_prefork(&bt2ctx_mtx); 1383 malloc_mutex_prefork(&prof_dump_seq_mtx); 1384 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1385 malloc_mutex_prefork(&ctx_locks[i]); 1386 } 1387 } 1388 1389 void 1390 prof_postfork_parent(void) 1391 { 1392 1393 if (opt_prof) { 1394 unsigned i; 1395 1396 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1397 malloc_mutex_postfork_parent(&ctx_locks[i]); 1398 malloc_mutex_postfork_parent(&prof_dump_seq_mtx); 1399 malloc_mutex_postfork_parent(&bt2ctx_mtx); 1400 } 1401 } 1402 1403 void 1404 prof_postfork_child(void) 1405 { 1406 1407 if (opt_prof) { 1408 unsigned i; 1409 1410 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1411 malloc_mutex_postfork_child(&ctx_locks[i]); 1412 malloc_mutex_postfork_child(&prof_dump_seq_mtx); 1413 malloc_mutex_postfork_child(&bt2ctx_mtx); 1414 } 1415 } 1416 1417 /******************************************************************************/ 1418