1 #define JEMALLOC_PROF_C_ 2 #include "jemalloc/internal/jemalloc_internal.h" 3 /******************************************************************************/ 4 5 #ifdef JEMALLOC_PROF_LIBUNWIND 6 #define UNW_LOCAL_ONLY 7 #include <libunwind.h> 8 #endif 9 10 #ifdef JEMALLOC_PROF_LIBGCC 11 #include <unwind.h> 12 #endif 13 14 /******************************************************************************/ 15 /* Data. */ 16 17 malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) 18 19 bool opt_prof = false; 20 bool opt_prof_active = true; 21 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 22 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 23 bool opt_prof_gdump = false; 24 bool opt_prof_final = true; 25 bool opt_prof_leak = false; 26 bool opt_prof_accum = false; 27 char opt_prof_prefix[PATH_MAX + 1]; 28 29 uint64_t prof_interval; 30 bool prof_promote; 31 32 /* 33 * Table of mutexes that are shared among ctx's. These are leaf locks, so 34 * there is no problem with using them for more than one ctx at the same time. 35 * The primary motivation for this sharing though is that ctx's are ephemeral, 36 * and destroying mutexes causes complications for systems that allocate when 37 * creating/destroying mutexes. 38 */ 39 static malloc_mutex_t *ctx_locks; 40 static unsigned cum_ctxs; /* Atomic counter. */ 41 42 /* 43 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data 44 * structure that knows about all backtraces currently captured. 45 */ 46 static ckh_t bt2ctx; 47 static malloc_mutex_t bt2ctx_mtx; 48 49 static malloc_mutex_t prof_dump_seq_mtx; 50 static uint64_t prof_dump_seq; 51 static uint64_t prof_dump_iseq; 52 static uint64_t prof_dump_mseq; 53 static uint64_t prof_dump_useq; 54 55 /* 56 * This buffer is rather large for stack allocation, so use a single buffer for 57 * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since 58 * it must be locked anyway during dumping. 59 */ 60 static char prof_dump_buf[PROF_DUMP_BUFSIZE]; 61 static unsigned prof_dump_buf_end; 62 static int prof_dump_fd; 63 64 /* Do not dump any profiles until bootstrapping is complete. */ 65 static bool prof_booted = false; 66 67 static malloc_mutex_t enq_mtx; 68 static bool enq; 69 static bool enq_idump; 70 static bool enq_gdump; 71 72 /******************************************************************************/ 73 /* Function prototypes for non-inline static functions. */ 74 75 static prof_bt_t *bt_dup(prof_bt_t *bt); 76 static void bt_destroy(prof_bt_t *bt); 77 #ifdef JEMALLOC_PROF_LIBGCC 78 static _Unwind_Reason_Code prof_unwind_init_callback( 79 struct _Unwind_Context *context, void *arg); 80 static _Unwind_Reason_Code prof_unwind_callback( 81 struct _Unwind_Context *context, void *arg); 82 #endif 83 static bool prof_flush(bool propagate_err); 84 static bool prof_write(bool propagate_err, const char *s); 85 static bool prof_printf(bool propagate_err, const char *format, ...) 86 JEMALLOC_ATTR(format(printf, 2, 3)); 87 static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, 88 size_t *leak_nctx); 89 static void prof_ctx_destroy(prof_ctx_t *ctx); 90 static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); 91 static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, 92 prof_bt_t *bt); 93 static bool prof_dump_maps(bool propagate_err); 94 static bool prof_dump(bool propagate_err, const char *filename, 95 bool leakcheck); 96 static void prof_dump_filename(char *filename, char v, int64_t vseq); 97 static void prof_fdump(void); 98 static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, 99 size_t *hash2); 100 static bool prof_bt_keycomp(const void *k1, const void *k2); 101 static malloc_mutex_t *prof_ctx_mutex_choose(void); 102 103 /******************************************************************************/ 104 105 void 106 bt_init(prof_bt_t *bt, void **vec) 107 { 108 109 cassert(config_prof); 110 111 bt->vec = vec; 112 bt->len = 0; 113 } 114 115 static void 116 bt_destroy(prof_bt_t *bt) 117 { 118 119 cassert(config_prof); 120 121 idalloc(bt); 122 } 123 124 static prof_bt_t * 125 bt_dup(prof_bt_t *bt) 126 { 127 prof_bt_t *ret; 128 129 cassert(config_prof); 130 131 /* 132 * Create a single allocation that has space for vec immediately 133 * following the prof_bt_t structure. The backtraces that get 134 * stored in the backtrace caches are copied from stack-allocated 135 * temporary variables, so size is known at creation time. Making this 136 * a contiguous object improves cache locality. 137 */ 138 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + 139 (bt->len * sizeof(void *))); 140 if (ret == NULL) 141 return (NULL); 142 ret->vec = (void **)((uintptr_t)ret + 143 QUANTUM_CEILING(sizeof(prof_bt_t))); 144 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); 145 ret->len = bt->len; 146 147 return (ret); 148 } 149 150 static inline void 151 prof_enter(void) 152 { 153 154 cassert(config_prof); 155 156 malloc_mutex_lock(&enq_mtx); 157 enq = true; 158 malloc_mutex_unlock(&enq_mtx); 159 160 malloc_mutex_lock(&bt2ctx_mtx); 161 } 162 163 static inline void 164 prof_leave(void) 165 { 166 bool idump, gdump; 167 168 cassert(config_prof); 169 170 malloc_mutex_unlock(&bt2ctx_mtx); 171 172 malloc_mutex_lock(&enq_mtx); 173 enq = false; 174 idump = enq_idump; 175 enq_idump = false; 176 gdump = enq_gdump; 177 enq_gdump = false; 178 malloc_mutex_unlock(&enq_mtx); 179 180 if (idump) 181 prof_idump(); 182 if (gdump) 183 prof_gdump(); 184 } 185 186 #ifdef JEMALLOC_PROF_LIBUNWIND 187 void 188 prof_backtrace(prof_bt_t *bt, unsigned nignore) 189 { 190 unw_context_t uc; 191 unw_cursor_t cursor; 192 unsigned i; 193 int err; 194 195 cassert(config_prof); 196 assert(bt->len == 0); 197 assert(bt->vec != NULL); 198 199 unw_getcontext(&uc); 200 unw_init_local(&cursor, &uc); 201 202 /* Throw away (nignore+1) stack frames, if that many exist. */ 203 for (i = 0; i < nignore + 1; i++) { 204 err = unw_step(&cursor); 205 if (err <= 0) 206 return; 207 } 208 209 /* 210 * Iterate over stack frames until there are no more, or until no space 211 * remains in bt. 212 */ 213 for (i = 0; i < PROF_BT_MAX; i++) { 214 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); 215 bt->len++; 216 err = unw_step(&cursor); 217 if (err <= 0) 218 break; 219 } 220 } 221 #elif (defined(JEMALLOC_PROF_LIBGCC)) 222 static _Unwind_Reason_Code 223 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) 224 { 225 226 cassert(config_prof); 227 228 return (_URC_NO_REASON); 229 } 230 231 static _Unwind_Reason_Code 232 prof_unwind_callback(struct _Unwind_Context *context, void *arg) 233 { 234 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 235 236 cassert(config_prof); 237 238 if (data->nignore > 0) 239 data->nignore--; 240 else { 241 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); 242 data->bt->len++; 243 if (data->bt->len == data->max) 244 return (_URC_END_OF_STACK); 245 } 246 247 return (_URC_NO_REASON); 248 } 249 250 void 251 prof_backtrace(prof_bt_t *bt, unsigned nignore) 252 { 253 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; 254 255 cassert(config_prof); 256 257 _Unwind_Backtrace(prof_unwind_callback, &data); 258 } 259 #elif (defined(JEMALLOC_PROF_GCC)) 260 void 261 prof_backtrace(prof_bt_t *bt, unsigned nignore) 262 { 263 #define BT_FRAME(i) \ 264 if ((i) < nignore + PROF_BT_MAX) { \ 265 void *p; \ 266 if (__builtin_frame_address(i) == 0) \ 267 return; \ 268 p = __builtin_return_address(i); \ 269 if (p == NULL) \ 270 return; \ 271 if (i >= nignore) { \ 272 bt->vec[(i) - nignore] = p; \ 273 bt->len = (i) - nignore + 1; \ 274 } \ 275 } else \ 276 return; 277 278 cassert(config_prof); 279 assert(nignore <= 3); 280 281 BT_FRAME(0) 282 BT_FRAME(1) 283 BT_FRAME(2) 284 BT_FRAME(3) 285 BT_FRAME(4) 286 BT_FRAME(5) 287 BT_FRAME(6) 288 BT_FRAME(7) 289 BT_FRAME(8) 290 BT_FRAME(9) 291 292 BT_FRAME(10) 293 BT_FRAME(11) 294 BT_FRAME(12) 295 BT_FRAME(13) 296 BT_FRAME(14) 297 BT_FRAME(15) 298 BT_FRAME(16) 299 BT_FRAME(17) 300 BT_FRAME(18) 301 BT_FRAME(19) 302 303 BT_FRAME(20) 304 BT_FRAME(21) 305 BT_FRAME(22) 306 BT_FRAME(23) 307 BT_FRAME(24) 308 BT_FRAME(25) 309 BT_FRAME(26) 310 BT_FRAME(27) 311 BT_FRAME(28) 312 BT_FRAME(29) 313 314 BT_FRAME(30) 315 BT_FRAME(31) 316 BT_FRAME(32) 317 BT_FRAME(33) 318 BT_FRAME(34) 319 BT_FRAME(35) 320 BT_FRAME(36) 321 BT_FRAME(37) 322 BT_FRAME(38) 323 BT_FRAME(39) 324 325 BT_FRAME(40) 326 BT_FRAME(41) 327 BT_FRAME(42) 328 BT_FRAME(43) 329 BT_FRAME(44) 330 BT_FRAME(45) 331 BT_FRAME(46) 332 BT_FRAME(47) 333 BT_FRAME(48) 334 BT_FRAME(49) 335 336 BT_FRAME(50) 337 BT_FRAME(51) 338 BT_FRAME(52) 339 BT_FRAME(53) 340 BT_FRAME(54) 341 BT_FRAME(55) 342 BT_FRAME(56) 343 BT_FRAME(57) 344 BT_FRAME(58) 345 BT_FRAME(59) 346 347 BT_FRAME(60) 348 BT_FRAME(61) 349 BT_FRAME(62) 350 BT_FRAME(63) 351 BT_FRAME(64) 352 BT_FRAME(65) 353 BT_FRAME(66) 354 BT_FRAME(67) 355 BT_FRAME(68) 356 BT_FRAME(69) 357 358 BT_FRAME(70) 359 BT_FRAME(71) 360 BT_FRAME(72) 361 BT_FRAME(73) 362 BT_FRAME(74) 363 BT_FRAME(75) 364 BT_FRAME(76) 365 BT_FRAME(77) 366 BT_FRAME(78) 367 BT_FRAME(79) 368 369 BT_FRAME(80) 370 BT_FRAME(81) 371 BT_FRAME(82) 372 BT_FRAME(83) 373 BT_FRAME(84) 374 BT_FRAME(85) 375 BT_FRAME(86) 376 BT_FRAME(87) 377 BT_FRAME(88) 378 BT_FRAME(89) 379 380 BT_FRAME(90) 381 BT_FRAME(91) 382 BT_FRAME(92) 383 BT_FRAME(93) 384 BT_FRAME(94) 385 BT_FRAME(95) 386 BT_FRAME(96) 387 BT_FRAME(97) 388 BT_FRAME(98) 389 BT_FRAME(99) 390 391 BT_FRAME(100) 392 BT_FRAME(101) 393 BT_FRAME(102) 394 BT_FRAME(103) 395 BT_FRAME(104) 396 BT_FRAME(105) 397 BT_FRAME(106) 398 BT_FRAME(107) 399 BT_FRAME(108) 400 BT_FRAME(109) 401 402 BT_FRAME(110) 403 BT_FRAME(111) 404 BT_FRAME(112) 405 BT_FRAME(113) 406 BT_FRAME(114) 407 BT_FRAME(115) 408 BT_FRAME(116) 409 BT_FRAME(117) 410 BT_FRAME(118) 411 BT_FRAME(119) 412 413 BT_FRAME(120) 414 BT_FRAME(121) 415 BT_FRAME(122) 416 BT_FRAME(123) 417 BT_FRAME(124) 418 BT_FRAME(125) 419 BT_FRAME(126) 420 BT_FRAME(127) 421 422 /* Extras to compensate for nignore. */ 423 BT_FRAME(128) 424 BT_FRAME(129) 425 BT_FRAME(130) 426 #undef BT_FRAME 427 } 428 #else 429 void 430 prof_backtrace(prof_bt_t *bt, unsigned nignore) 431 { 432 433 cassert(config_prof); 434 assert(false); 435 } 436 #endif 437 438 prof_thr_cnt_t * 439 prof_lookup(prof_bt_t *bt) 440 { 441 union { 442 prof_thr_cnt_t *p; 443 void *v; 444 } ret; 445 prof_tdata_t *prof_tdata; 446 447 cassert(config_prof); 448 449 prof_tdata = *prof_tdata_tsd_get(); 450 if (prof_tdata == NULL) { 451 prof_tdata = prof_tdata_init(); 452 if (prof_tdata == NULL) 453 return (NULL); 454 } 455 456 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { 457 union { 458 prof_bt_t *p; 459 void *v; 460 } btkey; 461 union { 462 prof_ctx_t *p; 463 void *v; 464 } ctx; 465 bool new_ctx; 466 467 /* 468 * This thread's cache lacks bt. Look for it in the global 469 * cache. 470 */ 471 prof_enter(); 472 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { 473 /* bt has never been seen before. Insert it. */ 474 ctx.v = imalloc(sizeof(prof_ctx_t)); 475 if (ctx.v == NULL) { 476 prof_leave(); 477 return (NULL); 478 } 479 btkey.p = bt_dup(bt); 480 if (btkey.v == NULL) { 481 prof_leave(); 482 idalloc(ctx.v); 483 return (NULL); 484 } 485 ctx.p->bt = btkey.p; 486 ctx.p->lock = prof_ctx_mutex_choose(); 487 memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); 488 ql_new(&ctx.p->cnts_ql); 489 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { 490 /* OOM. */ 491 prof_leave(); 492 idalloc(btkey.v); 493 idalloc(ctx.v); 494 return (NULL); 495 } 496 /* 497 * Artificially raise curobjs, in order to avoid a race 498 * condition with prof_ctx_merge()/prof_ctx_destroy(). 499 * 500 * No locking is necessary for ctx here because no other 501 * threads have had the opportunity to fetch it from 502 * bt2ctx yet. 503 */ 504 ctx.p->cnt_merged.curobjs++; 505 new_ctx = true; 506 } else { 507 /* 508 * Artificially raise curobjs, in order to avoid a race 509 * condition with prof_ctx_merge()/prof_ctx_destroy(). 510 */ 511 malloc_mutex_lock(ctx.p->lock); 512 ctx.p->cnt_merged.curobjs++; 513 malloc_mutex_unlock(ctx.p->lock); 514 new_ctx = false; 515 } 516 prof_leave(); 517 518 /* Link a prof_thd_cnt_t into ctx for this thread. */ 519 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { 520 assert(ckh_count(&prof_tdata->bt2cnt) > 0); 521 /* 522 * Flush the least recently used cnt in order to keep 523 * bt2cnt from becoming too large. 524 */ 525 ret.p = ql_last(&prof_tdata->lru_ql, lru_link); 526 assert(ret.v != NULL); 527 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, 528 NULL, NULL)) 529 assert(false); 530 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 531 prof_ctx_merge(ret.p->ctx, ret.p); 532 /* ret can now be re-used. */ 533 } else { 534 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); 535 /* Allocate and partially initialize a new cnt. */ 536 ret.v = imalloc(sizeof(prof_thr_cnt_t)); 537 if (ret.p == NULL) { 538 if (new_ctx) 539 prof_ctx_destroy(ctx.p); 540 return (NULL); 541 } 542 ql_elm_new(ret.p, cnts_link); 543 ql_elm_new(ret.p, lru_link); 544 } 545 /* Finish initializing ret. */ 546 ret.p->ctx = ctx.p; 547 ret.p->epoch = 0; 548 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 549 if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { 550 if (new_ctx) 551 prof_ctx_destroy(ctx.p); 552 idalloc(ret.v); 553 return (NULL); 554 } 555 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 556 malloc_mutex_lock(ctx.p->lock); 557 ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); 558 ctx.p->cnt_merged.curobjs--; 559 malloc_mutex_unlock(ctx.p->lock); 560 } else { 561 /* Move ret to the front of the LRU. */ 562 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 563 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 564 } 565 566 return (ret.p); 567 } 568 569 static bool 570 prof_flush(bool propagate_err) 571 { 572 bool ret = false; 573 ssize_t err; 574 575 cassert(config_prof); 576 577 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 578 if (err == -1) { 579 if (propagate_err == false) { 580 malloc_write("<jemalloc>: write() failed during heap " 581 "profile flush\n"); 582 if (opt_abort) 583 abort(); 584 } 585 ret = true; 586 } 587 prof_dump_buf_end = 0; 588 589 return (ret); 590 } 591 592 static bool 593 prof_write(bool propagate_err, const char *s) 594 { 595 unsigned i, slen, n; 596 597 cassert(config_prof); 598 599 i = 0; 600 slen = strlen(s); 601 while (i < slen) { 602 /* Flush the buffer if it is full. */ 603 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) 604 if (prof_flush(propagate_err) && propagate_err) 605 return (true); 606 607 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 608 /* Finish writing. */ 609 n = slen - i; 610 } else { 611 /* Write as much of s as will fit. */ 612 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 613 } 614 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 615 prof_dump_buf_end += n; 616 i += n; 617 } 618 619 return (false); 620 } 621 622 JEMALLOC_ATTR(format(printf, 2, 3)) 623 static bool 624 prof_printf(bool propagate_err, const char *format, ...) 625 { 626 bool ret; 627 va_list ap; 628 char buf[PROF_PRINTF_BUFSIZE]; 629 630 va_start(ap, format); 631 malloc_vsnprintf(buf, sizeof(buf), format, ap); 632 va_end(ap); 633 ret = prof_write(propagate_err, buf); 634 635 return (ret); 636 } 637 638 static void 639 prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) 640 { 641 prof_thr_cnt_t *thr_cnt; 642 prof_cnt_t tcnt; 643 644 cassert(config_prof); 645 646 malloc_mutex_lock(ctx->lock); 647 648 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); 649 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { 650 volatile unsigned *epoch = &thr_cnt->epoch; 651 652 while (true) { 653 unsigned epoch0 = *epoch; 654 655 /* Make sure epoch is even. */ 656 if (epoch0 & 1U) 657 continue; 658 659 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); 660 661 /* Terminate if epoch didn't change while reading. */ 662 if (*epoch == epoch0) 663 break; 664 } 665 666 ctx->cnt_summed.curobjs += tcnt.curobjs; 667 ctx->cnt_summed.curbytes += tcnt.curbytes; 668 if (opt_prof_accum) { 669 ctx->cnt_summed.accumobjs += tcnt.accumobjs; 670 ctx->cnt_summed.accumbytes += tcnt.accumbytes; 671 } 672 } 673 674 if (ctx->cnt_summed.curobjs != 0) 675 (*leak_nctx)++; 676 677 /* Add to cnt_all. */ 678 cnt_all->curobjs += ctx->cnt_summed.curobjs; 679 cnt_all->curbytes += ctx->cnt_summed.curbytes; 680 if (opt_prof_accum) { 681 cnt_all->accumobjs += ctx->cnt_summed.accumobjs; 682 cnt_all->accumbytes += ctx->cnt_summed.accumbytes; 683 } 684 685 malloc_mutex_unlock(ctx->lock); 686 } 687 688 static void 689 prof_ctx_destroy(prof_ctx_t *ctx) 690 { 691 692 cassert(config_prof); 693 694 /* 695 * Check that ctx is still unused by any thread cache before destroying 696 * it. prof_lookup() artificially raises ctx->cnt_merge.curobjs in 697 * order to avoid a race condition with this function, as does 698 * prof_ctx_merge() in order to avoid a race between the main body of 699 * prof_ctx_merge() and entry into this function. 700 */ 701 prof_enter(); 702 malloc_mutex_lock(ctx->lock); 703 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { 704 assert(ctx->cnt_merged.curbytes == 0); 705 assert(ctx->cnt_merged.accumobjs == 0); 706 assert(ctx->cnt_merged.accumbytes == 0); 707 /* Remove ctx from bt2ctx. */ 708 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) 709 assert(false); 710 prof_leave(); 711 /* Destroy ctx. */ 712 malloc_mutex_unlock(ctx->lock); 713 bt_destroy(ctx->bt); 714 idalloc(ctx); 715 } else { 716 /* 717 * Compensate for increment in prof_ctx_merge() or 718 * prof_lookup(). 719 */ 720 ctx->cnt_merged.curobjs--; 721 malloc_mutex_unlock(ctx->lock); 722 prof_leave(); 723 } 724 } 725 726 static void 727 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) 728 { 729 bool destroy; 730 731 cassert(config_prof); 732 733 /* Merge cnt stats and detach from ctx. */ 734 malloc_mutex_lock(ctx->lock); 735 ctx->cnt_merged.curobjs += cnt->cnts.curobjs; 736 ctx->cnt_merged.curbytes += cnt->cnts.curbytes; 737 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; 738 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; 739 ql_remove(&ctx->cnts_ql, cnt, cnts_link); 740 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && 741 ctx->cnt_merged.curobjs == 0) { 742 /* 743 * Artificially raise ctx->cnt_merged.curobjs in order to keep 744 * another thread from winning the race to destroy ctx while 745 * this one has ctx->lock dropped. Without this, it would be 746 * possible for another thread to: 747 * 748 * 1) Sample an allocation associated with ctx. 749 * 2) Deallocate the sampled object. 750 * 3) Successfully prof_ctx_destroy(ctx). 751 * 752 * The result would be that ctx no longer exists by the time 753 * this thread accesses it in prof_ctx_destroy(). 754 */ 755 ctx->cnt_merged.curobjs++; 756 destroy = true; 757 } else 758 destroy = false; 759 malloc_mutex_unlock(ctx->lock); 760 if (destroy) 761 prof_ctx_destroy(ctx); 762 } 763 764 static bool 765 prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) 766 { 767 unsigned i; 768 769 cassert(config_prof); 770 771 if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { 772 assert(ctx->cnt_summed.curbytes == 0); 773 assert(ctx->cnt_summed.accumobjs == 0); 774 assert(ctx->cnt_summed.accumbytes == 0); 775 return (false); 776 } 777 778 if (prof_printf(propagate_err, "%"PRId64": %"PRId64 779 " [%"PRIu64": %"PRIu64"] @", 780 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, 781 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) 782 return (true); 783 784 for (i = 0; i < bt->len; i++) { 785 if (prof_printf(propagate_err, " %#"PRIxPTR, 786 (uintptr_t)bt->vec[i])) 787 return (true); 788 } 789 790 if (prof_write(propagate_err, "\n")) 791 return (true); 792 793 return (false); 794 } 795 796 static bool 797 prof_dump_maps(bool propagate_err) 798 { 799 int mfd; 800 char filename[PATH_MAX + 1]; 801 802 cassert(config_prof); 803 804 malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", 805 (int)getpid()); 806 mfd = open(filename, O_RDONLY); 807 if (mfd != -1) { 808 ssize_t nread; 809 810 if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 811 propagate_err) 812 return (true); 813 nread = 0; 814 do { 815 prof_dump_buf_end += nread; 816 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 817 /* Make space in prof_dump_buf before read(). */ 818 if (prof_flush(propagate_err) && propagate_err) 819 return (true); 820 } 821 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], 822 PROF_DUMP_BUFSIZE - prof_dump_buf_end); 823 } while (nread > 0); 824 close(mfd); 825 } else 826 return (true); 827 828 return (false); 829 } 830 831 static bool 832 prof_dump(bool propagate_err, const char *filename, bool leakcheck) 833 { 834 prof_cnt_t cnt_all; 835 size_t tabind; 836 union { 837 prof_bt_t *p; 838 void *v; 839 } bt; 840 union { 841 prof_ctx_t *p; 842 void *v; 843 } ctx; 844 size_t leak_nctx; 845 846 cassert(config_prof); 847 848 prof_enter(); 849 prof_dump_fd = creat(filename, 0644); 850 if (prof_dump_fd == -1) { 851 if (propagate_err == false) { 852 malloc_printf( 853 "<jemalloc>: creat(\"%s\"), 0644) failed\n", 854 filename); 855 if (opt_abort) 856 abort(); 857 } 858 goto label_error; 859 } 860 861 /* Merge per thread profile stats, and sum them in cnt_all. */ 862 memset(&cnt_all, 0, sizeof(prof_cnt_t)); 863 leak_nctx = 0; 864 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) 865 prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); 866 867 /* Dump profile header. */ 868 if (opt_lg_prof_sample == 0) { 869 if (prof_printf(propagate_err, 870 "heap profile: %"PRId64": %"PRId64 871 " [%"PRIu64": %"PRIu64"] @ heapprofile\n", 872 cnt_all.curobjs, cnt_all.curbytes, 873 cnt_all.accumobjs, cnt_all.accumbytes)) 874 goto label_error; 875 } else { 876 if (prof_printf(propagate_err, 877 "heap profile: %"PRId64": %"PRId64 878 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", 879 cnt_all.curobjs, cnt_all.curbytes, 880 cnt_all.accumobjs, cnt_all.accumbytes, 881 ((uint64_t)1U << opt_lg_prof_sample))) 882 goto label_error; 883 } 884 885 /* Dump per ctx profile stats. */ 886 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) 887 == false;) { 888 if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) 889 goto label_error; 890 } 891 892 /* Dump /proc/<pid>/maps if possible. */ 893 if (prof_dump_maps(propagate_err)) 894 goto label_error; 895 896 if (prof_flush(propagate_err)) 897 goto label_error; 898 close(prof_dump_fd); 899 prof_leave(); 900 901 if (leakcheck && cnt_all.curbytes != 0) { 902 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" 903 PRId64" object%s, %zu context%s\n", 904 cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", 905 cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", 906 leak_nctx, (leak_nctx != 1) ? "s" : ""); 907 malloc_printf( 908 "<jemalloc>: Run pprof on \"%s\" for leak detail\n", 909 filename); 910 } 911 912 return (false); 913 label_error: 914 prof_leave(); 915 return (true); 916 } 917 918 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 919 static void 920 prof_dump_filename(char *filename, char v, int64_t vseq) 921 { 922 923 cassert(config_prof); 924 925 if (vseq != UINT64_C(0xffffffffffffffff)) { 926 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 927 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 928 "%s.%d.%"PRIu64".%c%"PRId64".heap", 929 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); 930 } else { 931 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 932 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 933 "%s.%d.%"PRIu64".%c.heap", 934 opt_prof_prefix, (int)getpid(), prof_dump_seq, v); 935 } 936 } 937 938 static void 939 prof_fdump(void) 940 { 941 char filename[DUMP_FILENAME_BUFSIZE]; 942 943 cassert(config_prof); 944 945 if (prof_booted == false) 946 return; 947 948 if (opt_prof_final && opt_prof_prefix[0] != '\0') { 949 malloc_mutex_lock(&prof_dump_seq_mtx); 950 prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); 951 malloc_mutex_unlock(&prof_dump_seq_mtx); 952 prof_dump(false, filename, opt_prof_leak); 953 } 954 } 955 956 void 957 prof_idump(void) 958 { 959 char filename[PATH_MAX + 1]; 960 961 cassert(config_prof); 962 963 if (prof_booted == false) 964 return; 965 malloc_mutex_lock(&enq_mtx); 966 if (enq) { 967 enq_idump = true; 968 malloc_mutex_unlock(&enq_mtx); 969 return; 970 } 971 malloc_mutex_unlock(&enq_mtx); 972 973 if (opt_prof_prefix[0] != '\0') { 974 malloc_mutex_lock(&prof_dump_seq_mtx); 975 prof_dump_filename(filename, 'i', prof_dump_iseq); 976 prof_dump_iseq++; 977 malloc_mutex_unlock(&prof_dump_seq_mtx); 978 prof_dump(false, filename, false); 979 } 980 } 981 982 bool 983 prof_mdump(const char *filename) 984 { 985 char filename_buf[DUMP_FILENAME_BUFSIZE]; 986 987 cassert(config_prof); 988 989 if (opt_prof == false || prof_booted == false) 990 return (true); 991 992 if (filename == NULL) { 993 /* No filename specified, so automatically generate one. */ 994 if (opt_prof_prefix[0] == '\0') 995 return (true); 996 malloc_mutex_lock(&prof_dump_seq_mtx); 997 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 998 prof_dump_mseq++; 999 malloc_mutex_unlock(&prof_dump_seq_mtx); 1000 filename = filename_buf; 1001 } 1002 return (prof_dump(true, filename, false)); 1003 } 1004 1005 void 1006 prof_gdump(void) 1007 { 1008 char filename[DUMP_FILENAME_BUFSIZE]; 1009 1010 cassert(config_prof); 1011 1012 if (prof_booted == false) 1013 return; 1014 malloc_mutex_lock(&enq_mtx); 1015 if (enq) { 1016 enq_gdump = true; 1017 malloc_mutex_unlock(&enq_mtx); 1018 return; 1019 } 1020 malloc_mutex_unlock(&enq_mtx); 1021 1022 if (opt_prof_prefix[0] != '\0') { 1023 malloc_mutex_lock(&prof_dump_seq_mtx); 1024 prof_dump_filename(filename, 'u', prof_dump_useq); 1025 prof_dump_useq++; 1026 malloc_mutex_unlock(&prof_dump_seq_mtx); 1027 prof_dump(false, filename, false); 1028 } 1029 } 1030 1031 static void 1032 prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) 1033 { 1034 size_t ret1, ret2; 1035 uint64_t h; 1036 prof_bt_t *bt = (prof_bt_t *)key; 1037 1038 cassert(config_prof); 1039 assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); 1040 assert(hash1 != NULL); 1041 assert(hash2 != NULL); 1042 1043 h = hash(bt->vec, bt->len * sizeof(void *), 1044 UINT64_C(0x94122f335b332aea)); 1045 if (minbits <= 32) { 1046 /* 1047 * Avoid doing multiple hashes, since a single hash provides 1048 * enough bits. 1049 */ 1050 ret1 = h & ZU(0xffffffffU); 1051 ret2 = h >> 32; 1052 } else { 1053 ret1 = h; 1054 ret2 = hash(bt->vec, bt->len * sizeof(void *), 1055 UINT64_C(0x8432a476666bbc13)); 1056 } 1057 1058 *hash1 = ret1; 1059 *hash2 = ret2; 1060 } 1061 1062 static bool 1063 prof_bt_keycomp(const void *k1, const void *k2) 1064 { 1065 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1066 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1067 1068 cassert(config_prof); 1069 1070 if (bt1->len != bt2->len) 1071 return (false); 1072 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1073 } 1074 1075 static malloc_mutex_t * 1076 prof_ctx_mutex_choose(void) 1077 { 1078 unsigned nctxs = atomic_add_u(&cum_ctxs, 1); 1079 1080 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); 1081 } 1082 1083 prof_tdata_t * 1084 prof_tdata_init(void) 1085 { 1086 prof_tdata_t *prof_tdata; 1087 1088 cassert(config_prof); 1089 1090 /* Initialize an empty cache for this thread. */ 1091 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); 1092 if (prof_tdata == NULL) 1093 return (NULL); 1094 1095 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, 1096 prof_bt_hash, prof_bt_keycomp)) { 1097 idalloc(prof_tdata); 1098 return (NULL); 1099 } 1100 ql_new(&prof_tdata->lru_ql); 1101 1102 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); 1103 if (prof_tdata->vec == NULL) { 1104 ckh_delete(&prof_tdata->bt2cnt); 1105 idalloc(prof_tdata); 1106 return (NULL); 1107 } 1108 1109 prof_tdata->prng_state = 0; 1110 prof_tdata->threshold = 0; 1111 prof_tdata->accum = 0; 1112 1113 prof_tdata_tsd_set(&prof_tdata); 1114 1115 return (prof_tdata); 1116 } 1117 1118 void 1119 prof_tdata_cleanup(void *arg) 1120 { 1121 prof_thr_cnt_t *cnt; 1122 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; 1123 1124 cassert(config_prof); 1125 1126 /* 1127 * Delete the hash table. All of its contents can still be iterated 1128 * over via the LRU. 1129 */ 1130 ckh_delete(&prof_tdata->bt2cnt); 1131 1132 /* Iteratively merge cnt's into the global stats and delete them. */ 1133 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { 1134 ql_remove(&prof_tdata->lru_ql, cnt, lru_link); 1135 prof_ctx_merge(cnt->ctx, cnt); 1136 idalloc(cnt); 1137 } 1138 1139 idalloc(prof_tdata->vec); 1140 1141 idalloc(prof_tdata); 1142 prof_tdata = NULL; 1143 prof_tdata_tsd_set(&prof_tdata); 1144 } 1145 1146 void 1147 prof_boot0(void) 1148 { 1149 1150 cassert(config_prof); 1151 1152 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 1153 sizeof(PROF_PREFIX_DEFAULT)); 1154 } 1155 1156 void 1157 prof_boot1(void) 1158 { 1159 1160 cassert(config_prof); 1161 1162 /* 1163 * opt_prof and prof_promote must be in their final state before any 1164 * arenas are initialized, so this function must be executed early. 1165 */ 1166 1167 if (opt_prof_leak && opt_prof == false) { 1168 /* 1169 * Enable opt_prof, but in such a way that profiles are never 1170 * automatically dumped. 1171 */ 1172 opt_prof = true; 1173 opt_prof_gdump = false; 1174 prof_interval = 0; 1175 } else if (opt_prof) { 1176 if (opt_lg_prof_interval >= 0) { 1177 prof_interval = (((uint64_t)1U) << 1178 opt_lg_prof_interval); 1179 } else 1180 prof_interval = 0; 1181 } 1182 1183 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); 1184 } 1185 1186 bool 1187 prof_boot2(void) 1188 { 1189 1190 cassert(config_prof); 1191 1192 if (opt_prof) { 1193 unsigned i; 1194 1195 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, 1196 prof_bt_keycomp)) 1197 return (true); 1198 if (malloc_mutex_init(&bt2ctx_mtx)) 1199 return (true); 1200 if (prof_tdata_tsd_boot()) { 1201 malloc_write( 1202 "<jemalloc>: Error in pthread_key_create()\n"); 1203 abort(); 1204 } 1205 1206 if (malloc_mutex_init(&prof_dump_seq_mtx)) 1207 return (true); 1208 1209 if (malloc_mutex_init(&enq_mtx)) 1210 return (true); 1211 enq = false; 1212 enq_idump = false; 1213 enq_gdump = false; 1214 1215 if (atexit(prof_fdump) != 0) { 1216 malloc_write("<jemalloc>: Error in atexit()\n"); 1217 if (opt_abort) 1218 abort(); 1219 } 1220 1221 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * 1222 sizeof(malloc_mutex_t)); 1223 if (ctx_locks == NULL) 1224 return (true); 1225 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 1226 if (malloc_mutex_init(&ctx_locks[i])) 1227 return (true); 1228 } 1229 } 1230 1231 #ifdef JEMALLOC_PROF_LIBGCC 1232 /* 1233 * Cause the backtracing machinery to allocate its internal state 1234 * before enabling profiling. 1235 */ 1236 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 1237 #endif 1238 1239 prof_booted = true; 1240 1241 return (false); 1242 } 1243 1244 /******************************************************************************/ 1245