1 #define JEMALLOC_PROF_C_ 2 #include "jemalloc/internal/jemalloc_internal.h" 3 /******************************************************************************/ 4 5 #ifdef JEMALLOC_PROF_LIBUNWIND 6 #define UNW_LOCAL_ONLY 7 #include <libunwind.h> 8 #endif 9 10 #ifdef JEMALLOC_PROF_LIBGCC 11 #include <unwind.h> 12 #endif 13 14 /******************************************************************************/ 15 /* Data. */ 16 17 malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) 18 19 bool opt_prof = false; 20 bool opt_prof_active = true; 21 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 22 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 23 bool opt_prof_gdump = false; 24 bool opt_prof_final = true; 25 bool opt_prof_leak = false; 26 bool opt_prof_accum = false; 27 char opt_prof_prefix[PATH_MAX + 1]; 28 29 uint64_t prof_interval; 30 bool prof_promote; 31 32 /* 33 * Table of mutexes that are shared among ctx's. These are leaf locks, so 34 * there is no problem with using them for more than one ctx at the same time. 35 * The primary motivation for this sharing though is that ctx's are ephemeral, 36 * and destroying mutexes causes complications for systems that allocate when 37 * creating/destroying mutexes. 38 */ 39 static malloc_mutex_t *ctx_locks; 40 static unsigned cum_ctxs; /* Atomic counter. */ 41 42 /* 43 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data 44 * structure that knows about all backtraces currently captured. 45 */ 46 static ckh_t bt2ctx; 47 static malloc_mutex_t bt2ctx_mtx; 48 49 static malloc_mutex_t prof_dump_seq_mtx; 50 static uint64_t prof_dump_seq; 51 static uint64_t prof_dump_iseq; 52 static uint64_t prof_dump_mseq; 53 static uint64_t prof_dump_useq; 54 55 /* 56 * This buffer is rather large for stack allocation, so use a single buffer for 57 * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since 58 * it must be locked anyway during dumping. 59 */ 60 static char prof_dump_buf[PROF_DUMP_BUFSIZE]; 61 static unsigned prof_dump_buf_end; 62 static int prof_dump_fd; 63 64 /* Do not dump any profiles until bootstrapping is complete. */ 65 static bool prof_booted = false; 66 67 /******************************************************************************/ 68 /* Function prototypes for non-inline static functions. */ 69 70 static prof_bt_t *bt_dup(prof_bt_t *bt); 71 static void bt_destroy(prof_bt_t *bt); 72 #ifdef JEMALLOC_PROF_LIBGCC 73 static _Unwind_Reason_Code prof_unwind_init_callback( 74 struct _Unwind_Context *context, void *arg); 75 static _Unwind_Reason_Code prof_unwind_callback( 76 struct _Unwind_Context *context, void *arg); 77 #endif 78 static bool prof_flush(bool propagate_err); 79 static bool prof_write(bool propagate_err, const char *s); 80 static bool prof_printf(bool propagate_err, const char *format, ...) 81 JEMALLOC_ATTR(format(printf, 2, 3)); 82 static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, 83 size_t *leak_nctx); 84 static void prof_ctx_destroy(prof_ctx_t *ctx); 85 static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); 86 static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, 87 prof_bt_t *bt); 88 static bool prof_dump_maps(bool propagate_err); 89 static bool prof_dump(bool propagate_err, const char *filename, 90 bool leakcheck); 91 static void prof_dump_filename(char *filename, char v, int64_t vseq); 92 static void prof_fdump(void); 93 static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, 94 size_t *hash2); 95 static bool prof_bt_keycomp(const void *k1, const void *k2); 96 static malloc_mutex_t *prof_ctx_mutex_choose(void); 97 98 /******************************************************************************/ 99 100 void 101 bt_init(prof_bt_t *bt, void **vec) 102 { 103 104 cassert(config_prof); 105 106 bt->vec = vec; 107 bt->len = 0; 108 } 109 110 static void 111 bt_destroy(prof_bt_t *bt) 112 { 113 114 cassert(config_prof); 115 116 idalloc(bt); 117 } 118 119 static prof_bt_t * 120 bt_dup(prof_bt_t *bt) 121 { 122 prof_bt_t *ret; 123 124 cassert(config_prof); 125 126 /* 127 * Create a single allocation that has space for vec immediately 128 * following the prof_bt_t structure. The backtraces that get 129 * stored in the backtrace caches are copied from stack-allocated 130 * temporary variables, so size is known at creation time. Making this 131 * a contiguous object improves cache locality. 132 */ 133 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + 134 (bt->len * sizeof(void *))); 135 if (ret == NULL) 136 return (NULL); 137 ret->vec = (void **)((uintptr_t)ret + 138 QUANTUM_CEILING(sizeof(prof_bt_t))); 139 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); 140 ret->len = bt->len; 141 142 return (ret); 143 } 144 145 static inline void 146 prof_enter(prof_tdata_t *prof_tdata) 147 { 148 149 cassert(config_prof); 150 151 assert(prof_tdata->enq == false); 152 prof_tdata->enq = true; 153 154 malloc_mutex_lock(&bt2ctx_mtx); 155 } 156 157 static inline void 158 prof_leave(prof_tdata_t *prof_tdata) 159 { 160 bool idump, gdump; 161 162 cassert(config_prof); 163 164 malloc_mutex_unlock(&bt2ctx_mtx); 165 166 assert(prof_tdata->enq); 167 prof_tdata->enq = false; 168 idump = prof_tdata->enq_idump; 169 prof_tdata->enq_idump = false; 170 gdump = prof_tdata->enq_gdump; 171 prof_tdata->enq_gdump = false; 172 173 if (idump) 174 prof_idump(); 175 if (gdump) 176 prof_gdump(); 177 } 178 179 #ifdef JEMALLOC_PROF_LIBUNWIND 180 void 181 prof_backtrace(prof_bt_t *bt, unsigned nignore) 182 { 183 unw_context_t uc; 184 unw_cursor_t cursor; 185 unsigned i; 186 int err; 187 188 cassert(config_prof); 189 assert(bt->len == 0); 190 assert(bt->vec != NULL); 191 192 unw_getcontext(&uc); 193 unw_init_local(&cursor, &uc); 194 195 /* Throw away (nignore+1) stack frames, if that many exist. */ 196 for (i = 0; i < nignore + 1; i++) { 197 err = unw_step(&cursor); 198 if (err <= 0) 199 return; 200 } 201 202 /* 203 * Iterate over stack frames until there are no more, or until no space 204 * remains in bt. 205 */ 206 for (i = 0; i < PROF_BT_MAX; i++) { 207 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); 208 bt->len++; 209 err = unw_step(&cursor); 210 if (err <= 0) 211 break; 212 } 213 } 214 #elif (defined(JEMALLOC_PROF_LIBGCC)) 215 static _Unwind_Reason_Code 216 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) 217 { 218 219 cassert(config_prof); 220 221 return (_URC_NO_REASON); 222 } 223 224 static _Unwind_Reason_Code 225 prof_unwind_callback(struct _Unwind_Context *context, void *arg) 226 { 227 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 228 229 cassert(config_prof); 230 231 if (data->nignore > 0) 232 data->nignore--; 233 else { 234 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); 235 data->bt->len++; 236 if (data->bt->len == data->max) 237 return (_URC_END_OF_STACK); 238 } 239 240 return (_URC_NO_REASON); 241 } 242 243 void 244 prof_backtrace(prof_bt_t *bt, unsigned nignore) 245 { 246 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; 247 248 cassert(config_prof); 249 250 _Unwind_Backtrace(prof_unwind_callback, &data); 251 } 252 #elif (defined(JEMALLOC_PROF_GCC)) 253 void 254 prof_backtrace(prof_bt_t *bt, unsigned nignore) 255 { 256 #define BT_FRAME(i) \ 257 if ((i) < nignore + PROF_BT_MAX) { \ 258 void *p; \ 259 if (__builtin_frame_address(i) == 0) \ 260 return; \ 261 p = __builtin_return_address(i); \ 262 if (p == NULL) \ 263 return; \ 264 if (i >= nignore) { \ 265 bt->vec[(i) - nignore] = p; \ 266 bt->len = (i) - nignore + 1; \ 267 } \ 268 } else \ 269 return; 270 271 cassert(config_prof); 272 assert(nignore <= 3); 273 274 BT_FRAME(0) 275 BT_FRAME(1) 276 BT_FRAME(2) 277 BT_FRAME(3) 278 BT_FRAME(4) 279 BT_FRAME(5) 280 BT_FRAME(6) 281 BT_FRAME(7) 282 BT_FRAME(8) 283 BT_FRAME(9) 284 285 BT_FRAME(10) 286 BT_FRAME(11) 287 BT_FRAME(12) 288 BT_FRAME(13) 289 BT_FRAME(14) 290 BT_FRAME(15) 291 BT_FRAME(16) 292 BT_FRAME(17) 293 BT_FRAME(18) 294 BT_FRAME(19) 295 296 BT_FRAME(20) 297 BT_FRAME(21) 298 BT_FRAME(22) 299 BT_FRAME(23) 300 BT_FRAME(24) 301 BT_FRAME(25) 302 BT_FRAME(26) 303 BT_FRAME(27) 304 BT_FRAME(28) 305 BT_FRAME(29) 306 307 BT_FRAME(30) 308 BT_FRAME(31) 309 BT_FRAME(32) 310 BT_FRAME(33) 311 BT_FRAME(34) 312 BT_FRAME(35) 313 BT_FRAME(36) 314 BT_FRAME(37) 315 BT_FRAME(38) 316 BT_FRAME(39) 317 318 BT_FRAME(40) 319 BT_FRAME(41) 320 BT_FRAME(42) 321 BT_FRAME(43) 322 BT_FRAME(44) 323 BT_FRAME(45) 324 BT_FRAME(46) 325 BT_FRAME(47) 326 BT_FRAME(48) 327 BT_FRAME(49) 328 329 BT_FRAME(50) 330 BT_FRAME(51) 331 BT_FRAME(52) 332 BT_FRAME(53) 333 BT_FRAME(54) 334 BT_FRAME(55) 335 BT_FRAME(56) 336 BT_FRAME(57) 337 BT_FRAME(58) 338 BT_FRAME(59) 339 340 BT_FRAME(60) 341 BT_FRAME(61) 342 BT_FRAME(62) 343 BT_FRAME(63) 344 BT_FRAME(64) 345 BT_FRAME(65) 346 BT_FRAME(66) 347 BT_FRAME(67) 348 BT_FRAME(68) 349 BT_FRAME(69) 350 351 BT_FRAME(70) 352 BT_FRAME(71) 353 BT_FRAME(72) 354 BT_FRAME(73) 355 BT_FRAME(74) 356 BT_FRAME(75) 357 BT_FRAME(76) 358 BT_FRAME(77) 359 BT_FRAME(78) 360 BT_FRAME(79) 361 362 BT_FRAME(80) 363 BT_FRAME(81) 364 BT_FRAME(82) 365 BT_FRAME(83) 366 BT_FRAME(84) 367 BT_FRAME(85) 368 BT_FRAME(86) 369 BT_FRAME(87) 370 BT_FRAME(88) 371 BT_FRAME(89) 372 373 BT_FRAME(90) 374 BT_FRAME(91) 375 BT_FRAME(92) 376 BT_FRAME(93) 377 BT_FRAME(94) 378 BT_FRAME(95) 379 BT_FRAME(96) 380 BT_FRAME(97) 381 BT_FRAME(98) 382 BT_FRAME(99) 383 384 BT_FRAME(100) 385 BT_FRAME(101) 386 BT_FRAME(102) 387 BT_FRAME(103) 388 BT_FRAME(104) 389 BT_FRAME(105) 390 BT_FRAME(106) 391 BT_FRAME(107) 392 BT_FRAME(108) 393 BT_FRAME(109) 394 395 BT_FRAME(110) 396 BT_FRAME(111) 397 BT_FRAME(112) 398 BT_FRAME(113) 399 BT_FRAME(114) 400 BT_FRAME(115) 401 BT_FRAME(116) 402 BT_FRAME(117) 403 BT_FRAME(118) 404 BT_FRAME(119) 405 406 BT_FRAME(120) 407 BT_FRAME(121) 408 BT_FRAME(122) 409 BT_FRAME(123) 410 BT_FRAME(124) 411 BT_FRAME(125) 412 BT_FRAME(126) 413 BT_FRAME(127) 414 415 /* Extras to compensate for nignore. */ 416 BT_FRAME(128) 417 BT_FRAME(129) 418 BT_FRAME(130) 419 #undef BT_FRAME 420 } 421 #else 422 void 423 prof_backtrace(prof_bt_t *bt, unsigned nignore) 424 { 425 426 cassert(config_prof); 427 assert(false); 428 } 429 #endif 430 431 prof_thr_cnt_t * 432 prof_lookup(prof_bt_t *bt) 433 { 434 union { 435 prof_thr_cnt_t *p; 436 void *v; 437 } ret; 438 prof_tdata_t *prof_tdata; 439 440 cassert(config_prof); 441 442 prof_tdata = prof_tdata_get(); 443 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 444 return (NULL); 445 446 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { 447 union { 448 prof_bt_t *p; 449 void *v; 450 } btkey; 451 union { 452 prof_ctx_t *p; 453 void *v; 454 } ctx; 455 bool new_ctx; 456 457 /* 458 * This thread's cache lacks bt. Look for it in the global 459 * cache. 460 */ 461 prof_enter(prof_tdata); 462 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { 463 /* bt has never been seen before. Insert it. */ 464 ctx.v = imalloc(sizeof(prof_ctx_t)); 465 if (ctx.v == NULL) { 466 prof_leave(prof_tdata); 467 return (NULL); 468 } 469 btkey.p = bt_dup(bt); 470 if (btkey.v == NULL) { 471 prof_leave(prof_tdata); 472 idalloc(ctx.v); 473 return (NULL); 474 } 475 ctx.p->bt = btkey.p; 476 ctx.p->lock = prof_ctx_mutex_choose(); 477 /* 478 * Set nlimbo to 1, in order to avoid a race condition 479 * with prof_ctx_merge()/prof_ctx_destroy(). 480 */ 481 ctx.p->nlimbo = 1; 482 memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); 483 ql_new(&ctx.p->cnts_ql); 484 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { 485 /* OOM. */ 486 prof_leave(prof_tdata); 487 idalloc(btkey.v); 488 idalloc(ctx.v); 489 return (NULL); 490 } 491 new_ctx = true; 492 } else { 493 /* 494 * Increment nlimbo, in order to avoid a race condition 495 * with prof_ctx_merge()/prof_ctx_destroy(). 496 */ 497 malloc_mutex_lock(ctx.p->lock); 498 ctx.p->nlimbo++; 499 malloc_mutex_unlock(ctx.p->lock); 500 new_ctx = false; 501 } 502 prof_leave(prof_tdata); 503 504 /* Link a prof_thd_cnt_t into ctx for this thread. */ 505 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { 506 assert(ckh_count(&prof_tdata->bt2cnt) > 0); 507 /* 508 * Flush the least recently used cnt in order to keep 509 * bt2cnt from becoming too large. 510 */ 511 ret.p = ql_last(&prof_tdata->lru_ql, lru_link); 512 assert(ret.v != NULL); 513 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, 514 NULL, NULL)) 515 assert(false); 516 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 517 prof_ctx_merge(ret.p->ctx, ret.p); 518 /* ret can now be re-used. */ 519 } else { 520 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); 521 /* Allocate and partially initialize a new cnt. */ 522 ret.v = imalloc(sizeof(prof_thr_cnt_t)); 523 if (ret.p == NULL) { 524 if (new_ctx) 525 prof_ctx_destroy(ctx.p); 526 return (NULL); 527 } 528 ql_elm_new(ret.p, cnts_link); 529 ql_elm_new(ret.p, lru_link); 530 } 531 /* Finish initializing ret. */ 532 ret.p->ctx = ctx.p; 533 ret.p->epoch = 0; 534 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 535 if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { 536 if (new_ctx) 537 prof_ctx_destroy(ctx.p); 538 idalloc(ret.v); 539 return (NULL); 540 } 541 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 542 malloc_mutex_lock(ctx.p->lock); 543 ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); 544 ctx.p->nlimbo--; 545 malloc_mutex_unlock(ctx.p->lock); 546 } else { 547 /* Move ret to the front of the LRU. */ 548 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 549 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 550 } 551 552 return (ret.p); 553 } 554 555 static bool 556 prof_flush(bool propagate_err) 557 { 558 bool ret = false; 559 ssize_t err; 560 561 cassert(config_prof); 562 563 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 564 if (err == -1) { 565 if (propagate_err == false) { 566 malloc_write("<jemalloc>: write() failed during heap " 567 "profile flush\n"); 568 if (opt_abort) 569 abort(); 570 } 571 ret = true; 572 } 573 prof_dump_buf_end = 0; 574 575 return (ret); 576 } 577 578 static bool 579 prof_write(bool propagate_err, const char *s) 580 { 581 unsigned i, slen, n; 582 583 cassert(config_prof); 584 585 i = 0; 586 slen = strlen(s); 587 while (i < slen) { 588 /* Flush the buffer if it is full. */ 589 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) 590 if (prof_flush(propagate_err) && propagate_err) 591 return (true); 592 593 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 594 /* Finish writing. */ 595 n = slen - i; 596 } else { 597 /* Write as much of s as will fit. */ 598 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 599 } 600 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 601 prof_dump_buf_end += n; 602 i += n; 603 } 604 605 return (false); 606 } 607 608 JEMALLOC_ATTR(format(printf, 2, 3)) 609 static bool 610 prof_printf(bool propagate_err, const char *format, ...) 611 { 612 bool ret; 613 va_list ap; 614 char buf[PROF_PRINTF_BUFSIZE]; 615 616 va_start(ap, format); 617 malloc_vsnprintf(buf, sizeof(buf), format, ap); 618 va_end(ap); 619 ret = prof_write(propagate_err, buf); 620 621 return (ret); 622 } 623 624 static void 625 prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) 626 { 627 prof_thr_cnt_t *thr_cnt; 628 prof_cnt_t tcnt; 629 630 cassert(config_prof); 631 632 malloc_mutex_lock(ctx->lock); 633 634 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); 635 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { 636 volatile unsigned *epoch = &thr_cnt->epoch; 637 638 while (true) { 639 unsigned epoch0 = *epoch; 640 641 /* Make sure epoch is even. */ 642 if (epoch0 & 1U) 643 continue; 644 645 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); 646 647 /* Terminate if epoch didn't change while reading. */ 648 if (*epoch == epoch0) 649 break; 650 } 651 652 ctx->cnt_summed.curobjs += tcnt.curobjs; 653 ctx->cnt_summed.curbytes += tcnt.curbytes; 654 if (opt_prof_accum) { 655 ctx->cnt_summed.accumobjs += tcnt.accumobjs; 656 ctx->cnt_summed.accumbytes += tcnt.accumbytes; 657 } 658 } 659 660 if (ctx->cnt_summed.curobjs != 0) 661 (*leak_nctx)++; 662 663 /* Add to cnt_all. */ 664 cnt_all->curobjs += ctx->cnt_summed.curobjs; 665 cnt_all->curbytes += ctx->cnt_summed.curbytes; 666 if (opt_prof_accum) { 667 cnt_all->accumobjs += ctx->cnt_summed.accumobjs; 668 cnt_all->accumbytes += ctx->cnt_summed.accumbytes; 669 } 670 671 malloc_mutex_unlock(ctx->lock); 672 } 673 674 static void 675 prof_ctx_destroy(prof_ctx_t *ctx) 676 { 677 prof_tdata_t *prof_tdata; 678 679 cassert(config_prof); 680 681 /* 682 * Check that ctx is still unused by any thread cache before destroying 683 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race 684 * condition with this function, as does prof_ctx_merge() in order to 685 * avoid a race between the main body of prof_ctx_merge() and entry 686 * into this function. 687 */ 688 prof_tdata = *prof_tdata_tsd_get(); 689 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); 690 prof_enter(prof_tdata); 691 malloc_mutex_lock(ctx->lock); 692 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && 693 ctx->nlimbo == 1) { 694 assert(ctx->cnt_merged.curbytes == 0); 695 assert(ctx->cnt_merged.accumobjs == 0); 696 assert(ctx->cnt_merged.accumbytes == 0); 697 /* Remove ctx from bt2ctx. */ 698 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) 699 assert(false); 700 prof_leave(prof_tdata); 701 /* Destroy ctx. */ 702 malloc_mutex_unlock(ctx->lock); 703 bt_destroy(ctx->bt); 704 idalloc(ctx); 705 } else { 706 /* 707 * Compensate for increment in prof_ctx_merge() or 708 * prof_lookup(). 709 */ 710 ctx->nlimbo--; 711 malloc_mutex_unlock(ctx->lock); 712 prof_leave(prof_tdata); 713 } 714 } 715 716 static void 717 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) 718 { 719 bool destroy; 720 721 cassert(config_prof); 722 723 /* Merge cnt stats and detach from ctx. */ 724 malloc_mutex_lock(ctx->lock); 725 ctx->cnt_merged.curobjs += cnt->cnts.curobjs; 726 ctx->cnt_merged.curbytes += cnt->cnts.curbytes; 727 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; 728 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; 729 ql_remove(&ctx->cnts_ql, cnt, cnts_link); 730 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && 731 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { 732 /* 733 * Increment ctx->nlimbo in order to keep another thread from 734 * winning the race to destroy ctx while this one has ctx->lock 735 * dropped. Without this, it would be possible for another 736 * thread to: 737 * 738 * 1) Sample an allocation associated with ctx. 739 * 2) Deallocate the sampled object. 740 * 3) Successfully prof_ctx_destroy(ctx). 741 * 742 * The result would be that ctx no longer exists by the time 743 * this thread accesses it in prof_ctx_destroy(). 744 */ 745 ctx->nlimbo++; 746 destroy = true; 747 } else 748 destroy = false; 749 malloc_mutex_unlock(ctx->lock); 750 if (destroy) 751 prof_ctx_destroy(ctx); 752 } 753 754 static bool 755 prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) 756 { 757 unsigned i; 758 759 cassert(config_prof); 760 761 /* 762 * Current statistics can sum to 0 as a result of unmerged per thread 763 * statistics. Additionally, interval- and growth-triggered dumps can 764 * occur between the time a ctx is created and when its statistics are 765 * filled in. Avoid dumping any ctx that is an artifact of either 766 * implementation detail. 767 */ 768 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || 769 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { 770 assert(ctx->cnt_summed.curobjs == 0); 771 assert(ctx->cnt_summed.curbytes == 0); 772 assert(ctx->cnt_summed.accumobjs == 0); 773 assert(ctx->cnt_summed.accumbytes == 0); 774 return (false); 775 } 776 777 if (prof_printf(propagate_err, "%"PRId64": %"PRId64 778 " [%"PRIu64": %"PRIu64"] @", 779 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, 780 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) 781 return (true); 782 783 for (i = 0; i < bt->len; i++) { 784 if (prof_printf(propagate_err, " %#"PRIxPTR, 785 (uintptr_t)bt->vec[i])) 786 return (true); 787 } 788 789 if (prof_write(propagate_err, "\n")) 790 return (true); 791 792 return (false); 793 } 794 795 static bool 796 prof_dump_maps(bool propagate_err) 797 { 798 int mfd; 799 char filename[PATH_MAX + 1]; 800 801 cassert(config_prof); 802 803 malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", 804 (int)getpid()); 805 mfd = open(filename, O_RDONLY); 806 if (mfd != -1) { 807 ssize_t nread; 808 809 if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 810 propagate_err) 811 return (true); 812 nread = 0; 813 do { 814 prof_dump_buf_end += nread; 815 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 816 /* Make space in prof_dump_buf before read(). */ 817 if (prof_flush(propagate_err) && propagate_err) 818 return (true); 819 } 820 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], 821 PROF_DUMP_BUFSIZE - prof_dump_buf_end); 822 } while (nread > 0); 823 close(mfd); 824 } else 825 return (true); 826 827 return (false); 828 } 829 830 static bool 831 prof_dump(bool propagate_err, const char *filename, bool leakcheck) 832 { 833 prof_tdata_t *prof_tdata; 834 prof_cnt_t cnt_all; 835 size_t tabind; 836 union { 837 prof_bt_t *p; 838 void *v; 839 } bt; 840 union { 841 prof_ctx_t *p; 842 void *v; 843 } ctx; 844 size_t leak_nctx; 845 846 cassert(config_prof); 847 848 prof_tdata = prof_tdata_get(); 849 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 850 return (true); 851 prof_enter(prof_tdata); 852 prof_dump_fd = creat(filename, 0644); 853 if (prof_dump_fd == -1) { 854 if (propagate_err == false) { 855 malloc_printf( 856 "<jemalloc>: creat(\"%s\"), 0644) failed\n", 857 filename); 858 if (opt_abort) 859 abort(); 860 } 861 goto label_error; 862 } 863 864 /* Merge per thread profile stats, and sum them in cnt_all. */ 865 memset(&cnt_all, 0, sizeof(prof_cnt_t)); 866 leak_nctx = 0; 867 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) 868 prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); 869 870 /* Dump profile header. */ 871 if (opt_lg_prof_sample == 0) { 872 if (prof_printf(propagate_err, 873 "heap profile: %"PRId64": %"PRId64 874 " [%"PRIu64": %"PRIu64"] @ heapprofile\n", 875 cnt_all.curobjs, cnt_all.curbytes, 876 cnt_all.accumobjs, cnt_all.accumbytes)) 877 goto label_error; 878 } else { 879 if (prof_printf(propagate_err, 880 "heap profile: %"PRId64": %"PRId64 881 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", 882 cnt_all.curobjs, cnt_all.curbytes, 883 cnt_all.accumobjs, cnt_all.accumbytes, 884 ((uint64_t)1U << opt_lg_prof_sample))) 885 goto label_error; 886 } 887 888 /* Dump per ctx profile stats. */ 889 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) 890 == false;) { 891 if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) 892 goto label_error; 893 } 894 895 /* Dump /proc/<pid>/maps if possible. */ 896 if (prof_dump_maps(propagate_err)) 897 goto label_error; 898 899 if (prof_flush(propagate_err)) 900 goto label_error; 901 close(prof_dump_fd); 902 prof_leave(prof_tdata); 903 904 if (leakcheck && cnt_all.curbytes != 0) { 905 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" 906 PRId64" object%s, %zu context%s\n", 907 cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", 908 cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", 909 leak_nctx, (leak_nctx != 1) ? "s" : ""); 910 malloc_printf( 911 "<jemalloc>: Run pprof on \"%s\" for leak detail\n", 912 filename); 913 } 914 915 return (false); 916 label_error: 917 prof_leave(prof_tdata); 918 return (true); 919 } 920 921 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 922 static void 923 prof_dump_filename(char *filename, char v, int64_t vseq) 924 { 925 926 cassert(config_prof); 927 928 if (vseq != UINT64_C(0xffffffffffffffff)) { 929 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 930 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 931 "%s.%d.%"PRIu64".%c%"PRId64".heap", 932 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); 933 } else { 934 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 935 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 936 "%s.%d.%"PRIu64".%c.heap", 937 opt_prof_prefix, (int)getpid(), prof_dump_seq, v); 938 } 939 prof_dump_seq++; 940 } 941 942 static void 943 prof_fdump(void) 944 { 945 char filename[DUMP_FILENAME_BUFSIZE]; 946 947 cassert(config_prof); 948 949 if (prof_booted == false) 950 return; 951 952 if (opt_prof_final && opt_prof_prefix[0] != '\0') { 953 malloc_mutex_lock(&prof_dump_seq_mtx); 954 prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); 955 malloc_mutex_unlock(&prof_dump_seq_mtx); 956 prof_dump(false, filename, opt_prof_leak); 957 } 958 } 959 960 void 961 prof_idump(void) 962 { 963 prof_tdata_t *prof_tdata; 964 char filename[PATH_MAX + 1]; 965 966 cassert(config_prof); 967 968 if (prof_booted == false) 969 return; 970 /* 971 * Don't call prof_tdata_get() here, because it could cause recursive 972 * allocation. 973 */ 974 prof_tdata = *prof_tdata_tsd_get(); 975 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 976 return; 977 if (prof_tdata->enq) { 978 prof_tdata->enq_idump = true; 979 return; 980 } 981 982 if (opt_prof_prefix[0] != '\0') { 983 malloc_mutex_lock(&prof_dump_seq_mtx); 984 prof_dump_filename(filename, 'i', prof_dump_iseq); 985 prof_dump_iseq++; 986 malloc_mutex_unlock(&prof_dump_seq_mtx); 987 prof_dump(false, filename, false); 988 } 989 } 990 991 bool 992 prof_mdump(const char *filename) 993 { 994 char filename_buf[DUMP_FILENAME_BUFSIZE]; 995 996 cassert(config_prof); 997 998 if (opt_prof == false || prof_booted == false) 999 return (true); 1000 1001 if (filename == NULL) { 1002 /* No filename specified, so automatically generate one. */ 1003 if (opt_prof_prefix[0] == '\0') 1004 return (true); 1005 malloc_mutex_lock(&prof_dump_seq_mtx); 1006 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 1007 prof_dump_mseq++; 1008 malloc_mutex_unlock(&prof_dump_seq_mtx); 1009 filename = filename_buf; 1010 } 1011 return (prof_dump(true, filename, false)); 1012 } 1013 1014 void 1015 prof_gdump(void) 1016 { 1017 prof_tdata_t *prof_tdata; 1018 char filename[DUMP_FILENAME_BUFSIZE]; 1019 1020 cassert(config_prof); 1021 1022 if (prof_booted == false) 1023 return; 1024 /* 1025 * Don't call prof_tdata_get() here, because it could cause recursive 1026 * allocation. 1027 */ 1028 prof_tdata = *prof_tdata_tsd_get(); 1029 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1030 return; 1031 if (prof_tdata->enq) { 1032 prof_tdata->enq_gdump = true; 1033 return; 1034 } 1035 1036 if (opt_prof_prefix[0] != '\0') { 1037 malloc_mutex_lock(&prof_dump_seq_mtx); 1038 prof_dump_filename(filename, 'u', prof_dump_useq); 1039 prof_dump_useq++; 1040 malloc_mutex_unlock(&prof_dump_seq_mtx); 1041 prof_dump(false, filename, false); 1042 } 1043 } 1044 1045 static void 1046 prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) 1047 { 1048 size_t ret1, ret2; 1049 uint64_t h; 1050 prof_bt_t *bt = (prof_bt_t *)key; 1051 1052 cassert(config_prof); 1053 assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); 1054 assert(hash1 != NULL); 1055 assert(hash2 != NULL); 1056 1057 h = hash(bt->vec, bt->len * sizeof(void *), 1058 UINT64_C(0x94122f335b332aea)); 1059 if (minbits <= 32) { 1060 /* 1061 * Avoid doing multiple hashes, since a single hash provides 1062 * enough bits. 1063 */ 1064 ret1 = h & ZU(0xffffffffU); 1065 ret2 = h >> 32; 1066 } else { 1067 ret1 = h; 1068 ret2 = hash(bt->vec, bt->len * sizeof(void *), 1069 UINT64_C(0x8432a476666bbc13)); 1070 } 1071 1072 *hash1 = ret1; 1073 *hash2 = ret2; 1074 } 1075 1076 static bool 1077 prof_bt_keycomp(const void *k1, const void *k2) 1078 { 1079 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1080 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1081 1082 cassert(config_prof); 1083 1084 if (bt1->len != bt2->len) 1085 return (false); 1086 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1087 } 1088 1089 static malloc_mutex_t * 1090 prof_ctx_mutex_choose(void) 1091 { 1092 unsigned nctxs = atomic_add_u(&cum_ctxs, 1); 1093 1094 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); 1095 } 1096 1097 prof_tdata_t * 1098 prof_tdata_init(void) 1099 { 1100 prof_tdata_t *prof_tdata; 1101 1102 cassert(config_prof); 1103 1104 /* Initialize an empty cache for this thread. */ 1105 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); 1106 if (prof_tdata == NULL) 1107 return (NULL); 1108 1109 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, 1110 prof_bt_hash, prof_bt_keycomp)) { 1111 idalloc(prof_tdata); 1112 return (NULL); 1113 } 1114 ql_new(&prof_tdata->lru_ql); 1115 1116 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); 1117 if (prof_tdata->vec == NULL) { 1118 ckh_delete(&prof_tdata->bt2cnt); 1119 idalloc(prof_tdata); 1120 return (NULL); 1121 } 1122 1123 prof_tdata->prng_state = 0; 1124 prof_tdata->threshold = 0; 1125 prof_tdata->accum = 0; 1126 1127 prof_tdata->enq = false; 1128 prof_tdata->enq_idump = false; 1129 prof_tdata->enq_gdump = false; 1130 1131 prof_tdata_tsd_set(&prof_tdata); 1132 1133 return (prof_tdata); 1134 } 1135 1136 void 1137 prof_tdata_cleanup(void *arg) 1138 { 1139 prof_thr_cnt_t *cnt; 1140 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; 1141 1142 cassert(config_prof); 1143 1144 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { 1145 /* 1146 * Another destructor deallocated memory after this destructor 1147 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY 1148 * in order to receive another callback. 1149 */ 1150 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1151 prof_tdata_tsd_set(&prof_tdata); 1152 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { 1153 /* 1154 * The previous time this destructor was called, we set the key 1155 * to PROF_TDATA_STATE_PURGATORY so that other destructors 1156 * wouldn't cause re-creation of the prof_tdata. This time, do 1157 * nothing, so that the destructor will not be called again. 1158 */ 1159 } else if (prof_tdata != NULL) { 1160 /* 1161 * Delete the hash table. All of its contents can still be 1162 * iterated over via the LRU. 1163 */ 1164 ckh_delete(&prof_tdata->bt2cnt); 1165 /* 1166 * Iteratively merge cnt's into the global stats and delete 1167 * them. 1168 */ 1169 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { 1170 ql_remove(&prof_tdata->lru_ql, cnt, lru_link); 1171 prof_ctx_merge(cnt->ctx, cnt); 1172 idalloc(cnt); 1173 } 1174 idalloc(prof_tdata->vec); 1175 idalloc(prof_tdata); 1176 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1177 prof_tdata_tsd_set(&prof_tdata); 1178 } 1179 } 1180 1181 void 1182 prof_boot0(void) 1183 { 1184 1185 cassert(config_prof); 1186 1187 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 1188 sizeof(PROF_PREFIX_DEFAULT)); 1189 } 1190 1191 void 1192 prof_boot1(void) 1193 { 1194 1195 cassert(config_prof); 1196 1197 /* 1198 * opt_prof and prof_promote must be in their final state before any 1199 * arenas are initialized, so this function must be executed early. 1200 */ 1201 1202 if (opt_prof_leak && opt_prof == false) { 1203 /* 1204 * Enable opt_prof, but in such a way that profiles are never 1205 * automatically dumped. 1206 */ 1207 opt_prof = true; 1208 opt_prof_gdump = false; 1209 prof_interval = 0; 1210 } else if (opt_prof) { 1211 if (opt_lg_prof_interval >= 0) { 1212 prof_interval = (((uint64_t)1U) << 1213 opt_lg_prof_interval); 1214 } else 1215 prof_interval = 0; 1216 } 1217 1218 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); 1219 } 1220 1221 bool 1222 prof_boot2(void) 1223 { 1224 1225 cassert(config_prof); 1226 1227 if (opt_prof) { 1228 unsigned i; 1229 1230 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, 1231 prof_bt_keycomp)) 1232 return (true); 1233 if (malloc_mutex_init(&bt2ctx_mtx)) 1234 return (true); 1235 if (prof_tdata_tsd_boot()) { 1236 malloc_write( 1237 "<jemalloc>: Error in pthread_key_create()\n"); 1238 abort(); 1239 } 1240 1241 if (malloc_mutex_init(&prof_dump_seq_mtx)) 1242 return (true); 1243 1244 if (atexit(prof_fdump) != 0) { 1245 malloc_write("<jemalloc>: Error in atexit()\n"); 1246 if (opt_abort) 1247 abort(); 1248 } 1249 1250 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * 1251 sizeof(malloc_mutex_t)); 1252 if (ctx_locks == NULL) 1253 return (true); 1254 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 1255 if (malloc_mutex_init(&ctx_locks[i])) 1256 return (true); 1257 } 1258 } 1259 1260 #ifdef JEMALLOC_PROF_LIBGCC 1261 /* 1262 * Cause the backtracing machinery to allocate its internal state 1263 * before enabling profiling. 1264 */ 1265 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 1266 #endif 1267 1268 prof_booted = true; 1269 1270 return (false); 1271 } 1272 1273 void 1274 prof_prefork(void) 1275 { 1276 1277 if (opt_prof) { 1278 unsigned i; 1279 1280 malloc_mutex_lock(&bt2ctx_mtx); 1281 malloc_mutex_lock(&prof_dump_seq_mtx); 1282 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1283 malloc_mutex_lock(&ctx_locks[i]); 1284 } 1285 } 1286 1287 void 1288 prof_postfork_parent(void) 1289 { 1290 1291 if (opt_prof) { 1292 unsigned i; 1293 1294 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1295 malloc_mutex_postfork_parent(&ctx_locks[i]); 1296 malloc_mutex_postfork_parent(&prof_dump_seq_mtx); 1297 malloc_mutex_postfork_parent(&bt2ctx_mtx); 1298 } 1299 } 1300 1301 void 1302 prof_postfork_child(void) 1303 { 1304 1305 if (opt_prof) { 1306 unsigned i; 1307 1308 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1309 malloc_mutex_postfork_child(&ctx_locks[i]); 1310 malloc_mutex_postfork_child(&prof_dump_seq_mtx); 1311 malloc_mutex_postfork_child(&bt2ctx_mtx); 1312 } 1313 } 1314 1315 /******************************************************************************/ 1316