1 #define JEMALLOC_PROF_C_ 2 #include "jemalloc/internal/jemalloc_internal.h" 3 /******************************************************************************/ 4 5 #ifdef JEMALLOC_PROF_LIBUNWIND 6 #define UNW_LOCAL_ONLY 7 #include <libunwind.h> 8 #endif 9 10 #ifdef JEMALLOC_PROF_LIBGCC 11 #include <unwind.h> 12 #endif 13 14 /******************************************************************************/ 15 /* Data. */ 16 17 malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL) 18 19 bool opt_prof = false; 20 bool opt_prof_active = true; 21 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; 22 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; 23 bool opt_prof_gdump = false; 24 bool opt_prof_final = true; 25 bool opt_prof_leak = false; 26 bool opt_prof_accum = false; 27 char opt_prof_prefix[PATH_MAX + 1]; 28 29 uint64_t prof_interval = 0; 30 bool prof_promote; 31 32 /* 33 * Table of mutexes that are shared among ctx's. These are leaf locks, so 34 * there is no problem with using them for more than one ctx at the same time. 35 * The primary motivation for this sharing though is that ctx's are ephemeral, 36 * and destroying mutexes causes complications for systems that allocate when 37 * creating/destroying mutexes. 38 */ 39 static malloc_mutex_t *ctx_locks; 40 static unsigned cum_ctxs; /* Atomic counter. */ 41 42 /* 43 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data 44 * structure that knows about all backtraces currently captured. 45 */ 46 static ckh_t bt2ctx; 47 static malloc_mutex_t bt2ctx_mtx; 48 49 static malloc_mutex_t prof_dump_seq_mtx; 50 static uint64_t prof_dump_seq; 51 static uint64_t prof_dump_iseq; 52 static uint64_t prof_dump_mseq; 53 static uint64_t prof_dump_useq; 54 55 /* 56 * This buffer is rather large for stack allocation, so use a single buffer for 57 * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since 58 * it must be locked anyway during dumping. 59 */ 60 static char prof_dump_buf[PROF_DUMP_BUFSIZE]; 61 static unsigned prof_dump_buf_end; 62 static int prof_dump_fd; 63 64 /* Do not dump any profiles until bootstrapping is complete. */ 65 static bool prof_booted = false; 66 67 /******************************************************************************/ 68 /* Function prototypes for non-inline static functions. */ 69 70 static prof_bt_t *bt_dup(prof_bt_t *bt); 71 static void bt_destroy(prof_bt_t *bt); 72 #ifdef JEMALLOC_PROF_LIBGCC 73 static _Unwind_Reason_Code prof_unwind_init_callback( 74 struct _Unwind_Context *context, void *arg); 75 static _Unwind_Reason_Code prof_unwind_callback( 76 struct _Unwind_Context *context, void *arg); 77 #endif 78 static bool prof_flush(bool propagate_err); 79 static bool prof_write(bool propagate_err, const char *s); 80 static bool prof_printf(bool propagate_err, const char *format, ...) 81 JEMALLOC_ATTR(format(printf, 2, 3)); 82 static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, 83 size_t *leak_nctx); 84 static void prof_ctx_destroy(prof_ctx_t *ctx); 85 static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); 86 static bool prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, 87 prof_bt_t *bt); 88 static bool prof_dump_maps(bool propagate_err); 89 static bool prof_dump(bool propagate_err, const char *filename, 90 bool leakcheck); 91 static void prof_dump_filename(char *filename, char v, int64_t vseq); 92 static void prof_fdump(void); 93 static void prof_bt_hash(const void *key, size_t r_hash[2]); 94 static bool prof_bt_keycomp(const void *k1, const void *k2); 95 static malloc_mutex_t *prof_ctx_mutex_choose(void); 96 97 /******************************************************************************/ 98 99 void 100 bt_init(prof_bt_t *bt, void **vec) 101 { 102 103 cassert(config_prof); 104 105 bt->vec = vec; 106 bt->len = 0; 107 } 108 109 static void 110 bt_destroy(prof_bt_t *bt) 111 { 112 113 cassert(config_prof); 114 115 idalloc(bt); 116 } 117 118 static prof_bt_t * 119 bt_dup(prof_bt_t *bt) 120 { 121 prof_bt_t *ret; 122 123 cassert(config_prof); 124 125 /* 126 * Create a single allocation that has space for vec immediately 127 * following the prof_bt_t structure. The backtraces that get 128 * stored in the backtrace caches are copied from stack-allocated 129 * temporary variables, so size is known at creation time. Making this 130 * a contiguous object improves cache locality. 131 */ 132 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + 133 (bt->len * sizeof(void *))); 134 if (ret == NULL) 135 return (NULL); 136 ret->vec = (void **)((uintptr_t)ret + 137 QUANTUM_CEILING(sizeof(prof_bt_t))); 138 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); 139 ret->len = bt->len; 140 141 return (ret); 142 } 143 144 static inline void 145 prof_enter(prof_tdata_t *prof_tdata) 146 { 147 148 cassert(config_prof); 149 150 assert(prof_tdata->enq == false); 151 prof_tdata->enq = true; 152 153 malloc_mutex_lock(&bt2ctx_mtx); 154 } 155 156 static inline void 157 prof_leave(prof_tdata_t *prof_tdata) 158 { 159 bool idump, gdump; 160 161 cassert(config_prof); 162 163 malloc_mutex_unlock(&bt2ctx_mtx); 164 165 assert(prof_tdata->enq); 166 prof_tdata->enq = false; 167 idump = prof_tdata->enq_idump; 168 prof_tdata->enq_idump = false; 169 gdump = prof_tdata->enq_gdump; 170 prof_tdata->enq_gdump = false; 171 172 if (idump) 173 prof_idump(); 174 if (gdump) 175 prof_gdump(); 176 } 177 178 #ifdef JEMALLOC_PROF_LIBUNWIND 179 void 180 prof_backtrace(prof_bt_t *bt, unsigned nignore) 181 { 182 unw_context_t uc; 183 unw_cursor_t cursor; 184 unsigned i; 185 int err; 186 187 cassert(config_prof); 188 assert(bt->len == 0); 189 assert(bt->vec != NULL); 190 191 unw_getcontext(&uc); 192 unw_init_local(&cursor, &uc); 193 194 /* Throw away (nignore+1) stack frames, if that many exist. */ 195 for (i = 0; i < nignore + 1; i++) { 196 err = unw_step(&cursor); 197 if (err <= 0) 198 return; 199 } 200 201 /* 202 * Iterate over stack frames until there are no more, or until no space 203 * remains in bt. 204 */ 205 for (i = 0; i < PROF_BT_MAX; i++) { 206 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); 207 bt->len++; 208 err = unw_step(&cursor); 209 if (err <= 0) 210 break; 211 } 212 } 213 #elif (defined(JEMALLOC_PROF_LIBGCC)) 214 static _Unwind_Reason_Code 215 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) 216 { 217 218 cassert(config_prof); 219 220 return (_URC_NO_REASON); 221 } 222 223 static _Unwind_Reason_Code 224 prof_unwind_callback(struct _Unwind_Context *context, void *arg) 225 { 226 prof_unwind_data_t *data = (prof_unwind_data_t *)arg; 227 228 cassert(config_prof); 229 230 if (data->nignore > 0) 231 data->nignore--; 232 else { 233 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); 234 data->bt->len++; 235 if (data->bt->len == data->max) 236 return (_URC_END_OF_STACK); 237 } 238 239 return (_URC_NO_REASON); 240 } 241 242 void 243 prof_backtrace(prof_bt_t *bt, unsigned nignore) 244 { 245 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX}; 246 247 cassert(config_prof); 248 249 _Unwind_Backtrace(prof_unwind_callback, &data); 250 } 251 #elif (defined(JEMALLOC_PROF_GCC)) 252 void 253 prof_backtrace(prof_bt_t *bt, unsigned nignore) 254 { 255 #define BT_FRAME(i) \ 256 if ((i) < nignore + PROF_BT_MAX) { \ 257 void *p; \ 258 if (__builtin_frame_address(i) == 0) \ 259 return; \ 260 p = __builtin_return_address(i); \ 261 if (p == NULL) \ 262 return; \ 263 if (i >= nignore) { \ 264 bt->vec[(i) - nignore] = p; \ 265 bt->len = (i) - nignore + 1; \ 266 } \ 267 } else \ 268 return; 269 270 cassert(config_prof); 271 assert(nignore <= 3); 272 273 BT_FRAME(0) 274 BT_FRAME(1) 275 BT_FRAME(2) 276 BT_FRAME(3) 277 BT_FRAME(4) 278 BT_FRAME(5) 279 BT_FRAME(6) 280 BT_FRAME(7) 281 BT_FRAME(8) 282 BT_FRAME(9) 283 284 BT_FRAME(10) 285 BT_FRAME(11) 286 BT_FRAME(12) 287 BT_FRAME(13) 288 BT_FRAME(14) 289 BT_FRAME(15) 290 BT_FRAME(16) 291 BT_FRAME(17) 292 BT_FRAME(18) 293 BT_FRAME(19) 294 295 BT_FRAME(20) 296 BT_FRAME(21) 297 BT_FRAME(22) 298 BT_FRAME(23) 299 BT_FRAME(24) 300 BT_FRAME(25) 301 BT_FRAME(26) 302 BT_FRAME(27) 303 BT_FRAME(28) 304 BT_FRAME(29) 305 306 BT_FRAME(30) 307 BT_FRAME(31) 308 BT_FRAME(32) 309 BT_FRAME(33) 310 BT_FRAME(34) 311 BT_FRAME(35) 312 BT_FRAME(36) 313 BT_FRAME(37) 314 BT_FRAME(38) 315 BT_FRAME(39) 316 317 BT_FRAME(40) 318 BT_FRAME(41) 319 BT_FRAME(42) 320 BT_FRAME(43) 321 BT_FRAME(44) 322 BT_FRAME(45) 323 BT_FRAME(46) 324 BT_FRAME(47) 325 BT_FRAME(48) 326 BT_FRAME(49) 327 328 BT_FRAME(50) 329 BT_FRAME(51) 330 BT_FRAME(52) 331 BT_FRAME(53) 332 BT_FRAME(54) 333 BT_FRAME(55) 334 BT_FRAME(56) 335 BT_FRAME(57) 336 BT_FRAME(58) 337 BT_FRAME(59) 338 339 BT_FRAME(60) 340 BT_FRAME(61) 341 BT_FRAME(62) 342 BT_FRAME(63) 343 BT_FRAME(64) 344 BT_FRAME(65) 345 BT_FRAME(66) 346 BT_FRAME(67) 347 BT_FRAME(68) 348 BT_FRAME(69) 349 350 BT_FRAME(70) 351 BT_FRAME(71) 352 BT_FRAME(72) 353 BT_FRAME(73) 354 BT_FRAME(74) 355 BT_FRAME(75) 356 BT_FRAME(76) 357 BT_FRAME(77) 358 BT_FRAME(78) 359 BT_FRAME(79) 360 361 BT_FRAME(80) 362 BT_FRAME(81) 363 BT_FRAME(82) 364 BT_FRAME(83) 365 BT_FRAME(84) 366 BT_FRAME(85) 367 BT_FRAME(86) 368 BT_FRAME(87) 369 BT_FRAME(88) 370 BT_FRAME(89) 371 372 BT_FRAME(90) 373 BT_FRAME(91) 374 BT_FRAME(92) 375 BT_FRAME(93) 376 BT_FRAME(94) 377 BT_FRAME(95) 378 BT_FRAME(96) 379 BT_FRAME(97) 380 BT_FRAME(98) 381 BT_FRAME(99) 382 383 BT_FRAME(100) 384 BT_FRAME(101) 385 BT_FRAME(102) 386 BT_FRAME(103) 387 BT_FRAME(104) 388 BT_FRAME(105) 389 BT_FRAME(106) 390 BT_FRAME(107) 391 BT_FRAME(108) 392 BT_FRAME(109) 393 394 BT_FRAME(110) 395 BT_FRAME(111) 396 BT_FRAME(112) 397 BT_FRAME(113) 398 BT_FRAME(114) 399 BT_FRAME(115) 400 BT_FRAME(116) 401 BT_FRAME(117) 402 BT_FRAME(118) 403 BT_FRAME(119) 404 405 BT_FRAME(120) 406 BT_FRAME(121) 407 BT_FRAME(122) 408 BT_FRAME(123) 409 BT_FRAME(124) 410 BT_FRAME(125) 411 BT_FRAME(126) 412 BT_FRAME(127) 413 414 /* Extras to compensate for nignore. */ 415 BT_FRAME(128) 416 BT_FRAME(129) 417 BT_FRAME(130) 418 #undef BT_FRAME 419 } 420 #else 421 void 422 prof_backtrace(prof_bt_t *bt, unsigned nignore) 423 { 424 425 cassert(config_prof); 426 assert(false); 427 } 428 #endif 429 430 prof_thr_cnt_t * 431 prof_lookup(prof_bt_t *bt) 432 { 433 union { 434 prof_thr_cnt_t *p; 435 void *v; 436 } ret; 437 prof_tdata_t *prof_tdata; 438 439 cassert(config_prof); 440 441 prof_tdata = prof_tdata_get(false); 442 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 443 return (NULL); 444 445 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { 446 union { 447 prof_bt_t *p; 448 void *v; 449 } btkey; 450 union { 451 prof_ctx_t *p; 452 void *v; 453 } ctx; 454 bool new_ctx; 455 456 /* 457 * This thread's cache lacks bt. Look for it in the global 458 * cache. 459 */ 460 prof_enter(prof_tdata); 461 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { 462 /* bt has never been seen before. Insert it. */ 463 ctx.v = imalloc(sizeof(prof_ctx_t)); 464 if (ctx.v == NULL) { 465 prof_leave(prof_tdata); 466 return (NULL); 467 } 468 btkey.p = bt_dup(bt); 469 if (btkey.v == NULL) { 470 prof_leave(prof_tdata); 471 idalloc(ctx.v); 472 return (NULL); 473 } 474 ctx.p->bt = btkey.p; 475 ctx.p->lock = prof_ctx_mutex_choose(); 476 /* 477 * Set nlimbo to 1, in order to avoid a race condition 478 * with prof_ctx_merge()/prof_ctx_destroy(). 479 */ 480 ctx.p->nlimbo = 1; 481 memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); 482 ql_new(&ctx.p->cnts_ql); 483 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { 484 /* OOM. */ 485 prof_leave(prof_tdata); 486 idalloc(btkey.v); 487 idalloc(ctx.v); 488 return (NULL); 489 } 490 new_ctx = true; 491 } else { 492 /* 493 * Increment nlimbo, in order to avoid a race condition 494 * with prof_ctx_merge()/prof_ctx_destroy(). 495 */ 496 malloc_mutex_lock(ctx.p->lock); 497 ctx.p->nlimbo++; 498 malloc_mutex_unlock(ctx.p->lock); 499 new_ctx = false; 500 } 501 prof_leave(prof_tdata); 502 503 /* Link a prof_thd_cnt_t into ctx for this thread. */ 504 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) { 505 assert(ckh_count(&prof_tdata->bt2cnt) > 0); 506 /* 507 * Flush the least recently used cnt in order to keep 508 * bt2cnt from becoming too large. 509 */ 510 ret.p = ql_last(&prof_tdata->lru_ql, lru_link); 511 assert(ret.v != NULL); 512 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, 513 NULL, NULL)) 514 assert(false); 515 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 516 prof_ctx_merge(ret.p->ctx, ret.p); 517 /* ret can now be re-used. */ 518 } else { 519 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX); 520 /* Allocate and partially initialize a new cnt. */ 521 ret.v = imalloc(sizeof(prof_thr_cnt_t)); 522 if (ret.p == NULL) { 523 if (new_ctx) 524 prof_ctx_destroy(ctx.p); 525 return (NULL); 526 } 527 ql_elm_new(ret.p, cnts_link); 528 ql_elm_new(ret.p, lru_link); 529 } 530 /* Finish initializing ret. */ 531 ret.p->ctx = ctx.p; 532 ret.p->epoch = 0; 533 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); 534 if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { 535 if (new_ctx) 536 prof_ctx_destroy(ctx.p); 537 idalloc(ret.v); 538 return (NULL); 539 } 540 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 541 malloc_mutex_lock(ctx.p->lock); 542 ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); 543 ctx.p->nlimbo--; 544 malloc_mutex_unlock(ctx.p->lock); 545 } else { 546 /* Move ret to the front of the LRU. */ 547 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); 548 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); 549 } 550 551 return (ret.p); 552 } 553 554 static bool 555 prof_flush(bool propagate_err) 556 { 557 bool ret = false; 558 ssize_t err; 559 560 cassert(config_prof); 561 562 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); 563 if (err == -1) { 564 if (propagate_err == false) { 565 malloc_write("<jemalloc>: write() failed during heap " 566 "profile flush\n"); 567 if (opt_abort) 568 abort(); 569 } 570 ret = true; 571 } 572 prof_dump_buf_end = 0; 573 574 return (ret); 575 } 576 577 static bool 578 prof_write(bool propagate_err, const char *s) 579 { 580 unsigned i, slen, n; 581 582 cassert(config_prof); 583 584 i = 0; 585 slen = strlen(s); 586 while (i < slen) { 587 /* Flush the buffer if it is full. */ 588 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) 589 if (prof_flush(propagate_err) && propagate_err) 590 return (true); 591 592 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) { 593 /* Finish writing. */ 594 n = slen - i; 595 } else { 596 /* Write as much of s as will fit. */ 597 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end; 598 } 599 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); 600 prof_dump_buf_end += n; 601 i += n; 602 } 603 604 return (false); 605 } 606 607 JEMALLOC_ATTR(format(printf, 2, 3)) 608 static bool 609 prof_printf(bool propagate_err, const char *format, ...) 610 { 611 bool ret; 612 va_list ap; 613 char buf[PROF_PRINTF_BUFSIZE]; 614 615 va_start(ap, format); 616 malloc_vsnprintf(buf, sizeof(buf), format, ap); 617 va_end(ap); 618 ret = prof_write(propagate_err, buf); 619 620 return (ret); 621 } 622 623 static void 624 prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) 625 { 626 prof_thr_cnt_t *thr_cnt; 627 prof_cnt_t tcnt; 628 629 cassert(config_prof); 630 631 malloc_mutex_lock(ctx->lock); 632 633 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); 634 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { 635 volatile unsigned *epoch = &thr_cnt->epoch; 636 637 while (true) { 638 unsigned epoch0 = *epoch; 639 640 /* Make sure epoch is even. */ 641 if (epoch0 & 1U) 642 continue; 643 644 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); 645 646 /* Terminate if epoch didn't change while reading. */ 647 if (*epoch == epoch0) 648 break; 649 } 650 651 ctx->cnt_summed.curobjs += tcnt.curobjs; 652 ctx->cnt_summed.curbytes += tcnt.curbytes; 653 if (opt_prof_accum) { 654 ctx->cnt_summed.accumobjs += tcnt.accumobjs; 655 ctx->cnt_summed.accumbytes += tcnt.accumbytes; 656 } 657 } 658 659 if (ctx->cnt_summed.curobjs != 0) 660 (*leak_nctx)++; 661 662 /* Add to cnt_all. */ 663 cnt_all->curobjs += ctx->cnt_summed.curobjs; 664 cnt_all->curbytes += ctx->cnt_summed.curbytes; 665 if (opt_prof_accum) { 666 cnt_all->accumobjs += ctx->cnt_summed.accumobjs; 667 cnt_all->accumbytes += ctx->cnt_summed.accumbytes; 668 } 669 670 malloc_mutex_unlock(ctx->lock); 671 } 672 673 static void 674 prof_ctx_destroy(prof_ctx_t *ctx) 675 { 676 prof_tdata_t *prof_tdata; 677 678 cassert(config_prof); 679 680 /* 681 * Check that ctx is still unused by any thread cache before destroying 682 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race 683 * condition with this function, as does prof_ctx_merge() in order to 684 * avoid a race between the main body of prof_ctx_merge() and entry 685 * into this function. 686 */ 687 prof_tdata = prof_tdata_get(false); 688 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX); 689 prof_enter(prof_tdata); 690 malloc_mutex_lock(ctx->lock); 691 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 && 692 ctx->nlimbo == 1) { 693 assert(ctx->cnt_merged.curbytes == 0); 694 assert(ctx->cnt_merged.accumobjs == 0); 695 assert(ctx->cnt_merged.accumbytes == 0); 696 /* Remove ctx from bt2ctx. */ 697 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL)) 698 assert(false); 699 prof_leave(prof_tdata); 700 /* Destroy ctx. */ 701 malloc_mutex_unlock(ctx->lock); 702 bt_destroy(ctx->bt); 703 idalloc(ctx); 704 } else { 705 /* 706 * Compensate for increment in prof_ctx_merge() or 707 * prof_lookup(). 708 */ 709 ctx->nlimbo--; 710 malloc_mutex_unlock(ctx->lock); 711 prof_leave(prof_tdata); 712 } 713 } 714 715 static void 716 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) 717 { 718 bool destroy; 719 720 cassert(config_prof); 721 722 /* Merge cnt stats and detach from ctx. */ 723 malloc_mutex_lock(ctx->lock); 724 ctx->cnt_merged.curobjs += cnt->cnts.curobjs; 725 ctx->cnt_merged.curbytes += cnt->cnts.curbytes; 726 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; 727 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; 728 ql_remove(&ctx->cnts_ql, cnt, cnts_link); 729 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && 730 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) { 731 /* 732 * Increment ctx->nlimbo in order to keep another thread from 733 * winning the race to destroy ctx while this one has ctx->lock 734 * dropped. Without this, it would be possible for another 735 * thread to: 736 * 737 * 1) Sample an allocation associated with ctx. 738 * 2) Deallocate the sampled object. 739 * 3) Successfully prof_ctx_destroy(ctx). 740 * 741 * The result would be that ctx no longer exists by the time 742 * this thread accesses it in prof_ctx_destroy(). 743 */ 744 ctx->nlimbo++; 745 destroy = true; 746 } else 747 destroy = false; 748 malloc_mutex_unlock(ctx->lock); 749 if (destroy) 750 prof_ctx_destroy(ctx); 751 } 752 753 static bool 754 prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, prof_bt_t *bt) 755 { 756 unsigned i; 757 758 cassert(config_prof); 759 760 /* 761 * Current statistics can sum to 0 as a result of unmerged per thread 762 * statistics. Additionally, interval- and growth-triggered dumps can 763 * occur between the time a ctx is created and when its statistics are 764 * filled in. Avoid dumping any ctx that is an artifact of either 765 * implementation detail. 766 */ 767 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) || 768 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) { 769 assert(ctx->cnt_summed.curobjs == 0); 770 assert(ctx->cnt_summed.curbytes == 0); 771 assert(ctx->cnt_summed.accumobjs == 0); 772 assert(ctx->cnt_summed.accumbytes == 0); 773 return (false); 774 } 775 776 if (prof_printf(propagate_err, "%"PRId64": %"PRId64 777 " [%"PRIu64": %"PRIu64"] @", 778 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes, 779 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) 780 return (true); 781 782 for (i = 0; i < bt->len; i++) { 783 if (prof_printf(propagate_err, " %#"PRIxPTR, 784 (uintptr_t)bt->vec[i])) 785 return (true); 786 } 787 788 if (prof_write(propagate_err, "\n")) 789 return (true); 790 791 return (false); 792 } 793 794 static bool 795 prof_dump_maps(bool propagate_err) 796 { 797 int mfd; 798 char filename[PATH_MAX + 1]; 799 800 cassert(config_prof); 801 802 malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps", 803 (int)getpid()); 804 mfd = open(filename, O_RDONLY); 805 if (mfd != -1) { 806 ssize_t nread; 807 808 if (prof_write(propagate_err, "\nMAPPED_LIBRARIES:\n") && 809 propagate_err) 810 return (true); 811 nread = 0; 812 do { 813 prof_dump_buf_end += nread; 814 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) { 815 /* Make space in prof_dump_buf before read(). */ 816 if (prof_flush(propagate_err) && propagate_err) 817 return (true); 818 } 819 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], 820 PROF_DUMP_BUFSIZE - prof_dump_buf_end); 821 } while (nread > 0); 822 close(mfd); 823 } else 824 return (true); 825 826 return (false); 827 } 828 829 static bool 830 prof_dump(bool propagate_err, const char *filename, bool leakcheck) 831 { 832 prof_tdata_t *prof_tdata; 833 prof_cnt_t cnt_all; 834 size_t tabind; 835 union { 836 prof_bt_t *p; 837 void *v; 838 } bt; 839 union { 840 prof_ctx_t *p; 841 void *v; 842 } ctx; 843 size_t leak_nctx; 844 845 cassert(config_prof); 846 847 prof_tdata = prof_tdata_get(false); 848 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 849 return (true); 850 prof_enter(prof_tdata); 851 prof_dump_fd = creat(filename, 0644); 852 if (prof_dump_fd == -1) { 853 if (propagate_err == false) { 854 malloc_printf( 855 "<jemalloc>: creat(\"%s\"), 0644) failed\n", 856 filename); 857 if (opt_abort) 858 abort(); 859 } 860 goto label_error; 861 } 862 863 /* Merge per thread profile stats, and sum them in cnt_all. */ 864 memset(&cnt_all, 0, sizeof(prof_cnt_t)); 865 leak_nctx = 0; 866 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) 867 prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); 868 869 /* Dump profile header. */ 870 if (opt_lg_prof_sample == 0) { 871 if (prof_printf(propagate_err, 872 "heap profile: %"PRId64": %"PRId64 873 " [%"PRIu64": %"PRIu64"] @ heapprofile\n", 874 cnt_all.curobjs, cnt_all.curbytes, 875 cnt_all.accumobjs, cnt_all.accumbytes)) 876 goto label_error; 877 } else { 878 if (prof_printf(propagate_err, 879 "heap profile: %"PRId64": %"PRId64 880 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n", 881 cnt_all.curobjs, cnt_all.curbytes, 882 cnt_all.accumobjs, cnt_all.accumbytes, 883 ((uint64_t)1U << opt_lg_prof_sample))) 884 goto label_error; 885 } 886 887 /* Dump per ctx profile stats. */ 888 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) 889 == false;) { 890 if (prof_dump_ctx(propagate_err, ctx.p, bt.p)) 891 goto label_error; 892 } 893 894 /* Dump /proc/<pid>/maps if possible. */ 895 if (prof_dump_maps(propagate_err)) 896 goto label_error; 897 898 if (prof_flush(propagate_err)) 899 goto label_error; 900 close(prof_dump_fd); 901 prof_leave(prof_tdata); 902 903 if (leakcheck && cnt_all.curbytes != 0) { 904 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %" 905 PRId64" object%s, %zu context%s\n", 906 cnt_all.curbytes, (cnt_all.curbytes != 1) ? "s" : "", 907 cnt_all.curobjs, (cnt_all.curobjs != 1) ? "s" : "", 908 leak_nctx, (leak_nctx != 1) ? "s" : ""); 909 malloc_printf( 910 "<jemalloc>: Run pprof on \"%s\" for leak detail\n", 911 filename); 912 } 913 914 return (false); 915 label_error: 916 prof_leave(prof_tdata); 917 return (true); 918 } 919 920 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1) 921 static void 922 prof_dump_filename(char *filename, char v, int64_t vseq) 923 { 924 925 cassert(config_prof); 926 927 if (vseq != UINT64_C(0xffffffffffffffff)) { 928 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */ 929 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 930 "%s.%d.%"PRIu64".%c%"PRId64".heap", 931 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq); 932 } else { 933 /* "<prefix>.<pid>.<seq>.<v>.heap" */ 934 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE, 935 "%s.%d.%"PRIu64".%c.heap", 936 opt_prof_prefix, (int)getpid(), prof_dump_seq, v); 937 } 938 prof_dump_seq++; 939 } 940 941 static void 942 prof_fdump(void) 943 { 944 char filename[DUMP_FILENAME_BUFSIZE]; 945 946 cassert(config_prof); 947 948 if (prof_booted == false) 949 return; 950 951 if (opt_prof_final && opt_prof_prefix[0] != '\0') { 952 malloc_mutex_lock(&prof_dump_seq_mtx); 953 prof_dump_filename(filename, 'f', UINT64_C(0xffffffffffffffff)); 954 malloc_mutex_unlock(&prof_dump_seq_mtx); 955 prof_dump(false, filename, opt_prof_leak); 956 } 957 } 958 959 void 960 prof_idump(void) 961 { 962 prof_tdata_t *prof_tdata; 963 char filename[PATH_MAX + 1]; 964 965 cassert(config_prof); 966 967 if (prof_booted == false) 968 return; 969 prof_tdata = prof_tdata_get(false); 970 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 971 return; 972 if (prof_tdata->enq) { 973 prof_tdata->enq_idump = true; 974 return; 975 } 976 977 if (opt_prof_prefix[0] != '\0') { 978 malloc_mutex_lock(&prof_dump_seq_mtx); 979 prof_dump_filename(filename, 'i', prof_dump_iseq); 980 prof_dump_iseq++; 981 malloc_mutex_unlock(&prof_dump_seq_mtx); 982 prof_dump(false, filename, false); 983 } 984 } 985 986 bool 987 prof_mdump(const char *filename) 988 { 989 char filename_buf[DUMP_FILENAME_BUFSIZE]; 990 991 cassert(config_prof); 992 993 if (opt_prof == false || prof_booted == false) 994 return (true); 995 996 if (filename == NULL) { 997 /* No filename specified, so automatically generate one. */ 998 if (opt_prof_prefix[0] == '\0') 999 return (true); 1000 malloc_mutex_lock(&prof_dump_seq_mtx); 1001 prof_dump_filename(filename_buf, 'm', prof_dump_mseq); 1002 prof_dump_mseq++; 1003 malloc_mutex_unlock(&prof_dump_seq_mtx); 1004 filename = filename_buf; 1005 } 1006 return (prof_dump(true, filename, false)); 1007 } 1008 1009 void 1010 prof_gdump(void) 1011 { 1012 prof_tdata_t *prof_tdata; 1013 char filename[DUMP_FILENAME_BUFSIZE]; 1014 1015 cassert(config_prof); 1016 1017 if (prof_booted == false) 1018 return; 1019 prof_tdata = prof_tdata_get(false); 1020 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX) 1021 return; 1022 if (prof_tdata->enq) { 1023 prof_tdata->enq_gdump = true; 1024 return; 1025 } 1026 1027 if (opt_prof_prefix[0] != '\0') { 1028 malloc_mutex_lock(&prof_dump_seq_mtx); 1029 prof_dump_filename(filename, 'u', prof_dump_useq); 1030 prof_dump_useq++; 1031 malloc_mutex_unlock(&prof_dump_seq_mtx); 1032 prof_dump(false, filename, false); 1033 } 1034 } 1035 1036 static void 1037 prof_bt_hash(const void *key, size_t r_hash[2]) 1038 { 1039 prof_bt_t *bt = (prof_bt_t *)key; 1040 1041 cassert(config_prof); 1042 1043 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash); 1044 } 1045 1046 static bool 1047 prof_bt_keycomp(const void *k1, const void *k2) 1048 { 1049 const prof_bt_t *bt1 = (prof_bt_t *)k1; 1050 const prof_bt_t *bt2 = (prof_bt_t *)k2; 1051 1052 cassert(config_prof); 1053 1054 if (bt1->len != bt2->len) 1055 return (false); 1056 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); 1057 } 1058 1059 static malloc_mutex_t * 1060 prof_ctx_mutex_choose(void) 1061 { 1062 unsigned nctxs = atomic_add_u(&cum_ctxs, 1); 1063 1064 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]); 1065 } 1066 1067 prof_tdata_t * 1068 prof_tdata_init(void) 1069 { 1070 prof_tdata_t *prof_tdata; 1071 1072 cassert(config_prof); 1073 1074 /* Initialize an empty cache for this thread. */ 1075 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); 1076 if (prof_tdata == NULL) 1077 return (NULL); 1078 1079 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, 1080 prof_bt_hash, prof_bt_keycomp)) { 1081 idalloc(prof_tdata); 1082 return (NULL); 1083 } 1084 ql_new(&prof_tdata->lru_ql); 1085 1086 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX); 1087 if (prof_tdata->vec == NULL) { 1088 ckh_delete(&prof_tdata->bt2cnt); 1089 idalloc(prof_tdata); 1090 return (NULL); 1091 } 1092 1093 prof_tdata->prng_state = 0; 1094 prof_tdata->threshold = 0; 1095 prof_tdata->accum = 0; 1096 1097 prof_tdata->enq = false; 1098 prof_tdata->enq_idump = false; 1099 prof_tdata->enq_gdump = false; 1100 1101 prof_tdata_tsd_set(&prof_tdata); 1102 1103 return (prof_tdata); 1104 } 1105 1106 void 1107 prof_tdata_cleanup(void *arg) 1108 { 1109 prof_thr_cnt_t *cnt; 1110 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg; 1111 1112 cassert(config_prof); 1113 1114 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) { 1115 /* 1116 * Another destructor deallocated memory after this destructor 1117 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY 1118 * in order to receive another callback. 1119 */ 1120 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1121 prof_tdata_tsd_set(&prof_tdata); 1122 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) { 1123 /* 1124 * The previous time this destructor was called, we set the key 1125 * to PROF_TDATA_STATE_PURGATORY so that other destructors 1126 * wouldn't cause re-creation of the prof_tdata. This time, do 1127 * nothing, so that the destructor will not be called again. 1128 */ 1129 } else if (prof_tdata != NULL) { 1130 /* 1131 * Delete the hash table. All of its contents can still be 1132 * iterated over via the LRU. 1133 */ 1134 ckh_delete(&prof_tdata->bt2cnt); 1135 /* 1136 * Iteratively merge cnt's into the global stats and delete 1137 * them. 1138 */ 1139 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { 1140 ql_remove(&prof_tdata->lru_ql, cnt, lru_link); 1141 prof_ctx_merge(cnt->ctx, cnt); 1142 idalloc(cnt); 1143 } 1144 idalloc(prof_tdata->vec); 1145 idalloc(prof_tdata); 1146 prof_tdata = PROF_TDATA_STATE_PURGATORY; 1147 prof_tdata_tsd_set(&prof_tdata); 1148 } 1149 } 1150 1151 void 1152 prof_boot0(void) 1153 { 1154 1155 cassert(config_prof); 1156 1157 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, 1158 sizeof(PROF_PREFIX_DEFAULT)); 1159 } 1160 1161 void 1162 prof_boot1(void) 1163 { 1164 1165 cassert(config_prof); 1166 1167 /* 1168 * opt_prof and prof_promote must be in their final state before any 1169 * arenas are initialized, so this function must be executed early. 1170 */ 1171 1172 if (opt_prof_leak && opt_prof == false) { 1173 /* 1174 * Enable opt_prof, but in such a way that profiles are never 1175 * automatically dumped. 1176 */ 1177 opt_prof = true; 1178 opt_prof_gdump = false; 1179 } else if (opt_prof) { 1180 if (opt_lg_prof_interval >= 0) { 1181 prof_interval = (((uint64_t)1U) << 1182 opt_lg_prof_interval); 1183 } 1184 } 1185 1186 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE); 1187 } 1188 1189 bool 1190 prof_boot2(void) 1191 { 1192 1193 cassert(config_prof); 1194 1195 if (opt_prof) { 1196 unsigned i; 1197 1198 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, 1199 prof_bt_keycomp)) 1200 return (true); 1201 if (malloc_mutex_init(&bt2ctx_mtx)) 1202 return (true); 1203 if (prof_tdata_tsd_boot()) { 1204 malloc_write( 1205 "<jemalloc>: Error in pthread_key_create()\n"); 1206 abort(); 1207 } 1208 1209 if (malloc_mutex_init(&prof_dump_seq_mtx)) 1210 return (true); 1211 1212 if (atexit(prof_fdump) != 0) { 1213 malloc_write("<jemalloc>: Error in atexit()\n"); 1214 if (opt_abort) 1215 abort(); 1216 } 1217 1218 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS * 1219 sizeof(malloc_mutex_t)); 1220 if (ctx_locks == NULL) 1221 return (true); 1222 for (i = 0; i < PROF_NCTX_LOCKS; i++) { 1223 if (malloc_mutex_init(&ctx_locks[i])) 1224 return (true); 1225 } 1226 } 1227 1228 #ifdef JEMALLOC_PROF_LIBGCC 1229 /* 1230 * Cause the backtracing machinery to allocate its internal state 1231 * before enabling profiling. 1232 */ 1233 _Unwind_Backtrace(prof_unwind_init_callback, NULL); 1234 #endif 1235 1236 prof_booted = true; 1237 1238 return (false); 1239 } 1240 1241 void 1242 prof_prefork(void) 1243 { 1244 1245 if (opt_prof) { 1246 unsigned i; 1247 1248 malloc_mutex_lock(&bt2ctx_mtx); 1249 malloc_mutex_lock(&prof_dump_seq_mtx); 1250 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1251 malloc_mutex_lock(&ctx_locks[i]); 1252 } 1253 } 1254 1255 void 1256 prof_postfork_parent(void) 1257 { 1258 1259 if (opt_prof) { 1260 unsigned i; 1261 1262 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1263 malloc_mutex_postfork_parent(&ctx_locks[i]); 1264 malloc_mutex_postfork_parent(&prof_dump_seq_mtx); 1265 malloc_mutex_postfork_parent(&bt2ctx_mtx); 1266 } 1267 } 1268 1269 void 1270 prof_postfork_child(void) 1271 { 1272 1273 if (opt_prof) { 1274 unsigned i; 1275 1276 for (i = 0; i < PROF_NCTX_LOCKS; i++) 1277 malloc_mutex_postfork_child(&ctx_locks[i]); 1278 malloc_mutex_postfork_child(&prof_dump_seq_mtx); 1279 malloc_mutex_postfork_child(&bt2ctx_mtx); 1280 } 1281 } 1282 1283 /******************************************************************************/ 1284