1 /*- 2 * Copyright (c) 2012, Fabien Thomas 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * Process hwpmc(4) samples as calltree. 29 * 30 * Output file format compatible with Kcachegrind (kdesdk). 31 * Handle top mode with a sorted tree display. 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include <sys/param.h> 38 #include <sys/endian.h> 39 #include <sys/queue.h> 40 41 #include <assert.h> 42 #include <curses.h> 43 #include <ctype.h> 44 #include <err.h> 45 #include <errno.h> 46 #include <fcntl.h> 47 #include <pmc.h> 48 #include <pmclog.h> 49 #include <stdint.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <unistd.h> 54 #include <sysexits.h> 55 56 #include "pmcstat.h" 57 #include "pmcstat_log.h" 58 #include "pmcstat_top.h" 59 #include "pmcpl_calltree.h" 60 61 #define PMCPL_CT_GROWSIZE 4 62 63 static int pmcstat_skiplink = 0; 64 65 struct pmcpl_ct_node; 66 67 /* Get the sample value for PMC a. */ 68 #define PMCPL_CT_SAMPLE(a, b) \ 69 ((a) < (b)->npmcs ? (b)->sb[a] : 0) 70 71 /* Get the sample value in percent related to rsamples. */ 72 #define PMCPL_CT_SAMPLEP(a, b) \ 73 (PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a]) 74 75 struct pmcpl_ct_sample { 76 int npmcs; /* Max pmc index available. */ 77 unsigned *sb; /* Sample buffer for 0..npmcs. */ 78 }; 79 80 struct pmcpl_ct_arc { 81 struct pmcpl_ct_sample pcta_samples; 82 struct pmcpl_ct_sample pcta_callid; 83 unsigned pcta_call; 84 struct pmcpl_ct_node *pcta_child; 85 }; 86 87 struct pmcpl_ct_instr { 88 uintfptr_t pctf_func; 89 struct pmcpl_ct_sample pctf_samples; 90 }; 91 92 /* 93 * Each calltree node is tracked by a pmcpl_ct_node struct. 94 */ 95 struct pmcpl_ct_node { 96 struct pmcstat_image *pct_image; 97 uintfptr_t pct_func; 98 99 struct pmcstat_symbol *pct_sym; 100 pmcstat_interned_string pct_ifl; 101 pmcstat_interned_string pct_ifn; 102 103 struct pmcpl_ct_sample pct_samples; 104 105 int pct_narc; 106 int pct_arc_c; 107 struct pmcpl_ct_arc *pct_arc; 108 109 /* TODO: optimize for large number of items. */ 110 int pct_ninstr; 111 int pct_instr_c; 112 struct pmcpl_ct_instr *pct_instr; 113 114 #define PMCPL_PCT_ADDR 0 115 #define PMCPL_PCT_NAME 1 116 char pct_type; 117 #define PMCPL_PCT_WHITE 0 118 #define PMCPL_PCT_GREY 1 119 #define PMCPL_PCT_BLACK 2 120 char pct_color; 121 }; 122 123 struct pmcpl_ct_node_hash { 124 struct pmcpl_ct_node *pch_ctnode; 125 STAILQ_ENTRY(pmcpl_ct_node_hash) pch_next; 126 }; 127 128 static struct pmcpl_ct_sample pmcpl_ct_callid; 129 130 #define PMCPL_CT_MAXCOL PMC_CALLCHAIN_DEPTH_MAX 131 #define PMCPL_CT_MAXLINE 1024 /* TODO: dynamic. */ 132 133 struct pmcpl_ct_line { 134 unsigned ln_sum; 135 unsigned ln_index; 136 }; 137 138 static struct pmcpl_ct_line pmcpl_ct_topmax[PMCPL_CT_MAXLINE+1]; 139 static struct pmcpl_ct_node 140 *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL+1][PMCPL_CT_MAXLINE+1]; 141 142 /* 143 * All nodes indexed by function/image name are placed in a hash table. 144 */ 145 static STAILQ_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH]; 146 147 /* 148 * Root node for the graph. 149 */ 150 static struct pmcpl_ct_node *pmcpl_ct_root; 151 152 /* 153 * Prototypes 154 */ 155 156 /* 157 * Initialize a samples. 158 */ 159 160 static void 161 pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples) 162 { 163 164 samples->npmcs = 0; 165 samples->sb = NULL; 166 } 167 168 /* 169 * Free a samples. 170 */ 171 172 static void 173 pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples) 174 { 175 176 samples->npmcs = 0; 177 free(samples->sb); 178 samples->sb = NULL; 179 } 180 181 /* 182 * Grow a sample block to store pmcstat_npmcs PMCs. 183 */ 184 185 static void 186 pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples) 187 { 188 unsigned int npmcs; 189 190 /* Enough storage. */ 191 if (pmcstat_npmcs <= samples->npmcs) 192 return; 193 194 npmcs = samples->npmcs + 195 max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE); 196 samples->sb = reallocarray(samples->sb, npmcs, sizeof(unsigned)); 197 if (samples->sb == NULL) 198 errx(EX_SOFTWARE, "ERROR: out of memory"); 199 bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned), 200 (npmcs - samples->npmcs) * sizeof(unsigned)); 201 samples->npmcs = npmcs; 202 } 203 204 /* 205 * Compute the sum of all root arcs. 206 */ 207 208 static void 209 pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples) 210 { 211 int i, pmcin; 212 213 pmcpl_ct_samples_init(samples); 214 pmcpl_ct_samples_grow(samples); 215 216 for (i = 0; i < pmcpl_ct_root->pct_narc; i++) 217 for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++) 218 samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin, 219 &pmcpl_ct_root->pct_arc[i].pcta_samples); 220 } 221 222 /* 223 * Grow the arc table. 224 */ 225 226 static void 227 pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items) 228 { 229 unsigned int nmaxsize; 230 231 if (cursize < *maxsize) 232 return; 233 234 nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE); 235 *items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_arc)); 236 if (*items == NULL) 237 errx(EX_SOFTWARE, "ERROR: out of memory"); 238 bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc), 239 (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc)); 240 *maxsize = nmaxsize; 241 } 242 243 /* 244 * Grow the instr table. 245 */ 246 247 static void 248 pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items) 249 { 250 unsigned int nmaxsize; 251 252 if (cursize < *maxsize) 253 return; 254 255 nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE); 256 *items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_instr)); 257 if (*items == NULL) 258 errx(EX_SOFTWARE, "ERROR: out of memory"); 259 bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr), 260 (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr)); 261 *maxsize = nmaxsize; 262 } 263 264 /* 265 * Add a new instruction sample to given node. 266 */ 267 268 static void 269 pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin, 270 uintfptr_t pc, unsigned v) 271 { 272 int i; 273 struct pmcpl_ct_instr *in; 274 275 for (i = 0; i<ct->pct_ninstr; i++) { 276 if (ct->pct_instr[i].pctf_func == pc) { 277 in = &ct->pct_instr[i]; 278 pmcpl_ct_samples_grow(&in->pctf_samples); 279 in->pctf_samples.sb[pmcin] += v; 280 return; 281 } 282 } 283 284 pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr); 285 in = &ct->pct_instr[ct->pct_ninstr]; 286 in->pctf_func = pc; 287 pmcpl_ct_samples_init(&in->pctf_samples); 288 pmcpl_ct_samples_grow(&in->pctf_samples); 289 in->pctf_samples.sb[pmcin] = v; 290 ct->pct_ninstr++; 291 } 292 293 /* 294 * Allocate a new node. 295 */ 296 297 static struct pmcpl_ct_node * 298 pmcpl_ct_node_allocate(void) 299 { 300 struct pmcpl_ct_node *ct; 301 302 if ((ct = malloc(sizeof(*ct))) == NULL) 303 err(EX_OSERR, "ERROR: Cannot allocate callgraph node"); 304 305 pmcpl_ct_samples_init(&ct->pct_samples); 306 307 ct->pct_sym = NULL; 308 ct->pct_image = NULL; 309 ct->pct_func = 0; 310 311 ct->pct_narc = 0; 312 ct->pct_arc_c = 0; 313 ct->pct_arc = NULL; 314 315 ct->pct_ninstr = 0; 316 ct->pct_instr_c = 0; 317 ct->pct_instr = NULL; 318 319 ct->pct_color = PMCPL_PCT_WHITE; 320 321 return (ct); 322 } 323 324 /* 325 * Free a node. 326 */ 327 328 static void 329 pmcpl_ct_node_free(struct pmcpl_ct_node *ct) 330 { 331 int i; 332 333 for (i = 0; i < ct->pct_narc; i++) { 334 pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples); 335 pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid); 336 } 337 338 pmcpl_ct_samples_free(&ct->pct_samples); 339 free(ct->pct_arc); 340 free(ct->pct_instr); 341 free(ct); 342 } 343 344 /* 345 * Clear the graph tag on each node. 346 */ 347 static void 348 pmcpl_ct_node_cleartag(void) 349 { 350 int i; 351 struct pmcpl_ct_node_hash *pch; 352 353 for (i = 0; i < PMCSTAT_NHASH; i++) 354 STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next) 355 pch->pch_ctnode->pct_color = PMCPL_PCT_WHITE; 356 357 pmcpl_ct_root->pct_color = PMCPL_PCT_WHITE; 358 } 359 360 /* 361 * Print the callchain line by line with maximum cost at top. 362 */ 363 364 static int 365 pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, 366 struct pmcpl_ct_sample *rsamples, int x, int *y) 367 { 368 int i, terminal; 369 struct pmcpl_ct_arc *arc; 370 371 if (ct->pct_color == PMCPL_PCT_GREY) 372 return 0; 373 374 if (x >= PMCPL_CT_MAXCOL) { 375 pmcpl_ct_topscreen[x][*y] = NULL; 376 return 1; 377 } 378 pmcpl_ct_topscreen[x][*y] = ct; 379 380 /* 381 * Check if this is a terminal node. 382 * We need to check that some samples exist 383 * for at least one arc for that PMC. 384 */ 385 terminal = 1; 386 for (i = 0; i < ct->pct_narc; i++) { 387 arc = &ct->pct_arc[i]; 388 if (arc->pcta_child->pct_color != PMCPL_PCT_GREY && 389 PMCPL_CT_SAMPLE(pmcin, 390 &arc->pcta_samples) != 0 && 391 PMCPL_CT_SAMPLEP(pmcin, 392 &arc->pcta_samples) > pmcstat_threshold) { 393 terminal = 0; 394 break; 395 } 396 } 397 398 if (ct->pct_narc == 0 || terminal) { 399 pmcpl_ct_topscreen[x+1][*y] = NULL; 400 if (*y >= PMCPL_CT_MAXLINE) 401 return 1; 402 *y = *y + 1; 403 for (i=0; i < x; i++) 404 pmcpl_ct_topscreen[i][*y] = 405 pmcpl_ct_topscreen[i][*y - 1]; 406 return 0; 407 } 408 409 ct->pct_color = PMCPL_PCT_GREY; 410 for (i = 0; i < ct->pct_narc; i++) { 411 if (PMCPL_CT_SAMPLE(pmcin, 412 &ct->pct_arc[i].pcta_samples) == 0) 413 continue; 414 if (PMCPL_CT_SAMPLEP(pmcin, 415 &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) { 416 if (pmcpl_ct_node_dumptop(pmcin, 417 ct->pct_arc[i].pcta_child, 418 rsamples, x+1, y)) { 419 ct->pct_color = PMCPL_PCT_BLACK; 420 return 1; 421 } 422 } 423 } 424 ct->pct_color = PMCPL_PCT_BLACK; 425 426 return 0; 427 } 428 429 /* 430 * Compare two top line by sum. 431 */ 432 static int 433 pmcpl_ct_line_compare(const void *a, const void *b) 434 { 435 const struct pmcpl_ct_line *ct1, *ct2; 436 437 ct1 = (const struct pmcpl_ct_line *) a; 438 ct2 = (const struct pmcpl_ct_line *) b; 439 440 /* Sort in reverse order */ 441 if (ct1->ln_sum < ct2->ln_sum) 442 return (1); 443 if (ct1->ln_sum > ct2->ln_sum) 444 return (-1); 445 return (0); 446 } 447 448 /* 449 * Format and display given PMC index. 450 */ 451 452 static void 453 pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) 454 { 455 #undef TS 456 #undef TSI 457 #define TS(x, y) (pmcpl_ct_topscreen[x][y]) 458 #define TSI(x, y) (pmcpl_ct_topscreen[x][pmcpl_ct_topmax[y].ln_index]) 459 460 int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y; 461 float v; 462 char ns[30], vs[10], is[20]; 463 struct pmcpl_ct_node *ct; 464 const char *space = " "; 465 466 /* 467 * Sort by line cost. 468 */ 469 for (y = 0; ; y++) { 470 ct = TS(1, y); 471 if (ct == NULL) 472 break; 473 474 pmcpl_ct_topmax[y].ln_sum = 0; 475 pmcpl_ct_topmax[y].ln_index = y; 476 for (x = 1; TS(x, y) != NULL; x++) { 477 pmcpl_ct_topmax[y].ln_sum += 478 PMCPL_CT_SAMPLE(pmcin, &TS(x, y)->pct_samples); 479 } 480 } 481 qsort(pmcpl_ct_topmax, y, sizeof(pmcpl_ct_topmax[0]), 482 pmcpl_ct_line_compare); 483 pmcpl_ct_topmax[y].ln_index = y; 484 485 for (y = 0; y < maxy; y++) { 486 ct = TSI(1, y); 487 if (ct == NULL) 488 break; 489 490 if (y > 0) 491 PMCSTAT_PRINTW("\n"); 492 493 /* Output sum. */ 494 v = pmcpl_ct_topmax[y].ln_sum * 100.0 / 495 rsamples->sb[pmcin]; 496 snprintf(vs, sizeof(vs), "%.1f", v); 497 v_attrs = PMCSTAT_ATTRPERCENT(v); 498 PMCSTAT_ATTRON(v_attrs); 499 PMCSTAT_PRINTW("%5.5s ", vs); 500 PMCSTAT_ATTROFF(v_attrs); 501 502 width = indentwidth = 5 + 1; 503 504 for (x = 1; (ct = TSI(x, y)) != NULL; x++) { 505 506 vs[0] = '\0'; vs_len = 0; 507 is[0] = '\0'; is_len = 0; 508 509 /* Format value. */ 510 v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples); 511 if (v > pmcstat_threshold) 512 vs_len = snprintf(vs, sizeof(vs), 513 "(%.1f%%)", v); 514 v_attrs = PMCSTAT_ATTRPERCENT(v); 515 516 if (pmcstat_skiplink && v <= pmcstat_threshold) { 517 strlcpy(ns, ".", sizeof(ns)); 518 ns_len = 1; 519 } else { 520 if (ct->pct_sym != NULL) { 521 ns_len = snprintf(ns, sizeof(ns), "%s", 522 pmcstat_string_unintern(ct->pct_sym->ps_name)); 523 } else 524 ns_len = snprintf(ns, sizeof(ns), "%p", 525 (void *)ct->pct_func); 526 527 /* Format image. */ 528 if (x == 1 || 529 TSI(x-1, y)->pct_image != ct->pct_image) 530 is_len = snprintf(is, sizeof(is), "@%s", 531 pmcstat_string_unintern(ct->pct_image->pi_name)); 532 533 /* Check for line wrap. */ 534 width += ns_len + is_len + vs_len + 1; 535 } 536 if (width >= pmcstat_displaywidth) { 537 maxy--; 538 if (y >= maxy) 539 break; 540 PMCSTAT_PRINTW("\n%*s", indentwidth, space); 541 width = indentwidth + ns_len + is_len + vs_len; 542 } 543 544 PMCSTAT_ATTRON(v_attrs); 545 PMCSTAT_PRINTW("%s%s%s ", ns, is, vs); 546 PMCSTAT_ATTROFF(v_attrs); 547 } 548 } 549 } 550 551 /* 552 * Output top mode snapshot. 553 */ 554 555 void 556 pmcpl_ct_topdisplay(void) 557 { 558 int y; 559 struct pmcpl_ct_sample r, *rsamples; 560 561 rsamples = &r; 562 pmcpl_ct_samples_root(rsamples); 563 pmcpl_ct_node_cleartag(); 564 565 PMCSTAT_PRINTW("%5.5s %s\n", "%SAMP", "CALLTREE"); 566 567 y = 0; 568 if (pmcpl_ct_node_dumptop(pmcstat_pmcinfilter, 569 pmcpl_ct_root, rsamples, 0, &y)) 570 PMCSTAT_PRINTW("...\n"); 571 pmcpl_ct_topscreen[1][y] = NULL; 572 573 pmcpl_ct_node_printtop(rsamples, 574 pmcstat_pmcinfilter, pmcstat_displayheight - 2); 575 576 pmcpl_ct_samples_free(rsamples); 577 } 578 579 /* 580 * Handle top mode keypress. 581 */ 582 583 int 584 pmcpl_ct_topkeypress(int c, WINDOW *w) 585 { 586 587 switch (c) { 588 case 'f': 589 pmcstat_skiplink = !pmcstat_skiplink; 590 wprintw(w, "skip empty link %s", 591 pmcstat_skiplink ? "on" : "off"); 592 break; 593 } 594 595 return 0; 596 } 597 598 /* 599 * Look for a callgraph node associated with pmc `pmcid' in the global 600 * hash table that corresponds to the given `pc' value in the process map 601 * `ppm'. 602 */ 603 604 static void 605 pmcpl_ct_node_update(struct pmcpl_ct_node *parent, 606 struct pmcpl_ct_node *child, int pmcin, unsigned v, int cd) 607 { 608 struct pmcpl_ct_arc *arc; 609 int i; 610 611 assert(parent != NULL); 612 613 /* 614 * Find related arc in parent node and 615 * increment the sample count. 616 */ 617 for (i = 0; i < parent->pct_narc; i++) { 618 if (parent->pct_arc[i].pcta_child == child) { 619 arc = &parent->pct_arc[i]; 620 pmcpl_ct_samples_grow(&arc->pcta_samples); 621 arc->pcta_samples.sb[pmcin] += v; 622 /* Estimate call count. */ 623 if (cd) { 624 pmcpl_ct_samples_grow(&arc->pcta_callid); 625 if (pmcpl_ct_callid.sb[pmcin] - 626 arc->pcta_callid.sb[pmcin] > 1) 627 arc->pcta_call++; 628 arc->pcta_callid.sb[pmcin] = 629 pmcpl_ct_callid.sb[pmcin]; 630 } 631 return; 632 } 633 } 634 635 /* 636 * No arc found for us, add ourself to the parent. 637 */ 638 pmcpl_ct_arc_grow(parent->pct_narc, 639 &parent->pct_arc_c, &parent->pct_arc); 640 arc = &parent->pct_arc[parent->pct_narc]; 641 pmcpl_ct_samples_grow(&arc->pcta_samples); 642 arc->pcta_samples.sb[pmcin] = v; 643 arc->pcta_call = 1; 644 if (cd) { 645 pmcpl_ct_samples_grow(&arc->pcta_callid); 646 arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin]; 647 } 648 arc->pcta_child = child; 649 parent->pct_narc++; 650 } 651 652 /* 653 * Lookup by image/pc. 654 */ 655 656 static struct pmcpl_ct_node * 657 pmcpl_ct_node_hash_lookup(struct pmcstat_image *image, uintfptr_t pc, 658 struct pmcstat_symbol *sym, char *fl, char *fn) 659 { 660 int i; 661 unsigned int hash; 662 struct pmcpl_ct_node *ct; 663 struct pmcpl_ct_node_hash *h; 664 pmcstat_interned_string ifl, ifn; 665 666 if (fn != NULL) { 667 ifl = pmcstat_string_intern(fl); 668 ifn = pmcstat_string_intern(fn); 669 } else { 670 ifl = 0; 671 ifn = 0; 672 } 673 674 for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++) 675 hash += (pc >> i) & 0xFF; 676 677 hash &= PMCSTAT_HASH_MASK; 678 679 STAILQ_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) { 680 ct = h->pch_ctnode; 681 682 assert(ct != NULL); 683 684 if (ct->pct_image == image && ct->pct_func == pc) { 685 if (fn == NULL) 686 return (ct); 687 if (ct->pct_type == PMCPL_PCT_NAME && 688 ct->pct_ifl == ifl && ct->pct_ifn == ifn) 689 return (ct); 690 } 691 } 692 693 /* 694 * We haven't seen this (pmcid, pc) tuple yet, so allocate a 695 * new callgraph node and a new hash table entry for it. 696 */ 697 ct = pmcpl_ct_node_allocate(); 698 if ((h = malloc(sizeof(*h))) == NULL) 699 err(EX_OSERR, "ERROR: Could not allocate callgraph node"); 700 701 if (fn != NULL) { 702 ct->pct_type = PMCPL_PCT_NAME; 703 ct->pct_ifl = ifl; 704 ct->pct_ifn = ifn; 705 } else 706 ct->pct_type = PMCPL_PCT_ADDR; 707 ct->pct_image = image; 708 ct->pct_func = pc; 709 ct->pct_sym = sym; 710 711 h->pch_ctnode = ct; 712 STAILQ_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next); 713 return (ct); 714 } 715 716 /* 717 * Record a callchain. 718 */ 719 720 void 721 pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, 722 uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu) 723 { 724 int i, n, pmcin; 725 uintfptr_t pc, loadaddress; 726 struct pmcstat_image *image; 727 struct pmcstat_symbol *sym; 728 struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX]; 729 struct pmcstat_process *km; 730 struct pmcpl_ct_node *ct; 731 struct pmcpl_ct_node *ctl[PMC_CALLCHAIN_DEPTH_MAX+1]; 732 733 (void) cpu; 734 735 assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX); 736 737 /* Get the PMC index. */ 738 pmcin = pmcr->pr_pmcin; 739 740 /* 741 * Validate mapping for the callchain. 742 * Go from bottom to first invalid entry. 743 */ 744 km = pmcstat_kernproc; 745 for (n = 0; n < (int)nsamples; n++) { 746 ppm[n] = pmcstat_process_find_map(usermode ? 747 pp : km, cc[n]); 748 if (ppm[n] == NULL) { 749 /* Detect full frame capture (kernel + user). */ 750 if (!usermode) { 751 ppm[n] = pmcstat_process_find_map(pp, cc[n]); 752 if (ppm[n] != NULL) 753 km = pp; 754 } 755 } 756 if (ppm[n] == NULL) 757 break; 758 } 759 if (n-- == 0) { 760 pmcstat_stats.ps_callchain_dubious_frames++; 761 pmcr->pr_dubious_frames++; 762 return; 763 } 764 765 /* Increase the call generation counter. */ 766 pmcpl_ct_samples_grow(&pmcpl_ct_callid); 767 pmcpl_ct_callid.sb[pmcin]++; 768 769 /* 770 * Build node list. 771 */ 772 ctl[0] = pmcpl_ct_root; 773 for (i = 1; n >= 0; n--) { 774 image = ppm[n]->ppm_image; 775 loadaddress = ppm[n]->ppm_lowpc + 776 image->pi_vaddr - image->pi_start; 777 /* Convert to an offset in the image. */ 778 pc = cc[n] - loadaddress; 779 /* 780 * Try determine the function at this offset. If we can't 781 * find a function round leave the `pc' value alone. 782 */ 783 if ((sym = pmcstat_symbol_search(image, pc)) != NULL) 784 pc = sym->ps_start; 785 else 786 pmcstat_stats.ps_samples_unknown_function++; 787 788 ct = pmcpl_ct_node_hash_lookup(image, pc, sym, NULL, NULL); 789 if (ct == NULL) { 790 pmcstat_stats.ps_callchain_dubious_frames++; 791 continue; 792 } 793 ctl[i++] = ct; 794 } 795 /* No valid node found. */ 796 if (i == 1) 797 return; 798 n = i; 799 800 ct = ctl[0]; 801 for (i = 1; i < n; i++) 802 pmcpl_ct_node_update(ctl[i-1], ctl[i], pmcin, 1, 1); 803 804 /* 805 * Increment the sample count for this PMC. 806 */ 807 pmcpl_ct_samples_grow(&ctl[n-1]->pct_samples); 808 ctl[n-1]->pct_samples.sb[pmcin]++; 809 810 /* Update per instruction sample if required. */ 811 if (args.pa_ctdumpinstr) 812 pmcpl_ct_instr_add(ctl[n-1], pmcin, cc[0] - 813 (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr - 814 ppm[0]->ppm_image->pi_start), 1); 815 } 816 817 /* 818 * Print node child cost. 819 */ 820 821 static void 822 pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct, uintfptr_t paddr, 823 int pline) 824 { 825 int i, j, line; 826 uintfptr_t addr; 827 struct pmcpl_ct_node *child; 828 char sourcefile[PATH_MAX]; 829 char funcname[PATH_MAX]; 830 831 /* 832 * Child cost. 833 * TODO: attach child cost to the real position in the function. 834 * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost> 835 */ 836 for (i=0 ; i<ct->pct_narc; i++) { 837 child = ct->pct_arc[i].pcta_child; 838 /* Object binary. */ 839 fprintf(args.pa_graphfile, "cob=%s\n", 840 pmcstat_string_unintern(child->pct_image->pi_fullpath)); 841 /* Child function name. */ 842 addr = child->pct_image->pi_vaddr + child->pct_func; 843 line = 0; 844 /* Child function source file. */ 845 if (child->pct_type == PMCPL_PCT_NAME) { 846 fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n", 847 pmcstat_string_unintern(child->pct_ifl), 848 pmcstat_string_unintern(child->pct_ifn)); 849 } else if (pmcstat_image_addr2line(child->pct_image, addr, 850 sourcefile, sizeof(sourcefile), &line, 851 funcname, sizeof(funcname))) { 852 fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n", 853 sourcefile, funcname); 854 } else { 855 if (child->pct_sym != NULL) 856 fprintf(args.pa_graphfile, 857 "cfi=???\ncfn=%s\n", 858 pmcstat_string_unintern( 859 child->pct_sym->ps_name)); 860 else 861 fprintf(args.pa_graphfile, 862 "cfi=???\ncfn=%p\n", (void *)addr); 863 } 864 865 /* Child function address, line and call count. */ 866 fprintf(args.pa_graphfile, "calls=%u %p %u\n", 867 ct->pct_arc[i].pcta_call, (void *)addr, line); 868 869 /* 870 * Call address, line, sample. 871 * TODO: Associate call address to the right location. 872 */ 873 fprintf(args.pa_graphfile, "%p %u", (void *)paddr, pline); 874 for (j = 0; j<pmcstat_npmcs; j++) 875 fprintf(args.pa_graphfile, " %u", 876 PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples)); 877 fprintf(args.pa_graphfile, "\n"); 878 } 879 } 880 881 /* 882 * Print node self cost. 883 */ 884 885 static void 886 pmcpl_ct_node_printself(struct pmcpl_ct_node *ct) 887 { 888 int i, j, fline, line; 889 uintfptr_t faddr, addr; 890 char sourcefile[PATH_MAX]; 891 char funcname[PATH_MAX]; 892 893 /* 894 * Object binary. 895 */ 896 fprintf(args.pa_graphfile, "ob=%s\n", 897 pmcstat_string_unintern(ct->pct_image->pi_fullpath)); 898 899 /* 900 * Function name. 901 */ 902 faddr = ct->pct_image->pi_vaddr + ct->pct_func; 903 fline = 0; 904 if (ct->pct_type == PMCPL_PCT_NAME) { 905 fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n", 906 pmcstat_string_unintern(ct->pct_ifl), 907 pmcstat_string_unintern(ct->pct_ifn)); 908 } else if (pmcstat_image_addr2line(ct->pct_image, faddr, 909 sourcefile, sizeof(sourcefile), &fline, 910 funcname, sizeof(funcname))) { 911 fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n", 912 sourcefile, funcname); 913 } else { 914 if (ct->pct_sym != NULL) 915 fprintf(args.pa_graphfile, "fl=???\nfn=%s\n", 916 pmcstat_string_unintern(ct->pct_sym->ps_name)); 917 else 918 fprintf(args.pa_graphfile, "fl=???\nfn=%p\n", 919 (void *)(ct->pct_image->pi_vaddr + ct->pct_func)); 920 } 921 922 /* 923 * Self cost. 924 */ 925 if (ct->pct_ninstr > 0) { 926 /* 927 * Per location cost. 928 */ 929 for (i = 0; i < ct->pct_ninstr; i++) { 930 addr = ct->pct_image->pi_vaddr + 931 ct->pct_instr[i].pctf_func; 932 line = 0; 933 pmcstat_image_addr2line(ct->pct_image, addr, 934 sourcefile, sizeof(sourcefile), &line, 935 funcname, sizeof(funcname)); 936 fprintf(args.pa_graphfile, "%p %u", 937 (void *)addr, line); 938 for (j = 0; j<pmcstat_npmcs; j++) 939 fprintf(args.pa_graphfile, " %u", 940 PMCPL_CT_SAMPLE(j, 941 &ct->pct_instr[i].pctf_samples)); 942 fprintf(args.pa_graphfile, "\n"); 943 } 944 } else { 945 /* Global cost function cost. */ 946 fprintf(args.pa_graphfile, "%p %u", (void *)faddr, fline); 947 for (i = 0; i<pmcstat_npmcs ; i++) 948 fprintf(args.pa_graphfile, " %u", 949 PMCPL_CT_SAMPLE(i, &ct->pct_samples)); 950 fprintf(args.pa_graphfile, "\n"); 951 } 952 953 pmcpl_ct_node_printchild(ct, faddr, fline); 954 } 955 956 static void 957 pmcpl_ct_printnode(struct pmcpl_ct_node *ct) 958 { 959 int i; 960 961 if (ct == pmcpl_ct_root) { 962 fprintf(args.pa_graphfile, "fn=root\n"); 963 fprintf(args.pa_graphfile, "0x0 1"); 964 for (i = 0; i<pmcstat_npmcs ; i++) 965 fprintf(args.pa_graphfile, " 0"); 966 fprintf(args.pa_graphfile, "\n"); 967 pmcpl_ct_node_printchild(ct, 0, 0); 968 } else 969 pmcpl_ct_node_printself(ct); 970 } 971 972 /* 973 * Breadth first traversal. 974 */ 975 976 static void 977 pmcpl_ct_bfs(struct pmcpl_ct_node *ct) 978 { 979 int i; 980 struct pmcpl_ct_node_hash *pch, *pchc; 981 struct pmcpl_ct_node *child; 982 STAILQ_HEAD(,pmcpl_ct_node_hash) q; 983 984 STAILQ_INIT(&q); 985 if ((pch = malloc(sizeof(*pch))) == NULL) 986 err(EX_OSERR, "ERROR: Cannot allocate queue"); 987 pch->pch_ctnode = ct; 988 STAILQ_INSERT_TAIL(&q, pch, pch_next); 989 ct->pct_color = PMCPL_PCT_BLACK; 990 991 while (!STAILQ_EMPTY(&q)) { 992 pch = STAILQ_FIRST(&q); 993 STAILQ_REMOVE_HEAD(&q, pch_next); 994 pmcpl_ct_printnode(pch->pch_ctnode); 995 for (i = 0; i<pch->pch_ctnode->pct_narc; i++) { 996 child = pch->pch_ctnode->pct_arc[i].pcta_child; 997 if (child->pct_color == PMCPL_PCT_WHITE) { 998 child->pct_color = PMCPL_PCT_BLACK; 999 if ((pchc = malloc(sizeof(*pchc))) == NULL) 1000 err(EX_OSERR, 1001 "ERROR: Cannot allocate queue"); 1002 pchc->pch_ctnode = child; 1003 STAILQ_INSERT_TAIL(&q, pchc, pch_next); 1004 } 1005 } 1006 free(pch); 1007 } 1008 } 1009 1010 /* 1011 * Detect and fix inlined location. 1012 */ 1013 1014 static void 1015 _pmcpl_ct_expand_inline(struct pmcpl_ct_node *ct) 1016 { 1017 int i, j; 1018 unsigned fline, line, v; 1019 uintfptr_t faddr, addr, pc; 1020 char sourcefile[PATH_MAX]; 1021 char ffuncname[PATH_MAX], funcname[PATH_MAX]; 1022 char buffer[PATH_MAX]; 1023 struct pmcpl_ct_node *child; 1024 1025 /* 1026 * Resolve parent and compare to each instr location. 1027 */ 1028 faddr = ct->pct_image->pi_vaddr + ct->pct_func; 1029 fline = 0; 1030 if (!pmcstat_image_addr2line(ct->pct_image, faddr, 1031 sourcefile, sizeof(sourcefile), &fline, 1032 ffuncname, sizeof(ffuncname))) 1033 return; 1034 1035 for (i = 0; i < ct->pct_ninstr; i++) { 1036 addr = ct->pct_image->pi_vaddr + 1037 ct->pct_instr[i].pctf_func; 1038 line = 0; 1039 if (!pmcstat_image_addr2line(ct->pct_image, addr, 1040 sourcefile, sizeof(sourcefile), &line, 1041 funcname, sizeof(funcname))) 1042 continue; 1043 1044 if (strcmp(funcname, ffuncname) == 0) 1045 continue; 1046 1047 /* 1048 * - Lookup/create inline node by function name. 1049 * - Move instr PMCs to the inline node. 1050 * - Link nodes. 1051 * The lookup create a specific node per image/pc. 1052 */ 1053 if (args.pa_verbosity >= 2) 1054 fprintf(args.pa_printfile, 1055 "WARNING: inlined function at %p %s in %s\n", 1056 (void *)addr, funcname, ffuncname); 1057 1058 snprintf(buffer, sizeof(buffer), "%s@%s", 1059 funcname, ffuncname); 1060 child = pmcpl_ct_node_hash_lookup(ct->pct_image, 1061 ct->pct_func, ct->pct_sym, sourcefile, buffer); 1062 assert(child != NULL); 1063 pc = ct->pct_instr[i].pctf_func; 1064 for (j = 0; j<pmcstat_npmcs; j++) { 1065 v = PMCPL_CT_SAMPLE(j, 1066 &ct->pct_instr[i].pctf_samples); 1067 if (v == 0) 1068 continue; 1069 pmcpl_ct_instr_add(child, j, pc, v); 1070 pmcpl_ct_node_update(ct, child, j, v, 0); 1071 if (j < ct->pct_samples.npmcs) 1072 ct->pct_samples.sb[j] -= 1073 ct->pct_instr[i].pctf_samples.sb[j]; 1074 ct->pct_instr[i].pctf_samples.sb[j] = 0; 1075 } 1076 } 1077 } 1078 1079 static void 1080 pmcpl_ct_expand_inline(void) 1081 { 1082 int i; 1083 struct pmcpl_ct_node_hash *pch; 1084 1085 if (!args.pa_ctdumpinstr) 1086 return; 1087 1088 for (i = 0; i < PMCSTAT_NHASH; i++) 1089 STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next) 1090 if (pch->pch_ctnode->pct_type == PMCPL_PCT_ADDR) 1091 _pmcpl_ct_expand_inline(pch->pch_ctnode); 1092 } 1093 1094 /* 1095 * Clean the PMC name for Kcachegrind formula 1096 */ 1097 1098 static void 1099 pmcpl_ct_fixup_pmcname(char *s) 1100 { 1101 char *p; 1102 1103 for (p = s; *p; p++) 1104 if (!isalnum(*p)) 1105 *p = '_'; 1106 } 1107 1108 /* 1109 * Print a calltree (KCachegrind) for all PMCs. 1110 */ 1111 1112 static void 1113 pmcpl_ct_print(void) 1114 { 1115 int i; 1116 char name[40]; 1117 struct pmcpl_ct_sample rsamples; 1118 1119 pmcpl_ct_samples_root(&rsamples); 1120 pmcpl_ct_expand_inline(); 1121 1122 fprintf(args.pa_graphfile, 1123 "version: 1\n" 1124 "creator: pmcstat\n" 1125 "positions: instr line\n" 1126 "events:"); 1127 for (i=0; i<pmcstat_npmcs; i++) { 1128 snprintf(name, sizeof(name), "%s_%d", 1129 pmcstat_pmcindex_to_name(i), i); 1130 pmcpl_ct_fixup_pmcname(name); 1131 fprintf(args.pa_graphfile, " %s", name); 1132 } 1133 fprintf(args.pa_graphfile, "\nsummary:"); 1134 for (i=0; i<pmcstat_npmcs ; i++) 1135 fprintf(args.pa_graphfile, " %u", 1136 PMCPL_CT_SAMPLE(i, &rsamples)); 1137 fprintf(args.pa_graphfile, "\n"); 1138 pmcpl_ct_bfs(pmcpl_ct_root); 1139 pmcpl_ct_samples_free(&rsamples); 1140 } 1141 1142 int 1143 pmcpl_ct_configure(char *opt) 1144 { 1145 1146 if (strncmp(opt, "skiplink=", 9) == 0) { 1147 pmcstat_skiplink = atoi(opt+9); 1148 } else 1149 return (0); 1150 1151 return (1); 1152 } 1153 1154 int 1155 pmcpl_ct_init(void) 1156 { 1157 int i; 1158 1159 pmcpl_ct_root = pmcpl_ct_node_allocate(); 1160 1161 for (i = 0; i < PMCSTAT_NHASH; i++) 1162 STAILQ_INIT(&pmcpl_ct_node_hash[i]); 1163 1164 pmcpl_ct_samples_init(&pmcpl_ct_callid); 1165 1166 return (0); 1167 } 1168 1169 void 1170 pmcpl_ct_shutdown(FILE *mf) 1171 { 1172 int i; 1173 struct pmcpl_ct_node_hash *pch, *pchtmp; 1174 1175 (void) mf; 1176 1177 if (args.pa_flags & FLAG_DO_CALLGRAPHS) 1178 pmcpl_ct_print(); 1179 1180 /* 1181 * Free memory. 1182 */ 1183 1184 for (i = 0; i < PMCSTAT_NHASH; i++) { 1185 STAILQ_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next, 1186 pchtmp) { 1187 pmcpl_ct_node_free(pch->pch_ctnode); 1188 free(pch); 1189 } 1190 } 1191 1192 pmcpl_ct_node_free(pmcpl_ct_root); 1193 pmcpl_ct_root = NULL; 1194 1195 pmcpl_ct_samples_free(&pmcpl_ct_callid); 1196 } 1197 1198