1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2012, Fabien Thomas 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Process hwpmc(4) samples as calltree. 31 * 32 * Output file format compatible with Kcachegrind (kdesdk). 33 * Handle top mode with a sorted tree display. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/endian.h> 38 #include <sys/queue.h> 39 40 #include <assert.h> 41 #include <curses.h> 42 #include <ctype.h> 43 #include <err.h> 44 #include <errno.h> 45 #include <fcntl.h> 46 #include <pmc.h> 47 #include <pmclog.h> 48 #include <stdint.h> 49 #include <stdio.h> 50 #include <stdlib.h> 51 #include <string.h> 52 #include <unistd.h> 53 #include <sysexits.h> 54 55 #include "pmcstat.h" 56 #include "pmcstat_log.h" 57 #include "pmcstat_top.h" 58 #include "pmcpl_calltree.h" 59 60 #define min(A,B) ((A) < (B) ? (A) : (B)) 61 #define max(A,B) ((A) > (B) ? (A) : (B)) 62 63 #define PMCPL_CT_GROWSIZE 4 64 65 static int pmcstat_skiplink = 0; 66 67 struct pmcpl_ct_node; 68 69 /* Get the sample value for PMC a. */ 70 #define PMCPL_CT_SAMPLE(a, b) \ 71 ((a) < (b)->npmcs ? (b)->sb[a] : 0) 72 73 /* Get the sample value in percent related to rsamples. */ 74 #define PMCPL_CT_SAMPLEP(a, b) \ 75 (PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a]) 76 77 struct pmcpl_ct_sample { 78 int npmcs; /* Max pmc index available. */ 79 unsigned *sb; /* Sample buffer for 0..npmcs. */ 80 }; 81 82 struct pmcpl_ct_arc { 83 struct pmcpl_ct_sample pcta_samples; 84 struct pmcpl_ct_sample pcta_callid; 85 unsigned pcta_call; 86 struct pmcpl_ct_node *pcta_child; 87 }; 88 89 struct pmcpl_ct_instr { 90 uintfptr_t pctf_func; 91 struct pmcpl_ct_sample pctf_samples; 92 }; 93 94 /* 95 * Each calltree node is tracked by a pmcpl_ct_node struct. 96 */ 97 struct pmcpl_ct_node { 98 struct pmcstat_image *pct_image; 99 uintfptr_t pct_func; 100 101 struct pmcstat_symbol *pct_sym; 102 pmcstat_interned_string pct_ifl; 103 pmcstat_interned_string pct_ifn; 104 105 struct pmcpl_ct_sample pct_samples; 106 107 int pct_narc; 108 int pct_arc_c; 109 struct pmcpl_ct_arc *pct_arc; 110 111 /* TODO: optimize for large number of items. */ 112 int pct_ninstr; 113 int pct_instr_c; 114 struct pmcpl_ct_instr *pct_instr; 115 116 #define PMCPL_PCT_ADDR 0 117 #define PMCPL_PCT_NAME 1 118 char pct_type; 119 #define PMCPL_PCT_WHITE 0 120 #define PMCPL_PCT_GREY 1 121 #define PMCPL_PCT_BLACK 2 122 char pct_color; 123 }; 124 125 struct pmcpl_ct_node_hash { 126 struct pmcpl_ct_node *pch_ctnode; 127 STAILQ_ENTRY(pmcpl_ct_node_hash) pch_next; 128 }; 129 130 static struct pmcpl_ct_sample pmcpl_ct_callid; 131 132 #define PMCPL_CT_MAXCOL PMC_CALLCHAIN_DEPTH_MAX 133 #define PMCPL_CT_MAXLINE 1024 /* TODO: dynamic. */ 134 135 struct pmcpl_ct_line { 136 unsigned ln_sum; 137 unsigned ln_index; 138 }; 139 140 static struct pmcpl_ct_line pmcpl_ct_topmax[PMCPL_CT_MAXLINE+1]; 141 static struct pmcpl_ct_node 142 *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL+1][PMCPL_CT_MAXLINE+1]; 143 144 /* 145 * All nodes indexed by function/image name are placed in a hash table. 146 */ 147 static STAILQ_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH]; 148 149 /* 150 * Root node for the graph. 151 */ 152 static struct pmcpl_ct_node *pmcpl_ct_root; 153 154 /* 155 * Prototypes 156 */ 157 158 /* 159 * Initialize a samples. 160 */ 161 162 static void 163 pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples) 164 { 165 166 samples->npmcs = 0; 167 samples->sb = NULL; 168 } 169 170 /* 171 * Free a samples. 172 */ 173 174 static void 175 pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples) 176 { 177 178 samples->npmcs = 0; 179 free(samples->sb); 180 samples->sb = NULL; 181 } 182 183 /* 184 * Grow a sample block to store pmcstat_npmcs PMCs. 185 */ 186 187 static void 188 pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples) 189 { 190 unsigned int npmcs; 191 192 /* Enough storage. */ 193 if (pmcstat_npmcs <= samples->npmcs) 194 return; 195 196 npmcs = samples->npmcs + 197 max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE); 198 samples->sb = reallocarray(samples->sb, npmcs, sizeof(unsigned)); 199 if (samples->sb == NULL) 200 errx(EX_SOFTWARE, "ERROR: out of memory"); 201 bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned), 202 (npmcs - samples->npmcs) * sizeof(unsigned)); 203 samples->npmcs = npmcs; 204 } 205 206 /* 207 * Compute the sum of all root arcs. 208 */ 209 210 static void 211 pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples) 212 { 213 int i, pmcin; 214 215 pmcpl_ct_samples_init(samples); 216 pmcpl_ct_samples_grow(samples); 217 218 for (i = 0; i < pmcpl_ct_root->pct_narc; i++) 219 for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++) 220 samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin, 221 &pmcpl_ct_root->pct_arc[i].pcta_samples); 222 } 223 224 /* 225 * Grow the arc table. 226 */ 227 228 static void 229 pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items) 230 { 231 unsigned int nmaxsize; 232 233 if (cursize < *maxsize) 234 return; 235 236 nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE); 237 *items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_arc)); 238 if (*items == NULL) 239 errx(EX_SOFTWARE, "ERROR: out of memory"); 240 bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc), 241 (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc)); 242 *maxsize = nmaxsize; 243 } 244 245 /* 246 * Grow the instr table. 247 */ 248 249 static void 250 pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items) 251 { 252 unsigned int nmaxsize; 253 254 if (cursize < *maxsize) 255 return; 256 257 nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE); 258 *items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_instr)); 259 if (*items == NULL) 260 errx(EX_SOFTWARE, "ERROR: out of memory"); 261 bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr), 262 (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr)); 263 *maxsize = nmaxsize; 264 } 265 266 /* 267 * Add a new instruction sample to given node. 268 */ 269 270 static void 271 pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin, 272 uintfptr_t pc, unsigned v) 273 { 274 int i; 275 struct pmcpl_ct_instr *in; 276 277 for (i = 0; i<ct->pct_ninstr; i++) { 278 if (ct->pct_instr[i].pctf_func == pc) { 279 in = &ct->pct_instr[i]; 280 pmcpl_ct_samples_grow(&in->pctf_samples); 281 in->pctf_samples.sb[pmcin] += v; 282 return; 283 } 284 } 285 286 pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr); 287 in = &ct->pct_instr[ct->pct_ninstr]; 288 in->pctf_func = pc; 289 pmcpl_ct_samples_init(&in->pctf_samples); 290 pmcpl_ct_samples_grow(&in->pctf_samples); 291 in->pctf_samples.sb[pmcin] = v; 292 ct->pct_ninstr++; 293 } 294 295 /* 296 * Allocate a new node. 297 */ 298 299 static struct pmcpl_ct_node * 300 pmcpl_ct_node_allocate(void) 301 { 302 struct pmcpl_ct_node *ct; 303 304 if ((ct = malloc(sizeof(*ct))) == NULL) 305 err(EX_OSERR, "ERROR: Cannot allocate callgraph node"); 306 307 pmcpl_ct_samples_init(&ct->pct_samples); 308 309 ct->pct_sym = NULL; 310 ct->pct_image = NULL; 311 ct->pct_func = 0; 312 313 ct->pct_narc = 0; 314 ct->pct_arc_c = 0; 315 ct->pct_arc = NULL; 316 317 ct->pct_ninstr = 0; 318 ct->pct_instr_c = 0; 319 ct->pct_instr = NULL; 320 321 ct->pct_color = PMCPL_PCT_WHITE; 322 323 return (ct); 324 } 325 326 /* 327 * Free a node. 328 */ 329 330 static void 331 pmcpl_ct_node_free(struct pmcpl_ct_node *ct) 332 { 333 int i; 334 335 for (i = 0; i < ct->pct_narc; i++) { 336 pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples); 337 pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid); 338 } 339 340 pmcpl_ct_samples_free(&ct->pct_samples); 341 free(ct->pct_arc); 342 free(ct->pct_instr); 343 free(ct); 344 } 345 346 /* 347 * Clear the graph tag on each node. 348 */ 349 static void 350 pmcpl_ct_node_cleartag(void) 351 { 352 int i; 353 struct pmcpl_ct_node_hash *pch; 354 355 for (i = 0; i < PMCSTAT_NHASH; i++) 356 STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next) 357 pch->pch_ctnode->pct_color = PMCPL_PCT_WHITE; 358 359 pmcpl_ct_root->pct_color = PMCPL_PCT_WHITE; 360 } 361 362 /* 363 * Print the callchain line by line with maximum cost at top. 364 */ 365 366 static int 367 pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, 368 struct pmcpl_ct_sample *rsamples, int x, int *y) 369 { 370 int i, terminal; 371 struct pmcpl_ct_arc *arc; 372 373 if (ct->pct_color == PMCPL_PCT_GREY) 374 return 0; 375 376 if (x >= PMCPL_CT_MAXCOL) { 377 pmcpl_ct_topscreen[x][*y] = NULL; 378 return 1; 379 } 380 pmcpl_ct_topscreen[x][*y] = ct; 381 382 /* 383 * Check if this is a terminal node. 384 * We need to check that some samples exist 385 * for at least one arc for that PMC. 386 */ 387 terminal = 1; 388 for (i = 0; i < ct->pct_narc; i++) { 389 arc = &ct->pct_arc[i]; 390 if (arc->pcta_child->pct_color != PMCPL_PCT_GREY && 391 PMCPL_CT_SAMPLE(pmcin, 392 &arc->pcta_samples) != 0 && 393 PMCPL_CT_SAMPLEP(pmcin, 394 &arc->pcta_samples) > pmcstat_threshold) { 395 terminal = 0; 396 break; 397 } 398 } 399 400 if (ct->pct_narc == 0 || terminal) { 401 pmcpl_ct_topscreen[x+1][*y] = NULL; 402 if (*y >= PMCPL_CT_MAXLINE) 403 return 1; 404 *y = *y + 1; 405 for (i=0; i < x; i++) 406 pmcpl_ct_topscreen[i][*y] = 407 pmcpl_ct_topscreen[i][*y - 1]; 408 return 0; 409 } 410 411 ct->pct_color = PMCPL_PCT_GREY; 412 for (i = 0; i < ct->pct_narc; i++) { 413 if (PMCPL_CT_SAMPLE(pmcin, 414 &ct->pct_arc[i].pcta_samples) == 0) 415 continue; 416 if (PMCPL_CT_SAMPLEP(pmcin, 417 &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) { 418 if (pmcpl_ct_node_dumptop(pmcin, 419 ct->pct_arc[i].pcta_child, 420 rsamples, x+1, y)) { 421 ct->pct_color = PMCPL_PCT_BLACK; 422 return 1; 423 } 424 } 425 } 426 ct->pct_color = PMCPL_PCT_BLACK; 427 428 return 0; 429 } 430 431 /* 432 * Compare two top line by sum. 433 */ 434 static int 435 pmcpl_ct_line_compare(const void *a, const void *b) 436 { 437 const struct pmcpl_ct_line *ct1, *ct2; 438 439 ct1 = (const struct pmcpl_ct_line *) a; 440 ct2 = (const struct pmcpl_ct_line *) b; 441 442 /* Sort in reverse order */ 443 if (ct1->ln_sum < ct2->ln_sum) 444 return (1); 445 if (ct1->ln_sum > ct2->ln_sum) 446 return (-1); 447 return (0); 448 } 449 450 /* 451 * Format and display given PMC index. 452 */ 453 454 static void 455 pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) 456 { 457 #undef TS 458 #undef TSI 459 #define TS(x, y) (pmcpl_ct_topscreen[x][y]) 460 #define TSI(x, y) (pmcpl_ct_topscreen[x][pmcpl_ct_topmax[y].ln_index]) 461 462 int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y; 463 float v; 464 char ns[30], vs[10], is[20]; 465 struct pmcpl_ct_node *ct; 466 const char *space = " "; 467 468 /* 469 * Sort by line cost. 470 */ 471 for (y = 0; ; y++) { 472 ct = TS(1, y); 473 if (ct == NULL) 474 break; 475 476 pmcpl_ct_topmax[y].ln_sum = 0; 477 pmcpl_ct_topmax[y].ln_index = y; 478 for (x = 1; TS(x, y) != NULL; x++) { 479 pmcpl_ct_topmax[y].ln_sum += 480 PMCPL_CT_SAMPLE(pmcin, &TS(x, y)->pct_samples); 481 } 482 } 483 qsort(pmcpl_ct_topmax, y, sizeof(pmcpl_ct_topmax[0]), 484 pmcpl_ct_line_compare); 485 pmcpl_ct_topmax[y].ln_index = y; 486 487 for (y = 0; y < maxy; y++) { 488 ct = TSI(1, y); 489 if (ct == NULL) 490 break; 491 492 if (y > 0) 493 PMCSTAT_PRINTW("\n"); 494 495 /* Output sum. */ 496 v = pmcpl_ct_topmax[y].ln_sum * 100.0 / 497 rsamples->sb[pmcin]; 498 snprintf(vs, sizeof(vs), "%.1f", v); 499 v_attrs = PMCSTAT_ATTRPERCENT(v); 500 PMCSTAT_ATTRON(v_attrs); 501 PMCSTAT_PRINTW("%5.5s ", vs); 502 PMCSTAT_ATTROFF(v_attrs); 503 504 width = indentwidth = 5 + 1; 505 506 for (x = 1; (ct = TSI(x, y)) != NULL; x++) { 507 508 vs[0] = '\0'; vs_len = 0; 509 is[0] = '\0'; is_len = 0; 510 511 /* Format value. */ 512 v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples); 513 if (v > pmcstat_threshold) 514 vs_len = snprintf(vs, sizeof(vs), 515 "(%.1f%%)", v); 516 v_attrs = PMCSTAT_ATTRPERCENT(v); 517 518 if (pmcstat_skiplink && v <= pmcstat_threshold) { 519 strlcpy(ns, ".", sizeof(ns)); 520 ns_len = 1; 521 } else { 522 if (ct->pct_sym != NULL) { 523 ns_len = snprintf(ns, sizeof(ns), "%s", 524 pmcstat_string_unintern(ct->pct_sym->ps_name)); 525 } else 526 ns_len = snprintf(ns, sizeof(ns), "%p", 527 (void *)ct->pct_func); 528 529 /* Format image. */ 530 if (x == 1 || 531 TSI(x-1, y)->pct_image != ct->pct_image) 532 is_len = snprintf(is, sizeof(is), "@%s", 533 pmcstat_string_unintern(ct->pct_image->pi_name)); 534 535 /* Check for line wrap. */ 536 width += ns_len + is_len + vs_len + 1; 537 } 538 if (width >= pmcstat_displaywidth) { 539 maxy--; 540 if (y >= maxy) 541 break; 542 PMCSTAT_PRINTW("\n%*s", indentwidth, space); 543 width = indentwidth + ns_len + is_len + vs_len; 544 } 545 546 PMCSTAT_ATTRON(v_attrs); 547 PMCSTAT_PRINTW("%s%s%s ", ns, is, vs); 548 PMCSTAT_ATTROFF(v_attrs); 549 } 550 } 551 } 552 553 /* 554 * Output top mode snapshot. 555 */ 556 557 void 558 pmcpl_ct_topdisplay(void) 559 { 560 int y; 561 struct pmcpl_ct_sample r, *rsamples; 562 563 rsamples = &r; 564 pmcpl_ct_samples_root(rsamples); 565 pmcpl_ct_node_cleartag(); 566 567 PMCSTAT_PRINTW("%5.5s %s\n", "%SAMP", "CALLTREE"); 568 569 y = 0; 570 if (pmcpl_ct_node_dumptop(pmcstat_pmcinfilter, 571 pmcpl_ct_root, rsamples, 0, &y)) 572 PMCSTAT_PRINTW("...\n"); 573 pmcpl_ct_topscreen[1][y] = NULL; 574 575 pmcpl_ct_node_printtop(rsamples, 576 pmcstat_pmcinfilter, pmcstat_displayheight - 2); 577 578 pmcpl_ct_samples_free(rsamples); 579 } 580 581 /* 582 * Handle top mode keypress. 583 */ 584 585 int 586 pmcpl_ct_topkeypress(int c, void *arg) 587 { 588 WINDOW *w; 589 590 w = (WINDOW *)arg; 591 592 switch (c) { 593 case 'f': 594 pmcstat_skiplink = !pmcstat_skiplink; 595 wprintw(w, "skip empty link %s", 596 pmcstat_skiplink ? "on" : "off"); 597 break; 598 } 599 600 return 0; 601 } 602 603 /* 604 * Look for a callgraph node associated with pmc `pmcid' in the global 605 * hash table that corresponds to the given `pc' value in the process map 606 * `ppm'. 607 */ 608 609 static void 610 pmcpl_ct_node_update(struct pmcpl_ct_node *parent, 611 struct pmcpl_ct_node *child, int pmcin, unsigned v, int cd) 612 { 613 struct pmcpl_ct_arc *arc; 614 int i; 615 616 assert(parent != NULL); 617 618 /* 619 * Find related arc in parent node and 620 * increment the sample count. 621 */ 622 for (i = 0; i < parent->pct_narc; i++) { 623 if (parent->pct_arc[i].pcta_child == child) { 624 arc = &parent->pct_arc[i]; 625 pmcpl_ct_samples_grow(&arc->pcta_samples); 626 arc->pcta_samples.sb[pmcin] += v; 627 /* Estimate call count. */ 628 if (cd) { 629 pmcpl_ct_samples_grow(&arc->pcta_callid); 630 if (pmcpl_ct_callid.sb[pmcin] - 631 arc->pcta_callid.sb[pmcin] > 1) 632 arc->pcta_call++; 633 arc->pcta_callid.sb[pmcin] = 634 pmcpl_ct_callid.sb[pmcin]; 635 } 636 return; 637 } 638 } 639 640 /* 641 * No arc found for us, add ourself to the parent. 642 */ 643 pmcpl_ct_arc_grow(parent->pct_narc, 644 &parent->pct_arc_c, &parent->pct_arc); 645 arc = &parent->pct_arc[parent->pct_narc]; 646 pmcpl_ct_samples_grow(&arc->pcta_samples); 647 arc->pcta_samples.sb[pmcin] = v; 648 arc->pcta_call = 1; 649 if (cd) { 650 pmcpl_ct_samples_grow(&arc->pcta_callid); 651 arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin]; 652 } 653 arc->pcta_child = child; 654 parent->pct_narc++; 655 } 656 657 /* 658 * Lookup by image/pc. 659 */ 660 661 static struct pmcpl_ct_node * 662 pmcpl_ct_node_hash_lookup(struct pmcstat_image *image, uintfptr_t pc, 663 struct pmcstat_symbol *sym, char *fl, char *fn) 664 { 665 int i; 666 unsigned int hash; 667 struct pmcpl_ct_node *ct; 668 struct pmcpl_ct_node_hash *h; 669 pmcstat_interned_string ifl, ifn; 670 671 if (fn != NULL) { 672 ifl = pmcstat_string_intern(fl); 673 ifn = pmcstat_string_intern(fn); 674 } else { 675 ifl = 0; 676 ifn = 0; 677 } 678 679 for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++) 680 hash += (pc >> i) & 0xFF; 681 682 hash &= PMCSTAT_HASH_MASK; 683 684 STAILQ_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) { 685 ct = h->pch_ctnode; 686 687 assert(ct != NULL); 688 689 if (ct->pct_image == image && ct->pct_func == pc) { 690 if (fn == NULL) 691 return (ct); 692 if (ct->pct_type == PMCPL_PCT_NAME && 693 ct->pct_ifl == ifl && ct->pct_ifn == ifn) 694 return (ct); 695 } 696 } 697 698 /* 699 * We haven't seen this (pmcid, pc) tuple yet, so allocate a 700 * new callgraph node and a new hash table entry for it. 701 */ 702 ct = pmcpl_ct_node_allocate(); 703 if ((h = malloc(sizeof(*h))) == NULL) 704 err(EX_OSERR, "ERROR: Could not allocate callgraph node"); 705 706 if (fn != NULL) { 707 ct->pct_type = PMCPL_PCT_NAME; 708 ct->pct_ifl = ifl; 709 ct->pct_ifn = ifn; 710 } else 711 ct->pct_type = PMCPL_PCT_ADDR; 712 ct->pct_image = image; 713 ct->pct_func = pc; 714 ct->pct_sym = sym; 715 716 h->pch_ctnode = ct; 717 STAILQ_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next); 718 return (ct); 719 } 720 721 /* 722 * Record a callchain. 723 */ 724 725 void 726 pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, 727 uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu) 728 { 729 int i, n, pmcin; 730 uintfptr_t pc, loadaddress; 731 struct pmcstat_image *image; 732 struct pmcstat_symbol *sym; 733 struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX]; 734 struct pmcstat_process *km; 735 struct pmcpl_ct_node *ct; 736 struct pmcpl_ct_node *ctl[PMC_CALLCHAIN_DEPTH_MAX+1]; 737 738 (void) cpu; 739 740 assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX); 741 742 /* Get the PMC index. */ 743 pmcin = pmcr->pr_pmcin; 744 745 /* 746 * Validate mapping for the callchain. 747 * Go from bottom to first invalid entry. 748 */ 749 km = pmcstat_kernproc; 750 for (n = 0; n < (int)nsamples; n++) { 751 ppm[n] = pmcstat_process_find_map(usermode ? 752 pp : km, cc[n]); 753 if (ppm[n] == NULL) { 754 /* Detect full frame capture (kernel + user). */ 755 if (!usermode) { 756 ppm[n] = pmcstat_process_find_map(pp, cc[n]); 757 if (ppm[n] != NULL) 758 km = pp; 759 } 760 } 761 if (ppm[n] == NULL) 762 break; 763 } 764 if (n-- == 0) { 765 pmcstat_stats.ps_callchain_dubious_frames++; 766 pmcr->pr_dubious_frames++; 767 return; 768 } 769 770 /* Increase the call generation counter. */ 771 pmcpl_ct_samples_grow(&pmcpl_ct_callid); 772 pmcpl_ct_callid.sb[pmcin]++; 773 774 /* 775 * Build node list. 776 */ 777 ctl[0] = pmcpl_ct_root; 778 for (i = 1; n >= 0; n--) { 779 image = ppm[n]->ppm_image; 780 loadaddress = ppm[n]->ppm_lowpc + 781 image->pi_vaddr - image->pi_start; 782 /* Convert to an offset in the image. */ 783 pc = cc[n] - loadaddress; 784 /* 785 * Try determine the function at this offset. If we can't 786 * find a function round leave the `pc' value alone. 787 */ 788 if ((sym = pmcstat_symbol_search(image, pc)) != NULL) 789 pc = sym->ps_start; 790 else 791 pmcstat_stats.ps_samples_unknown_function++; 792 793 ct = pmcpl_ct_node_hash_lookup(image, pc, sym, NULL, NULL); 794 if (ct == NULL) { 795 pmcstat_stats.ps_callchain_dubious_frames++; 796 continue; 797 } 798 ctl[i++] = ct; 799 } 800 /* No valid node found. */ 801 if (i == 1) 802 return; 803 n = i; 804 805 ct = ctl[0]; 806 for (i = 1; i < n; i++) 807 pmcpl_ct_node_update(ctl[i-1], ctl[i], pmcin, 1, 1); 808 809 /* 810 * Increment the sample count for this PMC. 811 */ 812 pmcpl_ct_samples_grow(&ctl[n-1]->pct_samples); 813 ctl[n-1]->pct_samples.sb[pmcin]++; 814 815 /* Update per instruction sample if required. */ 816 if (args.pa_ctdumpinstr) 817 pmcpl_ct_instr_add(ctl[n-1], pmcin, cc[0] - 818 (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr - 819 ppm[0]->ppm_image->pi_start), 1); 820 } 821 822 /* 823 * Print node child cost. 824 */ 825 826 static void 827 pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct, uintfptr_t paddr, 828 int pline) 829 { 830 int i, j, line; 831 uintfptr_t addr; 832 struct pmcpl_ct_node *child; 833 char sourcefile[PATH_MAX]; 834 char funcname[PATH_MAX]; 835 836 /* 837 * Child cost. 838 * TODO: attach child cost to the real position in the function. 839 * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost> 840 */ 841 for (i=0 ; i<ct->pct_narc; i++) { 842 child = ct->pct_arc[i].pcta_child; 843 /* Object binary. */ 844 fprintf(args.pa_graphfile, "cob=%s\n", 845 pmcstat_string_unintern(child->pct_image->pi_fullpath)); 846 /* Child function name. */ 847 addr = child->pct_image->pi_vaddr + child->pct_func; 848 line = 0; 849 /* Child function source file. */ 850 if (child->pct_type == PMCPL_PCT_NAME) { 851 fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n", 852 pmcstat_string_unintern(child->pct_ifl), 853 pmcstat_string_unintern(child->pct_ifn)); 854 } else if (pmcstat_image_addr2line(child->pct_image, addr, 855 sourcefile, sizeof(sourcefile), &line, 856 funcname, sizeof(funcname))) { 857 fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n", 858 sourcefile, funcname); 859 } else { 860 if (child->pct_sym != NULL) 861 fprintf(args.pa_graphfile, 862 "cfi=???\ncfn=%s\n", 863 pmcstat_string_unintern( 864 child->pct_sym->ps_name)); 865 else 866 fprintf(args.pa_graphfile, 867 "cfi=???\ncfn=%p\n", (void *)addr); 868 } 869 870 /* Child function address, line and call count. */ 871 fprintf(args.pa_graphfile, "calls=%u %p %u\n", 872 ct->pct_arc[i].pcta_call, (void *)addr, line); 873 874 /* 875 * Call address, line, sample. 876 * TODO: Associate call address to the right location. 877 */ 878 fprintf(args.pa_graphfile, "%p %u", (void *)paddr, pline); 879 for (j = 0; j<pmcstat_npmcs; j++) 880 fprintf(args.pa_graphfile, " %u", 881 PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples)); 882 fprintf(args.pa_graphfile, "\n"); 883 } 884 } 885 886 /* 887 * Print node self cost. 888 */ 889 890 static void 891 pmcpl_ct_node_printself(struct pmcpl_ct_node *ct) 892 { 893 int i, j, fline, line; 894 uintfptr_t faddr, addr; 895 char sourcefile[PATH_MAX]; 896 char funcname[PATH_MAX]; 897 898 /* 899 * Object binary. 900 */ 901 fprintf(args.pa_graphfile, "ob=%s\n", 902 pmcstat_string_unintern(ct->pct_image->pi_fullpath)); 903 904 /* 905 * Function name. 906 */ 907 faddr = ct->pct_image->pi_vaddr + ct->pct_func; 908 fline = 0; 909 if (ct->pct_type == PMCPL_PCT_NAME) { 910 fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n", 911 pmcstat_string_unintern(ct->pct_ifl), 912 pmcstat_string_unintern(ct->pct_ifn)); 913 } else if (pmcstat_image_addr2line(ct->pct_image, faddr, 914 sourcefile, sizeof(sourcefile), &fline, 915 funcname, sizeof(funcname))) { 916 fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n", 917 sourcefile, funcname); 918 } else { 919 if (ct->pct_sym != NULL) 920 fprintf(args.pa_graphfile, "fl=???\nfn=%s\n", 921 pmcstat_string_unintern(ct->pct_sym->ps_name)); 922 else 923 fprintf(args.pa_graphfile, "fl=???\nfn=%p\n", 924 (void *)(ct->pct_image->pi_vaddr + ct->pct_func)); 925 } 926 927 /* 928 * Self cost. 929 */ 930 if (ct->pct_ninstr > 0) { 931 /* 932 * Per location cost. 933 */ 934 for (i = 0; i < ct->pct_ninstr; i++) { 935 addr = ct->pct_image->pi_vaddr + 936 ct->pct_instr[i].pctf_func; 937 line = 0; 938 pmcstat_image_addr2line(ct->pct_image, addr, 939 sourcefile, sizeof(sourcefile), &line, 940 funcname, sizeof(funcname)); 941 fprintf(args.pa_graphfile, "%p %u", 942 (void *)addr, line); 943 for (j = 0; j<pmcstat_npmcs; j++) 944 fprintf(args.pa_graphfile, " %u", 945 PMCPL_CT_SAMPLE(j, 946 &ct->pct_instr[i].pctf_samples)); 947 fprintf(args.pa_graphfile, "\n"); 948 } 949 } else { 950 /* Global cost function cost. */ 951 fprintf(args.pa_graphfile, "%p %u", (void *)faddr, fline); 952 for (i = 0; i<pmcstat_npmcs ; i++) 953 fprintf(args.pa_graphfile, " %u", 954 PMCPL_CT_SAMPLE(i, &ct->pct_samples)); 955 fprintf(args.pa_graphfile, "\n"); 956 } 957 958 pmcpl_ct_node_printchild(ct, faddr, fline); 959 } 960 961 static void 962 pmcpl_ct_printnode(struct pmcpl_ct_node *ct) 963 { 964 int i; 965 966 if (ct == pmcpl_ct_root) { 967 fprintf(args.pa_graphfile, "fn=root\n"); 968 fprintf(args.pa_graphfile, "0x0 1"); 969 for (i = 0; i<pmcstat_npmcs ; i++) 970 fprintf(args.pa_graphfile, " 0"); 971 fprintf(args.pa_graphfile, "\n"); 972 pmcpl_ct_node_printchild(ct, 0, 0); 973 } else 974 pmcpl_ct_node_printself(ct); 975 } 976 977 /* 978 * Breadth first traversal. 979 */ 980 981 static void 982 pmcpl_ct_bfs(struct pmcpl_ct_node *ct) 983 { 984 int i; 985 struct pmcpl_ct_node_hash *pch, *pchc; 986 struct pmcpl_ct_node *child; 987 STAILQ_HEAD(,pmcpl_ct_node_hash) q; 988 989 STAILQ_INIT(&q); 990 if ((pch = malloc(sizeof(*pch))) == NULL) 991 err(EX_OSERR, "ERROR: Cannot allocate queue"); 992 pch->pch_ctnode = ct; 993 STAILQ_INSERT_TAIL(&q, pch, pch_next); 994 ct->pct_color = PMCPL_PCT_BLACK; 995 996 while (!STAILQ_EMPTY(&q)) { 997 pch = STAILQ_FIRST(&q); 998 STAILQ_REMOVE_HEAD(&q, pch_next); 999 pmcpl_ct_printnode(pch->pch_ctnode); 1000 for (i = 0; i<pch->pch_ctnode->pct_narc; i++) { 1001 child = pch->pch_ctnode->pct_arc[i].pcta_child; 1002 if (child->pct_color == PMCPL_PCT_WHITE) { 1003 child->pct_color = PMCPL_PCT_BLACK; 1004 if ((pchc = malloc(sizeof(*pchc))) == NULL) 1005 err(EX_OSERR, 1006 "ERROR: Cannot allocate queue"); 1007 pchc->pch_ctnode = child; 1008 STAILQ_INSERT_TAIL(&q, pchc, pch_next); 1009 } 1010 } 1011 free(pch); 1012 } 1013 } 1014 1015 /* 1016 * Detect and fix inlined location. 1017 */ 1018 1019 static void 1020 _pmcpl_ct_expand_inline(struct pmcpl_ct_node *ct) 1021 { 1022 int i, j; 1023 unsigned fline, line, v; 1024 uintfptr_t faddr, addr, pc; 1025 char sourcefile[PATH_MAX]; 1026 char ffuncname[PATH_MAX], funcname[PATH_MAX]; 1027 char buffer[PATH_MAX]; 1028 struct pmcpl_ct_node *child; 1029 1030 /* 1031 * Resolve parent and compare to each instr location. 1032 */ 1033 faddr = ct->pct_image->pi_vaddr + ct->pct_func; 1034 fline = 0; 1035 if (!pmcstat_image_addr2line(ct->pct_image, faddr, 1036 sourcefile, sizeof(sourcefile), &fline, 1037 ffuncname, sizeof(ffuncname))) 1038 return; 1039 1040 for (i = 0; i < ct->pct_ninstr; i++) { 1041 addr = ct->pct_image->pi_vaddr + 1042 ct->pct_instr[i].pctf_func; 1043 line = 0; 1044 if (!pmcstat_image_addr2line(ct->pct_image, addr, 1045 sourcefile, sizeof(sourcefile), &line, 1046 funcname, sizeof(funcname))) 1047 continue; 1048 1049 if (strcmp(funcname, ffuncname) == 0) 1050 continue; 1051 1052 /* 1053 * - Lookup/create inline node by function name. 1054 * - Move instr PMCs to the inline node. 1055 * - Link nodes. 1056 * The lookup create a specific node per image/pc. 1057 */ 1058 if (args.pa_verbosity >= 2) 1059 fprintf(args.pa_printfile, 1060 "WARNING: inlined function at %p %s in %s\n", 1061 (void *)addr, funcname, ffuncname); 1062 1063 snprintf(buffer, sizeof(buffer), "%s@%s", 1064 funcname, ffuncname); 1065 child = pmcpl_ct_node_hash_lookup(ct->pct_image, 1066 ct->pct_func, ct->pct_sym, sourcefile, buffer); 1067 assert(child != NULL); 1068 pc = ct->pct_instr[i].pctf_func; 1069 for (j = 0; j<pmcstat_npmcs; j++) { 1070 v = PMCPL_CT_SAMPLE(j, 1071 &ct->pct_instr[i].pctf_samples); 1072 if (v == 0) 1073 continue; 1074 pmcpl_ct_instr_add(child, j, pc, v); 1075 pmcpl_ct_node_update(ct, child, j, v, 0); 1076 if (j < ct->pct_samples.npmcs) 1077 ct->pct_samples.sb[j] -= 1078 ct->pct_instr[i].pctf_samples.sb[j]; 1079 ct->pct_instr[i].pctf_samples.sb[j] = 0; 1080 } 1081 } 1082 } 1083 1084 static void 1085 pmcpl_ct_expand_inline(void) 1086 { 1087 int i; 1088 struct pmcpl_ct_node_hash *pch; 1089 1090 if (!args.pa_ctdumpinstr) 1091 return; 1092 1093 for (i = 0; i < PMCSTAT_NHASH; i++) 1094 STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next) 1095 if (pch->pch_ctnode->pct_type == PMCPL_PCT_ADDR) 1096 _pmcpl_ct_expand_inline(pch->pch_ctnode); 1097 } 1098 1099 /* 1100 * Clean the PMC name for Kcachegrind formula 1101 */ 1102 1103 static void 1104 pmcpl_ct_fixup_pmcname(char *s) 1105 { 1106 char *p; 1107 1108 for (p = s; *p; p++) 1109 if (!isalnum(*p)) 1110 *p = '_'; 1111 } 1112 1113 /* 1114 * Print a calltree (KCachegrind) for all PMCs. 1115 */ 1116 1117 static void 1118 pmcpl_ct_print(void) 1119 { 1120 int i; 1121 char name[40]; 1122 struct pmcpl_ct_sample rsamples; 1123 1124 pmcpl_ct_samples_root(&rsamples); 1125 pmcpl_ct_expand_inline(); 1126 1127 fprintf(args.pa_graphfile, 1128 "version: 1\n" 1129 "creator: pmcstat\n" 1130 "positions: instr line\n" 1131 "events:"); 1132 for (i=0; i<pmcstat_npmcs; i++) { 1133 snprintf(name, sizeof(name), "%s_%d", 1134 pmcstat_pmcindex_to_name(i), i); 1135 pmcpl_ct_fixup_pmcname(name); 1136 fprintf(args.pa_graphfile, " %s", name); 1137 } 1138 fprintf(args.pa_graphfile, "\nsummary:"); 1139 for (i=0; i<pmcstat_npmcs ; i++) 1140 fprintf(args.pa_graphfile, " %u", 1141 PMCPL_CT_SAMPLE(i, &rsamples)); 1142 fprintf(args.pa_graphfile, "\n"); 1143 pmcpl_ct_bfs(pmcpl_ct_root); 1144 pmcpl_ct_samples_free(&rsamples); 1145 } 1146 1147 int 1148 pmcpl_ct_configure(char *opt) 1149 { 1150 1151 if (strncmp(opt, "skiplink=", 9) == 0) { 1152 pmcstat_skiplink = atoi(opt+9); 1153 } else 1154 return (0); 1155 1156 return (1); 1157 } 1158 1159 int 1160 pmcpl_ct_init(void) 1161 { 1162 int i; 1163 1164 pmcpl_ct_root = pmcpl_ct_node_allocate(); 1165 1166 for (i = 0; i < PMCSTAT_NHASH; i++) 1167 STAILQ_INIT(&pmcpl_ct_node_hash[i]); 1168 1169 pmcpl_ct_samples_init(&pmcpl_ct_callid); 1170 1171 return (0); 1172 } 1173 1174 void 1175 pmcpl_ct_shutdown(FILE *mf) 1176 { 1177 int i; 1178 struct pmcpl_ct_node_hash *pch, *pchtmp; 1179 1180 (void) mf; 1181 1182 if (args.pa_flags & FLAG_DO_CALLGRAPHS) 1183 pmcpl_ct_print(); 1184 1185 /* 1186 * Free memory. 1187 */ 1188 1189 for (i = 0; i < PMCSTAT_NHASH; i++) { 1190 STAILQ_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next, 1191 pchtmp) { 1192 pmcpl_ct_node_free(pch->pch_ctnode); 1193 free(pch); 1194 } 1195 } 1196 1197 pmcpl_ct_node_free(pmcpl_ct_root); 1198 pmcpl_ct_root = NULL; 1199 1200 pmcpl_ct_samples_free(&pmcpl_ct_callid); 1201 } 1202 1203