1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012, Fabien Thomas 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * Process hwpmc(4) samples as calltree. 31 * 32 * Output file format compatible with Kcachegrind (kdesdk). 33 * Handle top mode with a sorted tree display. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include <sys/param.h> 40 #include <sys/endian.h> 41 #include <sys/queue.h> 42 43 #include <assert.h> 44 #include <curses.h> 45 #include <ctype.h> 46 #include <err.h> 47 #include <errno.h> 48 #include <fcntl.h> 49 #include <pmc.h> 50 #include <pmclog.h> 51 #include <stdint.h> 52 #include <stdio.h> 53 #include <stdlib.h> 54 #include <string.h> 55 #include <unistd.h> 56 #include <sysexits.h> 57 58 #include "pmcstat.h" 59 #include "pmcstat_log.h" 60 #include "pmcstat_top.h" 61 #include "pmcpl_calltree.h" 62 63 #define min(A,B) ((A) < (B) ? (A) : (B)) 64 #define max(A,B) ((A) > (B) ? (A) : (B)) 65 66 #define PMCPL_CT_GROWSIZE 4 67 68 static int pmcstat_skiplink = 0; 69 70 struct pmcpl_ct_node; 71 72 /* Get the sample value for PMC a. */ 73 #define PMCPL_CT_SAMPLE(a, b) \ 74 ((a) < (b)->npmcs ? (b)->sb[a] : 0) 75 76 /* Get the sample value in percent related to rsamples. */ 77 #define PMCPL_CT_SAMPLEP(a, b) \ 78 (PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a]) 79 80 struct pmcpl_ct_sample { 81 int npmcs; /* Max pmc index available. */ 82 unsigned *sb; /* Sample buffer for 0..npmcs. */ 83 }; 84 85 struct pmcpl_ct_arc { 86 struct pmcpl_ct_sample pcta_samples; 87 struct pmcpl_ct_sample pcta_callid; 88 unsigned pcta_call; 89 struct pmcpl_ct_node *pcta_child; 90 }; 91 92 struct pmcpl_ct_instr { 93 uintfptr_t pctf_func; 94 struct pmcpl_ct_sample pctf_samples; 95 }; 96 97 /* 98 * Each calltree node is tracked by a pmcpl_ct_node struct. 99 */ 100 struct pmcpl_ct_node { 101 struct pmcstat_image *pct_image; 102 uintfptr_t pct_func; 103 104 struct pmcstat_symbol *pct_sym; 105 pmcstat_interned_string pct_ifl; 106 pmcstat_interned_string pct_ifn; 107 108 struct pmcpl_ct_sample pct_samples; 109 110 int pct_narc; 111 int pct_arc_c; 112 struct pmcpl_ct_arc *pct_arc; 113 114 /* TODO: optimize for large number of items. */ 115 int pct_ninstr; 116 int pct_instr_c; 117 struct pmcpl_ct_instr *pct_instr; 118 119 #define PMCPL_PCT_ADDR 0 120 #define PMCPL_PCT_NAME 1 121 char pct_type; 122 #define PMCPL_PCT_WHITE 0 123 #define PMCPL_PCT_GREY 1 124 #define PMCPL_PCT_BLACK 2 125 char pct_color; 126 }; 127 128 struct pmcpl_ct_node_hash { 129 struct pmcpl_ct_node *pch_ctnode; 130 STAILQ_ENTRY(pmcpl_ct_node_hash) pch_next; 131 }; 132 133 static struct pmcpl_ct_sample pmcpl_ct_callid; 134 135 #define PMCPL_CT_MAXCOL PMC_CALLCHAIN_DEPTH_MAX 136 #define PMCPL_CT_MAXLINE 1024 /* TODO: dynamic. */ 137 138 struct pmcpl_ct_line { 139 unsigned ln_sum; 140 unsigned ln_index; 141 }; 142 143 static struct pmcpl_ct_line pmcpl_ct_topmax[PMCPL_CT_MAXLINE+1]; 144 static struct pmcpl_ct_node 145 *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL+1][PMCPL_CT_MAXLINE+1]; 146 147 /* 148 * All nodes indexed by function/image name are placed in a hash table. 149 */ 150 static STAILQ_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH]; 151 152 /* 153 * Root node for the graph. 154 */ 155 static struct pmcpl_ct_node *pmcpl_ct_root; 156 157 /* 158 * Prototypes 159 */ 160 161 /* 162 * Initialize a samples. 163 */ 164 165 static void 166 pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples) 167 { 168 169 samples->npmcs = 0; 170 samples->sb = NULL; 171 } 172 173 /* 174 * Free a samples. 175 */ 176 177 static void 178 pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples) 179 { 180 181 samples->npmcs = 0; 182 free(samples->sb); 183 samples->sb = NULL; 184 } 185 186 /* 187 * Grow a sample block to store pmcstat_npmcs PMCs. 188 */ 189 190 static void 191 pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples) 192 { 193 unsigned int npmcs; 194 195 /* Enough storage. */ 196 if (pmcstat_npmcs <= samples->npmcs) 197 return; 198 199 npmcs = samples->npmcs + 200 max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE); 201 samples->sb = reallocarray(samples->sb, npmcs, sizeof(unsigned)); 202 if (samples->sb == NULL) 203 errx(EX_SOFTWARE, "ERROR: out of memory"); 204 bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned), 205 (npmcs - samples->npmcs) * sizeof(unsigned)); 206 samples->npmcs = npmcs; 207 } 208 209 /* 210 * Compute the sum of all root arcs. 211 */ 212 213 static void 214 pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples) 215 { 216 int i, pmcin; 217 218 pmcpl_ct_samples_init(samples); 219 pmcpl_ct_samples_grow(samples); 220 221 for (i = 0; i < pmcpl_ct_root->pct_narc; i++) 222 for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++) 223 samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin, 224 &pmcpl_ct_root->pct_arc[i].pcta_samples); 225 } 226 227 /* 228 * Grow the arc table. 229 */ 230 231 static void 232 pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items) 233 { 234 unsigned int nmaxsize; 235 236 if (cursize < *maxsize) 237 return; 238 239 nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE); 240 *items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_arc)); 241 if (*items == NULL) 242 errx(EX_SOFTWARE, "ERROR: out of memory"); 243 bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc), 244 (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc)); 245 *maxsize = nmaxsize; 246 } 247 248 /* 249 * Grow the instr table. 250 */ 251 252 static void 253 pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items) 254 { 255 unsigned int nmaxsize; 256 257 if (cursize < *maxsize) 258 return; 259 260 nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE); 261 *items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_instr)); 262 if (*items == NULL) 263 errx(EX_SOFTWARE, "ERROR: out of memory"); 264 bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr), 265 (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr)); 266 *maxsize = nmaxsize; 267 } 268 269 /* 270 * Add a new instruction sample to given node. 271 */ 272 273 static void 274 pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin, 275 uintfptr_t pc, unsigned v) 276 { 277 int i; 278 struct pmcpl_ct_instr *in; 279 280 for (i = 0; i<ct->pct_ninstr; i++) { 281 if (ct->pct_instr[i].pctf_func == pc) { 282 in = &ct->pct_instr[i]; 283 pmcpl_ct_samples_grow(&in->pctf_samples); 284 in->pctf_samples.sb[pmcin] += v; 285 return; 286 } 287 } 288 289 pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr); 290 in = &ct->pct_instr[ct->pct_ninstr]; 291 in->pctf_func = pc; 292 pmcpl_ct_samples_init(&in->pctf_samples); 293 pmcpl_ct_samples_grow(&in->pctf_samples); 294 in->pctf_samples.sb[pmcin] = v; 295 ct->pct_ninstr++; 296 } 297 298 /* 299 * Allocate a new node. 300 */ 301 302 static struct pmcpl_ct_node * 303 pmcpl_ct_node_allocate(void) 304 { 305 struct pmcpl_ct_node *ct; 306 307 if ((ct = malloc(sizeof(*ct))) == NULL) 308 err(EX_OSERR, "ERROR: Cannot allocate callgraph node"); 309 310 pmcpl_ct_samples_init(&ct->pct_samples); 311 312 ct->pct_sym = NULL; 313 ct->pct_image = NULL; 314 ct->pct_func = 0; 315 316 ct->pct_narc = 0; 317 ct->pct_arc_c = 0; 318 ct->pct_arc = NULL; 319 320 ct->pct_ninstr = 0; 321 ct->pct_instr_c = 0; 322 ct->pct_instr = NULL; 323 324 ct->pct_color = PMCPL_PCT_WHITE; 325 326 return (ct); 327 } 328 329 /* 330 * Free a node. 331 */ 332 333 static void 334 pmcpl_ct_node_free(struct pmcpl_ct_node *ct) 335 { 336 int i; 337 338 for (i = 0; i < ct->pct_narc; i++) { 339 pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples); 340 pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid); 341 } 342 343 pmcpl_ct_samples_free(&ct->pct_samples); 344 free(ct->pct_arc); 345 free(ct->pct_instr); 346 free(ct); 347 } 348 349 /* 350 * Clear the graph tag on each node. 351 */ 352 static void 353 pmcpl_ct_node_cleartag(void) 354 { 355 int i; 356 struct pmcpl_ct_node_hash *pch; 357 358 for (i = 0; i < PMCSTAT_NHASH; i++) 359 STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next) 360 pch->pch_ctnode->pct_color = PMCPL_PCT_WHITE; 361 362 pmcpl_ct_root->pct_color = PMCPL_PCT_WHITE; 363 } 364 365 /* 366 * Print the callchain line by line with maximum cost at top. 367 */ 368 369 static int 370 pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct, 371 struct pmcpl_ct_sample *rsamples, int x, int *y) 372 { 373 int i, terminal; 374 struct pmcpl_ct_arc *arc; 375 376 if (ct->pct_color == PMCPL_PCT_GREY) 377 return 0; 378 379 if (x >= PMCPL_CT_MAXCOL) { 380 pmcpl_ct_topscreen[x][*y] = NULL; 381 return 1; 382 } 383 pmcpl_ct_topscreen[x][*y] = ct; 384 385 /* 386 * Check if this is a terminal node. 387 * We need to check that some samples exist 388 * for at least one arc for that PMC. 389 */ 390 terminal = 1; 391 for (i = 0; i < ct->pct_narc; i++) { 392 arc = &ct->pct_arc[i]; 393 if (arc->pcta_child->pct_color != PMCPL_PCT_GREY && 394 PMCPL_CT_SAMPLE(pmcin, 395 &arc->pcta_samples) != 0 && 396 PMCPL_CT_SAMPLEP(pmcin, 397 &arc->pcta_samples) > pmcstat_threshold) { 398 terminal = 0; 399 break; 400 } 401 } 402 403 if (ct->pct_narc == 0 || terminal) { 404 pmcpl_ct_topscreen[x+1][*y] = NULL; 405 if (*y >= PMCPL_CT_MAXLINE) 406 return 1; 407 *y = *y + 1; 408 for (i=0; i < x; i++) 409 pmcpl_ct_topscreen[i][*y] = 410 pmcpl_ct_topscreen[i][*y - 1]; 411 return 0; 412 } 413 414 ct->pct_color = PMCPL_PCT_GREY; 415 for (i = 0; i < ct->pct_narc; i++) { 416 if (PMCPL_CT_SAMPLE(pmcin, 417 &ct->pct_arc[i].pcta_samples) == 0) 418 continue; 419 if (PMCPL_CT_SAMPLEP(pmcin, 420 &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) { 421 if (pmcpl_ct_node_dumptop(pmcin, 422 ct->pct_arc[i].pcta_child, 423 rsamples, x+1, y)) { 424 ct->pct_color = PMCPL_PCT_BLACK; 425 return 1; 426 } 427 } 428 } 429 ct->pct_color = PMCPL_PCT_BLACK; 430 431 return 0; 432 } 433 434 /* 435 * Compare two top line by sum. 436 */ 437 static int 438 pmcpl_ct_line_compare(const void *a, const void *b) 439 { 440 const struct pmcpl_ct_line *ct1, *ct2; 441 442 ct1 = (const struct pmcpl_ct_line *) a; 443 ct2 = (const struct pmcpl_ct_line *) b; 444 445 /* Sort in reverse order */ 446 if (ct1->ln_sum < ct2->ln_sum) 447 return (1); 448 if (ct1->ln_sum > ct2->ln_sum) 449 return (-1); 450 return (0); 451 } 452 453 /* 454 * Format and display given PMC index. 455 */ 456 457 static void 458 pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy) 459 { 460 #undef TS 461 #undef TSI 462 #define TS(x, y) (pmcpl_ct_topscreen[x][y]) 463 #define TSI(x, y) (pmcpl_ct_topscreen[x][pmcpl_ct_topmax[y].ln_index]) 464 465 int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y; 466 float v; 467 char ns[30], vs[10], is[20]; 468 struct pmcpl_ct_node *ct; 469 const char *space = " "; 470 471 /* 472 * Sort by line cost. 473 */ 474 for (y = 0; ; y++) { 475 ct = TS(1, y); 476 if (ct == NULL) 477 break; 478 479 pmcpl_ct_topmax[y].ln_sum = 0; 480 pmcpl_ct_topmax[y].ln_index = y; 481 for (x = 1; TS(x, y) != NULL; x++) { 482 pmcpl_ct_topmax[y].ln_sum += 483 PMCPL_CT_SAMPLE(pmcin, &TS(x, y)->pct_samples); 484 } 485 } 486 qsort(pmcpl_ct_topmax, y, sizeof(pmcpl_ct_topmax[0]), 487 pmcpl_ct_line_compare); 488 pmcpl_ct_topmax[y].ln_index = y; 489 490 for (y = 0; y < maxy; y++) { 491 ct = TSI(1, y); 492 if (ct == NULL) 493 break; 494 495 if (y > 0) 496 PMCSTAT_PRINTW("\n"); 497 498 /* Output sum. */ 499 v = pmcpl_ct_topmax[y].ln_sum * 100.0 / 500 rsamples->sb[pmcin]; 501 snprintf(vs, sizeof(vs), "%.1f", v); 502 v_attrs = PMCSTAT_ATTRPERCENT(v); 503 PMCSTAT_ATTRON(v_attrs); 504 PMCSTAT_PRINTW("%5.5s ", vs); 505 PMCSTAT_ATTROFF(v_attrs); 506 507 width = indentwidth = 5 + 1; 508 509 for (x = 1; (ct = TSI(x, y)) != NULL; x++) { 510 511 vs[0] = '\0'; vs_len = 0; 512 is[0] = '\0'; is_len = 0; 513 514 /* Format value. */ 515 v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples); 516 if (v > pmcstat_threshold) 517 vs_len = snprintf(vs, sizeof(vs), 518 "(%.1f%%)", v); 519 v_attrs = PMCSTAT_ATTRPERCENT(v); 520 521 if (pmcstat_skiplink && v <= pmcstat_threshold) { 522 strlcpy(ns, ".", sizeof(ns)); 523 ns_len = 1; 524 } else { 525 if (ct->pct_sym != NULL) { 526 ns_len = snprintf(ns, sizeof(ns), "%s", 527 pmcstat_string_unintern(ct->pct_sym->ps_name)); 528 } else 529 ns_len = snprintf(ns, sizeof(ns), "%p", 530 (void *)ct->pct_func); 531 532 /* Format image. */ 533 if (x == 1 || 534 TSI(x-1, y)->pct_image != ct->pct_image) 535 is_len = snprintf(is, sizeof(is), "@%s", 536 pmcstat_string_unintern(ct->pct_image->pi_name)); 537 538 /* Check for line wrap. */ 539 width += ns_len + is_len + vs_len + 1; 540 } 541 if (width >= pmcstat_displaywidth) { 542 maxy--; 543 if (y >= maxy) 544 break; 545 PMCSTAT_PRINTW("\n%*s", indentwidth, space); 546 width = indentwidth + ns_len + is_len + vs_len; 547 } 548 549 PMCSTAT_ATTRON(v_attrs); 550 PMCSTAT_PRINTW("%s%s%s ", ns, is, vs); 551 PMCSTAT_ATTROFF(v_attrs); 552 } 553 } 554 } 555 556 /* 557 * Output top mode snapshot. 558 */ 559 560 void 561 pmcpl_ct_topdisplay(void) 562 { 563 int y; 564 struct pmcpl_ct_sample r, *rsamples; 565 566 rsamples = &r; 567 pmcpl_ct_samples_root(rsamples); 568 pmcpl_ct_node_cleartag(); 569 570 PMCSTAT_PRINTW("%5.5s %s\n", "%SAMP", "CALLTREE"); 571 572 y = 0; 573 if (pmcpl_ct_node_dumptop(pmcstat_pmcinfilter, 574 pmcpl_ct_root, rsamples, 0, &y)) 575 PMCSTAT_PRINTW("...\n"); 576 pmcpl_ct_topscreen[1][y] = NULL; 577 578 pmcpl_ct_node_printtop(rsamples, 579 pmcstat_pmcinfilter, pmcstat_displayheight - 2); 580 581 pmcpl_ct_samples_free(rsamples); 582 } 583 584 /* 585 * Handle top mode keypress. 586 */ 587 588 int 589 pmcpl_ct_topkeypress(int c, void *arg) 590 { 591 WINDOW *w; 592 593 w = (WINDOW *)arg; 594 595 switch (c) { 596 case 'f': 597 pmcstat_skiplink = !pmcstat_skiplink; 598 wprintw(w, "skip empty link %s", 599 pmcstat_skiplink ? "on" : "off"); 600 break; 601 } 602 603 return 0; 604 } 605 606 /* 607 * Look for a callgraph node associated with pmc `pmcid' in the global 608 * hash table that corresponds to the given `pc' value in the process map 609 * `ppm'. 610 */ 611 612 static void 613 pmcpl_ct_node_update(struct pmcpl_ct_node *parent, 614 struct pmcpl_ct_node *child, int pmcin, unsigned v, int cd) 615 { 616 struct pmcpl_ct_arc *arc; 617 int i; 618 619 assert(parent != NULL); 620 621 /* 622 * Find related arc in parent node and 623 * increment the sample count. 624 */ 625 for (i = 0; i < parent->pct_narc; i++) { 626 if (parent->pct_arc[i].pcta_child == child) { 627 arc = &parent->pct_arc[i]; 628 pmcpl_ct_samples_grow(&arc->pcta_samples); 629 arc->pcta_samples.sb[pmcin] += v; 630 /* Estimate call count. */ 631 if (cd) { 632 pmcpl_ct_samples_grow(&arc->pcta_callid); 633 if (pmcpl_ct_callid.sb[pmcin] - 634 arc->pcta_callid.sb[pmcin] > 1) 635 arc->pcta_call++; 636 arc->pcta_callid.sb[pmcin] = 637 pmcpl_ct_callid.sb[pmcin]; 638 } 639 return; 640 } 641 } 642 643 /* 644 * No arc found for us, add ourself to the parent. 645 */ 646 pmcpl_ct_arc_grow(parent->pct_narc, 647 &parent->pct_arc_c, &parent->pct_arc); 648 arc = &parent->pct_arc[parent->pct_narc]; 649 pmcpl_ct_samples_grow(&arc->pcta_samples); 650 arc->pcta_samples.sb[pmcin] = v; 651 arc->pcta_call = 1; 652 if (cd) { 653 pmcpl_ct_samples_grow(&arc->pcta_callid); 654 arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin]; 655 } 656 arc->pcta_child = child; 657 parent->pct_narc++; 658 } 659 660 /* 661 * Lookup by image/pc. 662 */ 663 664 static struct pmcpl_ct_node * 665 pmcpl_ct_node_hash_lookup(struct pmcstat_image *image, uintfptr_t pc, 666 struct pmcstat_symbol *sym, char *fl, char *fn) 667 { 668 int i; 669 unsigned int hash; 670 struct pmcpl_ct_node *ct; 671 struct pmcpl_ct_node_hash *h; 672 pmcstat_interned_string ifl, ifn; 673 674 if (fn != NULL) { 675 ifl = pmcstat_string_intern(fl); 676 ifn = pmcstat_string_intern(fn); 677 } else { 678 ifl = 0; 679 ifn = 0; 680 } 681 682 for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++) 683 hash += (pc >> i) & 0xFF; 684 685 hash &= PMCSTAT_HASH_MASK; 686 687 STAILQ_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) { 688 ct = h->pch_ctnode; 689 690 assert(ct != NULL); 691 692 if (ct->pct_image == image && ct->pct_func == pc) { 693 if (fn == NULL) 694 return (ct); 695 if (ct->pct_type == PMCPL_PCT_NAME && 696 ct->pct_ifl == ifl && ct->pct_ifn == ifn) 697 return (ct); 698 } 699 } 700 701 /* 702 * We haven't seen this (pmcid, pc) tuple yet, so allocate a 703 * new callgraph node and a new hash table entry for it. 704 */ 705 ct = pmcpl_ct_node_allocate(); 706 if ((h = malloc(sizeof(*h))) == NULL) 707 err(EX_OSERR, "ERROR: Could not allocate callgraph node"); 708 709 if (fn != NULL) { 710 ct->pct_type = PMCPL_PCT_NAME; 711 ct->pct_ifl = ifl; 712 ct->pct_ifn = ifn; 713 } else 714 ct->pct_type = PMCPL_PCT_ADDR; 715 ct->pct_image = image; 716 ct->pct_func = pc; 717 ct->pct_sym = sym; 718 719 h->pch_ctnode = ct; 720 STAILQ_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next); 721 return (ct); 722 } 723 724 /* 725 * Record a callchain. 726 */ 727 728 void 729 pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, 730 uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu) 731 { 732 int i, n, pmcin; 733 uintfptr_t pc, loadaddress; 734 struct pmcstat_image *image; 735 struct pmcstat_symbol *sym; 736 struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX]; 737 struct pmcstat_process *km; 738 struct pmcpl_ct_node *ct; 739 struct pmcpl_ct_node *ctl[PMC_CALLCHAIN_DEPTH_MAX+1]; 740 741 (void) cpu; 742 743 assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX); 744 745 /* Get the PMC index. */ 746 pmcin = pmcr->pr_pmcin; 747 748 /* 749 * Validate mapping for the callchain. 750 * Go from bottom to first invalid entry. 751 */ 752 km = pmcstat_kernproc; 753 for (n = 0; n < (int)nsamples; n++) { 754 ppm[n] = pmcstat_process_find_map(usermode ? 755 pp : km, cc[n]); 756 if (ppm[n] == NULL) { 757 /* Detect full frame capture (kernel + user). */ 758 if (!usermode) { 759 ppm[n] = pmcstat_process_find_map(pp, cc[n]); 760 if (ppm[n] != NULL) 761 km = pp; 762 } 763 } 764 if (ppm[n] == NULL) 765 break; 766 } 767 if (n-- == 0) { 768 pmcstat_stats.ps_callchain_dubious_frames++; 769 pmcr->pr_dubious_frames++; 770 return; 771 } 772 773 /* Increase the call generation counter. */ 774 pmcpl_ct_samples_grow(&pmcpl_ct_callid); 775 pmcpl_ct_callid.sb[pmcin]++; 776 777 /* 778 * Build node list. 779 */ 780 ctl[0] = pmcpl_ct_root; 781 for (i = 1; n >= 0; n--) { 782 image = ppm[n]->ppm_image; 783 loadaddress = ppm[n]->ppm_lowpc + 784 image->pi_vaddr - image->pi_start; 785 /* Convert to an offset in the image. */ 786 pc = cc[n] - loadaddress; 787 /* 788 * Try determine the function at this offset. If we can't 789 * find a function round leave the `pc' value alone. 790 */ 791 if ((sym = pmcstat_symbol_search(image, pc)) != NULL) 792 pc = sym->ps_start; 793 else 794 pmcstat_stats.ps_samples_unknown_function++; 795 796 ct = pmcpl_ct_node_hash_lookup(image, pc, sym, NULL, NULL); 797 if (ct == NULL) { 798 pmcstat_stats.ps_callchain_dubious_frames++; 799 continue; 800 } 801 ctl[i++] = ct; 802 } 803 /* No valid node found. */ 804 if (i == 1) 805 return; 806 n = i; 807 808 ct = ctl[0]; 809 for (i = 1; i < n; i++) 810 pmcpl_ct_node_update(ctl[i-1], ctl[i], pmcin, 1, 1); 811 812 /* 813 * Increment the sample count for this PMC. 814 */ 815 pmcpl_ct_samples_grow(&ctl[n-1]->pct_samples); 816 ctl[n-1]->pct_samples.sb[pmcin]++; 817 818 /* Update per instruction sample if required. */ 819 if (args.pa_ctdumpinstr) 820 pmcpl_ct_instr_add(ctl[n-1], pmcin, cc[0] - 821 (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr - 822 ppm[0]->ppm_image->pi_start), 1); 823 } 824 825 /* 826 * Print node child cost. 827 */ 828 829 static void 830 pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct, uintfptr_t paddr, 831 int pline) 832 { 833 int i, j, line; 834 uintfptr_t addr; 835 struct pmcpl_ct_node *child; 836 char sourcefile[PATH_MAX]; 837 char funcname[PATH_MAX]; 838 839 /* 840 * Child cost. 841 * TODO: attach child cost to the real position in the function. 842 * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost> 843 */ 844 for (i=0 ; i<ct->pct_narc; i++) { 845 child = ct->pct_arc[i].pcta_child; 846 /* Object binary. */ 847 fprintf(args.pa_graphfile, "cob=%s\n", 848 pmcstat_string_unintern(child->pct_image->pi_fullpath)); 849 /* Child function name. */ 850 addr = child->pct_image->pi_vaddr + child->pct_func; 851 line = 0; 852 /* Child function source file. */ 853 if (child->pct_type == PMCPL_PCT_NAME) { 854 fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n", 855 pmcstat_string_unintern(child->pct_ifl), 856 pmcstat_string_unintern(child->pct_ifn)); 857 } else if (pmcstat_image_addr2line(child->pct_image, addr, 858 sourcefile, sizeof(sourcefile), &line, 859 funcname, sizeof(funcname))) { 860 fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n", 861 sourcefile, funcname); 862 } else { 863 if (child->pct_sym != NULL) 864 fprintf(args.pa_graphfile, 865 "cfi=???\ncfn=%s\n", 866 pmcstat_string_unintern( 867 child->pct_sym->ps_name)); 868 else 869 fprintf(args.pa_graphfile, 870 "cfi=???\ncfn=%p\n", (void *)addr); 871 } 872 873 /* Child function address, line and call count. */ 874 fprintf(args.pa_graphfile, "calls=%u %p %u\n", 875 ct->pct_arc[i].pcta_call, (void *)addr, line); 876 877 /* 878 * Call address, line, sample. 879 * TODO: Associate call address to the right location. 880 */ 881 fprintf(args.pa_graphfile, "%p %u", (void *)paddr, pline); 882 for (j = 0; j<pmcstat_npmcs; j++) 883 fprintf(args.pa_graphfile, " %u", 884 PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples)); 885 fprintf(args.pa_graphfile, "\n"); 886 } 887 } 888 889 /* 890 * Print node self cost. 891 */ 892 893 static void 894 pmcpl_ct_node_printself(struct pmcpl_ct_node *ct) 895 { 896 int i, j, fline, line; 897 uintfptr_t faddr, addr; 898 char sourcefile[PATH_MAX]; 899 char funcname[PATH_MAX]; 900 901 /* 902 * Object binary. 903 */ 904 fprintf(args.pa_graphfile, "ob=%s\n", 905 pmcstat_string_unintern(ct->pct_image->pi_fullpath)); 906 907 /* 908 * Function name. 909 */ 910 faddr = ct->pct_image->pi_vaddr + ct->pct_func; 911 fline = 0; 912 if (ct->pct_type == PMCPL_PCT_NAME) { 913 fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n", 914 pmcstat_string_unintern(ct->pct_ifl), 915 pmcstat_string_unintern(ct->pct_ifn)); 916 } else if (pmcstat_image_addr2line(ct->pct_image, faddr, 917 sourcefile, sizeof(sourcefile), &fline, 918 funcname, sizeof(funcname))) { 919 fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n", 920 sourcefile, funcname); 921 } else { 922 if (ct->pct_sym != NULL) 923 fprintf(args.pa_graphfile, "fl=???\nfn=%s\n", 924 pmcstat_string_unintern(ct->pct_sym->ps_name)); 925 else 926 fprintf(args.pa_graphfile, "fl=???\nfn=%p\n", 927 (void *)(ct->pct_image->pi_vaddr + ct->pct_func)); 928 } 929 930 /* 931 * Self cost. 932 */ 933 if (ct->pct_ninstr > 0) { 934 /* 935 * Per location cost. 936 */ 937 for (i = 0; i < ct->pct_ninstr; i++) { 938 addr = ct->pct_image->pi_vaddr + 939 ct->pct_instr[i].pctf_func; 940 line = 0; 941 pmcstat_image_addr2line(ct->pct_image, addr, 942 sourcefile, sizeof(sourcefile), &line, 943 funcname, sizeof(funcname)); 944 fprintf(args.pa_graphfile, "%p %u", 945 (void *)addr, line); 946 for (j = 0; j<pmcstat_npmcs; j++) 947 fprintf(args.pa_graphfile, " %u", 948 PMCPL_CT_SAMPLE(j, 949 &ct->pct_instr[i].pctf_samples)); 950 fprintf(args.pa_graphfile, "\n"); 951 } 952 } else { 953 /* Global cost function cost. */ 954 fprintf(args.pa_graphfile, "%p %u", (void *)faddr, fline); 955 for (i = 0; i<pmcstat_npmcs ; i++) 956 fprintf(args.pa_graphfile, " %u", 957 PMCPL_CT_SAMPLE(i, &ct->pct_samples)); 958 fprintf(args.pa_graphfile, "\n"); 959 } 960 961 pmcpl_ct_node_printchild(ct, faddr, fline); 962 } 963 964 static void 965 pmcpl_ct_printnode(struct pmcpl_ct_node *ct) 966 { 967 int i; 968 969 if (ct == pmcpl_ct_root) { 970 fprintf(args.pa_graphfile, "fn=root\n"); 971 fprintf(args.pa_graphfile, "0x0 1"); 972 for (i = 0; i<pmcstat_npmcs ; i++) 973 fprintf(args.pa_graphfile, " 0"); 974 fprintf(args.pa_graphfile, "\n"); 975 pmcpl_ct_node_printchild(ct, 0, 0); 976 } else 977 pmcpl_ct_node_printself(ct); 978 } 979 980 /* 981 * Breadth first traversal. 982 */ 983 984 static void 985 pmcpl_ct_bfs(struct pmcpl_ct_node *ct) 986 { 987 int i; 988 struct pmcpl_ct_node_hash *pch, *pchc; 989 struct pmcpl_ct_node *child; 990 STAILQ_HEAD(,pmcpl_ct_node_hash) q; 991 992 STAILQ_INIT(&q); 993 if ((pch = malloc(sizeof(*pch))) == NULL) 994 err(EX_OSERR, "ERROR: Cannot allocate queue"); 995 pch->pch_ctnode = ct; 996 STAILQ_INSERT_TAIL(&q, pch, pch_next); 997 ct->pct_color = PMCPL_PCT_BLACK; 998 999 while (!STAILQ_EMPTY(&q)) { 1000 pch = STAILQ_FIRST(&q); 1001 STAILQ_REMOVE_HEAD(&q, pch_next); 1002 pmcpl_ct_printnode(pch->pch_ctnode); 1003 for (i = 0; i<pch->pch_ctnode->pct_narc; i++) { 1004 child = pch->pch_ctnode->pct_arc[i].pcta_child; 1005 if (child->pct_color == PMCPL_PCT_WHITE) { 1006 child->pct_color = PMCPL_PCT_BLACK; 1007 if ((pchc = malloc(sizeof(*pchc))) == NULL) 1008 err(EX_OSERR, 1009 "ERROR: Cannot allocate queue"); 1010 pchc->pch_ctnode = child; 1011 STAILQ_INSERT_TAIL(&q, pchc, pch_next); 1012 } 1013 } 1014 free(pch); 1015 } 1016 } 1017 1018 /* 1019 * Detect and fix inlined location. 1020 */ 1021 1022 static void 1023 _pmcpl_ct_expand_inline(struct pmcpl_ct_node *ct) 1024 { 1025 int i, j; 1026 unsigned fline, line, v; 1027 uintfptr_t faddr, addr, pc; 1028 char sourcefile[PATH_MAX]; 1029 char ffuncname[PATH_MAX], funcname[PATH_MAX]; 1030 char buffer[PATH_MAX]; 1031 struct pmcpl_ct_node *child; 1032 1033 /* 1034 * Resolve parent and compare to each instr location. 1035 */ 1036 faddr = ct->pct_image->pi_vaddr + ct->pct_func; 1037 fline = 0; 1038 if (!pmcstat_image_addr2line(ct->pct_image, faddr, 1039 sourcefile, sizeof(sourcefile), &fline, 1040 ffuncname, sizeof(ffuncname))) 1041 return; 1042 1043 for (i = 0; i < ct->pct_ninstr; i++) { 1044 addr = ct->pct_image->pi_vaddr + 1045 ct->pct_instr[i].pctf_func; 1046 line = 0; 1047 if (!pmcstat_image_addr2line(ct->pct_image, addr, 1048 sourcefile, sizeof(sourcefile), &line, 1049 funcname, sizeof(funcname))) 1050 continue; 1051 1052 if (strcmp(funcname, ffuncname) == 0) 1053 continue; 1054 1055 /* 1056 * - Lookup/create inline node by function name. 1057 * - Move instr PMCs to the inline node. 1058 * - Link nodes. 1059 * The lookup create a specific node per image/pc. 1060 */ 1061 if (args.pa_verbosity >= 2) 1062 fprintf(args.pa_printfile, 1063 "WARNING: inlined function at %p %s in %s\n", 1064 (void *)addr, funcname, ffuncname); 1065 1066 snprintf(buffer, sizeof(buffer), "%s@%s", 1067 funcname, ffuncname); 1068 child = pmcpl_ct_node_hash_lookup(ct->pct_image, 1069 ct->pct_func, ct->pct_sym, sourcefile, buffer); 1070 assert(child != NULL); 1071 pc = ct->pct_instr[i].pctf_func; 1072 for (j = 0; j<pmcstat_npmcs; j++) { 1073 v = PMCPL_CT_SAMPLE(j, 1074 &ct->pct_instr[i].pctf_samples); 1075 if (v == 0) 1076 continue; 1077 pmcpl_ct_instr_add(child, j, pc, v); 1078 pmcpl_ct_node_update(ct, child, j, v, 0); 1079 if (j < ct->pct_samples.npmcs) 1080 ct->pct_samples.sb[j] -= 1081 ct->pct_instr[i].pctf_samples.sb[j]; 1082 ct->pct_instr[i].pctf_samples.sb[j] = 0; 1083 } 1084 } 1085 } 1086 1087 static void 1088 pmcpl_ct_expand_inline(void) 1089 { 1090 int i; 1091 struct pmcpl_ct_node_hash *pch; 1092 1093 if (!args.pa_ctdumpinstr) 1094 return; 1095 1096 for (i = 0; i < PMCSTAT_NHASH; i++) 1097 STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next) 1098 if (pch->pch_ctnode->pct_type == PMCPL_PCT_ADDR) 1099 _pmcpl_ct_expand_inline(pch->pch_ctnode); 1100 } 1101 1102 /* 1103 * Clean the PMC name for Kcachegrind formula 1104 */ 1105 1106 static void 1107 pmcpl_ct_fixup_pmcname(char *s) 1108 { 1109 char *p; 1110 1111 for (p = s; *p; p++) 1112 if (!isalnum(*p)) 1113 *p = '_'; 1114 } 1115 1116 /* 1117 * Print a calltree (KCachegrind) for all PMCs. 1118 */ 1119 1120 static void 1121 pmcpl_ct_print(void) 1122 { 1123 int i; 1124 char name[40]; 1125 struct pmcpl_ct_sample rsamples; 1126 1127 pmcpl_ct_samples_root(&rsamples); 1128 pmcpl_ct_expand_inline(); 1129 1130 fprintf(args.pa_graphfile, 1131 "version: 1\n" 1132 "creator: pmcstat\n" 1133 "positions: instr line\n" 1134 "events:"); 1135 for (i=0; i<pmcstat_npmcs; i++) { 1136 snprintf(name, sizeof(name), "%s_%d", 1137 pmcstat_pmcindex_to_name(i), i); 1138 pmcpl_ct_fixup_pmcname(name); 1139 fprintf(args.pa_graphfile, " %s", name); 1140 } 1141 fprintf(args.pa_graphfile, "\nsummary:"); 1142 for (i=0; i<pmcstat_npmcs ; i++) 1143 fprintf(args.pa_graphfile, " %u", 1144 PMCPL_CT_SAMPLE(i, &rsamples)); 1145 fprintf(args.pa_graphfile, "\n"); 1146 pmcpl_ct_bfs(pmcpl_ct_root); 1147 pmcpl_ct_samples_free(&rsamples); 1148 } 1149 1150 int 1151 pmcpl_ct_configure(char *opt) 1152 { 1153 1154 if (strncmp(opt, "skiplink=", 9) == 0) { 1155 pmcstat_skiplink = atoi(opt+9); 1156 } else 1157 return (0); 1158 1159 return (1); 1160 } 1161 1162 int 1163 pmcpl_ct_init(void) 1164 { 1165 int i; 1166 1167 pmcpl_ct_root = pmcpl_ct_node_allocate(); 1168 1169 for (i = 0; i < PMCSTAT_NHASH; i++) 1170 STAILQ_INIT(&pmcpl_ct_node_hash[i]); 1171 1172 pmcpl_ct_samples_init(&pmcpl_ct_callid); 1173 1174 return (0); 1175 } 1176 1177 void 1178 pmcpl_ct_shutdown(FILE *mf) 1179 { 1180 int i; 1181 struct pmcpl_ct_node_hash *pch, *pchtmp; 1182 1183 (void) mf; 1184 1185 if (args.pa_flags & FLAG_DO_CALLGRAPHS) 1186 pmcpl_ct_print(); 1187 1188 /* 1189 * Free memory. 1190 */ 1191 1192 for (i = 0; i < PMCSTAT_NHASH; i++) { 1193 STAILQ_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next, 1194 pchtmp) { 1195 pmcpl_ct_node_free(pch->pch_ctnode); 1196 free(pch); 1197 } 1198 } 1199 1200 pmcpl_ct_node_free(pmcpl_ct_root); 1201 pmcpl_ct_root = NULL; 1202 1203 pmcpl_ct_samples_free(&pmcpl_ct_callid); 1204 } 1205 1206