1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sysexits.h> 27 #include <stdlib.h> 28 #include <stdio.h> 29 #include <unistd.h> 30 #include "gprof.h" 31 #include "profile.h" 32 33 bool aflag; 34 bool bflag; 35 bool Bflag; 36 bool cflag; 37 bool Cflag; 38 bool dflag; 39 bool Dflag; 40 bool eflag; 41 bool Eflag; 42 bool fflag; 43 bool Fflag; 44 bool lflag; 45 bool sflag; 46 bool zflag; 47 bool nflag; 48 bool rflag; 49 bool first_file; 50 bool old_style; 51 double scale; 52 double totime; 53 Size n_pcsamples; 54 mod_info_t modules; 55 pctype s_lowpc; 56 pctype s_highpc; 57 sztype n_modules; 58 sztype sampbytes; 59 sztype nsamples; 60 unsigned short *samples; 61 fl_info_t aout_info; 62 fl_info_t gmonout_info; 63 long hz; 64 struct hdr h; 65 unsigned char *textspace; 66 int debug; 67 int number_funcs_toprint; 68 char *a_outname; 69 char *prog_name; 70 char *gmonname; 71 char *whoami = "gprof"; 72 static pctype lowpc, highpc; /* range profiled, in UNIT's */ 73 74 /* 75 * things which get -E excluded by default. 76 */ 77 static char *defaultEs[] = { 78 "mcount", 79 "__mcleanup", 80 NULL 81 }; 82 83 #ifdef DEBUG 84 85 static char *objname[] = { 86 "<invalid object>", 87 "PROF_BUFFER_T", 88 "PROF_CALLGRAPH_T", 89 "PROF_MODULES_T", 90 NULL 91 }; 92 #define MAX_OBJTYPES 3 93 94 #endif /* DEBUG */ 95 96 void 97 done(void) 98 { 99 100 exit(EX_OK); 101 } 102 103 static pctype 104 max(pctype a, pctype b) 105 { 106 if (a > b) 107 return (a); 108 return (b); 109 } 110 111 static pctype 112 min(pctype a, pctype b) 113 { 114 if (a < b) 115 return (a); 116 return (b); 117 } 118 119 /* 120 * calculate scaled entry point addresses (to save time in asgnsamples), 121 * and possibly push the scaled entry points over the entry mask, 122 * if it turns out that the entry point is in one bucket and the code 123 * for a routine is in the next bucket. 124 * 125 */ 126 static void 127 alignentries(void) 128 { 129 struct nl *nlp; 130 #ifdef DEBUG 131 pctype bucket_of_entry; 132 pctype bucket_of_code; 133 #endif /* DEBUG */ 134 135 /* for old-style gmon.out, nameslist is only in modules.nl */ 136 137 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 138 nlp->svalue = nlp->value / sizeof (UNIT); 139 #ifdef DEBUG 140 bucket_of_entry = (nlp->svalue - lowpc) / scale; 141 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 142 if (bucket_of_entry < bucket_of_code) { 143 if (debug & SAMPLEDEBUG) { 144 (void) printf( 145 "[alignentries] pushing svalue 0x%llx " 146 "to 0x%llx\n", nlp->svalue, 147 nlp->svalue + UNITS_TO_CODE); 148 } 149 } 150 #endif /* DEBUG */ 151 } 152 } 153 154 /* 155 * old-style gmon.out 156 * ------------------ 157 * 158 * Assign samples to the procedures to which they belong. 159 * 160 * There are three cases as to where pcl and pch can be 161 * with respect to the routine entry addresses svalue0 and svalue1 162 * as shown in the following diagram. overlap computes the 163 * distance between the arrows, the fraction of the sample 164 * that is to be credited to the routine which starts at svalue0. 165 * 166 * svalue0 svalue1 167 * | | 168 * v v 169 * 170 * +-----------------------------------------------+ 171 * | | 172 * | ->| |<- ->| |<- ->| |<- | 173 * | | | | | | 174 * +---------+ +---------+ +---------+ 175 * 176 * ^ ^ ^ ^ ^ ^ 177 * | | | | | | 178 * pcl pch pcl pch pcl pch 179 * 180 * For the vax we assert that samples will never fall in the first 181 * two bytes of any routine, since that is the entry mask, 182 * thus we give call alignentries() to adjust the entry points if 183 * the entry mask falls in one bucket but the code for the routine 184 * doesn't start until the next bucket. In conjunction with the 185 * alignment of routine addresses, this should allow us to have 186 * only one sample for every four bytes of text space and never 187 * have any overlap (the two end cases, above). 188 */ 189 static void 190 asgnsamples(void) 191 { 192 sztype i, j; 193 unsigned_UNIT ccnt; 194 double time; 195 pctype pcl, pch; 196 pctype overlap; 197 pctype svalue0, svalue1; 198 199 extern mod_info_t modules; 200 nltype *nl = modules.nl; 201 sztype nname = modules.nname; 202 203 /* read samples and assign to namelist symbols */ 204 scale = highpc - lowpc; 205 scale /= nsamples; 206 alignentries(); 207 for (i = 0, j = 1; i < nsamples; i++) { 208 ccnt = samples[i]; 209 if (ccnt == 0) 210 continue; 211 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/ 212 pcl = lowpc + scale * i; 213 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/ 214 pch = lowpc + scale * (i + 1); 215 time = ccnt; 216 #ifdef DEBUG 217 if (debug & SAMPLEDEBUG) { 218 (void) printf( 219 "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n", 220 pcl, pch, ccnt); 221 } 222 #endif /* DEBUG */ 223 totime += time; 224 for (j = (j ? j - 1 : 0); j < nname; j++) { 225 svalue0 = nl[j].svalue; 226 svalue1 = nl[j+1].svalue; 227 /* 228 * if high end of tick is below entry address, 229 * go for next tick. 230 */ 231 if (pch < svalue0) 232 break; 233 /* 234 * if low end of tick into next routine, 235 * go for next routine. 236 */ 237 if (pcl >= svalue1) 238 continue; 239 overlap = min(pch, svalue1) - max(pcl, svalue0); 240 if (overlap != 0) { 241 #ifdef DEBUG 242 if (debug & SAMPLEDEBUG) { 243 (void) printf("[asgnsamples] " 244 "(0x%llx->0x%llx-0x%llx) %s gets " 245 "%f ticks %lld overlap\n", 246 nl[j].value/sizeof (UNIT), svalue0, 247 svalue1, nl[j].name, 248 overlap * time / scale, overlap); 249 } 250 #endif /* DEBUG */ 251 nl[j].time += overlap * time / scale; 252 } 253 } 254 } 255 #ifdef DEBUG 256 if (debug & SAMPLEDEBUG) { 257 (void) printf("[asgnsamples] totime %f\n", totime); 258 } 259 #endif /* DEBUG */ 260 } 261 262 263 static void 264 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs, 265 unsigned long ncallees) 266 { 267 ProfCallGraph prof_cgraph; 268 ProfFunction prof_func; 269 arctype *arcp; 270 mod_info_t *mi; 271 nltype *nlp; 272 size_t cur_offset; 273 unsigned long caller_id = 0, callee_id = 0; 274 275 /* 276 * Write the callgraph header 277 */ 278 prof_cgraph.type = PROF_CALLGRAPH_T; 279 prof_cgraph.version = PROF_CALLGRAPH_VER; 280 prof_cgraph.functions = PROFCGRAPH_SZ; 281 prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ; 282 if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) { 283 perror(filename); 284 exit(EX_IOERR); 285 } 286 /* CONSTCOND */ 287 if (CGRAPH_FILLER) 288 (void) fseek(fp, CGRAPH_FILLER, SEEK_CUR); 289 290 /* Current offset inside the callgraph object */ 291 cur_offset = prof_cgraph.functions; 292 293 for (mi = &modules; mi; mi = mi->next) { 294 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 295 if (nlp->ncallers == 0) 296 continue; 297 298 /* If this is the last callee, set next_to to 0 */ 299 callee_id++; 300 if (callee_id == ncallees) 301 prof_func.next_to = 0; 302 else { 303 prof_func.next_to = cur_offset + 304 nlp->ncallers * PROFFUNC_SZ; 305 } 306 307 /* 308 * Dump this callee's raw arc information with all 309 * its callers 310 */ 311 caller_id = 1; 312 for (arcp = nlp->parents; arcp; 313 arcp = arcp->arc_parentlist) { 314 /* 315 * If no more callers for this callee, set 316 * next_from to 0 317 */ 318 if (caller_id == nlp->ncallers) 319 prof_func.next_from = 0; 320 else { 321 prof_func.next_from = cur_offset + 322 PROFFUNC_SZ; 323 } 324 325 prof_func.frompc = 326 arcp->arc_parentp->module->load_base + 327 (arcp->arc_parentp->value - 328 arcp->arc_parentp->module->txt_origin); 329 prof_func.topc = mi->load_base + 330 (nlp->value - mi->txt_origin); 331 prof_func.count = arcp->arc_count; 332 333 334 if (fwrite(&prof_func, sizeof (ProfFunction), 335 1, fp) != 1) { 336 perror(filename); 337 exit(EX_IOERR); 338 } 339 /* CONSTCOND */ 340 if (FUNC_FILLER) 341 (void) fseek(fp, FUNC_FILLER, SEEK_CUR); 342 343 cur_offset += PROFFUNC_SZ; 344 caller_id++; 345 } 346 } /* for nlp... */ 347 } /* for mi... */ 348 } 349 350 /* 351 * To save all pc-hits in all the gmon.out's is infeasible, as this 352 * may become quite huge even with a small number of files to sum. 353 * Instead, we'll dump *fictitious hits* to correct functions 354 * by scanning module namelists. Again, since this is summing 355 * pc-hits, we may have to dump the pcsamples out in chunks if the 356 * number of pc-hits is high. 357 */ 358 static void 359 dump_hits(FILE *fp, char *filename, nltype *nlp) 360 { 361 Address *p, hitpc; 362 size_t i, nelem, ntowrite; 363 364 if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE) 365 nelem = PROF_BUFFER_SIZE; 366 367 if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) { 368 (void) fprintf(stderr, "%s: no room for %d pcsamples\n", 369 whoami, nelem); 370 exit(EX_OSERR); 371 } 372 373 /* 374 * Set up *fictitious* hits (to function entry) buffer 375 */ 376 hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin); 377 for (i = 0; i < nelem; i++) 378 p[i] = hitpc; 379 380 for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) { 381 if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) { 382 perror(filename); 383 exit(EX_IOERR); 384 } 385 } 386 387 if (ntowrite) { 388 if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) { 389 perror(filename); 390 exit(EX_IOERR); 391 } 392 } 393 394 free(p); 395 } 396 397 static void 398 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs, 399 unsigned long *ncallees) 400 { 401 ProfBuffer prof_buffer; 402 arctype *arcp; 403 mod_info_t *mi; 404 nltype *nlp; 405 406 prof_buffer.type = PROF_BUFFER_T; 407 prof_buffer.version = PROF_BUFFER_VER; 408 prof_buffer.buffer = PROFBUF_SZ; 409 prof_buffer.bufsize = n_pcsamples; 410 prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address); 411 if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) { 412 perror(filename); 413 exit(EX_IOERR); 414 } 415 /* CONSTCOND */ 416 if (BUF_FILLER) 417 (void) fseek(fp, BUF_FILLER, SEEK_CUR); 418 419 *tarcs = 0; 420 *ncallees = 0; 421 for (mi = &modules; mi; mi = mi->next) { 422 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 423 if (nlp->nticks) 424 dump_hits(fp, filename, nlp); 425 426 nlp->ncallers = 0; 427 for (arcp = nlp->parents; arcp; 428 arcp = arcp->arc_parentlist) { 429 (nlp->ncallers)++; 430 } 431 432 if (nlp->ncallers) { 433 (*tarcs) += nlp->ncallers; 434 (*ncallees)++; 435 } 436 } 437 } 438 } 439 440 static void 441 dump_modules(FILE *fp, char *filename, size_t pbuf_sz) 442 { 443 char *pbuf, *p; 444 size_t namelen; 445 Index off_nxt, off_path; 446 mod_info_t *mi; 447 448 ProfModuleList prof_modlist; 449 ProfModule prof_mod; 450 451 /* Allocate for path strings buffer */ 452 pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN); 453 if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) { 454 (void) fprintf(stderr, "%s: no room for %d bytes\n", 455 whoami, pbuf_sz * sizeof (char)); 456 exit(EX_OSERR); 457 } 458 459 /* Dump out PROF_MODULE_T info for all non-aout modules */ 460 prof_modlist.type = PROF_MODULES_T; 461 prof_modlist.version = PROF_MODULES_VER; 462 prof_modlist.modules = PROFMODLIST_SZ; 463 prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ + 464 pbuf_sz; 465 if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) { 466 perror(filename); 467 exit(EX_IOERR); 468 } 469 /* CONSTCOND */ 470 if (MODLIST_FILLER) 471 (void) fseek(fp, MODLIST_FILLER, SEEK_CUR); 472 473 /* 474 * Initialize offsets for ProfModule elements. 475 */ 476 off_nxt = PROFMODLIST_SZ + PROFMOD_SZ; 477 off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ; 478 479 for (mi = modules.next; mi; mi = mi->next) { 480 if (mi->next) 481 prof_mod.next = off_nxt; 482 else 483 prof_mod.next = 0; 484 prof_mod.path = off_path; 485 prof_mod.startaddr = mi->load_base; 486 prof_mod.endaddr = mi->load_end; 487 488 if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) { 489 perror(filename); 490 exit(EX_IOERR); 491 } 492 493 /* CONSTCOND */ 494 if (MOD_FILLER) 495 (void) fseek(fp, MOD_FILLER, SEEK_CUR); 496 497 (void) strcpy(p, mi->name); 498 namelen = strlen(mi->name); 499 p += namelen + 1; 500 501 /* Note that offset to every path str need not be aligned */ 502 off_nxt += PROFMOD_SZ; 503 off_path += namelen + 1; 504 } 505 506 /* Write out the module path strings */ 507 if (pbuf_sz) { 508 if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) { 509 perror(filename); 510 exit(EX_IOERR); 511 } 512 513 free(pbuf); 514 } 515 } 516 517 /* 518 * If we have inactive modules, their current load addresses may overlap with 519 * active ones, and so we've to assign fictitious, non-overlapping addresses 520 * to all modules before we dump them. 521 */ 522 static void 523 fixup_maps(size_t *pathsz) 524 { 525 unsigned int n_inactive = 0; 526 Address lbase = 0, lend; 527 mod_info_t *mi; 528 529 /* Pick the lowest load address among modules */ 530 *pathsz = 0; 531 for (mi = &modules; mi; mi = mi->next) { 532 533 if (mi->active == FALSE) 534 n_inactive++; 535 536 if (mi == &modules || mi->load_base < lbase) 537 lbase = mi->load_base; 538 539 /* 540 * Return total path size of non-aout modules only 541 */ 542 if (mi != &modules) 543 *pathsz = (*pathsz) + strlen(mi->name) + 1; 544 } 545 546 /* 547 * All module info is in fine shape already if there are no 548 * inactive modules 549 */ 550 if (n_inactive == 0) 551 return; 552 553 /* 554 * Assign fictitious load addresses to all (non-aout) modules so 555 * that sum info can be dumped out. 556 */ 557 for (mi = modules.next; mi; mi = mi->next) { 558 lend = lbase + (mi->data_end - mi->txt_origin); 559 if ((lbase < modules.load_base && lend < modules.load_base) || 560 (lbase > modules.load_end && lend > modules.load_end)) { 561 562 mi->load_base = lbase; 563 mi->load_end = lend; 564 565 /* just to give an appearance of reality */ 566 lbase = CEIL(lend + PGSZ, PGSZ); 567 } else { 568 /* 569 * can't use this lbase & lend pair, as it 570 * overlaps with aout's addresses 571 */ 572 mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ); 573 mi->load_end = mi->load_base + (lend - lbase); 574 575 lbase = CEIL(mi->load_end + PGSZ, PGSZ); 576 } 577 } 578 } 579 580 static void 581 dump_gprofhdr(FILE *fp, char *filename) 582 { 583 ProfHeader prof_hdr; 584 585 prof_hdr.h_magic = PROF_MAGIC; 586 prof_hdr.h_major_ver = PROF_MAJOR_VERSION; 587 prof_hdr.h_minor_ver = PROF_MINOR_VERSION; 588 prof_hdr.size = PROFHDR_SZ; 589 if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) { 590 perror(filename); 591 exit(EX_IOERR); 592 } 593 594 /* CONSTCOND */ 595 if (HDR_FILLER) 596 (void) fseek(fp, HDR_FILLER, SEEK_CUR); 597 } 598 599 static void 600 dumpsum_ostyle(char *sumfile) 601 { 602 nltype *nlp; 603 arctype *arcp; 604 struct rawarc arc; 605 struct rawarc32 arc32; 606 FILE *sfile; 607 608 if ((sfile = fopen(sumfile, "w")) == NULL) { 609 perror(sumfile); 610 exit(EX_IOERR); 611 } 612 /* 613 * dump the header; use the last header read in 614 */ 615 if (Bflag) { 616 if (fwrite(&h, sizeof (h), 1, sfile) != 1) { 617 perror(sumfile); 618 exit(EX_IOERR); 619 } 620 } else { 621 struct hdr32 hdr; 622 hdr.lowpc = (pctype32)h.lowpc; 623 hdr.highpc = (pctype32)h.highpc; 624 hdr.ncnt = (pctype32)h.ncnt; 625 if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) { 626 perror(sumfile); 627 exit(EX_IOERR); 628 } 629 } 630 /* 631 * dump the samples 632 */ 633 if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) != 634 nsamples) { 635 perror(sumfile); 636 exit(EX_IOERR); 637 } 638 /* 639 * dump the normalized raw arc information. For old-style dumping, 640 * the only namelist is in modules.nl 641 */ 642 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 643 for (arcp = nlp->children; arcp; 644 arcp = arcp->arc_childlist) { 645 if (Bflag) { 646 arc.raw_frompc = arcp->arc_parentp->value; 647 arc.raw_selfpc = arcp->arc_childp->value; 648 arc.raw_count = arcp->arc_count; 649 if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) { 650 perror(sumfile); 651 exit(EX_IOERR); 652 } 653 } else { 654 arc32.raw_frompc = 655 (pctype32)arcp->arc_parentp->value; 656 arc32.raw_selfpc = 657 (pctype32)arcp->arc_childp->value; 658 arc32.raw_count = (actype32)arcp->arc_count; 659 if (fwrite(&arc32, sizeof (arc32), 1, sfile) != 660 1) { 661 perror(sumfile); 662 exit(EX_IOERR); 663 } 664 } 665 #ifdef DEBUG 666 if (debug & SAMPLEDEBUG) { 667 (void) printf( 668 "[dumpsum_ostyle] frompc 0x%llx selfpc " 669 "0x%llx count %lld\n", arc.raw_frompc, 670 arc.raw_selfpc, arc.raw_count); 671 } 672 #endif /* DEBUG */ 673 } 674 } 675 (void) fclose(sfile); 676 } 677 678 /* 679 * dump out the gmon.sum file 680 */ 681 static void 682 dumpsum(char *sumfile) 683 { 684 FILE *sfile; 685 size_t pathbuf_sz; 686 unsigned long total_arcs; /* total number of arcs in all */ 687 unsigned long ncallees; /* no. of callees with parents */ 688 689 if (old_style) { 690 dumpsum_ostyle(sumfile); 691 return; 692 } 693 694 if ((sfile = fopen(sumfile, "w")) == NULL) { 695 perror(sumfile); 696 exit(EX_IOERR); 697 } 698 699 /* 700 * Dump the new-style gprof header. Even if one of the original 701 * profiled-files was of a older version, the summed file is of 702 * current version only. 703 */ 704 dump_gprofhdr(sfile, sumfile); 705 706 /* 707 * Fix up load-maps and dump out modules info 708 * 709 * Fix up module load maps so inactive modules get *some* address 710 * (and btw, could you get the total size of non-aout module path 711 * strings please ?) 712 */ 713 fixup_maps(&pathbuf_sz); 714 dump_modules(sfile, sumfile, pathbuf_sz); 715 716 717 /* 718 * Dump out the summ'd pcsamples 719 * 720 * For dumping call graph information later, we need certain 721 * statistics (like total arcs, number of callers for each node); 722 * collect these also while we are at it. 723 */ 724 dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees); 725 726 /* 727 * Dump out the summ'd call graph information 728 */ 729 dump_callgraph(sfile, sumfile, total_arcs, ncallees); 730 731 732 (void) fclose(sfile); 733 } 734 735 static void 736 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp) 737 { 738 nltype *parentp; 739 nltype *childp; 740 741 /* 742 * if count == 0 this is a null arc and 743 * we don't need to tally it. 744 */ 745 if (rawp->raw_count == 0) 746 return; 747 748 /* 749 * Lookup the caller and callee pcs in namelists of 750 * appropriate modules 751 */ 752 parentp = nllookup(caller_mod, rawp->raw_frompc, NULL); 753 childp = nllookup(callee_mod, rawp->raw_selfpc, NULL); 754 if (childp && parentp) { 755 if (!Dflag) 756 childp->ncall += rawp->raw_count; 757 else { 758 if (first_file) 759 childp->ncall += rawp->raw_count; 760 else { 761 childp->ncall -= rawp->raw_count; 762 if (childp->ncall < 0) 763 childp->ncall = 0; 764 } 765 } 766 767 #ifdef DEBUG 768 if (debug & TALLYDEBUG) { 769 (void) printf("[tally] arc from %s to %s traversed " 770 "%lld times\n", parentp->name, 771 childp->name, rawp->raw_count); 772 } 773 #endif /* DEBUG */ 774 addarc(parentp, childp, rawp->raw_count); 775 } 776 } 777 778 /* 779 * Look up a module's base address in a sorted list of pc-hits. Unlike 780 * nllookup(), this deals with misses by mapping them to the next *higher* 781 * pc-hit. This is so that we get into the module's first pc-hit rightaway, 782 * even if the module's entry-point (load_base) itself is not a hit. 783 */ 784 static Address * 785 locate(Address *pclist, size_t nelem, Address keypc) 786 { 787 size_t low = 0, middle, high = nelem - 1; 788 789 if (keypc <= pclist[low]) 790 return (pclist); 791 792 if (keypc > pclist[high]) 793 return (NULL); 794 795 while (low != high) { 796 middle = (high + low) >> 1; 797 798 if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc)) 799 return (&pclist[middle + 1]); 800 801 if (pclist[middle] >= keypc) 802 high = middle; 803 else 804 low = middle + 1; 805 } 806 807 /* must never reach here! */ 808 return (NULL); 809 } 810 811 static void 812 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples) 813 { 814 Address *pcptr, *pcse = pcsmpl + n_samples; 815 pctype nxt_func; 816 nltype *fnl; 817 size_t func_nticks; 818 #ifdef DEBUG 819 size_t n_hits_in_module = 0; 820 #endif /* DEBUG */ 821 822 /* Locate the first pc-hit for this module */ 823 if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) { 824 #ifdef DEBUG 825 if (debug & PCSMPLDEBUG) { 826 (void) printf("[assign_pcsamples] no pc-hits in\n"); 827 (void) printf( 828 " `%s'\n", module->name); 829 } 830 #endif /* DEBUG */ 831 return; /* no pc-hits in this module */ 832 } 833 834 /* Assign all pc-hits in this module to appropriate functions */ 835 while ((pcptr < pcse) && (*pcptr < module->load_end)) { 836 837 /* Update the corresponding function's time */ 838 fnl = nllookup(module, (pctype) *pcptr, &nxt_func); 839 if (fnl != NULL) { 840 /* 841 * Collect all pc-hits in this function. Each 842 * pc-hit counts as 1 tick. 843 */ 844 func_nticks = 0; 845 while ((pcptr < pcse) && (*pcptr < nxt_func)) { 846 func_nticks++; 847 pcptr++; 848 } 849 850 if (func_nticks == 0) 851 pcptr++; 852 else { 853 fnl->nticks += func_nticks; 854 fnl->time += func_nticks; 855 totime += func_nticks; 856 } 857 858 #ifdef DEBUG 859 n_hits_in_module += func_nticks; 860 #endif /* DEBUG */ 861 } else { 862 /* 863 * pc sample could not be assigned to function; 864 * probably in a PLT 865 */ 866 pcptr++; 867 } 868 } 869 870 #ifdef DEBUG 871 if (debug & PCSMPLDEBUG) { 872 (void) printf( 873 "[assign_pcsamples] %ld hits in\n", n_hits_in_module); 874 (void) printf(" `%s'\n", module->name); 875 } 876 #endif /* DEBUG */ 877 } 878 879 int 880 pc_cmp(const void *arg1, const void *arg2) 881 { 882 Address *pc1 = (Address *)arg1; 883 Address *pc2 = (Address *)arg2; 884 885 if (*pc1 > *pc2) 886 return (1); 887 888 if (*pc1 < *pc2) 889 return (-1); 890 891 return (0); 892 } 893 894 static void 895 process_pcsamples(ProfBuffer *bufp) 896 { 897 Address *pc_samples; 898 mod_info_t *mi; 899 caddr_t p; 900 size_t chunk_size, nelem_read, nelem_to_read; 901 902 #ifdef DEBUG 903 if (debug & PCSMPLDEBUG) { 904 (void) printf( 905 "[process_pcsamples] number of pcsamples = %lld\n", 906 bufp->bufsize); 907 } 908 #endif /* DEBUG */ 909 910 /* buffer with no pc samples ? */ 911 if (bufp->bufsize == 0) 912 return; 913 914 /* 915 * If we're processing pcsamples of a profile sum, we could have 916 * more than PROF_BUFFER_SIZE number of samples. In such a case, 917 * we must read the pcsamples in chunks. 918 */ 919 if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE) 920 chunk_size = PROF_BUFFER_SIZE; 921 922 /* Allocate for the pcsample chunk */ 923 pc_samples = (Address *) calloc(chunk_size, sizeof (Address)); 924 if (pc_samples == NULL) { 925 (void) fprintf(stderr, "%s: no room for %d sample pc's\n", 926 whoami, chunk_size); 927 exit(EX_OSERR); 928 } 929 930 /* Copy the current set of pcsamples */ 931 nelem_read = 0; 932 nelem_to_read = bufp->bufsize; 933 p = (char *)bufp + bufp->buffer; 934 935 while (nelem_read < nelem_to_read) { 936 (void) memcpy((void *) pc_samples, p, 937 chunk_size * sizeof (Address)); 938 939 /* Sort the pc samples */ 940 qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp); 941 942 /* 943 * Assign pcsamples to functions in the currently active 944 * module list 945 */ 946 for (mi = &modules; mi; mi = mi->next) { 947 if (mi->active == FALSE) 948 continue; 949 assign_pcsamples(mi, pc_samples, chunk_size); 950 } 951 952 p += (chunk_size * sizeof (Address)); 953 nelem_read += chunk_size; 954 955 if ((nelem_to_read - nelem_read) < chunk_size) 956 chunk_size = nelem_to_read - nelem_read; 957 } 958 959 free(pc_samples); 960 961 /* Update total number of pcsamples read so far */ 962 n_pcsamples += bufp->bufsize; 963 } 964 965 static mod_info_t * 966 find_module(Address addr) 967 { 968 mod_info_t *mi; 969 970 for (mi = &modules; mi; mi = mi->next) { 971 if (mi->active == FALSE) 972 continue; 973 974 if (addr >= mi->load_base && addr < mi->load_end) 975 return (mi); 976 } 977 978 return (NULL); 979 } 980 981 static void 982 process_cgraph(ProfCallGraph *cgp) 983 { 984 struct rawarc arc; 985 mod_info_t *callee_mi, *caller_mi; 986 ProfFunction *calleep, *callerp; 987 Index caller_off, callee_off; 988 989 /* 990 * Note that *callee_off* increment in the for loop below 991 * uses *calleep* and *calleep* doesn't get set until the for loop 992 * is entered. We don't expect the increment to be executed before 993 * the loop body is executed atleast once, so this should be ok. 994 */ 995 for (callee_off = cgp->functions; callee_off; 996 callee_off = calleep->next_to) { 997 998 /* LINTED: pointer cast */ 999 calleep = (ProfFunction *)((char *)cgp + callee_off); 1000 1001 /* 1002 * We could choose either to sort the {caller, callee} 1003 * list twice and assign callee/caller to modules or inspect 1004 * each callee/caller in the active modules list. Since 1005 * the modules list is usually very small, we'l choose the 1006 * latter. 1007 */ 1008 1009 /* 1010 * If we cannot identify a callee with a module, there's 1011 * no use worrying about who called it. 1012 */ 1013 if ((callee_mi = find_module(calleep->topc)) == NULL) { 1014 #ifdef DEBUG 1015 if (debug & CGRAPHDEBUG) { 1016 (void) printf( 1017 "[process_cgraph] callee %#llx missed\n", 1018 calleep->topc); 1019 } 1020 #endif /* DEBUG */ 1021 continue; 1022 } else 1023 arc.raw_selfpc = calleep->topc; 1024 1025 for (caller_off = callee_off; caller_off; 1026 caller_off = callerp->next_from) { 1027 1028 /* LINTED: pointer cast */ 1029 callerp = (ProfFunction *)((char *)cgp + caller_off); 1030 if ((caller_mi = find_module(callerp->frompc)) == 1031 NULL) { 1032 #ifdef DEBUG 1033 if (debug & CGRAPHDEBUG) { 1034 (void) printf( 1035 "[process_cgraph] caller %#llx " 1036 "missed\n", callerp->frompc); 1037 } 1038 #endif /* DEBUG */ 1039 continue; 1040 } 1041 1042 arc.raw_frompc = callerp->frompc; 1043 arc.raw_count = callerp->count; 1044 1045 #ifdef DEBUG 1046 if (debug & CGRAPHDEBUG) { 1047 (void) printf( 1048 "[process_cgraph] arc <%#llx, %#llx, " 1049 "%lld>\n", arc.raw_frompc, arc.raw_selfpc, 1050 arc.raw_count); 1051 } 1052 #endif /* DEBUG */ 1053 tally(caller_mi, callee_mi, &arc); 1054 } 1055 } 1056 1057 #ifdef DEBUG 1058 (void) puts("\n"); 1059 #endif /* DEBUG */ 1060 } 1061 1062 /* 1063 * Two modules overlap each other if they don't lie completely *outside* 1064 * each other. 1065 */ 1066 static bool 1067 does_overlap(ProfModule *new, mod_info_t *old) 1068 { 1069 /* case 1: new module lies completely *before* the old one */ 1070 if (new->startaddr < old->load_base && new->endaddr <= old->load_base) 1071 return (FALSE); 1072 1073 /* case 2: new module lies completely *after* the old one */ 1074 if (new->startaddr >= old->load_end && new->endaddr >= old->load_end) 1075 return (FALSE); 1076 1077 /* probably a dlopen: the modules overlap each other */ 1078 return (TRUE); 1079 } 1080 1081 static bool 1082 is_same_as_aout(char *modpath, struct stat *buf) 1083 { 1084 if (stat(modpath, buf) == -1) { 1085 (void) fprintf(stderr, "%s: can't get info on `%s'\n", 1086 whoami, modpath); 1087 exit(EX_NOINPUT); 1088 } 1089 1090 if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino)) 1091 return (TRUE); 1092 else 1093 return (FALSE); 1094 } 1095 1096 static void 1097 process_modules(ProfModuleList *modlp) 1098 { 1099 ProfModule *newmodp; 1100 mod_info_t *mi, *last, *new_module; 1101 char *so_path; 1102 bool more_modules = TRUE; 1103 struct stat so_statbuf; 1104 1105 #ifdef DEBUG 1106 if (debug & MODULEDEBUG) { 1107 (void) printf("[process_modules] module obj version %u\n", 1108 modlp->version); 1109 } 1110 #endif /* DEBUG */ 1111 1112 /* Check version of module type object */ 1113 if (modlp->version > PROF_MODULES_VER) { 1114 (void) fprintf(stderr, "%s: version %d for module type objects" 1115 "is not supported\n", whoami, modlp->version); 1116 exit(EX_SOFTWARE); 1117 } 1118 1119 1120 /* 1121 * Scan the PROF_MODULES_T list and add modules to current list 1122 * of modules, if they're not present already 1123 */ 1124 /* LINTED: pointer cast */ 1125 newmodp = (ProfModule *)((char *)modlp + modlp->modules); 1126 do { 1127 /* 1128 * Since the prog could've been renamed after its run, we 1129 * should see if this overlaps a.out. If it does, it is 1130 * probably the renamed aout. We should also skip any other 1131 * non-sharedobj's that we see (or should we report an error ?) 1132 */ 1133 so_path = (caddr_t)modlp + newmodp->path; 1134 if (does_overlap(newmodp, &modules) || 1135 is_same_as_aout(so_path, &so_statbuf) || 1136 (!is_shared_obj(so_path))) { 1137 1138 if (!newmodp->next) 1139 more_modules = FALSE; 1140 1141 /* LINTED: pointer cast */ 1142 newmodp = (ProfModule *) 1143 ((caddr_t)modlp + newmodp->next); 1144 #ifdef DEBUG 1145 if (debug & MODULEDEBUG) { 1146 (void) printf( 1147 "[process_modules] `%s'\n", so_path); 1148 (void) printf(" skipped\n"); 1149 } 1150 #endif /* DEBUG */ 1151 continue; 1152 } 1153 #ifdef DEBUG 1154 if (debug & MODULEDEBUG) 1155 (void) printf("[process_modules] `%s'...\n", so_path); 1156 #endif /* DEBUG */ 1157 1158 /* 1159 * Check all modules (leave the first one, 'cos that 1160 * is the program executable info). If this module is already 1161 * there in the list, update the load addresses and proceed. 1162 */ 1163 last = &modules; 1164 while ((mi = last->next) != NULL) { 1165 /* 1166 * We expect the full pathname for all shared objects 1167 * needed by the program executable. In this case, we 1168 * simply need to compare the paths to see if they are 1169 * the same file. 1170 */ 1171 if (strcmp(mi->name, so_path) == 0) 1172 break; 1173 1174 /* 1175 * Check if this new shared object will overlap 1176 * any existing module. If yes, remove the old one 1177 * from the linked list (but don't free it, 'cos 1178 * there may be symbols referring to this module 1179 * still) 1180 */ 1181 if (does_overlap(newmodp, mi)) { 1182 #ifdef DEBUG 1183 if (debug & MODULEDEBUG) { 1184 (void) printf( 1185 "[process_modules] `%s'\n", 1186 so_path); 1187 (void) printf( 1188 " overlaps\n"); 1189 (void) printf( 1190 " `%s'\n", 1191 mi->name); 1192 } 1193 #endif /* DEBUG */ 1194 mi->active = FALSE; 1195 } 1196 1197 last = mi; 1198 } 1199 1200 /* Module already there, skip it */ 1201 if (mi != NULL) { 1202 mi->load_base = newmodp->startaddr; 1203 mi->load_end = newmodp->endaddr; 1204 mi->active = TRUE; 1205 if (!newmodp->next) 1206 more_modules = FALSE; 1207 1208 /* LINTED: pointer cast */ 1209 newmodp = (ProfModule *) 1210 ((caddr_t)modlp + newmodp->next); 1211 1212 #ifdef DEBUG 1213 if (debug & MODULEDEBUG) { 1214 (void) printf("[process_modules] base=%#llx, " 1215 "end=%#llx\n", mi->load_base, mi->load_end); 1216 } 1217 #endif /* DEBUG */ 1218 continue; 1219 } 1220 1221 /* 1222 * Check if gmon.out is outdated with respect to the new 1223 * module we want to add 1224 */ 1225 if (gmonout_info.mtime < so_statbuf.st_mtime) { 1226 (void) fprintf(stderr, 1227 "%s: shared obj outdates prof info\n", whoami); 1228 (void) fprintf(stderr, "\t(newer %s)\n", so_path); 1229 exit(EX_NOINPUT); 1230 } 1231 1232 /* Create a new module element */ 1233 new_module = malloc(sizeof (mod_info_t)); 1234 if (new_module == NULL) { 1235 (void) fprintf(stderr, "%s: no room for %d bytes\n", 1236 whoami, sizeof (mod_info_t)); 1237 exit(EX_OSERR); 1238 } 1239 1240 /* and fill in info... */ 1241 new_module->id = n_modules + 1; 1242 new_module->load_base = newmodp->startaddr; 1243 new_module->load_end = newmodp->endaddr; 1244 new_module->name = malloc(strlen(so_path) + 1); 1245 if (new_module->name == NULL) { 1246 (void) fprintf(stderr, "%s: no room for %d bytes\n", 1247 whoami, strlen(so_path) + 1); 1248 exit(EX_OSERR); 1249 } 1250 (void) strcpy(new_module->name, so_path); 1251 #ifdef DEBUG 1252 if (debug & MODULEDEBUG) { 1253 (void) printf( 1254 "[process_modules] base=%#llx, end=%#llx\n", 1255 new_module->load_base, new_module->load_end); 1256 } 1257 #endif /* DEBUG */ 1258 1259 /* Create this module's nameslist */ 1260 process_namelist(new_module); 1261 1262 /* Add it to the tail of active module list */ 1263 last->next = new_module; 1264 n_modules++; 1265 1266 #ifdef DEBUG 1267 if (debug & MODULEDEBUG) { 1268 (void) printf( 1269 "[process_modules] total shared objects = %ld\n", 1270 n_modules - 1); 1271 } 1272 #endif /* DEBUG */ 1273 /* 1274 * Move to the next module in the PROF_MODULES_T list 1275 * (if present) 1276 */ 1277 if (!newmodp->next) 1278 more_modules = FALSE; 1279 1280 /* LINTED: pointer cast */ 1281 newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next); 1282 1283 } while (more_modules); 1284 } 1285 1286 static void 1287 reset_active_modules(void) 1288 { 1289 mod_info_t *mi; 1290 1291 /* Except the executable, no other module should remain active */ 1292 for (mi = modules.next; mi; mi = mi->next) 1293 mi->active = FALSE; 1294 } 1295 1296 static void 1297 getpfiledata(caddr_t memp, size_t fsz) 1298 { 1299 ProfObject *objp; 1300 caddr_t file_end; 1301 bool found_pcsamples = FALSE, found_cgraph = FALSE; 1302 1303 /* 1304 * Before processing a new gmon.out, all modules except the 1305 * program executable must be made inactive, so that symbols 1306 * are searched only in the program executable, if we don't 1307 * find a MODULES_T object. Don't do it *after* we read a gmon.out, 1308 * because we need the active module data after we're done with 1309 * the last gmon.out, if we're doing summing. 1310 */ 1311 reset_active_modules(); 1312 1313 file_end = memp + fsz; 1314 /* LINTED: pointer cast */ 1315 objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size); 1316 while ((caddr_t)objp < file_end) { 1317 #ifdef DEBUG 1318 { 1319 unsigned int type = 0; 1320 1321 if (debug & MONOUTDEBUG) { 1322 if (objp->type <= MAX_OBJTYPES) 1323 type = objp->type; 1324 1325 (void) printf( 1326 "\n[getpfiledata] object %s [%#lx]\n", 1327 objname[type], objp->type); 1328 } 1329 } 1330 #endif /* DEBUG */ 1331 switch (objp->type) { 1332 case PROF_MODULES_T : 1333 process_modules((ProfModuleList *) objp); 1334 break; 1335 1336 case PROF_CALLGRAPH_T : 1337 process_cgraph((ProfCallGraph *) objp); 1338 found_cgraph = TRUE; 1339 break; 1340 1341 case PROF_BUFFER_T : 1342 process_pcsamples((ProfBuffer *) objp); 1343 found_pcsamples = TRUE; 1344 break; 1345 1346 default : 1347 (void) fprintf(stderr, 1348 "%s: unknown prof object type=%d\n", 1349 whoami, objp->type); 1350 exit(EX_SOFTWARE); 1351 } 1352 /* LINTED: pointer cast */ 1353 objp = (ProfObject *)((caddr_t)objp + objp->size); 1354 } 1355 1356 if (!found_cgraph || !found_pcsamples) { 1357 (void) fprintf(stderr, 1358 "%s: missing callgraph/pcsamples object\n", whoami); 1359 exit(EX_SOFTWARE); 1360 } 1361 1362 if ((caddr_t)objp > file_end) { 1363 (void) fprintf(stderr, "%s: malformed profile file.\n", whoami); 1364 exit(EX_SOFTWARE); 1365 } 1366 1367 if (first_file) 1368 first_file = FALSE; 1369 } 1370 1371 static void 1372 readarcs(FILE *pfile) 1373 { 1374 /* 1375 * the rest of the file consists of 1376 * a bunch of <from,self,count> tuples. 1377 */ 1378 /* CONSTCOND */ 1379 while (1) { 1380 struct rawarc arc; 1381 1382 if (rflag) { 1383 if (Bflag) { 1384 L_cgarc64 rtld_arc64; 1385 1386 /* 1387 * If rflag is set then this is an profiled 1388 * image generated by rtld. It needs to be 1389 * 'converted' to the standard data format. 1390 */ 1391 if (fread(&rtld_arc64, 1392 sizeof (L_cgarc64), 1, pfile) != 1) 1393 break; 1394 1395 if (rtld_arc64.cg_from == PRF_OUTADDR64) 1396 arc.raw_frompc = s_highpc + 0x10; 1397 else 1398 arc.raw_frompc = 1399 (pctype)rtld_arc64.cg_from; 1400 arc.raw_selfpc = (pctype)rtld_arc64.cg_to; 1401 arc.raw_count = (actype)rtld_arc64.cg_count; 1402 } else { 1403 L_cgarc rtld_arc; 1404 1405 /* 1406 * If rflag is set then this is an profiled 1407 * image generated by rtld. It needs to be 1408 * 'converted' to the standard data format. 1409 */ 1410 if (fread(&rtld_arc, 1411 sizeof (L_cgarc), 1, pfile) != 1) 1412 break; 1413 1414 if (rtld_arc.cg_from == PRF_OUTADDR) 1415 arc.raw_frompc = s_highpc + 0x10; 1416 else 1417 arc.raw_frompc = (pctype) 1418 (uintptr_t)rtld_arc.cg_from; 1419 arc.raw_selfpc = (pctype) 1420 (uintptr_t)rtld_arc.cg_to; 1421 arc.raw_count = (actype)rtld_arc.cg_count; 1422 } 1423 } else { 1424 if (Bflag) { 1425 if (fread(&arc, sizeof (struct rawarc), 1, 1426 pfile) != 1) { 1427 break; 1428 } 1429 } else { 1430 /* 1431 * If these aren't big %pc's, we need to read 1432 * into the 32-bit raw arc structure, and 1433 * assign the members into the actual arc. 1434 */ 1435 struct rawarc32 arc32; 1436 if (fread(&arc32, sizeof (struct rawarc32), 1437 1, pfile) != 1) 1438 break; 1439 arc.raw_frompc = (pctype)arc32.raw_frompc; 1440 arc.raw_selfpc = (pctype)arc32.raw_selfpc; 1441 arc.raw_count = (actype)arc32.raw_count; 1442 } 1443 } 1444 1445 #ifdef DEBUG 1446 if (debug & SAMPLEDEBUG) { 1447 (void) printf("[getpfile] frompc 0x%llx selfpc " 1448 "0x%llx count %lld\n", arc.raw_frompc, 1449 arc.raw_selfpc, arc.raw_count); 1450 } 1451 #endif /* DEBUG */ 1452 /* 1453 * add this arc 1454 */ 1455 tally(&modules, &modules, &arc); 1456 } 1457 if (first_file) 1458 first_file = FALSE; 1459 } 1460 1461 static void 1462 readsamples(FILE *pfile) 1463 { 1464 sztype i; 1465 unsigned_UNIT sample; 1466 1467 if (samples == 0) { 1468 samples = (unsigned_UNIT *) calloc(nsamples, 1469 sizeof (unsigned_UNIT)); 1470 if (samples == 0) { 1471 (void) fprintf(stderr, 1472 "%s: No room for %d sample pc's\n", 1473 whoami, sampbytes / sizeof (unsigned_UNIT)); 1474 exit(EX_OSERR); 1475 } 1476 } 1477 1478 for (i = 0; i < nsamples; i++) { 1479 (void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile); 1480 if (feof(pfile)) 1481 break; 1482 samples[i] += sample; 1483 } 1484 if (i != nsamples) { 1485 (void) fprintf(stderr, 1486 "%s: unexpected EOF after reading %d/%d samples\n", 1487 whoami, --i, nsamples); 1488 exit(EX_IOERR); 1489 } 1490 } 1491 1492 static void * 1493 handle_versioned(FILE *pfile, char *filename, size_t *fsz) 1494 { 1495 int fd; 1496 bool invalid_version; 1497 caddr_t fmem; 1498 struct stat buf; 1499 ProfHeader prof_hdr; 1500 off_t lret; 1501 1502 /* 1503 * Check versioning info. For now, let's say we provide 1504 * backward compatibility, so we accept all older versions. 1505 */ 1506 if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) { 1507 perror("fread()"); 1508 exit(EX_IOERR); 1509 } 1510 1511 invalid_version = FALSE; 1512 if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION) 1513 invalid_version = TRUE; 1514 else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) { 1515 if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION) 1516 invalid_version = FALSE; 1517 } 1518 1519 if (invalid_version) { 1520 (void) fprintf(stderr, "%s: version %d.%d not supported\n", 1521 whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver); 1522 exit(EX_SOFTWARE); 1523 } 1524 1525 /* 1526 * Map gmon.out onto memory. 1527 */ 1528 (void) fclose(pfile); 1529 if ((fd = open(filename, O_RDONLY)) == -1) { 1530 perror(filename); 1531 exit(EX_IOERR); 1532 } 1533 1534 if ((lret = lseek(fd, 0, SEEK_END)) == -1) { 1535 perror(filename); 1536 exit(EX_IOERR); 1537 } 1538 *fsz = lret; 1539 1540 fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0); 1541 if (fmem == MAP_FAILED) { 1542 (void) fprintf(stderr, "%s: can't map %s\n", whoami, filename); 1543 exit(EX_IOERR); 1544 } 1545 1546 /* 1547 * Before we close this fd, save this gmon.out's info to later verify 1548 * if the shared objects it references have changed since the time 1549 * they were used to generate this gmon.out 1550 */ 1551 if (fstat(fd, &buf) == -1) { 1552 (void) fprintf(stderr, "%s: can't get info on `%s'\n", 1553 whoami, filename); 1554 exit(EX_NOINPUT); 1555 } 1556 gmonout_info.dev = buf.st_dev; 1557 gmonout_info.ino = buf.st_ino; 1558 gmonout_info.mtime = buf.st_mtime; 1559 gmonout_info.size = buf.st_size; 1560 1561 (void) close(fd); 1562 1563 return ((void *) fmem); 1564 } 1565 1566 static void * 1567 openpfile(char *filename, size_t *fsz) 1568 { 1569 struct hdr tmp; 1570 FILE *pfile; 1571 unsigned long magic_num; 1572 size_t hdrsize; 1573 static bool first_time = TRUE; 1574 extern bool old_style; 1575 1576 if ((pfile = fopen(filename, "r")) == NULL) { 1577 perror(filename); 1578 exit(EX_IOERR); 1579 } 1580 1581 /* 1582 * Read in the magic. Note that we changed the cast "unsigned long" 1583 * to "unsigned int" because that's how h_magic is defined in the 1584 * new format ProfHeader. 1585 */ 1586 if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) { 1587 perror("fread()"); 1588 exit(EX_IOERR); 1589 } 1590 1591 rewind(pfile); 1592 1593 /* 1594 * First check if this is versioned or *old-style* gmon.out 1595 */ 1596 if (magic_num == (unsigned int)PROF_MAGIC) { 1597 if ((!first_time) && (old_style == TRUE)) { 1598 (void) fprintf(stderr, "%s: can't mix old & new format " 1599 "profiled files\n", whoami); 1600 exit(EX_SOFTWARE); 1601 } 1602 first_time = FALSE; 1603 old_style = FALSE; 1604 return (handle_versioned(pfile, filename, fsz)); 1605 } 1606 1607 if ((!first_time) && (old_style == FALSE)) { 1608 (void) fprintf(stderr, "%s: can't mix old & new format " 1609 "profiled files\n", whoami); 1610 exit(EX_SOFTWARE); 1611 } 1612 1613 first_time = FALSE; 1614 old_style = TRUE; 1615 fsz = 0; 1616 1617 /* 1618 * Now, we need to determine if this is a run-time linker 1619 * profiled file or if it is a standard gmon.out. 1620 * 1621 * We do this by checking if magic matches PRF_MAGIC. If it 1622 * does, then this is a run-time linker profiled file, if it 1623 * doesn't, it must be a gmon.out file. 1624 */ 1625 if (magic_num == (unsigned long)PRF_MAGIC) 1626 rflag = TRUE; 1627 else 1628 rflag = FALSE; 1629 1630 hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32); 1631 1632 if (rflag) { 1633 if (Bflag) { 1634 L_hdr64 l_hdr64; 1635 1636 /* 1637 * If the rflag is set then the input file is 1638 * rtld profiled data, we'll read it in and convert 1639 * it to the standard format (ie: make it look like 1640 * a gmon.out file). 1641 */ 1642 if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) { 1643 perror("fread()"); 1644 exit(EX_IOERR); 1645 } 1646 if (l_hdr64.hd_version != PRF_VERSION_64) { 1647 (void) fprintf(stderr, 1648 "%s: expected version %d, " 1649 "got version %d when processing 64-bit " 1650 "run-time linker profiled file.\n", 1651 whoami, PRF_VERSION_64, l_hdr64.hd_version); 1652 exit(EX_SOFTWARE); 1653 } 1654 tmp.lowpc = 0; 1655 tmp.highpc = (pctype)l_hdr64.hd_hpc; 1656 tmp.ncnt = hdrsize + l_hdr64.hd_psize; 1657 } else { 1658 L_hdr l_hdr; 1659 1660 /* 1661 * If the rflag is set then the input file is 1662 * rtld profiled data, we'll read it in and convert 1663 * it to the standard format (ie: make it look like 1664 * a gmon.out file). 1665 */ 1666 if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) { 1667 perror("fread()"); 1668 exit(EX_IOERR); 1669 } 1670 if (l_hdr.hd_version != PRF_VERSION) { 1671 (void) fprintf(stderr, 1672 "%s: expected version %d, " 1673 "got version %d when processing " 1674 "run-time linker profiled file.\n", 1675 whoami, PRF_VERSION, l_hdr.hd_version); 1676 exit(EX_SOFTWARE); 1677 } 1678 tmp.lowpc = 0; 1679 tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc; 1680 tmp.ncnt = hdrsize + l_hdr.hd_psize; 1681 } 1682 } else { 1683 if (Bflag) { 1684 if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) { 1685 perror("fread()"); 1686 exit(EX_IOERR); 1687 } 1688 } else { 1689 /* 1690 * If we're not reading big %pc's, we need to read 1691 * the 32-bit header, and assign the members to 1692 * the actual header. 1693 */ 1694 struct hdr32 hdr32; 1695 if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) { 1696 perror("fread()"); 1697 exit(EX_IOERR); 1698 } 1699 tmp.lowpc = hdr32.lowpc; 1700 tmp.highpc = hdr32.highpc; 1701 tmp.ncnt = hdr32.ncnt; 1702 } 1703 } 1704 1705 /* 1706 * perform sanity check on profiled file we've opened. 1707 */ 1708 if (tmp.lowpc >= tmp.highpc) { 1709 if (rflag) 1710 (void) fprintf(stderr, 1711 "%s: badly formed profiled data.\n", 1712 filename); 1713 else 1714 (void) fprintf(stderr, 1715 "%s: badly formed gmon.out file.\n", 1716 filename); 1717 exit(EX_SOFTWARE); 1718 } 1719 1720 if (s_highpc != 0 && (tmp.lowpc != h.lowpc || 1721 tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) { 1722 (void) fprintf(stderr, 1723 "%s: incompatible with first gmon file\n", 1724 filename); 1725 exit(EX_IOERR); 1726 } 1727 h = tmp; 1728 s_lowpc = h.lowpc; 1729 s_highpc = h.highpc; 1730 lowpc = h.lowpc / sizeof (UNIT); 1731 highpc = h.highpc / sizeof (UNIT); 1732 sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0; 1733 nsamples = sampbytes / sizeof (unsigned_UNIT); 1734 1735 #ifdef DEBUG 1736 if (debug & SAMPLEDEBUG) { 1737 (void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc " 1738 "0x%llx hdr.ncnt %lld\n", 1739 h.lowpc, h.highpc, h.ncnt); 1740 (void) printf( 1741 "[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n", 1742 s_lowpc, s_highpc); 1743 (void) printf( 1744 "[openpfile] lowpc 0x%llx highpc 0x%llx\n", 1745 lowpc, highpc); 1746 (void) printf("[openpfile] sampbytes %d nsamples %d\n", 1747 sampbytes, nsamples); 1748 } 1749 #endif /* DEBUG */ 1750 1751 return ((void *) pfile); 1752 } 1753 1754 /* 1755 * Information from a gmon.out file depends on whether it's versioned 1756 * or non-versioned, *old style* gmon.out. If old-style, it is in two 1757 * parts : an array of sampling hits within pc ranges, and the arcs. If 1758 * versioned, it contains a header, followed by any number of 1759 * modules/callgraph/pcsample_buffer objects. 1760 */ 1761 static void 1762 getpfile(char *filename) 1763 { 1764 void *handle; 1765 size_t fsz; 1766 1767 handle = openpfile(filename, &fsz); 1768 1769 if (old_style) { 1770 readsamples((FILE *)handle); 1771 readarcs((FILE *)handle); 1772 (void) fclose((FILE *)handle); 1773 return; 1774 } 1775 1776 getpfiledata((caddr_t)handle, fsz); 1777 (void) munmap(handle, fsz); 1778 } 1779 1780 int 1781 main(int argc, char **argv) 1782 { 1783 char **sp; 1784 nltype **timesortnlp; 1785 int c; 1786 int errflg; 1787 1788 prog_name = *argv; /* preserve program name */ 1789 debug = 0; 1790 nflag = FALSE; 1791 bflag = TRUE; 1792 lflag = FALSE; 1793 Cflag = FALSE; 1794 first_file = TRUE; 1795 rflag = FALSE; 1796 Bflag = FALSE; 1797 errflg = FALSE; 1798 1799 while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF) 1800 switch (c) { 1801 case 'a': 1802 aflag = TRUE; 1803 break; 1804 case 'b': 1805 bflag = FALSE; 1806 break; 1807 case 'c': 1808 cflag = TRUE; 1809 break; 1810 case 'C': 1811 Cflag = TRUE; 1812 break; 1813 case 'd': 1814 dflag = TRUE; 1815 debug |= atoi(optarg); 1816 (void) printf("[main] debug = 0x%x\n", debug); 1817 break; 1818 case 'D': 1819 Dflag = TRUE; 1820 break; 1821 case 'E': 1822 addlist(Elist, optarg); 1823 Eflag = TRUE; 1824 addlist(elist, optarg); 1825 eflag = TRUE; 1826 break; 1827 case 'e': 1828 addlist(elist, optarg); 1829 eflag = TRUE; 1830 break; 1831 case 'F': 1832 addlist(Flist, optarg); 1833 Fflag = TRUE; 1834 addlist(flist, optarg); 1835 fflag = TRUE; 1836 break; 1837 case 'f': 1838 addlist(flist, optarg); 1839 fflag = TRUE; 1840 break; 1841 case 'l': 1842 lflag = TRUE; 1843 break; 1844 case 'n': 1845 nflag = TRUE; 1846 number_funcs_toprint = atoi(optarg); 1847 break; 1848 case 's': 1849 sflag = TRUE; 1850 break; 1851 case 'z': 1852 zflag = TRUE; 1853 break; 1854 case '?': 1855 errflg++; 1856 1857 } 1858 1859 if (errflg) { 1860 (void) fprintf(stderr, 1861 "usage: gprof [ -abcCDlsz ] [ -e function-name ] " 1862 "[ -E function-name ]\n\t[ -f function-name ] " 1863 "[ -F function-name ]\n\t[ image-file " 1864 "[ profile-file ... ] ]\n"); 1865 exit(EX_USAGE); 1866 } 1867 1868 if (optind < argc) { 1869 a_outname = argv[optind++]; 1870 } else { 1871 a_outname = A_OUTNAME; 1872 } 1873 if (optind < argc) { 1874 gmonname = argv[optind++]; 1875 } else { 1876 gmonname = GMONNAME; 1877 } 1878 /* 1879 * turn off default functions 1880 */ 1881 for (sp = &defaultEs[0]; *sp; sp++) { 1882 Eflag = TRUE; 1883 addlist(Elist, *sp); 1884 eflag = TRUE; 1885 addlist(elist, *sp); 1886 } 1887 /* 1888 * how many ticks per second? 1889 * if we can't tell, report time in ticks. 1890 */ 1891 hz = sysconf(_SC_CLK_TCK); 1892 if (hz == -1) { 1893 hz = 1; 1894 (void) fprintf(stderr, "time is in ticks, not seconds\n"); 1895 } 1896 1897 getnfile(a_outname); 1898 1899 /* 1900 * get information about mon.out file(s). 1901 */ 1902 do { 1903 getpfile(gmonname); 1904 if (optind < argc) 1905 gmonname = argv[optind++]; 1906 else 1907 optind++; 1908 } while (optind <= argc); 1909 /* 1910 * dump out a gmon.sum file if requested 1911 */ 1912 if (sflag || Dflag) 1913 dumpsum(GMONSUM); 1914 1915 if (old_style) { 1916 /* 1917 * assign samples to procedures 1918 */ 1919 asgnsamples(); 1920 } 1921 1922 /* 1923 * assemble the dynamic profile 1924 */ 1925 timesortnlp = doarcs(); 1926 1927 /* 1928 * print the dynamic profile 1929 */ 1930 #ifdef DEBUG 1931 if (debug & ANYDEBUG) { 1932 /* raw output of all symbols in all their glory */ 1933 int i; 1934 (void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, " 1935 "#calls, selfcalls, index \n"); 1936 for (i = 0; i < modules.nname; i++) { /* Print each symbol */ 1937 if (timesortnlp[i]->name) 1938 (void) printf(" %s ", timesortnlp[i]->name); 1939 else 1940 (void) printf(" <cycle> "); 1941 (void) printf(" %lld ", timesortnlp[i]->value); 1942 (void) printf(" %lld ", timesortnlp[i]->svalue); 1943 (void) printf(" %f ", timesortnlp[i]->time); 1944 (void) printf(" %lld ", timesortnlp[i]->ncall); 1945 (void) printf(" %lld ", timesortnlp[i]->selfcalls); 1946 (void) printf(" %d ", timesortnlp[i]->index); 1947 (void) printf(" \n"); 1948 } 1949 } 1950 #endif /* DEBUG */ 1951 1952 printgprof(timesortnlp); 1953 /* 1954 * print the flat profile 1955 */ 1956 printprof(); 1957 /* 1958 * print the index 1959 */ 1960 printindex(); 1961 1962 /* 1963 * print the modules 1964 */ 1965 printmodules(); 1966 1967 done(); 1968 /* NOTREACHED */ 1969 return (0); 1970 } 1971