1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sysexits.h> 27 #include <stdlib.h> 28 #include <stdio.h> 29 #include <unistd.h> 30 #include "gprof.h" 31 #include "profile.h" 32 33 bool aflag; 34 bool bflag; 35 bool Bflag; 36 bool cflag; 37 bool Cflag; 38 bool dflag; 39 bool Dflag; 40 bool eflag; 41 bool Eflag; 42 bool fflag; 43 bool Fflag; 44 bool lflag; 45 bool sflag; 46 bool zflag; 47 bool nflag; 48 bool rflag; 49 bool first_file; 50 bool old_style; 51 double scale; 52 double totime; 53 Size n_pcsamples; 54 mod_info_t modules; 55 pctype s_lowpc; 56 pctype s_highpc; 57 sztype n_modules; 58 sztype sampbytes; 59 sztype nsamples; 60 unsigned short *samples; 61 fl_info_t aout_info; 62 fl_info_t gmonout_info; 63 long hz; 64 struct hdr h; 65 unsigned char *textspace; 66 int debug; 67 int number_funcs_toprint; 68 char *a_outname; 69 char *prog_name; 70 char *gmonname; 71 char *whoami = "gprof"; 72 static pctype lowpc, highpc; /* range profiled, in UNIT's */ 73 74 /* 75 * things which get -E excluded by default. 76 */ 77 static char *defaultEs[] = { 78 "mcount", 79 "__mcleanup", 80 NULL 81 }; 82 83 #ifdef DEBUG 84 85 static char *objname[] = { 86 "<invalid object>", 87 "PROF_BUFFER_T", 88 "PROF_CALLGRAPH_T", 89 "PROF_MODULES_T", 90 NULL 91 }; 92 #define MAX_OBJTYPES 3 93 94 #endif /* DEBUG */ 95 96 void 97 done(void) 98 { 99 100 exit(EX_OK); 101 } 102 103 static pctype 104 max(pctype a, pctype b) 105 { 106 if (a > b) 107 return (a); 108 return (b); 109 } 110 111 static pctype 112 min(pctype a, pctype b) 113 { 114 if (a < b) 115 return (a); 116 return (b); 117 } 118 119 /* 120 * calculate scaled entry point addresses (to save time in asgnsamples), 121 * and possibly push the scaled entry points over the entry mask, 122 * if it turns out that the entry point is in one bucket and the code 123 * for a routine is in the next bucket. 124 * 125 */ 126 static void 127 alignentries(void) 128 { 129 struct nl *nlp; 130 #ifdef DEBUG 131 pctype bucket_of_entry; 132 pctype bucket_of_code; 133 #endif /* DEBUG */ 134 135 /* for old-style gmon.out, nameslist is only in modules.nl */ 136 137 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 138 nlp->svalue = nlp->value / sizeof (UNIT); 139 #ifdef DEBUG 140 bucket_of_entry = (nlp->svalue - lowpc) / scale; 141 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 142 if (bucket_of_entry < bucket_of_code) { 143 if (debug & SAMPLEDEBUG) { 144 (void) printf( 145 "[alignentries] pushing svalue 0x%llx " 146 "to 0x%llx\n", nlp->svalue, 147 nlp->svalue + UNITS_TO_CODE); 148 } 149 } 150 #endif /* DEBUG */ 151 } 152 } 153 154 /* 155 * old-style gmon.out 156 * ------------------ 157 * 158 * Assign samples to the procedures to which they belong. 159 * 160 * There are three cases as to where pcl and pch can be 161 * with respect to the routine entry addresses svalue0 and svalue1 162 * as shown in the following diagram. overlap computes the 163 * distance between the arrows, the fraction of the sample 164 * that is to be credited to the routine which starts at svalue0. 165 * 166 * svalue0 svalue1 167 * | | 168 * v v 169 * 170 * +-----------------------------------------------+ 171 * | | 172 * | ->| |<- ->| |<- ->| |<- | 173 * | | | | | | 174 * +---------+ +---------+ +---------+ 175 * 176 * ^ ^ ^ ^ ^ ^ 177 * | | | | | | 178 * pcl pch pcl pch pcl pch 179 * 180 * For the vax we assert that samples will never fall in the first 181 * two bytes of any routine, since that is the entry mask, 182 * thus we give call alignentries() to adjust the entry points if 183 * the entry mask falls in one bucket but the code for the routine 184 * doesn't start until the next bucket. In conjunction with the 185 * alignment of routine addresses, this should allow us to have 186 * only one sample for every four bytes of text space and never 187 * have any overlap (the two end cases, above). 188 */ 189 static void 190 asgnsamples(void) 191 { 192 sztype i, j; 193 unsigned_UNIT ccnt; 194 double time; 195 pctype pcl, pch; 196 pctype overlap; 197 pctype svalue0, svalue1; 198 199 extern mod_info_t modules; 200 nltype *nl = modules.nl; 201 sztype nname = modules.nname; 202 203 /* read samples and assign to namelist symbols */ 204 scale = highpc - lowpc; 205 scale /= nsamples; 206 alignentries(); 207 for (i = 0, j = 1; i < nsamples; i++) { 208 ccnt = samples[i]; 209 if (ccnt == 0) 210 continue; 211 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/ 212 pcl = lowpc + scale * i; 213 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/ 214 pch = lowpc + scale * (i + 1); 215 time = ccnt; 216 #ifdef DEBUG 217 if (debug & SAMPLEDEBUG) { 218 (void) printf( 219 "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n", 220 pcl, pch, ccnt); 221 } 222 #endif /* DEBUG */ 223 totime += time; 224 for (j = (j ? j - 1 : 0); j < nname; j++) { 225 svalue0 = nl[j].svalue; 226 svalue1 = nl[j+1].svalue; 227 /* 228 * if high end of tick is below entry address, 229 * go for next tick. 230 */ 231 if (pch < svalue0) 232 break; 233 /* 234 * if low end of tick into next routine, 235 * go for next routine. 236 */ 237 if (pcl >= svalue1) 238 continue; 239 overlap = min(pch, svalue1) - max(pcl, svalue0); 240 if (overlap != 0) { 241 #ifdef DEBUG 242 if (debug & SAMPLEDEBUG) { 243 (void) printf("[asgnsamples] " 244 "(0x%llx->0x%llx-0x%llx) %s gets " 245 "%f ticks %lld overlap\n", 246 nl[j].value/sizeof (UNIT), svalue0, 247 svalue1, nl[j].name, 248 overlap * time / scale, overlap); 249 } 250 #endif /* DEBUG */ 251 nl[j].time += overlap * time / scale; 252 } 253 } 254 } 255 #ifdef DEBUG 256 if (debug & SAMPLEDEBUG) { 257 (void) printf("[asgnsamples] totime %f\n", totime); 258 } 259 #endif /* DEBUG */ 260 } 261 262 263 static void 264 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs, 265 unsigned long ncallees) 266 { 267 ProfCallGraph prof_cgraph; 268 ProfFunction prof_func; 269 arctype *arcp; 270 mod_info_t *mi; 271 nltype *nlp; 272 size_t cur_offset; 273 unsigned long caller_id = 0, callee_id = 0; 274 275 /* 276 * Write the callgraph header 277 */ 278 prof_cgraph.type = PROF_CALLGRAPH_T; 279 prof_cgraph.version = PROF_CALLGRAPH_VER; 280 prof_cgraph.functions = PROFCGRAPH_SZ; 281 prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ; 282 if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) { 283 perror(filename); 284 exit(EX_IOERR); 285 } 286 /* CONSTCOND */ 287 if (CGRAPH_FILLER) 288 (void) fseek(fp, CGRAPH_FILLER, SEEK_CUR); 289 290 /* Current offset inside the callgraph object */ 291 cur_offset = prof_cgraph.functions; 292 293 for (mi = &modules; mi; mi = mi->next) { 294 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 295 if (nlp->ncallers == 0) 296 continue; 297 298 /* If this is the last callee, set next_to to 0 */ 299 callee_id++; 300 if (callee_id == ncallees) 301 prof_func.next_to = 0; 302 else { 303 prof_func.next_to = cur_offset + 304 nlp->ncallers * PROFFUNC_SZ; 305 } 306 307 /* 308 * Dump this callee's raw arc information with all 309 * its callers 310 */ 311 caller_id = 1; 312 for (arcp = nlp->parents; arcp; 313 arcp = arcp->arc_parentlist) { 314 /* 315 * If no more callers for this callee, set 316 * next_from to 0 317 */ 318 if (caller_id == nlp->ncallers) 319 prof_func.next_from = 0; 320 else { 321 prof_func.next_from = cur_offset + 322 PROFFUNC_SZ; 323 } 324 325 prof_func.frompc = 326 arcp->arc_parentp->module->load_base + 327 (arcp->arc_parentp->value - 328 arcp->arc_parentp->module->txt_origin); 329 prof_func.topc = mi->load_base + 330 (nlp->value - mi->txt_origin); 331 prof_func.count = arcp->arc_count; 332 333 334 if (fwrite(&prof_func, sizeof (ProfFunction), 335 1, fp) != 1) { 336 perror(filename); 337 exit(EX_IOERR); 338 } 339 /* CONSTCOND */ 340 if (FUNC_FILLER) 341 (void) fseek(fp, FUNC_FILLER, SEEK_CUR); 342 343 cur_offset += PROFFUNC_SZ; 344 caller_id++; 345 } 346 } /* for nlp... */ 347 } /* for mi... */ 348 } 349 350 /* 351 * To save all pc-hits in all the gmon.out's is infeasible, as this 352 * may become quite huge even with a small number of files to sum. 353 * Instead, we'll dump *fictitious hits* to correct functions 354 * by scanning module namelists. Again, since this is summing 355 * pc-hits, we may have to dump the pcsamples out in chunks if the 356 * number of pc-hits is high. 357 */ 358 static void 359 dump_hits(FILE *fp, char *filename, nltype *nlp) 360 { 361 Address *p, hitpc; 362 size_t i, nelem, ntowrite; 363 364 if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE) 365 nelem = PROF_BUFFER_SIZE; 366 367 if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) { 368 (void) fprintf(stderr, "%s: no room for %d pcsamples\n", 369 whoami, nelem); 370 exit(EX_OSERR); 371 } 372 373 /* 374 * Set up *fictitious* hits (to function entry) buffer 375 */ 376 hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin); 377 for (i = 0; i < nelem; i++) 378 p[i] = hitpc; 379 380 for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) { 381 if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) { 382 perror(filename); 383 exit(EX_IOERR); 384 } 385 } 386 387 if (ntowrite) { 388 if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) { 389 perror(filename); 390 exit(EX_IOERR); 391 } 392 } 393 394 free(p); 395 } 396 397 static void 398 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs, 399 unsigned long *ncallees) 400 { 401 ProfBuffer prof_buffer; 402 arctype *arcp; 403 mod_info_t *mi; 404 nltype *nlp; 405 406 prof_buffer.type = PROF_BUFFER_T; 407 prof_buffer.version = PROF_BUFFER_VER; 408 prof_buffer.buffer = PROFBUF_SZ; 409 prof_buffer.bufsize = n_pcsamples; 410 prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address); 411 if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) { 412 perror(filename); 413 exit(EX_IOERR); 414 } 415 /* CONSTCOND */ 416 if (BUF_FILLER) 417 (void) fseek(fp, BUF_FILLER, SEEK_CUR); 418 419 *tarcs = 0; 420 *ncallees = 0; 421 for (mi = &modules; mi; mi = mi->next) { 422 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 423 if (nlp->nticks) 424 dump_hits(fp, filename, nlp); 425 426 nlp->ncallers = 0; 427 for (arcp = nlp->parents; arcp; 428 arcp = arcp->arc_parentlist) { 429 (nlp->ncallers)++; 430 } 431 432 if (nlp->ncallers) { 433 (*tarcs) += nlp->ncallers; 434 (*ncallees)++; 435 } 436 } 437 } 438 } 439 440 static void 441 dump_modules(FILE *fp, char *filename, size_t pbuf_sz) 442 { 443 char *pbuf, *p; 444 size_t namelen; 445 Index off_nxt, off_path; 446 mod_info_t *mi; 447 448 ProfModuleList prof_modlist; 449 ProfModule prof_mod; 450 451 /* Allocate for path strings buffer */ 452 pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN); 453 if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) { 454 (void) fprintf(stderr, "%s: no room for %d bytes\n", 455 whoami, pbuf_sz * sizeof (char)); 456 exit(EX_OSERR); 457 } 458 459 /* Dump out PROF_MODULE_T info for all non-aout modules */ 460 prof_modlist.type = PROF_MODULES_T; 461 prof_modlist.version = PROF_MODULES_VER; 462 prof_modlist.modules = PROFMODLIST_SZ; 463 prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ + 464 pbuf_sz; 465 if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) { 466 perror(filename); 467 exit(EX_IOERR); 468 } 469 /* CONSTCOND */ 470 if (MODLIST_FILLER) 471 (void) fseek(fp, MODLIST_FILLER, SEEK_CUR); 472 473 /* 474 * Initialize offsets for ProfModule elements. 475 */ 476 off_nxt = PROFMODLIST_SZ + PROFMOD_SZ; 477 off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ; 478 479 for (mi = modules.next; mi; mi = mi->next) { 480 if (mi->next) 481 prof_mod.next = off_nxt; 482 else 483 prof_mod.next = 0; 484 prof_mod.path = off_path; 485 prof_mod.startaddr = mi->load_base; 486 prof_mod.endaddr = mi->load_end; 487 488 if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) { 489 perror(filename); 490 exit(EX_IOERR); 491 } 492 493 /* CONSTCOND */ 494 if (MOD_FILLER) 495 (void) fseek(fp, MOD_FILLER, SEEK_CUR); 496 497 (void) strcpy(p, mi->name); 498 namelen = strlen(mi->name); 499 p += namelen + 1; 500 501 /* Note that offset to every path str need not be aligned */ 502 off_nxt += PROFMOD_SZ; 503 off_path += namelen + 1; 504 } 505 506 /* Write out the module path strings */ 507 if (pbuf_sz) { 508 if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) { 509 perror(filename); 510 exit(EX_IOERR); 511 } 512 513 free(pbuf); 514 } 515 } 516 517 /* 518 * If we have inactive modules, their current load addresses may overlap with 519 * active ones, and so we've to assign fictitious, non-overlapping addresses 520 * to all modules before we dump them. 521 */ 522 static void 523 fixup_maps(size_t *pathsz) 524 { 525 unsigned int n_inactive = 0; 526 Address lbase = 0, lend; 527 mod_info_t *mi; 528 529 /* Pick the lowest load address among modules */ 530 *pathsz = 0; 531 for (mi = &modules; mi; mi = mi->next) { 532 533 if (mi->active == FALSE) 534 n_inactive++; 535 536 if (mi == &modules || mi->load_base < lbase) 537 lbase = mi->load_base; 538 539 /* 540 * Return total path size of non-aout modules only 541 */ 542 if (mi != &modules) 543 *pathsz = (*pathsz) + strlen(mi->name) + 1; 544 } 545 546 /* 547 * All module info is in fine shape already if there are no 548 * inactive modules 549 */ 550 if (n_inactive == 0) 551 return; 552 553 /* 554 * Assign fictitious load addresses to all (non-aout) modules so 555 * that sum info can be dumped out. 556 */ 557 for (mi = modules.next; mi; mi = mi->next) { 558 lend = lbase + (mi->data_end - mi->txt_origin); 559 if ((lbase < modules.load_base && lend < modules.load_base) || 560 (lbase > modules.load_end && lend > modules.load_end)) { 561 562 mi->load_base = lbase; 563 mi->load_end = lend; 564 565 /* just to give an appearance of reality */ 566 lbase = CEIL(lend + PGSZ, PGSZ); 567 } else { 568 /* 569 * can't use this lbase & lend pair, as it 570 * overlaps with aout's addresses 571 */ 572 mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ); 573 mi->load_end = mi->load_base + (lend - lbase); 574 575 lbase = CEIL(mi->load_end + PGSZ, PGSZ); 576 } 577 } 578 } 579 580 static void 581 dump_gprofhdr(FILE *fp, char *filename) 582 { 583 ProfHeader prof_hdr; 584 585 prof_hdr.h_magic = PROF_MAGIC; 586 prof_hdr.h_major_ver = PROF_MAJOR_VERSION; 587 prof_hdr.h_minor_ver = PROF_MINOR_VERSION; 588 prof_hdr.size = PROFHDR_SZ; 589 if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) { 590 perror(filename); 591 exit(EX_IOERR); 592 } 593 594 /* CONSTCOND */ 595 if (HDR_FILLER) 596 (void) fseek(fp, HDR_FILLER, SEEK_CUR); 597 } 598 599 static void 600 dumpsum_ostyle(char *sumfile) 601 { 602 nltype *nlp; 603 arctype *arcp; 604 struct rawarc arc; 605 struct rawarc32 arc32; 606 FILE *sfile; 607 608 if ((sfile = fopen(sumfile, "w")) == NULL) { 609 perror(sumfile); 610 exit(EX_IOERR); 611 } 612 /* 613 * dump the header; use the last header read in 614 */ 615 if (Bflag) { 616 if (fwrite(&h, sizeof (h), 1, sfile) != 1) { 617 perror(sumfile); 618 exit(EX_IOERR); 619 } 620 } else { 621 struct hdr32 hdr; 622 hdr.lowpc = (pctype32)h.lowpc; 623 hdr.highpc = (pctype32)h.highpc; 624 hdr.ncnt = (pctype32)h.ncnt; 625 if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) { 626 perror(sumfile); 627 exit(EX_IOERR); 628 } 629 } 630 /* 631 * dump the samples 632 */ 633 if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) != 634 nsamples) { 635 perror(sumfile); 636 exit(EX_IOERR); 637 } 638 /* 639 * dump the normalized raw arc information. For old-style dumping, 640 * the only namelist is in modules.nl 641 */ 642 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 643 for (arcp = nlp->children; arcp; 644 arcp = arcp->arc_childlist) { 645 if (Bflag) { 646 arc.raw_frompc = arcp->arc_parentp->value; 647 arc.raw_selfpc = arcp->arc_childp->value; 648 arc.raw_count = arcp->arc_count; 649 if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) { 650 perror(sumfile); 651 exit(EX_IOERR); 652 } 653 } else { 654 arc32.raw_frompc = 655 (pctype32)arcp->arc_parentp->value; 656 arc32.raw_selfpc = 657 (pctype32)arcp->arc_childp->value; 658 arc32.raw_count = (actype32)arcp->arc_count; 659 if (fwrite(&arc32, sizeof (arc32), 1, sfile) != 660 1) { 661 perror(sumfile); 662 exit(EX_IOERR); 663 } 664 } 665 #ifdef DEBUG 666 if (debug & SAMPLEDEBUG) { 667 (void) printf( 668 "[dumpsum_ostyle] frompc 0x%llx selfpc " 669 "0x%llx count %lld\n", arc.raw_frompc, 670 arc.raw_selfpc, arc.raw_count); 671 } 672 #endif /* DEBUG */ 673 } 674 } 675 (void) fclose(sfile); 676 } 677 678 /* 679 * dump out the gmon.sum file 680 */ 681 static void 682 dumpsum(char *sumfile) 683 { 684 FILE *sfile; 685 size_t pathbuf_sz; 686 unsigned long total_arcs; /* total number of arcs in all */ 687 unsigned long ncallees; /* no. of callees with parents */ 688 689 if (old_style) { 690 dumpsum_ostyle(sumfile); 691 return; 692 } 693 694 if ((sfile = fopen(sumfile, "w")) == NULL) { 695 perror(sumfile); 696 exit(EX_IOERR); 697 } 698 699 /* 700 * Dump the new-style gprof header. Even if one of the original 701 * profiled-files was of a older version, the summed file is of 702 * current version only. 703 */ 704 dump_gprofhdr(sfile, sumfile); 705 706 /* 707 * Fix up load-maps and dump out modules info 708 * 709 * Fix up module load maps so inactive modules get *some* address 710 * (and btw, could you get the total size of non-aout module path 711 * strings please ?) 712 */ 713 fixup_maps(&pathbuf_sz); 714 dump_modules(sfile, sumfile, pathbuf_sz); 715 716 717 /* 718 * Dump out the summ'd pcsamples 719 * 720 * For dumping call graph information later, we need certain 721 * statistics (like total arcs, number of callers for each node); 722 * collect these also while we are at it. 723 */ 724 dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees); 725 726 /* 727 * Dump out the summ'd call graph information 728 */ 729 dump_callgraph(sfile, sumfile, total_arcs, ncallees); 730 731 732 (void) fclose(sfile); 733 } 734 735 static void 736 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp) 737 { 738 nltype *parentp; 739 nltype *childp; 740 741 /* 742 * if count == 0 this is a null arc and 743 * we don't need to tally it. 744 */ 745 if (rawp->raw_count == 0) 746 return; 747 748 /* 749 * Lookup the caller and callee pcs in namelists of 750 * appropriate modules 751 */ 752 parentp = nllookup(caller_mod, rawp->raw_frompc, NULL); 753 childp = nllookup(callee_mod, rawp->raw_selfpc, NULL); 754 if (childp && parentp) { 755 if (!Dflag) 756 childp->ncall += rawp->raw_count; 757 else { 758 if (first_file) 759 childp->ncall += rawp->raw_count; 760 else { 761 childp->ncall -= rawp->raw_count; 762 if (childp->ncall < 0) 763 childp->ncall = 0; 764 } 765 } 766 767 #ifdef DEBUG 768 if (debug & TALLYDEBUG) { 769 (void) printf("[tally] arc from %s to %s traversed " 770 "%lld times\n", parentp->name, 771 childp->name, rawp->raw_count); 772 } 773 #endif /* DEBUG */ 774 addarc(parentp, childp, rawp->raw_count); 775 } 776 } 777 778 /* 779 * Look up a module's base address in a sorted list of pc-hits. Unlike 780 * nllookup(), this deals with misses by mapping them to the next *higher* 781 * pc-hit. This is so that we get into the module's first pc-hit rightaway, 782 * even if the module's entry-point (load_base) itself is not a hit. 783 */ 784 static Address * 785 locate(Address *pclist, size_t nelem, Address keypc) 786 { 787 size_t low = 0, middle, high = nelem - 1; 788 789 if (keypc <= pclist[low]) 790 return (pclist); 791 792 if (keypc > pclist[high]) 793 return (NULL); 794 795 while (low != high) { 796 middle = (high + low) >> 1; 797 798 if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc)) 799 return (&pclist[middle + 1]); 800 801 if (pclist[middle] >= keypc) 802 high = middle; 803 else 804 low = middle + 1; 805 } 806 807 /* must never reach here! */ 808 return (NULL); 809 } 810 811 static void 812 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples) 813 { 814 Address *pcptr, *pcse = pcsmpl + n_samples; 815 pctype nxt_func; 816 nltype *fnl; 817 size_t func_nticks; 818 #ifdef DEBUG 819 size_t n_hits_in_module = 0; 820 #endif /* DEBUG */ 821 822 /* Locate the first pc-hit for this module */ 823 if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) { 824 #ifdef DEBUG 825 if (debug & PCSMPLDEBUG) { 826 (void) printf("[assign_pcsamples] no pc-hits in\n"); 827 (void) printf( 828 " `%s'\n", module->name); 829 } 830 #endif /* DEBUG */ 831 return; /* no pc-hits in this module */ 832 } 833 834 /* Assign all pc-hits in this module to appropriate functions */ 835 while ((pcptr < pcse) && (*pcptr < module->load_end)) { 836 837 /* Update the corresponding function's time */ 838 if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) { 839 /* 840 * Collect all pc-hits in this function. Each 841 * pc-hit counts as 1 tick. 842 */ 843 func_nticks = 0; 844 while ((pcptr < pcse) && (*pcptr < nxt_func)) { 845 func_nticks++; 846 pcptr++; 847 } 848 849 if (func_nticks == 0) 850 pcptr++; 851 else { 852 fnl->nticks += func_nticks; 853 fnl->time += func_nticks; 854 totime += func_nticks; 855 } 856 857 #ifdef DEBUG 858 n_hits_in_module += func_nticks; 859 #endif /* DEBUG */ 860 } else { 861 /* 862 * pc sample could not be assigned to function; 863 * probably in a PLT 864 */ 865 pcptr++; 866 } 867 } 868 869 #ifdef DEBUG 870 if (debug & PCSMPLDEBUG) { 871 (void) printf( 872 "[assign_pcsamples] %ld hits in\n", n_hits_in_module); 873 (void) printf(" `%s'\n", module->name); 874 } 875 #endif /* DEBUG */ 876 } 877 878 int 879 pc_cmp(const void *arg1, const void *arg2) 880 { 881 Address *pc1 = (Address *)arg1; 882 Address *pc2 = (Address *)arg2; 883 884 if (*pc1 > *pc2) 885 return (1); 886 887 if (*pc1 < *pc2) 888 return (-1); 889 890 return (0); 891 } 892 893 static void 894 process_pcsamples(ProfBuffer *bufp) 895 { 896 Address *pc_samples; 897 mod_info_t *mi; 898 caddr_t p; 899 size_t chunk_size, nelem_read, nelem_to_read; 900 901 #ifdef DEBUG 902 if (debug & PCSMPLDEBUG) { 903 (void) printf( 904 "[process_pcsamples] number of pcsamples = %lld\n", 905 bufp->bufsize); 906 } 907 #endif /* DEBUG */ 908 909 /* buffer with no pc samples ? */ 910 if (bufp->bufsize == 0) 911 return; 912 913 /* 914 * If we're processing pcsamples of a profile sum, we could have 915 * more than PROF_BUFFER_SIZE number of samples. In such a case, 916 * we must read the pcsamples in chunks. 917 */ 918 if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE) 919 chunk_size = PROF_BUFFER_SIZE; 920 921 /* Allocate for the pcsample chunk */ 922 pc_samples = (Address *) calloc(chunk_size, sizeof (Address)); 923 if (pc_samples == NULL) { 924 (void) fprintf(stderr, "%s: no room for %d sample pc's\n", 925 whoami, chunk_size); 926 exit(EX_OSERR); 927 } 928 929 /* Copy the current set of pcsamples */ 930 nelem_read = 0; 931 nelem_to_read = bufp->bufsize; 932 p = (char *)bufp + bufp->buffer; 933 934 while (nelem_read < nelem_to_read) { 935 (void) memcpy((void *) pc_samples, p, 936 chunk_size * sizeof (Address)); 937 938 /* Sort the pc samples */ 939 qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp); 940 941 /* 942 * Assign pcsamples to functions in the currently active 943 * module list 944 */ 945 for (mi = &modules; mi; mi = mi->next) { 946 if (mi->active == FALSE) 947 continue; 948 assign_pcsamples(mi, pc_samples, chunk_size); 949 } 950 951 p += (chunk_size * sizeof (Address)); 952 nelem_read += chunk_size; 953 954 if ((nelem_to_read - nelem_read) < chunk_size) 955 chunk_size = nelem_to_read - nelem_read; 956 } 957 958 free(pc_samples); 959 960 /* Update total number of pcsamples read so far */ 961 n_pcsamples += bufp->bufsize; 962 } 963 964 static mod_info_t * 965 find_module(Address addr) 966 { 967 mod_info_t *mi; 968 969 for (mi = &modules; mi; mi = mi->next) { 970 if (mi->active == FALSE) 971 continue; 972 973 if (addr >= mi->load_base && addr < mi->load_end) 974 return (mi); 975 } 976 977 return (NULL); 978 } 979 980 static void 981 process_cgraph(ProfCallGraph *cgp) 982 { 983 struct rawarc arc; 984 mod_info_t *callee_mi, *caller_mi; 985 ProfFunction *calleep, *callerp; 986 Index caller_off, callee_off; 987 988 /* 989 * Note that *callee_off* increment in the for loop below 990 * uses *calleep* and *calleep* doesn't get set until the for loop 991 * is entered. We don't expect the increment to be executed before 992 * the loop body is executed atleast once, so this should be ok. 993 */ 994 for (callee_off = cgp->functions; callee_off; 995 callee_off = calleep->next_to) { 996 997 /* LINTED: pointer cast */ 998 calleep = (ProfFunction *)((char *)cgp + callee_off); 999 1000 /* 1001 * We could choose either to sort the {caller, callee} 1002 * list twice and assign callee/caller to modules or inspect 1003 * each callee/caller in the active modules list. Since 1004 * the modules list is usually very small, we'l choose the 1005 * latter. 1006 */ 1007 1008 /* 1009 * If we cannot identify a callee with a module, there's 1010 * no use worrying about who called it. 1011 */ 1012 if ((callee_mi = find_module(calleep->topc)) == NULL) { 1013 #ifdef DEBUG 1014 if (debug & CGRAPHDEBUG) { 1015 (void) printf( 1016 "[process_cgraph] callee %#llx missed\n", 1017 calleep->topc); 1018 } 1019 #endif /* DEBUG */ 1020 continue; 1021 } else 1022 arc.raw_selfpc = calleep->topc; 1023 1024 for (caller_off = callee_off; caller_off; 1025 caller_off = callerp->next_from) { 1026 1027 /* LINTED: pointer cast */ 1028 callerp = (ProfFunction *)((char *)cgp + caller_off); 1029 if ((caller_mi = find_module(callerp->frompc)) == 1030 NULL) { 1031 #ifdef DEBUG 1032 if (debug & CGRAPHDEBUG) { 1033 (void) printf( 1034 "[process_cgraph] caller %#llx " 1035 "missed\n", callerp->frompc); 1036 } 1037 #endif /* DEBUG */ 1038 continue; 1039 } 1040 1041 arc.raw_frompc = callerp->frompc; 1042 arc.raw_count = callerp->count; 1043 1044 #ifdef DEBUG 1045 if (debug & CGRAPHDEBUG) { 1046 (void) printf( 1047 "[process_cgraph] arc <%#llx, %#llx, " 1048 "%lld>\n", arc.raw_frompc, arc.raw_selfpc, 1049 arc.raw_count); 1050 } 1051 #endif /* DEBUG */ 1052 tally(caller_mi, callee_mi, &arc); 1053 } 1054 } 1055 1056 #ifdef DEBUG 1057 puts("\n"); 1058 #endif /* DEBUG */ 1059 } 1060 1061 /* 1062 * Two modules overlap each other if they don't lie completely *outside* 1063 * each other. 1064 */ 1065 static bool 1066 does_overlap(ProfModule *new, mod_info_t *old) 1067 { 1068 /* case 1: new module lies completely *before* the old one */ 1069 if (new->startaddr < old->load_base && new->endaddr <= old->load_base) 1070 return (FALSE); 1071 1072 /* case 2: new module lies completely *after* the old one */ 1073 if (new->startaddr >= old->load_end && new->endaddr >= old->load_end) 1074 return (FALSE); 1075 1076 /* probably a dlopen: the modules overlap each other */ 1077 return (TRUE); 1078 } 1079 1080 static bool 1081 is_same_as_aout(char *modpath, struct stat *buf) 1082 { 1083 if (stat(modpath, buf) == -1) { 1084 (void) fprintf(stderr, "%s: can't get info on `%s'\n", 1085 whoami, modpath); 1086 exit(EX_NOINPUT); 1087 } 1088 1089 if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino)) 1090 return (TRUE); 1091 else 1092 return (FALSE); 1093 } 1094 1095 static void 1096 process_modules(ProfModuleList *modlp) 1097 { 1098 ProfModule *newmodp; 1099 mod_info_t *mi, *last, *new_module; 1100 char *so_path; 1101 bool more_modules = TRUE; 1102 struct stat so_statbuf; 1103 1104 #ifdef DEBUG 1105 if (debug & MODULEDEBUG) { 1106 (void) printf("[process_modules] module obj version %u\n", 1107 modlp->version); 1108 } 1109 #endif /* DEBUG */ 1110 1111 /* Check version of module type object */ 1112 if (modlp->version > PROF_MODULES_VER) { 1113 (void) fprintf(stderr, "%s: version %d for module type objects" 1114 "is not supported\n", whoami, modlp->version); 1115 exit(EX_SOFTWARE); 1116 } 1117 1118 1119 /* 1120 * Scan the PROF_MODULES_T list and add modules to current list 1121 * of modules, if they're not present already 1122 */ 1123 /* LINTED: pointer cast */ 1124 newmodp = (ProfModule *)((char *)modlp + modlp->modules); 1125 do { 1126 /* 1127 * Since the prog could've been renamed after its run, we 1128 * should see if this overlaps a.out. If it does, it is 1129 * probably the renamed aout. We should also skip any other 1130 * non-sharedobj's that we see (or should we report an error ?) 1131 */ 1132 so_path = (caddr_t)modlp + newmodp->path; 1133 if (does_overlap(newmodp, &modules) || 1134 is_same_as_aout(so_path, &so_statbuf) || 1135 (!is_shared_obj(so_path))) { 1136 1137 if (!newmodp->next) 1138 more_modules = FALSE; 1139 1140 /* LINTED: pointer cast */ 1141 newmodp = (ProfModule *) 1142 ((caddr_t)modlp + newmodp->next); 1143 #ifdef DEBUG 1144 if (debug & MODULEDEBUG) { 1145 (void) printf( 1146 "[process_modules] `%s'\n", so_path); 1147 (void) printf(" skipped\n"); 1148 } 1149 #endif /* DEBUG */ 1150 continue; 1151 } 1152 #ifdef DEBUG 1153 if (debug & MODULEDEBUG) 1154 (void) printf("[process_modules] `%s'...\n", so_path); 1155 #endif /* DEBUG */ 1156 1157 /* 1158 * Check all modules (leave the first one, 'cos that 1159 * is the program executable info). If this module is already 1160 * there in the list, update the load addresses and proceed. 1161 */ 1162 last = &modules; 1163 while ((mi = last->next) != NULL) { 1164 /* 1165 * We expect the full pathname for all shared objects 1166 * needed by the program executable. In this case, we 1167 * simply need to compare the paths to see if they are 1168 * the same file. 1169 */ 1170 if (strcmp(mi->name, so_path) == 0) 1171 break; 1172 1173 /* 1174 * Check if this new shared object will overlap 1175 * any existing module. If yes, remove the old one 1176 * from the linked list (but don't free it, 'cos 1177 * there may be symbols referring to this module 1178 * still) 1179 */ 1180 if (does_overlap(newmodp, mi)) { 1181 #ifdef DEBUG 1182 if (debug & MODULEDEBUG) { 1183 (void) printf( 1184 "[process_modules] `%s'\n", 1185 so_path); 1186 (void) printf( 1187 " overlaps\n"); 1188 (void) printf( 1189 " `%s'\n", 1190 mi->name); 1191 } 1192 #endif /* DEBUG */ 1193 mi->active = FALSE; 1194 } 1195 1196 last = mi; 1197 } 1198 1199 /* Module already there, skip it */ 1200 if (mi != NULL) { 1201 mi->load_base = newmodp->startaddr; 1202 mi->load_end = newmodp->endaddr; 1203 mi->active = TRUE; 1204 if (!newmodp->next) 1205 more_modules = FALSE; 1206 1207 /* LINTED: pointer cast */ 1208 newmodp = (ProfModule *) 1209 ((caddr_t)modlp + newmodp->next); 1210 1211 #ifdef DEBUG 1212 if (debug & MODULEDEBUG) { 1213 (void) printf("[process_modules] base=%#llx, " 1214 "end=%#llx\n", mi->load_base, mi->load_end); 1215 } 1216 #endif /* DEBUG */ 1217 continue; 1218 } 1219 1220 /* 1221 * Check if gmon.out is outdated with respect to the new 1222 * module we want to add 1223 */ 1224 if (gmonout_info.mtime < so_statbuf.st_mtime) { 1225 (void) fprintf(stderr, 1226 "%s: shared obj outdates prof info\n", whoami); 1227 (void) fprintf(stderr, "\t(newer %s)\n", so_path); 1228 exit(EX_NOINPUT); 1229 } 1230 1231 /* Create a new module element */ 1232 new_module = malloc(sizeof (mod_info_t)); 1233 if (new_module == NULL) { 1234 (void) fprintf(stderr, "%s: no room for %d bytes\n", 1235 whoami, sizeof (mod_info_t)); 1236 exit(EX_OSERR); 1237 } 1238 1239 /* and fill in info... */ 1240 new_module->id = n_modules + 1; 1241 new_module->load_base = newmodp->startaddr; 1242 new_module->load_end = newmodp->endaddr; 1243 new_module->name = malloc(strlen(so_path) + 1); 1244 if (new_module->name == NULL) { 1245 (void) fprintf(stderr, "%s: no room for %d bytes\n", 1246 whoami, strlen(so_path) + 1); 1247 exit(EX_OSERR); 1248 } 1249 (void) strcpy(new_module->name, so_path); 1250 #ifdef DEBUG 1251 if (debug & MODULEDEBUG) { 1252 (void) printf( 1253 "[process_modules] base=%#llx, end=%#llx\n", 1254 new_module->load_base, new_module->load_end); 1255 } 1256 #endif /* DEBUG */ 1257 1258 /* Create this module's nameslist */ 1259 process_namelist(new_module); 1260 1261 /* Add it to the tail of active module list */ 1262 last->next = new_module; 1263 n_modules++; 1264 1265 #ifdef DEBUG 1266 if (debug & MODULEDEBUG) { 1267 (void) printf( 1268 "[process_modules] total shared objects = %ld\n", 1269 n_modules - 1); 1270 } 1271 #endif /* DEBUG */ 1272 /* 1273 * Move to the next module in the PROF_MODULES_T list 1274 * (if present) 1275 */ 1276 if (!newmodp->next) 1277 more_modules = FALSE; 1278 1279 /* LINTED: pointer cast */ 1280 newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next); 1281 1282 } while (more_modules); 1283 } 1284 1285 static void 1286 reset_active_modules(void) 1287 { 1288 mod_info_t *mi; 1289 1290 /* Except the executable, no other module should remain active */ 1291 for (mi = modules.next; mi; mi = mi->next) 1292 mi->active = FALSE; 1293 } 1294 1295 static void 1296 getpfiledata(caddr_t memp, size_t fsz) 1297 { 1298 ProfObject *objp; 1299 caddr_t file_end; 1300 bool found_pcsamples = FALSE, found_cgraph = FALSE; 1301 1302 /* 1303 * Before processing a new gmon.out, all modules except the 1304 * program executable must be made inactive, so that symbols 1305 * are searched only in the program executable, if we don't 1306 * find a MODULES_T object. Don't do it *after* we read a gmon.out, 1307 * because we need the active module data after we're done with 1308 * the last gmon.out, if we're doing summing. 1309 */ 1310 reset_active_modules(); 1311 1312 file_end = memp + fsz; 1313 /* LINTED: pointer cast */ 1314 objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size); 1315 while ((caddr_t)objp < file_end) { 1316 #ifdef DEBUG 1317 { 1318 unsigned int type = 0; 1319 1320 if (debug & MONOUTDEBUG) { 1321 if (objp->type <= MAX_OBJTYPES) 1322 type = objp->type; 1323 1324 (void) printf( 1325 "\n[getpfiledata] object %s [%#lx]\n", 1326 objname[type], objp->type); 1327 } 1328 } 1329 #endif /* DEBUG */ 1330 switch (objp->type) { 1331 case PROF_MODULES_T : 1332 process_modules((ProfModuleList *) objp); 1333 break; 1334 1335 case PROF_CALLGRAPH_T : 1336 process_cgraph((ProfCallGraph *) objp); 1337 found_cgraph = TRUE; 1338 break; 1339 1340 case PROF_BUFFER_T : 1341 process_pcsamples((ProfBuffer *) objp); 1342 found_pcsamples = TRUE; 1343 break; 1344 1345 default : 1346 (void) fprintf(stderr, 1347 "%s: unknown prof object type=%d\n", 1348 whoami, objp->type); 1349 exit(EX_SOFTWARE); 1350 } 1351 /* LINTED: pointer cast */ 1352 objp = (ProfObject *)((caddr_t)objp + objp->size); 1353 } 1354 1355 if (!found_cgraph || !found_pcsamples) { 1356 (void) fprintf(stderr, 1357 "%s: missing callgraph/pcsamples object\n", whoami); 1358 exit(EX_SOFTWARE); 1359 } 1360 1361 if ((caddr_t)objp > file_end) { 1362 (void) fprintf(stderr, "%s: malformed profile file.\n", whoami); 1363 exit(EX_SOFTWARE); 1364 } 1365 1366 if (first_file) 1367 first_file = FALSE; 1368 } 1369 1370 static void 1371 readarcs(FILE *pfile) 1372 { 1373 /* 1374 * the rest of the file consists of 1375 * a bunch of <from,self,count> tuples. 1376 */ 1377 /* CONSTCOND */ 1378 while (1) { 1379 struct rawarc arc; 1380 1381 if (rflag) { 1382 if (Bflag) { 1383 L_cgarc64 rtld_arc64; 1384 1385 /* 1386 * If rflag is set then this is an profiled 1387 * image generated by rtld. It needs to be 1388 * 'converted' to the standard data format. 1389 */ 1390 if (fread(&rtld_arc64, 1391 sizeof (L_cgarc64), 1, pfile) != 1) 1392 break; 1393 1394 if (rtld_arc64.cg_from == PRF_OUTADDR64) 1395 arc.raw_frompc = s_highpc + 0x10; 1396 else 1397 arc.raw_frompc = 1398 (pctype)rtld_arc64.cg_from; 1399 arc.raw_selfpc = (pctype)rtld_arc64.cg_to; 1400 arc.raw_count = (actype)rtld_arc64.cg_count; 1401 } else { 1402 L_cgarc rtld_arc; 1403 1404 /* 1405 * If rflag is set then this is an profiled 1406 * image generated by rtld. It needs to be 1407 * 'converted' to the standard data format. 1408 */ 1409 if (fread(&rtld_arc, 1410 sizeof (L_cgarc), 1, pfile) != 1) 1411 break; 1412 1413 if (rtld_arc.cg_from == PRF_OUTADDR) 1414 arc.raw_frompc = s_highpc + 0x10; 1415 else 1416 arc.raw_frompc = (pctype) 1417 (uintptr_t)rtld_arc.cg_from; 1418 arc.raw_selfpc = (pctype) 1419 (uintptr_t)rtld_arc.cg_to; 1420 arc.raw_count = (actype)rtld_arc.cg_count; 1421 } 1422 } else { 1423 if (Bflag) { 1424 if (fread(&arc, sizeof (struct rawarc), 1, 1425 pfile) != 1) { 1426 break; 1427 } 1428 } else { 1429 /* 1430 * If these aren't big %pc's, we need to read 1431 * into the 32-bit raw arc structure, and 1432 * assign the members into the actual arc. 1433 */ 1434 struct rawarc32 arc32; 1435 if (fread(&arc32, sizeof (struct rawarc32), 1436 1, pfile) != 1) 1437 break; 1438 arc.raw_frompc = (pctype)arc32.raw_frompc; 1439 arc.raw_selfpc = (pctype)arc32.raw_selfpc; 1440 arc.raw_count = (actype)arc32.raw_count; 1441 } 1442 } 1443 1444 #ifdef DEBUG 1445 if (debug & SAMPLEDEBUG) { 1446 (void) printf("[getpfile] frompc 0x%llx selfpc " 1447 "0x%llx count %lld\n", arc.raw_frompc, 1448 arc.raw_selfpc, arc.raw_count); 1449 } 1450 #endif /* DEBUG */ 1451 /* 1452 * add this arc 1453 */ 1454 tally(&modules, &modules, &arc); 1455 } 1456 if (first_file) 1457 first_file = FALSE; 1458 } 1459 1460 static void 1461 readsamples(FILE *pfile) 1462 { 1463 sztype i; 1464 unsigned_UNIT sample; 1465 1466 if (samples == 0) { 1467 samples = (unsigned_UNIT *) calloc(nsamples, 1468 sizeof (unsigned_UNIT)); 1469 if (samples == 0) { 1470 (void) fprintf(stderr, 1471 "%s: No room for %d sample pc's\n", 1472 whoami, sampbytes / sizeof (unsigned_UNIT)); 1473 exit(EX_OSERR); 1474 } 1475 } 1476 1477 for (i = 0; i < nsamples; i++) { 1478 (void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile); 1479 if (feof(pfile)) 1480 break; 1481 samples[i] += sample; 1482 } 1483 if (i != nsamples) { 1484 (void) fprintf(stderr, 1485 "%s: unexpected EOF after reading %d/%d samples\n", 1486 whoami, --i, nsamples); 1487 exit(EX_IOERR); 1488 } 1489 } 1490 1491 static void * 1492 handle_versioned(FILE *pfile, char *filename, size_t *fsz) 1493 { 1494 int fd; 1495 bool invalid_version; 1496 caddr_t fmem; 1497 struct stat buf; 1498 ProfHeader prof_hdr; 1499 off_t lret; 1500 1501 /* 1502 * Check versioning info. For now, let's say we provide 1503 * backward compatibility, so we accept all older versions. 1504 */ 1505 if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) { 1506 perror("fread()"); 1507 exit(EX_IOERR); 1508 } 1509 1510 invalid_version = FALSE; 1511 if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION) 1512 invalid_version = TRUE; 1513 else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) { 1514 if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION) 1515 invalid_version = FALSE; 1516 } 1517 1518 if (invalid_version) { 1519 (void) fprintf(stderr, "%s: version %d.%d not supported\n", 1520 whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver); 1521 exit(EX_SOFTWARE); 1522 } 1523 1524 /* 1525 * Map gmon.out onto memory. 1526 */ 1527 (void) fclose(pfile); 1528 if ((fd = open(filename, O_RDONLY)) == -1) { 1529 perror(filename); 1530 exit(EX_IOERR); 1531 } 1532 1533 if ((lret = lseek(fd, 0, SEEK_END)) == -1) { 1534 perror(filename); 1535 exit(EX_IOERR); 1536 } 1537 *fsz = lret; 1538 1539 fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0); 1540 if (fmem == MAP_FAILED) { 1541 (void) fprintf(stderr, "%s: can't map %s\n", whoami, filename); 1542 exit(EX_IOERR); 1543 } 1544 1545 /* 1546 * Before we close this fd, save this gmon.out's info to later verify 1547 * if the shared objects it references have changed since the time 1548 * they were used to generate this gmon.out 1549 */ 1550 if (fstat(fd, &buf) == -1) { 1551 (void) fprintf(stderr, "%s: can't get info on `%s'\n", 1552 whoami, filename); 1553 exit(EX_NOINPUT); 1554 } 1555 gmonout_info.dev = buf.st_dev; 1556 gmonout_info.ino = buf.st_ino; 1557 gmonout_info.mtime = buf.st_mtime; 1558 gmonout_info.size = buf.st_size; 1559 1560 (void) close(fd); 1561 1562 return ((void *) fmem); 1563 } 1564 1565 static void * 1566 openpfile(char *filename, size_t *fsz) 1567 { 1568 struct hdr tmp; 1569 FILE *pfile; 1570 unsigned long magic_num; 1571 size_t hdrsize; 1572 static bool first_time = TRUE; 1573 extern bool old_style; 1574 1575 if ((pfile = fopen(filename, "r")) == NULL) { 1576 perror(filename); 1577 exit(EX_IOERR); 1578 } 1579 1580 /* 1581 * Read in the magic. Note that we changed the cast "unsigned long" 1582 * to "unsigned int" because that's how h_magic is defined in the 1583 * new format ProfHeader. 1584 */ 1585 if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) { 1586 perror("fread()"); 1587 exit(EX_IOERR); 1588 } 1589 1590 rewind(pfile); 1591 1592 /* 1593 * First check if this is versioned or *old-style* gmon.out 1594 */ 1595 if (magic_num == (unsigned int)PROF_MAGIC) { 1596 if ((!first_time) && (old_style == TRUE)) { 1597 (void) fprintf(stderr, "%s: can't mix old & new format " 1598 "profiled files\n", whoami); 1599 exit(EX_SOFTWARE); 1600 } 1601 first_time = FALSE; 1602 old_style = FALSE; 1603 return (handle_versioned(pfile, filename, fsz)); 1604 } 1605 1606 if ((!first_time) && (old_style == FALSE)) { 1607 (void) fprintf(stderr, "%s: can't mix old & new format " 1608 "profiled files\n", whoami); 1609 exit(EX_SOFTWARE); 1610 } 1611 1612 first_time = FALSE; 1613 old_style = TRUE; 1614 fsz = 0; 1615 1616 /* 1617 * Now, we need to determine if this is a run-time linker 1618 * profiled file or if it is a standard gmon.out. 1619 * 1620 * We do this by checking if magic matches PRF_MAGIC. If it 1621 * does, then this is a run-time linker profiled file, if it 1622 * doesn't, it must be a gmon.out file. 1623 */ 1624 if (magic_num == (unsigned long)PRF_MAGIC) 1625 rflag = TRUE; 1626 else 1627 rflag = FALSE; 1628 1629 hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32); 1630 1631 if (rflag) { 1632 if (Bflag) { 1633 L_hdr64 l_hdr64; 1634 1635 /* 1636 * If the rflag is set then the input file is 1637 * rtld profiled data, we'll read it in and convert 1638 * it to the standard format (ie: make it look like 1639 * a gmon.out file). 1640 */ 1641 if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) { 1642 perror("fread()"); 1643 exit(EX_IOERR); 1644 } 1645 if (l_hdr64.hd_version != PRF_VERSION_64) { 1646 (void) fprintf(stderr, 1647 "%s: expected version %d, " 1648 "got version %d when processing 64-bit " 1649 "run-time linker profiled file.\n", 1650 whoami, PRF_VERSION_64, l_hdr64.hd_version); 1651 exit(EX_SOFTWARE); 1652 } 1653 tmp.lowpc = 0; 1654 tmp.highpc = (pctype)l_hdr64.hd_hpc; 1655 tmp.ncnt = hdrsize + l_hdr64.hd_psize; 1656 } else { 1657 L_hdr l_hdr; 1658 1659 /* 1660 * If the rflag is set then the input file is 1661 * rtld profiled data, we'll read it in and convert 1662 * it to the standard format (ie: make it look like 1663 * a gmon.out file). 1664 */ 1665 if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) { 1666 perror("fread()"); 1667 exit(EX_IOERR); 1668 } 1669 if (l_hdr.hd_version != PRF_VERSION) { 1670 (void) fprintf(stderr, 1671 "%s: expected version %d, " 1672 "got version %d when processing " 1673 "run-time linker profiled file.\n", 1674 whoami, PRF_VERSION, l_hdr.hd_version); 1675 exit(EX_SOFTWARE); 1676 } 1677 tmp.lowpc = 0; 1678 tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc; 1679 tmp.ncnt = hdrsize + l_hdr.hd_psize; 1680 } 1681 } else { 1682 if (Bflag) { 1683 if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) { 1684 perror("fread()"); 1685 exit(EX_IOERR); 1686 } 1687 } else { 1688 /* 1689 * If we're not reading big %pc's, we need to read 1690 * the 32-bit header, and assign the members to 1691 * the actual header. 1692 */ 1693 struct hdr32 hdr32; 1694 if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) { 1695 perror("fread()"); 1696 exit(EX_IOERR); 1697 } 1698 tmp.lowpc = hdr32.lowpc; 1699 tmp.highpc = hdr32.highpc; 1700 tmp.ncnt = hdr32.ncnt; 1701 } 1702 } 1703 1704 /* 1705 * perform sanity check on profiled file we've opened. 1706 */ 1707 if (tmp.lowpc >= tmp.highpc) { 1708 if (rflag) 1709 (void) fprintf(stderr, 1710 "%s: badly formed profiled data.\n", 1711 filename); 1712 else 1713 (void) fprintf(stderr, 1714 "%s: badly formed gmon.out file.\n", 1715 filename); 1716 exit(EX_SOFTWARE); 1717 } 1718 1719 if (s_highpc != 0 && (tmp.lowpc != h.lowpc || 1720 tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) { 1721 (void) fprintf(stderr, 1722 "%s: incompatible with first gmon file\n", 1723 filename); 1724 exit(EX_IOERR); 1725 } 1726 h = tmp; 1727 s_lowpc = h.lowpc; 1728 s_highpc = h.highpc; 1729 lowpc = h.lowpc / sizeof (UNIT); 1730 highpc = h.highpc / sizeof (UNIT); 1731 sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0; 1732 nsamples = sampbytes / sizeof (unsigned_UNIT); 1733 1734 #ifdef DEBUG 1735 if (debug & SAMPLEDEBUG) { 1736 (void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc " 1737 "0x%llx hdr.ncnt %lld\n", 1738 h.lowpc, h.highpc, h.ncnt); 1739 (void) printf( 1740 "[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n", 1741 s_lowpc, s_highpc); 1742 (void) printf( 1743 "[openpfile] lowpc 0x%llx highpc 0x%llx\n", 1744 lowpc, highpc); 1745 (void) printf("[openpfile] sampbytes %d nsamples %d\n", 1746 sampbytes, nsamples); 1747 } 1748 #endif /* DEBUG */ 1749 1750 return ((void *) pfile); 1751 } 1752 1753 /* 1754 * Information from a gmon.out file depends on whether it's versioned 1755 * or non-versioned, *old style* gmon.out. If old-style, it is in two 1756 * parts : an array of sampling hits within pc ranges, and the arcs. If 1757 * versioned, it contains a header, followed by any number of 1758 * modules/callgraph/pcsample_buffer objects. 1759 */ 1760 static void 1761 getpfile(char *filename) 1762 { 1763 void *handle; 1764 size_t fsz; 1765 1766 handle = openpfile(filename, &fsz); 1767 1768 if (old_style) { 1769 readsamples((FILE *)handle); 1770 readarcs((FILE *)handle); 1771 (void) fclose((FILE *)handle); 1772 return; 1773 } 1774 1775 getpfiledata((caddr_t)handle, fsz); 1776 (void) munmap(handle, fsz); 1777 } 1778 1779 int 1780 main(int argc, char **argv) 1781 { 1782 char **sp; 1783 nltype **timesortnlp; 1784 int c; 1785 int errflg; 1786 1787 prog_name = *argv; /* preserve program name */ 1788 debug = 0; 1789 nflag = FALSE; 1790 bflag = TRUE; 1791 lflag = FALSE; 1792 Cflag = FALSE; 1793 first_file = TRUE; 1794 rflag = FALSE; 1795 Bflag = FALSE; 1796 errflg = FALSE; 1797 1798 while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF) 1799 switch (c) { 1800 case 'a': 1801 aflag = TRUE; 1802 break; 1803 case 'b': 1804 bflag = FALSE; 1805 break; 1806 case 'c': 1807 cflag = TRUE; 1808 break; 1809 case 'C': 1810 Cflag = TRUE; 1811 break; 1812 case 'd': 1813 dflag = TRUE; 1814 debug |= atoi(optarg); 1815 (void) printf("[main] debug = 0x%x\n", debug); 1816 break; 1817 case 'D': 1818 Dflag = TRUE; 1819 break; 1820 case 'E': 1821 addlist(Elist, optarg); 1822 Eflag = TRUE; 1823 addlist(elist, optarg); 1824 eflag = TRUE; 1825 break; 1826 case 'e': 1827 addlist(elist, optarg); 1828 eflag = TRUE; 1829 break; 1830 case 'F': 1831 addlist(Flist, optarg); 1832 Fflag = TRUE; 1833 addlist(flist, optarg); 1834 fflag = TRUE; 1835 break; 1836 case 'f': 1837 addlist(flist, optarg); 1838 fflag = TRUE; 1839 break; 1840 case 'l': 1841 lflag = TRUE; 1842 break; 1843 case 'n': 1844 nflag = TRUE; 1845 number_funcs_toprint = atoi(optarg); 1846 break; 1847 case 's': 1848 sflag = TRUE; 1849 break; 1850 case 'z': 1851 zflag = TRUE; 1852 break; 1853 case '?': 1854 errflg++; 1855 1856 } 1857 1858 if (errflg) { 1859 (void) fprintf(stderr, 1860 "usage: gprof [ -abcCDlsz ] [ -e function-name ] " 1861 "[ -E function-name ]\n\t[ -f function-name ] " 1862 "[ -F function-name ]\n\t[ image-file " 1863 "[ profile-file ... ] ]\n"); 1864 exit(EX_USAGE); 1865 } 1866 1867 if (optind < argc) { 1868 a_outname = argv[optind++]; 1869 } else { 1870 a_outname = A_OUTNAME; 1871 } 1872 if (optind < argc) { 1873 gmonname = argv[optind++]; 1874 } else { 1875 gmonname = GMONNAME; 1876 } 1877 /* 1878 * turn off default functions 1879 */ 1880 for (sp = &defaultEs[0]; *sp; sp++) { 1881 Eflag = TRUE; 1882 addlist(Elist, *sp); 1883 eflag = TRUE; 1884 addlist(elist, *sp); 1885 } 1886 /* 1887 * how many ticks per second? 1888 * if we can't tell, report time in ticks. 1889 */ 1890 hz = sysconf(_SC_CLK_TCK); 1891 if (hz == -1) { 1892 hz = 1; 1893 (void) fprintf(stderr, "time is in ticks, not seconds\n"); 1894 } 1895 1896 getnfile(a_outname); 1897 1898 /* 1899 * get information about mon.out file(s). 1900 */ 1901 do { 1902 getpfile(gmonname); 1903 if (optind < argc) 1904 gmonname = argv[optind++]; 1905 else 1906 optind++; 1907 } while (optind <= argc); 1908 /* 1909 * dump out a gmon.sum file if requested 1910 */ 1911 if (sflag || Dflag) 1912 dumpsum(GMONSUM); 1913 1914 if (old_style) { 1915 /* 1916 * assign samples to procedures 1917 */ 1918 asgnsamples(); 1919 } 1920 1921 /* 1922 * assemble the dynamic profile 1923 */ 1924 timesortnlp = doarcs(); 1925 1926 /* 1927 * print the dynamic profile 1928 */ 1929 #ifdef DEBUG 1930 if (debug & ANYDEBUG) { 1931 /* raw output of all symbols in all their glory */ 1932 int i; 1933 (void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, " 1934 "#calls, selfcalls, index \n"); 1935 for (i = 0; i < modules.nname; i++) { /* Print each symbol */ 1936 if (timesortnlp[i]->name) 1937 (void) printf(" %s ", timesortnlp[i]->name); 1938 else 1939 (void) printf(" <cycle> "); 1940 (void) printf(" %lld ", timesortnlp[i]->value); 1941 (void) printf(" %lld ", timesortnlp[i]->svalue); 1942 (void) printf(" %f ", timesortnlp[i]->time); 1943 (void) printf(" %lld ", timesortnlp[i]->ncall); 1944 (void) printf(" %lld ", timesortnlp[i]->selfcalls); 1945 (void) printf(" %d ", timesortnlp[i]->index); 1946 (void) printf(" \n"); 1947 } 1948 } 1949 #endif /* DEBUG */ 1950 1951 printgprof(timesortnlp); 1952 /* 1953 * print the flat profile 1954 */ 1955 printprof(); 1956 /* 1957 * print the index 1958 */ 1959 printindex(); 1960 1961 /* 1962 * print the modules 1963 */ 1964 printmodules(); 1965 1966 done(); 1967 /* NOTREACHED */ 1968 return (0); 1969 } 1970