1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sysexits.h> 30 #include <stdlib.h> 31 #include <stdio.h> 32 #include <unistd.h> 33 #include "gprof.h" 34 #include "profile.h" 35 36 char *whoami = "gprof"; 37 static pctype lowpc, highpc; /* range profiled, in UNIT's */ 38 39 /* 40 * things which get -E excluded by default. 41 */ 42 static char *defaultEs[] = { 43 "mcount", 44 "__mcleanup", 45 NULL 46 }; 47 48 #ifdef DEBUG 49 50 static char *objname[] = { 51 "<invalid object>", 52 "PROF_BUFFER_T", 53 "PROF_CALLGRAPH_T", 54 "PROF_MODULES_T", 55 NULL 56 }; 57 #define MAX_OBJTYPES 3 58 59 #endif /* DEBUG */ 60 61 void 62 done(void) 63 { 64 65 exit(EX_OK); 66 } 67 68 static pctype 69 max(pctype a, pctype b) 70 { 71 if (a > b) 72 return (a); 73 return (b); 74 } 75 76 static pctype 77 min(pctype a, pctype b) 78 { 79 if (a < b) 80 return (a); 81 return (b); 82 } 83 84 /* 85 * calculate scaled entry point addresses (to save time in asgnsamples), 86 * and possibly push the scaled entry points over the entry mask, 87 * if it turns out that the entry point is in one bucket and the code 88 * for a routine is in the next bucket. 89 * 90 */ 91 static void 92 alignentries(void) 93 { 94 struct nl *nlp; 95 #ifdef DEBUG 96 pctype bucket_of_entry; 97 pctype bucket_of_code; 98 #endif /* DEBUG */ 99 100 /* for old-style gmon.out, nameslist is only in modules.nl */ 101 102 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 103 nlp->svalue = nlp->value / sizeof (UNIT); 104 #ifdef DEBUG 105 bucket_of_entry = (nlp->svalue - lowpc) / scale; 106 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 107 if (bucket_of_entry < bucket_of_code) { 108 if (debug & SAMPLEDEBUG) { 109 (void) printf( 110 "[alignentries] pushing svalue 0x%llx " 111 "to 0x%llx\n", nlp->svalue, 112 nlp->svalue + UNITS_TO_CODE); 113 } 114 } 115 #endif /* DEBUG */ 116 } 117 } 118 119 /* 120 * old-style gmon.out 121 * ------------------ 122 * 123 * Assign samples to the procedures to which they belong. 124 * 125 * There are three cases as to where pcl and pch can be 126 * with respect to the routine entry addresses svalue0 and svalue1 127 * as shown in the following diagram. overlap computes the 128 * distance between the arrows, the fraction of the sample 129 * that is to be credited to the routine which starts at svalue0. 130 * 131 * svalue0 svalue1 132 * | | 133 * v v 134 * 135 * +-----------------------------------------------+ 136 * | | 137 * | ->| |<- ->| |<- ->| |<- | 138 * | | | | | | 139 * +---------+ +---------+ +---------+ 140 * 141 * ^ ^ ^ ^ ^ ^ 142 * | | | | | | 143 * pcl pch pcl pch pcl pch 144 * 145 * For the vax we assert that samples will never fall in the first 146 * two bytes of any routine, since that is the entry mask, 147 * thus we give call alignentries() to adjust the entry points if 148 * the entry mask falls in one bucket but the code for the routine 149 * doesn't start until the next bucket. In conjunction with the 150 * alignment of routine addresses, this should allow us to have 151 * only one sample for every four bytes of text space and never 152 * have any overlap (the two end cases, above). 153 */ 154 static void 155 asgnsamples(void) 156 { 157 sztype i, j; 158 unsigned_UNIT ccnt; 159 double time; 160 pctype pcl, pch; 161 pctype overlap; 162 pctype svalue0, svalue1; 163 164 extern mod_info_t modules; 165 nltype *nl = modules.nl; 166 sztype nname = modules.nname; 167 168 /* read samples and assign to namelist symbols */ 169 scale = highpc - lowpc; 170 scale /= nsamples; 171 alignentries(); 172 for (i = 0, j = 1; i < nsamples; i++) { 173 ccnt = samples[i]; 174 if (ccnt == 0) 175 continue; 176 pcl = lowpc + scale * i; 177 pch = lowpc + scale * (i + 1); 178 time = ccnt; 179 #ifdef DEBUG 180 if (debug & SAMPLEDEBUG) { 181 (void) printf( 182 "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n", 183 pcl, pch, ccnt); 184 } 185 #endif /* DEBUG */ 186 totime += time; 187 for (j = (j ? j - 1 : 0); j < nname; j++) { 188 svalue0 = nl[j].svalue; 189 svalue1 = nl[j+1].svalue; 190 /* 191 * if high end of tick is below entry address, 192 * go for next tick. 193 */ 194 if (pch < svalue0) 195 break; 196 /* 197 * if low end of tick into next routine, 198 * go for next routine. 199 */ 200 if (pcl >= svalue1) 201 continue; 202 overlap = min(pch, svalue1) - max(pcl, svalue0); 203 if (overlap != 0) { 204 #ifdef DEBUG 205 if (debug & SAMPLEDEBUG) { 206 (void) printf("[asgnsamples] " 207 "(0x%llx->0x%llx-0x%llx) %s gets " 208 "%f ticks %lld overlap\n", 209 nl[j].value/sizeof (UNIT), svalue0, 210 svalue1, nl[j].name, 211 overlap * time / scale, overlap); 212 } 213 #endif /* DEBUG */ 214 nl[j].time += overlap * time / scale; 215 } 216 } 217 } 218 #ifdef DEBUG 219 if (debug & SAMPLEDEBUG) { 220 (void) printf("[asgnsamples] totime %f\n", totime); 221 } 222 #endif /* DEBUG */ 223 } 224 225 226 static void 227 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs, 228 unsigned long ncallees) 229 { 230 ProfCallGraph prof_cgraph; 231 ProfFunction prof_func; 232 arctype *arcp; 233 mod_info_t *mi; 234 nltype *nlp; 235 size_t cur_offset; 236 unsigned long caller_id = 0, callee_id = 0; 237 238 /* 239 * Write the callgraph header 240 */ 241 prof_cgraph.type = PROF_CALLGRAPH_T; 242 prof_cgraph.version = PROF_CALLGRAPH_VER; 243 prof_cgraph.functions = PROFCGRAPH_SZ; 244 prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ; 245 if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) { 246 perror(filename); 247 exit(EX_IOERR); 248 } 249 /* CONSTCOND */ 250 if (CGRAPH_FILLER) 251 (void) fseek(fp, CGRAPH_FILLER, SEEK_CUR); 252 253 /* Current offset inside the callgraph object */ 254 cur_offset = prof_cgraph.functions; 255 256 for (mi = &modules; mi; mi = mi->next) { 257 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 258 if (nlp->ncallers == 0) 259 continue; 260 261 /* If this is the last callee, set next_to to 0 */ 262 callee_id++; 263 if (callee_id == ncallees) 264 prof_func.next_to = 0; 265 else { 266 prof_func.next_to = cur_offset + 267 nlp->ncallers * PROFFUNC_SZ; 268 } 269 270 /* 271 * Dump this callee's raw arc information with all 272 * its callers 273 */ 274 caller_id = 1; 275 for (arcp = nlp->parents; arcp; 276 arcp = arcp->arc_parentlist) { 277 /* 278 * If no more callers for this callee, set 279 * next_from to 0 280 */ 281 if (caller_id == nlp->ncallers) 282 prof_func.next_from = 0; 283 else { 284 prof_func.next_from = cur_offset + 285 PROFFUNC_SZ; 286 } 287 288 prof_func.frompc = 289 arcp->arc_parentp->module->load_base + 290 (arcp->arc_parentp->value - 291 arcp->arc_parentp->module->txt_origin); 292 prof_func.topc = 293 mi->load_base + 294 (nlp->value - mi->txt_origin); 295 prof_func.count = arcp->arc_count; 296 297 298 if (fwrite(&prof_func, sizeof (ProfFunction), 299 1, fp) != 1) { 300 perror(filename); 301 exit(EX_IOERR); 302 } 303 /* CONSTCOND */ 304 if (FUNC_FILLER) 305 (void) fseek(fp, FUNC_FILLER, SEEK_CUR); 306 307 cur_offset += PROFFUNC_SZ; 308 caller_id++; 309 } 310 } /* for nlp... */ 311 } /* for mi... */ 312 } 313 314 /* 315 * To save all pc-hits in all the gmon.out's is infeasible, as this 316 * may become quite huge even with a small number of files to sum. 317 * Instead, we'll dump *fictitious hits* to correct functions 318 * by scanning module namelists. Again, since this is summing 319 * pc-hits, we may have to dump the pcsamples out in chunks if the 320 * number of pc-hits is high. 321 */ 322 static void 323 dump_hits(FILE *fp, char *filename, nltype *nlp) 324 { 325 Address *p, hitpc; 326 size_t i, nelem, ntowrite; 327 328 if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE) 329 nelem = PROF_BUFFER_SIZE; 330 331 if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) { 332 (void) fprintf(stderr, "%s: no room for %d pcsamples\n", 333 whoami, nelem); 334 exit(EX_OSERR); 335 } 336 337 /* 338 * Set up *fictitious* hits (to function entry) buffer 339 */ 340 hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin); 341 for (i = 0; i < nelem; i++) 342 p[i] = hitpc; 343 344 for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) { 345 if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) { 346 perror(filename); 347 exit(EX_IOERR); 348 } 349 } 350 351 if (ntowrite) { 352 if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) { 353 perror(filename); 354 exit(EX_IOERR); 355 } 356 } 357 358 free(p); 359 } 360 361 static void 362 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs, 363 unsigned long *ncallees) 364 { 365 ProfBuffer prof_buffer; 366 arctype *arcp; 367 mod_info_t *mi; 368 nltype *nlp; 369 370 prof_buffer.type = PROF_BUFFER_T; 371 prof_buffer.version = PROF_BUFFER_VER; 372 prof_buffer.buffer = PROFBUF_SZ; 373 prof_buffer.bufsize = n_pcsamples; 374 prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address); 375 if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) { 376 perror(filename); 377 exit(EX_IOERR); 378 } 379 /* CONSTCOND */ 380 if (BUF_FILLER) 381 (void) fseek(fp, BUF_FILLER, SEEK_CUR); 382 383 *tarcs = 0; 384 *ncallees = 0; 385 for (mi = &modules; mi; mi = mi->next) { 386 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 387 if (nlp->nticks) 388 dump_hits(fp, filename, nlp); 389 390 nlp->ncallers = 0; 391 for (arcp = nlp->parents; arcp; 392 arcp = arcp->arc_parentlist) { 393 (nlp->ncallers)++; 394 } 395 396 if (nlp->ncallers) { 397 (*tarcs) += nlp->ncallers; 398 (*ncallees)++; 399 } 400 } 401 } 402 } 403 404 static void 405 dump_modules(FILE *fp, char *filename, size_t pbuf_sz) 406 { 407 char *pbuf, *p; 408 size_t namelen; 409 Index off_nxt, off_path; 410 mod_info_t *mi; 411 412 ProfModuleList prof_modlist; 413 ProfModule prof_mod; 414 415 /* Allocate for path strings buffer */ 416 pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN); 417 if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) { 418 (void) fprintf(stderr, "%s: no room for %d bytes\n", 419 whoami, pbuf_sz * sizeof (char)); 420 exit(EX_OSERR); 421 } 422 423 /* Dump out PROF_MODULE_T info for all non-aout modules */ 424 prof_modlist.type = PROF_MODULES_T; 425 prof_modlist.version = PROF_MODULES_VER; 426 prof_modlist.modules = PROFMODLIST_SZ; 427 prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ + 428 pbuf_sz; 429 if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) { 430 perror(filename); 431 exit(EX_IOERR); 432 } 433 /* CONSTCOND */ 434 if (MODLIST_FILLER) 435 (void) fseek(fp, MODLIST_FILLER, SEEK_CUR); 436 437 /* 438 * Initialize offsets for ProfModule elements. 439 */ 440 off_nxt = PROFMODLIST_SZ + PROFMOD_SZ; 441 off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ; 442 443 for (mi = modules.next; mi; mi = mi->next) { 444 if (mi->next) 445 prof_mod.next = off_nxt; 446 else 447 prof_mod.next = 0; 448 prof_mod.path = off_path; 449 prof_mod.startaddr = mi->load_base; 450 prof_mod.endaddr = mi->load_end; 451 452 if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) { 453 perror(filename); 454 exit(EX_IOERR); 455 } 456 457 /* CONSTCOND */ 458 if (MOD_FILLER) 459 (void) fseek(fp, MOD_FILLER, SEEK_CUR); 460 461 (void) strcpy(p, mi->name); 462 namelen = strlen(mi->name); 463 p += namelen + 1; 464 465 /* Note that offset to every path str need not be aligned */ 466 off_nxt += PROFMOD_SZ; 467 off_path += namelen + 1; 468 } 469 470 /* Write out the module path strings */ 471 if (pbuf_sz) { 472 if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) { 473 perror(filename); 474 exit(EX_IOERR); 475 } 476 477 free(pbuf); 478 } 479 } 480 481 /* 482 * If we have inactive modules, their current load addresses may overlap with 483 * active ones, and so we've to assign fictitious, non-overlapping addresses 484 * to all modules before we dump them. 485 */ 486 static void 487 fixup_maps(size_t *pathsz) 488 { 489 unsigned int n_inactive = 0; 490 Address lbase = 0, lend; 491 mod_info_t *mi; 492 493 /* Pick the lowest load address among modules */ 494 *pathsz = 0; 495 for (mi = &modules; mi; mi = mi->next) { 496 497 if (mi->active == FALSE) 498 n_inactive++; 499 500 if (mi == &modules || mi->load_base < lbase) 501 lbase = mi->load_base; 502 503 /* 504 * Return total path size of non-aout modules only 505 */ 506 if (mi != &modules) 507 *pathsz = (*pathsz) + strlen(mi->name) + 1; 508 } 509 510 /* 511 * All module info is in fine shape already if there are no 512 * inactive modules 513 */ 514 if (n_inactive == 0) 515 return; 516 517 /* 518 * Assign fictitious load addresses to all (non-aout) modules so 519 * that sum info can be dumped out. 520 */ 521 for (mi = modules.next; mi; mi = mi->next) { 522 lend = lbase + (mi->data_end - mi->txt_origin); 523 if ((lbase < modules.load_base && lend < modules.load_base) || 524 (lbase > modules.load_end && lend > modules.load_end)) { 525 526 mi->load_base = lbase; 527 mi->load_end = lend; 528 529 /* just to give an appearance of reality */ 530 lbase = CEIL(lend + PGSZ, PGSZ); 531 } else { 532 /* 533 * can't use this lbase & lend pair, as it 534 * overlaps with aout's addresses 535 */ 536 mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ); 537 mi->load_end = mi->load_base + (lend - lbase); 538 539 lbase = CEIL(mi->load_end + PGSZ, PGSZ); 540 } 541 } 542 } 543 544 static void 545 dump_gprofhdr(FILE *fp, char *filename) 546 { 547 ProfHeader prof_hdr; 548 549 prof_hdr.h_magic = PROF_MAGIC; 550 prof_hdr.h_major_ver = PROF_MAJOR_VERSION; 551 prof_hdr.h_minor_ver = PROF_MINOR_VERSION; 552 prof_hdr.size = PROFHDR_SZ; 553 if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) { 554 perror(filename); 555 exit(EX_IOERR); 556 } 557 558 /* CONSTCOND */ 559 if (HDR_FILLER) 560 (void) fseek(fp, HDR_FILLER, SEEK_CUR); 561 } 562 563 static void 564 dumpsum_ostyle(char *sumfile) 565 { 566 nltype *nlp; 567 arctype *arcp; 568 struct rawarc arc; 569 struct rawarc32 arc32; 570 FILE *sfile; 571 572 if ((sfile = fopen(sumfile, "w")) == NULL) { 573 perror(sumfile); 574 exit(EX_IOERR); 575 } 576 /* 577 * dump the header; use the last header read in 578 */ 579 if (Bflag) { 580 if (fwrite(&h, sizeof (h), 1, sfile) != 1) { 581 perror(sumfile); 582 exit(EX_IOERR); 583 } 584 } else { 585 struct hdr32 hdr; 586 hdr.lowpc = (pctype32)h.lowpc; 587 hdr.highpc = (pctype32)h.highpc; 588 hdr.ncnt = (pctype32)h.ncnt; 589 if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) { 590 perror(sumfile); 591 exit(EX_IOERR); 592 } 593 } 594 /* 595 * dump the samples 596 */ 597 if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) != 598 nsamples) { 599 perror(sumfile); 600 exit(EX_IOERR); 601 } 602 /* 603 * dump the normalized raw arc information. For old-style dumping, 604 * the only namelist is in modules.nl 605 */ 606 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 607 for (arcp = nlp->children; arcp; 608 arcp = arcp->arc_childlist) { 609 if (Bflag) { 610 arc.raw_frompc = arcp->arc_parentp->value; 611 arc.raw_selfpc = arcp->arc_childp->value; 612 arc.raw_count = arcp->arc_count; 613 if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) { 614 perror(sumfile); 615 exit(EX_IOERR); 616 } 617 } else { 618 arc32.raw_frompc = 619 (pctype32)arcp->arc_parentp->value; 620 arc32.raw_selfpc = 621 (pctype32)arcp->arc_childp->value; 622 arc32.raw_count = (actype32)arcp->arc_count; 623 if (fwrite(&arc32, sizeof (arc32), 1, sfile) != 1) { 624 perror(sumfile); 625 exit(EX_IOERR); 626 } 627 } 628 #ifdef DEBUG 629 if (debug & SAMPLEDEBUG) { 630 (void) printf( 631 "[dumpsum_ostyle] frompc 0x%llx selfpc " 632 "0x%llx count %lld\n", arc.raw_frompc, 633 arc.raw_selfpc, arc.raw_count); 634 } 635 #endif /* DEBUG */ 636 } 637 } 638 (void) fclose(sfile); 639 } 640 641 /* 642 * dump out the gmon.sum file 643 */ 644 static void 645 dumpsum(char *sumfile) 646 { 647 FILE *sfile; 648 size_t pathbuf_sz; 649 unsigned long total_arcs; /* total number of arcs in all */ 650 unsigned long ncallees; /* no. of callees with parents */ 651 652 if (old_style) { 653 dumpsum_ostyle(sumfile); 654 return; 655 } 656 657 if ((sfile = fopen(sumfile, "w")) == NULL) { 658 perror(sumfile); 659 exit(EX_IOERR); 660 } 661 662 /* 663 * Dump the new-style gprof header. Even if one of the original 664 * profiled-files was of a older version, the summed file is of 665 * current version only. 666 */ 667 dump_gprofhdr(sfile, sumfile); 668 669 /* 670 * Fix up load-maps and dump out modules info 671 * 672 * Fix up module load maps so inactive modules get *some* address 673 * (and btw, could you get the total size of non-aout module path 674 * strings please ?) 675 */ 676 fixup_maps(&pathbuf_sz); 677 dump_modules(sfile, sumfile, pathbuf_sz); 678 679 680 /* 681 * Dump out the summ'd pcsamples 682 * 683 * For dumping call graph information later, we need certain 684 * statistics (like total arcs, number of callers for each node); 685 * collect these also while we are at it. 686 */ 687 dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees); 688 689 /* 690 * Dump out the summ'd call graph information 691 */ 692 dump_callgraph(sfile, sumfile, total_arcs, ncallees); 693 694 695 (void) fclose(sfile); 696 } 697 698 static void 699 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp) 700 { 701 nltype *parentp; 702 nltype *childp; 703 704 /* 705 * if count == 0 this is a null arc and 706 * we don't need to tally it. 707 */ 708 if (rawp->raw_count == 0) 709 return; 710 711 /* 712 * Lookup the caller and callee pcs in namelists of 713 * appropriate modules 714 */ 715 parentp = nllookup(caller_mod, rawp->raw_frompc, NULL); 716 childp = nllookup(callee_mod, rawp->raw_selfpc, NULL); 717 if (childp && parentp) { 718 if (!Dflag) 719 childp->ncall += rawp->raw_count; 720 else { 721 if (first_file) 722 childp->ncall += rawp->raw_count; 723 else { 724 childp->ncall -= rawp->raw_count; 725 if (childp->ncall < 0) 726 childp->ncall = 0; 727 } 728 } 729 730 #ifdef DEBUG 731 if (debug & TALLYDEBUG) { 732 (void) printf("[tally] arc from %s to %s traversed " 733 "%lld times\n", parentp->name, 734 childp->name, rawp->raw_count); 735 } 736 #endif /* DEBUG */ 737 addarc(parentp, childp, rawp->raw_count); 738 } 739 } 740 741 /* 742 * Look up a module's base address in a sorted list of pc-hits. Unlike 743 * nllookup(), this deals with misses by mapping them to the next *higher* 744 * pc-hit. This is so that we get into the module's first pc-hit rightaway, 745 * even if the module's entry-point (load_base) itself is not a hit. 746 */ 747 static Address * 748 locate(Address *pclist, size_t nelem, Address keypc) 749 { 750 size_t low = 0, middle, high = nelem - 1; 751 752 if (keypc <= pclist[low]) 753 return (pclist); 754 755 if (keypc > pclist[high]) 756 return (NULL); 757 758 while (low != high) { 759 middle = (high + low) >> 1; 760 761 if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc)) 762 return (&pclist[middle + 1]); 763 764 if (pclist[middle] >= keypc) 765 high = middle; 766 else 767 low = middle + 1; 768 } 769 770 /* must never reach here! */ 771 return (NULL); 772 } 773 774 static void 775 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples) 776 { 777 Address *pcptr, *pcse = pcsmpl + n_samples; 778 pctype nxt_func; 779 nltype *fnl; 780 size_t func_nticks; 781 #ifdef DEBUG 782 size_t n_hits_in_module = 0; 783 #endif /* DEBUG */ 784 785 /* Locate the first pc-hit for this module */ 786 if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) { 787 #ifdef DEBUG 788 if (debug & PCSMPLDEBUG) { 789 (void) printf("[assign_pcsamples] no pc-hits in\n"); 790 (void) printf( 791 " `%s'\n", module->name); 792 } 793 #endif /* DEBUG */ 794 return; /* no pc-hits in this module */ 795 } 796 797 /* Assign all pc-hits in this module to appropriate functions */ 798 while ((pcptr < pcse) && (*pcptr < module->load_end)) { 799 800 /* Update the corresponding function's time */ 801 if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) { 802 /* 803 * Collect all pc-hits in this function. Each 804 * pc-hit counts as 1 tick. 805 */ 806 func_nticks = 0; 807 while ((pcptr < pcse) && (*pcptr < nxt_func)) { 808 func_nticks++; 809 pcptr++; 810 } 811 812 if (func_nticks == 0) 813 pcptr++; 814 else { 815 fnl->nticks += func_nticks; 816 fnl->time += func_nticks; 817 totime += func_nticks; 818 } 819 820 #ifdef DEBUG 821 n_hits_in_module += func_nticks; 822 #endif /* DEBUG */ 823 } else { 824 /* 825 * pc sample could not be assigned to function; 826 * probably in a PLT 827 */ 828 pcptr++; 829 } 830 } 831 832 #ifdef DEBUG 833 if (debug & PCSMPLDEBUG) { 834 (void) printf( 835 "[assign_pcsamples] %ld hits in\n", n_hits_in_module); 836 (void) printf(" `%s'\n", module->name); 837 } 838 #endif /* DEBUG */ 839 } 840 841 int 842 pc_cmp(const void *arg1, const void *arg2) 843 { 844 Address *pc1 = (Address *)arg1; 845 Address *pc2 = (Address *)arg2; 846 847 if (*pc1 > *pc2) 848 return (1); 849 850 if (*pc1 < *pc2) 851 return (-1); 852 853 return (0); 854 } 855 856 static void 857 process_pcsamples(ProfBuffer *bufp) 858 { 859 Address *pc_samples; 860 mod_info_t *mi; 861 caddr_t p; 862 size_t chunk_size, nelem_read, nelem_to_read; 863 864 #ifdef DEBUG 865 if (debug & PCSMPLDEBUG) { 866 (void) printf( 867 "[process_pcsamples] number of pcsamples = %lld\n", 868 bufp->bufsize); 869 } 870 #endif /* DEBUG */ 871 872 /* buffer with no pc samples ? */ 873 if (bufp->bufsize == 0) 874 return; 875 876 /* 877 * If we're processing pcsamples of a profile sum, we could have 878 * more than PROF_BUFFER_SIZE number of samples. In such a case, 879 * we must read the pcsamples in chunks. 880 */ 881 if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE) 882 chunk_size = PROF_BUFFER_SIZE; 883 884 /* Allocate for the pcsample chunk */ 885 pc_samples = (Address *) calloc(chunk_size, sizeof (Address)); 886 if (pc_samples == NULL) { 887 (void) fprintf(stderr, "%s: no room for %d sample pc's\n", 888 whoami, chunk_size); 889 exit(EX_OSERR); 890 } 891 892 /* Copy the current set of pcsamples */ 893 nelem_read = 0; 894 nelem_to_read = bufp->bufsize; 895 p = (char *)bufp + bufp->buffer; 896 897 while (nelem_read < nelem_to_read) { 898 (void) memcpy((void *) pc_samples, p, 899 chunk_size * sizeof (Address)); 900 901 /* Sort the pc samples */ 902 qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp); 903 904 /* 905 * Assign pcsamples to functions in the currently active 906 * module list 907 */ 908 for (mi = &modules; mi; mi = mi->next) { 909 if (mi->active == FALSE) 910 continue; 911 assign_pcsamples(mi, pc_samples, chunk_size); 912 } 913 914 p += (chunk_size * sizeof (Address)); 915 nelem_read += chunk_size; 916 917 if ((nelem_to_read - nelem_read) < chunk_size) 918 chunk_size = nelem_to_read - nelem_read; 919 } 920 921 free(pc_samples); 922 923 /* Update total number of pcsamples read so far */ 924 n_pcsamples += bufp->bufsize; 925 } 926 927 static mod_info_t * 928 find_module(Address addr) 929 { 930 mod_info_t *mi; 931 932 for (mi = &modules; mi; mi = mi->next) { 933 if (mi->active == FALSE) 934 continue; 935 936 if (addr >= mi->load_base && addr < mi->load_end) 937 return (mi); 938 } 939 940 return (NULL); 941 } 942 943 static void 944 process_cgraph(ProfCallGraph *cgp) 945 { 946 struct rawarc arc; 947 mod_info_t *callee_mi, *caller_mi; 948 ProfFunction *calleep, *callerp; 949 Index caller_off, callee_off; 950 951 /* 952 * Note that *callee_off* increment in the for loop below 953 * uses *calleep* and *calleep* doesn't get set until the for loop 954 * is entered. We don't expect the increment to be executed before 955 * the loop body is executed atleast once, so this should be ok. 956 */ 957 for (callee_off = cgp->functions; callee_off; 958 callee_off = calleep->next_to) { 959 960 /* LINTED: pointer cast */ 961 calleep = (ProfFunction *)((char *)cgp + callee_off); 962 963 /* 964 * We could choose either to sort the {caller, callee} 965 * list twice and assign callee/caller to modules or inspect 966 * each callee/caller in the active modules list. Since 967 * the modules list is usually very small, we'l choose the 968 * latter. 969 */ 970 971 /* 972 * If we cannot identify a callee with a module, there's 973 * no use worrying about who called it. 974 */ 975 if ((callee_mi = find_module(calleep->topc)) == NULL) { 976 #ifdef DEBUG 977 if (debug & CGRAPHDEBUG) { 978 (void) printf( 979 "[process_cgraph] callee %#llx missed\n", 980 calleep->topc); 981 } 982 #endif /* DEBUG */ 983 continue; 984 } else 985 arc.raw_selfpc = calleep->topc; 986 987 for (caller_off = callee_off; caller_off; 988 caller_off = callerp->next_from) { 989 990 /* LINTED: pointer cast */ 991 callerp = (ProfFunction *)((char *)cgp + caller_off); 992 if ((caller_mi = find_module(callerp->frompc)) == 993 NULL) { 994 #ifdef DEBUG 995 if (debug & CGRAPHDEBUG) { 996 (void) printf( 997 "[process_cgraph] caller %#llx " 998 "missed\n", callerp->frompc); 999 } 1000 #endif /* DEBUG */ 1001 continue; 1002 } 1003 1004 arc.raw_frompc = callerp->frompc; 1005 arc.raw_count = callerp->count; 1006 1007 #ifdef DEBUG 1008 if (debug & CGRAPHDEBUG) { 1009 (void) printf( 1010 "[process_cgraph] arc <%#llx, %#llx, " 1011 "%lld>\n", arc.raw_frompc, arc.raw_selfpc, 1012 arc.raw_count); 1013 } 1014 #endif /* DEBUG */ 1015 tally(caller_mi, callee_mi, &arc); 1016 } 1017 } 1018 1019 #ifdef DEBUG 1020 puts("\n"); 1021 #endif /* DEBUG */ 1022 } 1023 1024 /* 1025 * Two modules overlap each other if they don't lie completely *outside* 1026 * each other. 1027 */ 1028 static bool 1029 does_overlap(ProfModule *new, mod_info_t *old) 1030 { 1031 /* case 1: new module lies completely *before* the old one */ 1032 if (new->startaddr < old->load_base && new->endaddr <= old->load_base) 1033 return (FALSE); 1034 1035 /* case 2: new module lies completely *after* the old one */ 1036 if (new->startaddr >= old->load_end && new->endaddr >= old->load_end) 1037 return (FALSE); 1038 1039 /* probably a dlopen: the modules overlap each other */ 1040 return (TRUE); 1041 } 1042 1043 static bool 1044 is_same_as_aout(char *modpath, struct stat *buf) 1045 { 1046 if (stat(modpath, buf) == -1) { 1047 (void) fprintf(stderr, "%s: can't get info on `%s'\n", 1048 whoami, modpath); 1049 exit(EX_NOINPUT); 1050 } 1051 1052 if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino)) 1053 return (TRUE); 1054 else 1055 return (FALSE); 1056 } 1057 1058 static void 1059 process_modules(ProfModuleList *modlp) 1060 { 1061 ProfModule *newmodp; 1062 mod_info_t *mi, *last, *new_module; 1063 char *so_path; 1064 bool more_modules = TRUE; 1065 struct stat so_statbuf; 1066 1067 #ifdef DEBUG 1068 if (debug & MODULEDEBUG) { 1069 (void) printf("[process_modules] module obj version %u\n", 1070 modlp->version); 1071 } 1072 #endif /* DEBUG */ 1073 1074 /* Check version of module type object */ 1075 if (modlp->version > PROF_MODULES_VER) { 1076 (void) fprintf(stderr, "%s: version %d for module type objects" 1077 "is not supported\n", whoami, modlp->version); 1078 exit(EX_SOFTWARE); 1079 } 1080 1081 1082 /* 1083 * Scan the PROF_MODULES_T list and add modules to current list 1084 * of modules, if they're not present already 1085 */ 1086 /* LINTED: pointer cast */ 1087 newmodp = (ProfModule *)((char *)modlp + modlp->modules); 1088 do { 1089 /* 1090 * Since the prog could've been renamed after its run, we 1091 * should see if this overlaps a.out. If it does, it is 1092 * probably the renamed aout. We should also skip any other 1093 * non-sharedobj's that we see (or should we report an error ?) 1094 */ 1095 so_path = (caddr_t)modlp + newmodp->path; 1096 if (does_overlap(newmodp, &modules) || 1097 is_same_as_aout(so_path, &so_statbuf) || 1098 (!is_shared_obj(so_path))) { 1099 1100 if (!newmodp->next) 1101 more_modules = FALSE; 1102 1103 /* LINTED: pointer cast */ 1104 newmodp = (ProfModule *) 1105 ((caddr_t)modlp + newmodp->next); 1106 #ifdef DEBUG 1107 if (debug & MODULEDEBUG) { 1108 (void) printf( 1109 "[process_modules] `%s'\n", so_path); 1110 (void) printf(" skipped\n"); 1111 } 1112 #endif /* DEBUG */ 1113 continue; 1114 } 1115 #ifdef DEBUG 1116 if (debug & MODULEDEBUG) 1117 (void) printf("[process_modules] `%s'...\n", so_path); 1118 #endif /* DEBUG */ 1119 1120 /* 1121 * Check all modules (leave the first one, 'cos that 1122 * is the program executable info). If this module is already 1123 * there in the list, update the load addresses and proceed. 1124 */ 1125 last = &modules; 1126 while ((mi = last->next) != NULL) { 1127 /* 1128 * We expect the full pathname for all shared objects 1129 * needed by the program executable. In this case, we 1130 * simply need to compare the paths to see if they are 1131 * the same file. 1132 */ 1133 if (strcmp(mi->name, so_path) == 0) 1134 break; 1135 1136 /* 1137 * Check if this new shared object will overlap 1138 * any existing module. If yes, remove the old one 1139 * from the linked list (but don't free it, 'cos 1140 * there may be symbols referring to this module 1141 * still) 1142 */ 1143 if (does_overlap(newmodp, mi)) { 1144 #ifdef DEBUG 1145 if (debug & MODULEDEBUG) { 1146 (void) printf( 1147 "[process_modules] `%s'\n", 1148 so_path); 1149 (void) printf( 1150 " overlaps\n"); 1151 (void) printf( 1152 " `%s'\n", 1153 mi->name); 1154 } 1155 #endif /* DEBUG */ 1156 mi->active = FALSE; 1157 } 1158 1159 last = mi; 1160 } 1161 1162 /* Module already there, skip it */ 1163 if (mi != NULL) { 1164 mi->load_base = newmodp->startaddr; 1165 mi->load_end = newmodp->endaddr; 1166 mi->active = TRUE; 1167 if (!newmodp->next) 1168 more_modules = FALSE; 1169 1170 /* LINTED: pointer cast */ 1171 newmodp = (ProfModule *) 1172 ((caddr_t)modlp + newmodp->next); 1173 1174 #ifdef DEBUG 1175 if (debug & MODULEDEBUG) { 1176 (void) printf("[process_modules] base=%#llx, " 1177 "end=%#llx\n", mi->load_base, 1178 mi->load_end); 1179 } 1180 #endif /* DEBUG */ 1181 continue; 1182 } 1183 1184 /* 1185 * Check if gmon.out is outdated with respect to the new 1186 * module we want to add 1187 */ 1188 if (gmonout_info.mtime < so_statbuf.st_mtime) { 1189 (void) fprintf(stderr, 1190 "%s: shared obj outdates prof info\n", whoami); 1191 (void) fprintf(stderr, "\t(newer %s)\n", so_path); 1192 exit(EX_NOINPUT); 1193 } 1194 1195 /* Create a new module element */ 1196 new_module = malloc(sizeof (mod_info_t)); 1197 if (new_module == NULL) { 1198 (void) fprintf(stderr, "%s: no room for %d bytes\n", 1199 whoami, sizeof (mod_info_t)); 1200 exit(EX_OSERR); 1201 } 1202 1203 /* and fill in info... */ 1204 new_module->id = n_modules + 1; 1205 new_module->load_base = newmodp->startaddr; 1206 new_module->load_end = newmodp->endaddr; 1207 new_module->name = malloc(strlen(so_path) + 1); 1208 if (new_module->name == NULL) { 1209 (void) fprintf(stderr, "%s: no room for %d bytes\n", 1210 whoami, strlen(so_path) + 1); 1211 exit(EX_OSERR); 1212 } 1213 (void) strcpy(new_module->name, so_path); 1214 #ifdef DEBUG 1215 if (debug & MODULEDEBUG) { 1216 (void) printf( 1217 "[process_modules] base=%#llx, end=%#llx\n", 1218 new_module->load_base, new_module->load_end); 1219 } 1220 #endif /* DEBUG */ 1221 1222 /* Create this module's nameslist */ 1223 process_namelist(new_module); 1224 1225 /* Add it to the tail of active module list */ 1226 last->next = new_module; 1227 n_modules++; 1228 1229 #ifdef DEBUG 1230 if (debug & MODULEDEBUG) { 1231 (void) printf( 1232 "[process_modules] total shared objects = %ld\n", 1233 n_modules - 1); 1234 } 1235 #endif /* DEBUG */ 1236 /* 1237 * Move to the next module in the PROF_MODULES_T list 1238 * (if present) 1239 */ 1240 if (!newmodp->next) 1241 more_modules = FALSE; 1242 1243 /* LINTED: pointer cast */ 1244 newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next); 1245 1246 } while (more_modules); 1247 } 1248 1249 static void 1250 reset_active_modules(void) 1251 { 1252 mod_info_t *mi; 1253 1254 /* Except the executable, no other module should remain active */ 1255 for (mi = modules.next; mi; mi = mi->next) 1256 mi->active = FALSE; 1257 } 1258 1259 static void 1260 getpfiledata(caddr_t memp, size_t fsz) 1261 { 1262 ProfObject *objp; 1263 caddr_t file_end; 1264 bool found_pcsamples = FALSE, found_cgraph = FALSE; 1265 1266 /* 1267 * Before processing a new gmon.out, all modules except the 1268 * program executable must be made inactive, so that symbols 1269 * are searched only in the program executable, if we don't 1270 * find a MODULES_T object. Don't do it *after* we read a gmon.out, 1271 * because we need the active module data after we're done with 1272 * the last gmon.out, if we're doing summing. 1273 */ 1274 reset_active_modules(); 1275 1276 file_end = memp + fsz; 1277 /* LINTED: pointer cast */ 1278 objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size); 1279 while ((caddr_t)objp < file_end) { 1280 #ifdef DEBUG 1281 { 1282 unsigned int type = 0; 1283 1284 if (debug & MONOUTDEBUG) { 1285 if (objp->type <= MAX_OBJTYPES) 1286 type = objp->type; 1287 1288 (void) printf( 1289 "\n[getpfiledata] object %s [%#lx]\n", 1290 objname[type], objp->type); 1291 } 1292 } 1293 #endif /* DEBUG */ 1294 switch (objp->type) { 1295 case PROF_MODULES_T : 1296 process_modules((ProfModuleList *) objp); 1297 break; 1298 1299 case PROF_CALLGRAPH_T : 1300 process_cgraph((ProfCallGraph *) objp); 1301 found_cgraph = TRUE; 1302 break; 1303 1304 case PROF_BUFFER_T : 1305 process_pcsamples((ProfBuffer *) objp); 1306 found_pcsamples = TRUE; 1307 break; 1308 1309 default : 1310 (void) fprintf(stderr, 1311 "%s: unknown prof object type=%d\n", 1312 whoami, objp->type); 1313 exit(EX_SOFTWARE); 1314 } 1315 /* LINTED: pointer cast */ 1316 objp = (ProfObject *)((caddr_t)objp + objp->size); 1317 } 1318 1319 if (!found_cgraph || !found_pcsamples) { 1320 (void) fprintf(stderr, 1321 "%s: missing callgraph/pcsamples object\n", whoami); 1322 exit(EX_SOFTWARE); 1323 } 1324 1325 if ((caddr_t)objp > file_end) { 1326 (void) fprintf(stderr, "%s: malformed profile file.\n", whoami); 1327 exit(EX_SOFTWARE); 1328 } 1329 1330 if (first_file) 1331 first_file = FALSE; 1332 } 1333 1334 static void 1335 readarcs(FILE *pfile) 1336 { 1337 /* 1338 * the rest of the file consists of 1339 * a bunch of <from,self,count> tuples. 1340 */ 1341 /* CONSTCOND */ 1342 while (1) { 1343 struct rawarc arc; 1344 1345 if (rflag) { 1346 if (Bflag) { 1347 L_cgarc64 rtld_arc64; 1348 1349 /* 1350 * If rflag is set then this is an profiled 1351 * image generated by rtld. It needs to be 1352 * 'converted' to the standard data format. 1353 */ 1354 if (fread(&rtld_arc64, 1355 sizeof (L_cgarc64), 1, pfile) != 1) 1356 break; 1357 1358 if (rtld_arc64.cg_from == PRF_OUTADDR64) 1359 arc.raw_frompc = s_highpc + 0x10; 1360 else 1361 arc.raw_frompc = 1362 (pctype)rtld_arc64.cg_from; 1363 arc.raw_selfpc = (pctype)rtld_arc64.cg_to; 1364 arc.raw_count = (actype)rtld_arc64.cg_count; 1365 } else { 1366 L_cgarc rtld_arc; 1367 1368 /* 1369 * If rflag is set then this is an profiled 1370 * image generated by rtld. It needs to be 1371 * 'converted' to the standard data format. 1372 */ 1373 if (fread(&rtld_arc, 1374 sizeof (L_cgarc), 1, pfile) != 1) 1375 break; 1376 1377 if (rtld_arc.cg_from == PRF_OUTADDR) 1378 arc.raw_frompc = s_highpc + 0x10; 1379 else 1380 arc.raw_frompc = (pctype) 1381 (uintptr_t)rtld_arc.cg_from; 1382 arc.raw_selfpc = (pctype) 1383 (uintptr_t)rtld_arc.cg_to; 1384 arc.raw_count = (actype)rtld_arc.cg_count; 1385 } 1386 } else { 1387 if (Bflag) { 1388 if (fread(&arc, sizeof (struct rawarc), 1, 1389 pfile) != 1) { 1390 break; 1391 } 1392 } else { 1393 /* 1394 * If these aren't big %pc's, we need to read 1395 * into the 32-bit raw arc structure, and 1396 * assign the members into the actual arc. 1397 */ 1398 struct rawarc32 arc32; 1399 if (fread(&arc32, sizeof (struct rawarc32), 1400 1, pfile) != 1) 1401 break; 1402 arc.raw_frompc = (pctype)arc32.raw_frompc; 1403 arc.raw_selfpc = (pctype)arc32.raw_selfpc; 1404 arc.raw_count = (actype)arc32.raw_count; 1405 } 1406 } 1407 1408 #ifdef DEBUG 1409 if (debug & SAMPLEDEBUG) { 1410 (void) printf("[getpfile] frompc 0x%llx selfpc " 1411 "0x%llx count %lld\n", arc.raw_frompc, 1412 arc.raw_selfpc, arc.raw_count); 1413 } 1414 #endif /* DEBUG */ 1415 /* 1416 * add this arc 1417 */ 1418 tally(&modules, &modules, &arc); 1419 } 1420 if (first_file) 1421 first_file = FALSE; 1422 } 1423 1424 static void 1425 readsamples(FILE *pfile) 1426 { 1427 sztype i; 1428 unsigned_UNIT sample; 1429 1430 if (samples == 0) { 1431 samples = (unsigned_UNIT *) calloc(nsamples, 1432 sizeof (unsigned_UNIT)); 1433 if (samples == 0) { 1434 (void) fprintf(stderr, 1435 "%s: No room for %d sample pc's\n", 1436 whoami, sampbytes / sizeof (unsigned_UNIT)); 1437 exit(EX_OSERR); 1438 } 1439 } 1440 1441 for (i = 0; i < nsamples; i++) { 1442 (void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile); 1443 if (feof(pfile)) 1444 break; 1445 samples[i] += sample; 1446 } 1447 if (i != nsamples) { 1448 (void) fprintf(stderr, 1449 "%s: unexpected EOF after reading %d/%d samples\n", 1450 whoami, --i, nsamples); 1451 exit(EX_IOERR); 1452 } 1453 } 1454 1455 static void * 1456 handle_versioned(FILE *pfile, char *filename, size_t *fsz) 1457 { 1458 int fd; 1459 bool invalid_version; 1460 caddr_t fmem; 1461 struct stat buf; 1462 ProfHeader prof_hdr; 1463 off_t lret; 1464 1465 /* 1466 * Check versioning info. For now, let's say we provide 1467 * backward compatibility, so we accept all older versions. 1468 */ 1469 if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) { 1470 perror("fread()"); 1471 exit(EX_IOERR); 1472 } 1473 1474 invalid_version = FALSE; 1475 if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION) 1476 invalid_version = TRUE; 1477 else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) { 1478 if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION) 1479 invalid_version = FALSE; 1480 } 1481 1482 if (invalid_version) { 1483 (void) fprintf(stderr, "%s: version %d.%d not supported\n", 1484 whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver); 1485 exit(EX_SOFTWARE); 1486 } 1487 1488 /* 1489 * Map gmon.out onto memory. 1490 */ 1491 (void) fclose(pfile); 1492 if ((fd = open(filename, O_RDONLY)) == -1) { 1493 perror(filename); 1494 exit(EX_IOERR); 1495 } 1496 1497 if ((lret = lseek(fd, 0, SEEK_END)) == -1) { 1498 perror(filename); 1499 exit(EX_IOERR); 1500 } 1501 *fsz = lret; 1502 1503 fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0); 1504 if (fmem == MAP_FAILED) { 1505 (void) fprintf(stderr, "%s: can't map %s\n", whoami, filename); 1506 exit(EX_IOERR); 1507 } 1508 1509 /* 1510 * Before we close this fd, save this gmon.out's info to later verify 1511 * if the shared objects it references have changed since the time 1512 * they were used to generate this gmon.out 1513 */ 1514 if (fstat(fd, &buf) == -1) { 1515 (void) fprintf(stderr, "%s: can't get info on `%s'\n", 1516 whoami, filename); 1517 exit(EX_NOINPUT); 1518 } 1519 gmonout_info.dev = buf.st_dev; 1520 gmonout_info.ino = buf.st_ino; 1521 gmonout_info.mtime = buf.st_mtime; 1522 gmonout_info.size = buf.st_size; 1523 1524 (void) close(fd); 1525 1526 return ((void *) fmem); 1527 } 1528 1529 static void * 1530 openpfile(char *filename, size_t *fsz) 1531 { 1532 struct hdr tmp; 1533 FILE *pfile; 1534 unsigned long magic_num; 1535 size_t hdrsize; 1536 static bool first_time = TRUE; 1537 extern bool old_style; 1538 1539 if ((pfile = fopen(filename, "r")) == NULL) { 1540 perror(filename); 1541 exit(EX_IOERR); 1542 } 1543 1544 /* 1545 * Read in the magic. Note that we changed the cast "unsigned long" 1546 * to "unsigned int" because that's how h_magic is defined in the 1547 * new format ProfHeader. 1548 */ 1549 if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) { 1550 perror("fread()"); 1551 exit(EX_IOERR); 1552 } 1553 1554 rewind(pfile); 1555 1556 /* 1557 * First check if this is versioned or *old-style* gmon.out 1558 */ 1559 if (magic_num == (unsigned int)PROF_MAGIC) { 1560 if ((!first_time) && (old_style == TRUE)) { 1561 (void) fprintf(stderr, "%s: can't mix old & new format " 1562 "profiled files\n", whoami); 1563 exit(EX_SOFTWARE); 1564 } 1565 first_time = FALSE; 1566 old_style = FALSE; 1567 return (handle_versioned(pfile, filename, fsz)); 1568 } 1569 1570 if ((!first_time) && (old_style == FALSE)) { 1571 (void) fprintf(stderr, "%s: can't mix old & new format " 1572 "profiled files\n", whoami); 1573 exit(EX_SOFTWARE); 1574 } 1575 1576 first_time = FALSE; 1577 old_style = TRUE; 1578 fsz = 0; 1579 1580 /* 1581 * Now, we need to determine if this is a run-time linker 1582 * profiled file or if it is a standard gmon.out. 1583 * 1584 * We do this by checking if magic matches PRF_MAGIC. If it 1585 * does, then this is a run-time linker profiled file, if it 1586 * doesn't, it must be a gmon.out file. 1587 */ 1588 if (magic_num == (unsigned long)PRF_MAGIC) 1589 rflag = TRUE; 1590 else 1591 rflag = FALSE; 1592 1593 hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32); 1594 1595 if (rflag) { 1596 if (Bflag) { 1597 L_hdr64 l_hdr64; 1598 1599 /* 1600 * If the rflag is set then the input file is 1601 * rtld profiled data, we'll read it in and convert 1602 * it to the standard format (ie: make it look like 1603 * a gmon.out file). 1604 */ 1605 if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) { 1606 perror("fread()"); 1607 exit(EX_IOERR); 1608 } 1609 if (l_hdr64.hd_version != PRF_VERSION_64) { 1610 (void) fprintf(stderr, 1611 "%s: expected version %d, " 1612 "got version %d when processing 64-bit " 1613 "run-time linker profiled file.\n", 1614 whoami, PRF_VERSION_64, l_hdr64.hd_version); 1615 exit(EX_SOFTWARE); 1616 } 1617 tmp.lowpc = 0; 1618 tmp.highpc = (pctype)l_hdr64.hd_hpc; 1619 tmp.ncnt = hdrsize + l_hdr64.hd_psize; 1620 } else { 1621 L_hdr l_hdr; 1622 1623 /* 1624 * If the rflag is set then the input file is 1625 * rtld profiled data, we'll read it in and convert 1626 * it to the standard format (ie: make it look like 1627 * a gmon.out file). 1628 */ 1629 if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) { 1630 perror("fread()"); 1631 exit(EX_IOERR); 1632 } 1633 if (l_hdr.hd_version != PRF_VERSION) { 1634 (void) fprintf(stderr, 1635 "%s: expected version %d, " 1636 "got version %d when processing " 1637 "run-time linker profiled file.\n", 1638 whoami, PRF_VERSION, l_hdr.hd_version); 1639 exit(EX_SOFTWARE); 1640 } 1641 tmp.lowpc = 0; 1642 tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc; 1643 tmp.ncnt = hdrsize + l_hdr.hd_psize; 1644 } 1645 } else { 1646 if (Bflag) { 1647 if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) { 1648 perror("fread()"); 1649 exit(EX_IOERR); 1650 } 1651 } else { 1652 /* 1653 * If we're not reading big %pc's, we need to read 1654 * the 32-bit header, and assign the members to 1655 * the actual header. 1656 */ 1657 struct hdr32 hdr32; 1658 if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) { 1659 perror("fread()"); 1660 exit(EX_IOERR); 1661 } 1662 tmp.lowpc = hdr32.lowpc; 1663 tmp.highpc = hdr32.highpc; 1664 tmp.ncnt = hdr32.ncnt; 1665 } 1666 } 1667 1668 /* 1669 * perform sanity check on profiled file we've opened. 1670 */ 1671 if (tmp.lowpc >= tmp.highpc) { 1672 if (rflag) 1673 (void) fprintf(stderr, 1674 "%s: badly formed profiled data.\n", 1675 filename); 1676 else 1677 (void) fprintf(stderr, 1678 "%s: badly formed gmon.out file.\n", 1679 filename); 1680 exit(EX_SOFTWARE); 1681 } 1682 1683 if (s_highpc != 0 && (tmp.lowpc != h.lowpc || 1684 tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) { 1685 (void) fprintf(stderr, 1686 "%s: incompatible with first gmon file\n", 1687 filename); 1688 exit(EX_IOERR); 1689 } 1690 h = tmp; 1691 s_lowpc = h.lowpc; 1692 s_highpc = h.highpc; 1693 lowpc = h.lowpc / sizeof (UNIT); 1694 highpc = h.highpc / sizeof (UNIT); 1695 sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0; 1696 nsamples = sampbytes / sizeof (unsigned_UNIT); 1697 1698 #ifdef DEBUG 1699 if (debug & SAMPLEDEBUG) { 1700 (void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc " 1701 "0x%llx hdr.ncnt %lld\n", 1702 h.lowpc, h.highpc, h.ncnt); 1703 (void) printf( 1704 "[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n", 1705 s_lowpc, s_highpc); 1706 (void) printf( 1707 "[openpfile] lowpc 0x%llx highpc 0x%llx\n", 1708 lowpc, highpc); 1709 (void) printf("[openpfile] sampbytes %d nsamples %d\n", 1710 sampbytes, nsamples); 1711 } 1712 #endif /* DEBUG */ 1713 1714 return ((void *) pfile); 1715 } 1716 1717 /* 1718 * Information from a gmon.out file depends on whether it's versioned 1719 * or non-versioned, *old style* gmon.out. If old-style, it is in two 1720 * parts : an array of sampling hits within pc ranges, and the arcs. If 1721 * versioned, it contains a header, followed by any number of 1722 * modules/callgraph/pcsample_buffer objects. 1723 */ 1724 static void 1725 getpfile(char *filename) 1726 { 1727 void *handle; 1728 size_t fsz; 1729 1730 handle = openpfile(filename, &fsz); 1731 1732 if (old_style) { 1733 readsamples((FILE *)handle); 1734 readarcs((FILE *)handle); 1735 (void) fclose((FILE *)handle); 1736 return; 1737 } 1738 1739 getpfiledata((caddr_t)handle, fsz); 1740 (void) munmap(handle, fsz); 1741 } 1742 1743 int 1744 main(int argc, char **argv) 1745 { 1746 char **sp; 1747 nltype **timesortnlp; 1748 int c; 1749 int errflg; 1750 1751 prog_name = *argv; /* preserve program name */ 1752 debug = 0; 1753 nflag = FALSE; 1754 bflag = TRUE; 1755 lflag = FALSE; 1756 Cflag = FALSE; 1757 first_file = TRUE; 1758 rflag = FALSE; 1759 Bflag = FALSE; 1760 errflg = FALSE; 1761 1762 while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF) 1763 switch (c) { 1764 case 'a': 1765 aflag = TRUE; 1766 break; 1767 case 'b': 1768 bflag = FALSE; 1769 break; 1770 case 'c': 1771 cflag = TRUE; 1772 break; 1773 case 'C': 1774 Cflag = TRUE; 1775 break; 1776 case 'd': 1777 dflag = TRUE; 1778 debug |= atoi(optarg); 1779 (void) printf("[main] debug = 0x%x\n", debug); 1780 break; 1781 case 'D': 1782 Dflag = TRUE; 1783 break; 1784 case 'E': 1785 addlist(Elist, optarg); 1786 Eflag = TRUE; 1787 addlist(elist, optarg); 1788 eflag = TRUE; 1789 break; 1790 case 'e': 1791 addlist(elist, optarg); 1792 eflag = TRUE; 1793 break; 1794 case 'F': 1795 addlist(Flist, optarg); 1796 Fflag = TRUE; 1797 addlist(flist, optarg); 1798 fflag = TRUE; 1799 break; 1800 case 'f': 1801 addlist(flist, optarg); 1802 fflag = TRUE; 1803 break; 1804 case 'l': 1805 lflag = TRUE; 1806 break; 1807 case 'n': 1808 nflag = TRUE; 1809 number_funcs_toprint = atoi(optarg); 1810 break; 1811 case 's': 1812 sflag = TRUE; 1813 break; 1814 case 'z': 1815 zflag = TRUE; 1816 break; 1817 case '?': 1818 errflg++; 1819 1820 } 1821 1822 if (errflg) { 1823 (void) fprintf(stderr, 1824 "usage: gprof [ -abcCDlsz ] [ -e function-name ] " 1825 "[ -E function-name ]\n\t[ -f function-name ] " 1826 "[ -F function-name ]\n\t[ image-file " 1827 "[ profile-file ... ] ]\n"); 1828 exit(EX_USAGE); 1829 } 1830 1831 if (optind < argc) { 1832 a_outname = argv[optind++]; 1833 } else { 1834 a_outname = A_OUTNAME; 1835 } 1836 if (optind < argc) { 1837 gmonname = argv[optind++]; 1838 } else { 1839 gmonname = GMONNAME; 1840 } 1841 /* 1842 * turn off default functions 1843 */ 1844 for (sp = &defaultEs[0]; *sp; sp++) { 1845 Eflag = TRUE; 1846 addlist(Elist, *sp); 1847 eflag = TRUE; 1848 addlist(elist, *sp); 1849 } 1850 /* 1851 * how many ticks per second? 1852 * if we can't tell, report time in ticks. 1853 */ 1854 hz = sysconf(_SC_CLK_TCK); 1855 if (hz == -1) { 1856 hz = 1; 1857 (void) fprintf(stderr, "time is in ticks, not seconds\n"); 1858 } 1859 1860 getnfile(a_outname); 1861 1862 /* 1863 * get information about mon.out file(s). 1864 */ 1865 do { 1866 getpfile(gmonname); 1867 if (optind < argc) 1868 gmonname = argv[optind++]; 1869 else 1870 optind++; 1871 } while (optind <= argc); 1872 /* 1873 * dump out a gmon.sum file if requested 1874 */ 1875 if (sflag || Dflag) 1876 dumpsum(GMONSUM); 1877 1878 if (old_style) { 1879 /* 1880 * assign samples to procedures 1881 */ 1882 asgnsamples(); 1883 } 1884 1885 /* 1886 * assemble the dynamic profile 1887 */ 1888 timesortnlp = doarcs(); 1889 1890 /* 1891 * print the dynamic profile 1892 */ 1893 #ifdef DEBUG 1894 if (debug & ANYDEBUG) { 1895 /* raw output of all symbols in all their glory */ 1896 int i; 1897 (void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, " 1898 "#calls, selfcalls, index \n"); 1899 for (i = 0; i < modules.nname; i++) { /* Print each symbol */ 1900 if (timesortnlp[i]->name) 1901 (void) printf(" %s ", timesortnlp[i]->name); 1902 else 1903 (void) printf(" <cycle> "); 1904 (void) printf(" %lld ", timesortnlp[i]->value); 1905 (void) printf(" %lld ", timesortnlp[i]->svalue); 1906 (void) printf(" %f ", timesortnlp[i]->time); 1907 (void) printf(" %lld ", timesortnlp[i]->ncall); 1908 (void) printf(" %lld ", timesortnlp[i]->selfcalls); 1909 (void) printf(" %d ", timesortnlp[i]->index); 1910 (void) printf(" \n"); 1911 } 1912 } 1913 #endif /* DEBUG */ 1914 1915 printgprof(timesortnlp); 1916 /* 1917 * print the flat profile 1918 */ 1919 printprof(); 1920 /* 1921 * print the index 1922 */ 1923 printindex(); 1924 1925 /* 1926 * print the modules 1927 */ 1928 printmodules(); 1929 1930 done(); 1931 /* NOTREACHED */ 1932 return (0); 1933 } 1934