1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sysexits.h> 29 #include <stdlib.h> 30 #include <stdio.h> 31 #include <unistd.h> 32 #include "gprof.h" 33 #include "profile.h" 34 35 char *whoami = "gprof"; 36 static pctype lowpc, highpc; /* range profiled, in UNIT's */ 37 38 /* 39 * things which get -E excluded by default. 40 */ 41 static char *defaultEs[] = { 42 "mcount", 43 "__mcleanup", 44 NULL 45 }; 46 47 #ifdef DEBUG 48 49 static char *objname[] = { 50 "<invalid object>", 51 "PROF_BUFFER_T", 52 "PROF_CALLGRAPH_T", 53 "PROF_MODULES_T", 54 NULL 55 }; 56 #define MAX_OBJTYPES 3 57 58 #endif /* DEBUG */ 59 60 void 61 done(void) 62 { 63 64 exit(EX_OK); 65 } 66 67 static pctype 68 max(pctype a, pctype b) 69 { 70 if (a > b) 71 return (a); 72 return (b); 73 } 74 75 static pctype 76 min(pctype a, pctype b) 77 { 78 if (a < b) 79 return (a); 80 return (b); 81 } 82 83 /* 84 * calculate scaled entry point addresses (to save time in asgnsamples), 85 * and possibly push the scaled entry points over the entry mask, 86 * if it turns out that the entry point is in one bucket and the code 87 * for a routine is in the next bucket. 88 * 89 */ 90 static void 91 alignentries(void) 92 { 93 struct nl *nlp; 94 #ifdef DEBUG 95 pctype bucket_of_entry; 96 pctype bucket_of_code; 97 #endif /* DEBUG */ 98 99 /* for old-style gmon.out, nameslist is only in modules.nl */ 100 101 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 102 nlp->svalue = nlp->value / sizeof (UNIT); 103 #ifdef DEBUG 104 bucket_of_entry = (nlp->svalue - lowpc) / scale; 105 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 106 if (bucket_of_entry < bucket_of_code) { 107 if (debug & SAMPLEDEBUG) { 108 (void) printf( 109 "[alignentries] pushing svalue 0x%llx " 110 "to 0x%llx\n", nlp->svalue, 111 nlp->svalue + UNITS_TO_CODE); 112 } 113 } 114 #endif /* DEBUG */ 115 } 116 } 117 118 /* 119 * old-style gmon.out 120 * ------------------ 121 * 122 * Assign samples to the procedures to which they belong. 123 * 124 * There are three cases as to where pcl and pch can be 125 * with respect to the routine entry addresses svalue0 and svalue1 126 * as shown in the following diagram. overlap computes the 127 * distance between the arrows, the fraction of the sample 128 * that is to be credited to the routine which starts at svalue0. 129 * 130 * svalue0 svalue1 131 * | | 132 * v v 133 * 134 * +-----------------------------------------------+ 135 * | | 136 * | ->| |<- ->| |<- ->| |<- | 137 * | | | | | | 138 * +---------+ +---------+ +---------+ 139 * 140 * ^ ^ ^ ^ ^ ^ 141 * | | | | | | 142 * pcl pch pcl pch pcl pch 143 * 144 * For the vax we assert that samples will never fall in the first 145 * two bytes of any routine, since that is the entry mask, 146 * thus we give call alignentries() to adjust the entry points if 147 * the entry mask falls in one bucket but the code for the routine 148 * doesn't start until the next bucket. In conjunction with the 149 * alignment of routine addresses, this should allow us to have 150 * only one sample for every four bytes of text space and never 151 * have any overlap (the two end cases, above). 152 */ 153 static void 154 asgnsamples(void) 155 { 156 sztype i, j; 157 unsigned_UNIT ccnt; 158 double time; 159 pctype pcl, pch; 160 pctype overlap; 161 pctype svalue0, svalue1; 162 163 extern mod_info_t modules; 164 nltype *nl = modules.nl; 165 sztype nname = modules.nname; 166 167 /* read samples and assign to namelist symbols */ 168 scale = highpc - lowpc; 169 scale /= nsamples; 170 alignentries(); 171 for (i = 0, j = 1; i < nsamples; i++) { 172 ccnt = samples[i]; 173 if (ccnt == 0) 174 continue; 175 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/ 176 pcl = lowpc + scale * i; 177 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/ 178 pch = lowpc + scale * (i + 1); 179 time = ccnt; 180 #ifdef DEBUG 181 if (debug & SAMPLEDEBUG) { 182 (void) printf( 183 "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n", 184 pcl, pch, ccnt); 185 } 186 #endif /* DEBUG */ 187 totime += time; 188 for (j = (j ? j - 1 : 0); j < nname; j++) { 189 svalue0 = nl[j].svalue; 190 svalue1 = nl[j+1].svalue; 191 /* 192 * if high end of tick is below entry address, 193 * go for next tick. 194 */ 195 if (pch < svalue0) 196 break; 197 /* 198 * if low end of tick into next routine, 199 * go for next routine. 200 */ 201 if (pcl >= svalue1) 202 continue; 203 overlap = min(pch, svalue1) - max(pcl, svalue0); 204 if (overlap != 0) { 205 #ifdef DEBUG 206 if (debug & SAMPLEDEBUG) { 207 (void) printf("[asgnsamples] " 208 "(0x%llx->0x%llx-0x%llx) %s gets " 209 "%f ticks %lld overlap\n", 210 nl[j].value/sizeof (UNIT), svalue0, 211 svalue1, nl[j].name, 212 overlap * time / scale, overlap); 213 } 214 #endif /* DEBUG */ 215 nl[j].time += overlap * time / scale; 216 } 217 } 218 } 219 #ifdef DEBUG 220 if (debug & SAMPLEDEBUG) { 221 (void) printf("[asgnsamples] totime %f\n", totime); 222 } 223 #endif /* DEBUG */ 224 } 225 226 227 static void 228 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs, 229 unsigned long ncallees) 230 { 231 ProfCallGraph prof_cgraph; 232 ProfFunction prof_func; 233 arctype *arcp; 234 mod_info_t *mi; 235 nltype *nlp; 236 size_t cur_offset; 237 unsigned long caller_id = 0, callee_id = 0; 238 239 /* 240 * Write the callgraph header 241 */ 242 prof_cgraph.type = PROF_CALLGRAPH_T; 243 prof_cgraph.version = PROF_CALLGRAPH_VER; 244 prof_cgraph.functions = PROFCGRAPH_SZ; 245 prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ; 246 if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) { 247 perror(filename); 248 exit(EX_IOERR); 249 } 250 /* CONSTCOND */ 251 if (CGRAPH_FILLER) 252 (void) fseek(fp, CGRAPH_FILLER, SEEK_CUR); 253 254 /* Current offset inside the callgraph object */ 255 cur_offset = prof_cgraph.functions; 256 257 for (mi = &modules; mi; mi = mi->next) { 258 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 259 if (nlp->ncallers == 0) 260 continue; 261 262 /* If this is the last callee, set next_to to 0 */ 263 callee_id++; 264 if (callee_id == ncallees) 265 prof_func.next_to = 0; 266 else { 267 prof_func.next_to = cur_offset + 268 nlp->ncallers * PROFFUNC_SZ; 269 } 270 271 /* 272 * Dump this callee's raw arc information with all 273 * its callers 274 */ 275 caller_id = 1; 276 for (arcp = nlp->parents; arcp; 277 arcp = arcp->arc_parentlist) { 278 /* 279 * If no more callers for this callee, set 280 * next_from to 0 281 */ 282 if (caller_id == nlp->ncallers) 283 prof_func.next_from = 0; 284 else { 285 prof_func.next_from = cur_offset + 286 PROFFUNC_SZ; 287 } 288 289 prof_func.frompc = 290 arcp->arc_parentp->module->load_base + 291 (arcp->arc_parentp->value - 292 arcp->arc_parentp->module->txt_origin); 293 prof_func.topc = mi->load_base + 294 (nlp->value - mi->txt_origin); 295 prof_func.count = arcp->arc_count; 296 297 298 if (fwrite(&prof_func, sizeof (ProfFunction), 299 1, fp) != 1) { 300 perror(filename); 301 exit(EX_IOERR); 302 } 303 /* CONSTCOND */ 304 if (FUNC_FILLER) 305 (void) fseek(fp, FUNC_FILLER, SEEK_CUR); 306 307 cur_offset += PROFFUNC_SZ; 308 caller_id++; 309 } 310 } /* for nlp... */ 311 } /* for mi... */ 312 } 313 314 /* 315 * To save all pc-hits in all the gmon.out's is infeasible, as this 316 * may become quite huge even with a small number of files to sum. 317 * Instead, we'll dump *fictitious hits* to correct functions 318 * by scanning module namelists. Again, since this is summing 319 * pc-hits, we may have to dump the pcsamples out in chunks if the 320 * number of pc-hits is high. 321 */ 322 static void 323 dump_hits(FILE *fp, char *filename, nltype *nlp) 324 { 325 Address *p, hitpc; 326 size_t i, nelem, ntowrite; 327 328 if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE) 329 nelem = PROF_BUFFER_SIZE; 330 331 if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) { 332 (void) fprintf(stderr, "%s: no room for %d pcsamples\n", 333 whoami, nelem); 334 exit(EX_OSERR); 335 } 336 337 /* 338 * Set up *fictitious* hits (to function entry) buffer 339 */ 340 hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin); 341 for (i = 0; i < nelem; i++) 342 p[i] = hitpc; 343 344 for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) { 345 if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) { 346 perror(filename); 347 exit(EX_IOERR); 348 } 349 } 350 351 if (ntowrite) { 352 if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) { 353 perror(filename); 354 exit(EX_IOERR); 355 } 356 } 357 358 free(p); 359 } 360 361 static void 362 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs, 363 unsigned long *ncallees) 364 { 365 ProfBuffer prof_buffer; 366 arctype *arcp; 367 mod_info_t *mi; 368 nltype *nlp; 369 370 prof_buffer.type = PROF_BUFFER_T; 371 prof_buffer.version = PROF_BUFFER_VER; 372 prof_buffer.buffer = PROFBUF_SZ; 373 prof_buffer.bufsize = n_pcsamples; 374 prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address); 375 if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) { 376 perror(filename); 377 exit(EX_IOERR); 378 } 379 /* CONSTCOND */ 380 if (BUF_FILLER) 381 (void) fseek(fp, BUF_FILLER, SEEK_CUR); 382 383 *tarcs = 0; 384 *ncallees = 0; 385 for (mi = &modules; mi; mi = mi->next) { 386 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 387 if (nlp->nticks) 388 dump_hits(fp, filename, nlp); 389 390 nlp->ncallers = 0; 391 for (arcp = nlp->parents; arcp; 392 arcp = arcp->arc_parentlist) { 393 (nlp->ncallers)++; 394 } 395 396 if (nlp->ncallers) { 397 (*tarcs) += nlp->ncallers; 398 (*ncallees)++; 399 } 400 } 401 } 402 } 403 404 static void 405 dump_modules(FILE *fp, char *filename, size_t pbuf_sz) 406 { 407 char *pbuf, *p; 408 size_t namelen; 409 Index off_nxt, off_path; 410 mod_info_t *mi; 411 412 ProfModuleList prof_modlist; 413 ProfModule prof_mod; 414 415 /* Allocate for path strings buffer */ 416 pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN); 417 if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) { 418 (void) fprintf(stderr, "%s: no room for %d bytes\n", 419 whoami, pbuf_sz * sizeof (char)); 420 exit(EX_OSERR); 421 } 422 423 /* Dump out PROF_MODULE_T info for all non-aout modules */ 424 prof_modlist.type = PROF_MODULES_T; 425 prof_modlist.version = PROF_MODULES_VER; 426 prof_modlist.modules = PROFMODLIST_SZ; 427 prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ + 428 pbuf_sz; 429 if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) { 430 perror(filename); 431 exit(EX_IOERR); 432 } 433 /* CONSTCOND */ 434 if (MODLIST_FILLER) 435 (void) fseek(fp, MODLIST_FILLER, SEEK_CUR); 436 437 /* 438 * Initialize offsets for ProfModule elements. 439 */ 440 off_nxt = PROFMODLIST_SZ + PROFMOD_SZ; 441 off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ; 442 443 for (mi = modules.next; mi; mi = mi->next) { 444 if (mi->next) 445 prof_mod.next = off_nxt; 446 else 447 prof_mod.next = 0; 448 prof_mod.path = off_path; 449 prof_mod.startaddr = mi->load_base; 450 prof_mod.endaddr = mi->load_end; 451 452 if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) { 453 perror(filename); 454 exit(EX_IOERR); 455 } 456 457 /* CONSTCOND */ 458 if (MOD_FILLER) 459 (void) fseek(fp, MOD_FILLER, SEEK_CUR); 460 461 (void) strcpy(p, mi->name); 462 namelen = strlen(mi->name); 463 p += namelen + 1; 464 465 /* Note that offset to every path str need not be aligned */ 466 off_nxt += PROFMOD_SZ; 467 off_path += namelen + 1; 468 } 469 470 /* Write out the module path strings */ 471 if (pbuf_sz) { 472 if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) { 473 perror(filename); 474 exit(EX_IOERR); 475 } 476 477 free(pbuf); 478 } 479 } 480 481 /* 482 * If we have inactive modules, their current load addresses may overlap with 483 * active ones, and so we've to assign fictitious, non-overlapping addresses 484 * to all modules before we dump them. 485 */ 486 static void 487 fixup_maps(size_t *pathsz) 488 { 489 unsigned int n_inactive = 0; 490 Address lbase = 0, lend; 491 mod_info_t *mi; 492 493 /* Pick the lowest load address among modules */ 494 *pathsz = 0; 495 for (mi = &modules; mi; mi = mi->next) { 496 497 if (mi->active == FALSE) 498 n_inactive++; 499 500 if (mi == &modules || mi->load_base < lbase) 501 lbase = mi->load_base; 502 503 /* 504 * Return total path size of non-aout modules only 505 */ 506 if (mi != &modules) 507 *pathsz = (*pathsz) + strlen(mi->name) + 1; 508 } 509 510 /* 511 * All module info is in fine shape already if there are no 512 * inactive modules 513 */ 514 if (n_inactive == 0) 515 return; 516 517 /* 518 * Assign fictitious load addresses to all (non-aout) modules so 519 * that sum info can be dumped out. 520 */ 521 for (mi = modules.next; mi; mi = mi->next) { 522 lend = lbase + (mi->data_end - mi->txt_origin); 523 if ((lbase < modules.load_base && lend < modules.load_base) || 524 (lbase > modules.load_end && lend > modules.load_end)) { 525 526 mi->load_base = lbase; 527 mi->load_end = lend; 528 529 /* just to give an appearance of reality */ 530 lbase = CEIL(lend + PGSZ, PGSZ); 531 } else { 532 /* 533 * can't use this lbase & lend pair, as it 534 * overlaps with aout's addresses 535 */ 536 mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ); 537 mi->load_end = mi->load_base + (lend - lbase); 538 539 lbase = CEIL(mi->load_end + PGSZ, PGSZ); 540 } 541 } 542 } 543 544 static void 545 dump_gprofhdr(FILE *fp, char *filename) 546 { 547 ProfHeader prof_hdr; 548 549 prof_hdr.h_magic = PROF_MAGIC; 550 prof_hdr.h_major_ver = PROF_MAJOR_VERSION; 551 prof_hdr.h_minor_ver = PROF_MINOR_VERSION; 552 prof_hdr.size = PROFHDR_SZ; 553 if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) { 554 perror(filename); 555 exit(EX_IOERR); 556 } 557 558 /* CONSTCOND */ 559 if (HDR_FILLER) 560 (void) fseek(fp, HDR_FILLER, SEEK_CUR); 561 } 562 563 static void 564 dumpsum_ostyle(char *sumfile) 565 { 566 nltype *nlp; 567 arctype *arcp; 568 struct rawarc arc; 569 struct rawarc32 arc32; 570 FILE *sfile; 571 572 if ((sfile = fopen(sumfile, "w")) == NULL) { 573 perror(sumfile); 574 exit(EX_IOERR); 575 } 576 /* 577 * dump the header; use the last header read in 578 */ 579 if (Bflag) { 580 if (fwrite(&h, sizeof (h), 1, sfile) != 1) { 581 perror(sumfile); 582 exit(EX_IOERR); 583 } 584 } else { 585 struct hdr32 hdr; 586 hdr.lowpc = (pctype32)h.lowpc; 587 hdr.highpc = (pctype32)h.highpc; 588 hdr.ncnt = (pctype32)h.ncnt; 589 if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) { 590 perror(sumfile); 591 exit(EX_IOERR); 592 } 593 } 594 /* 595 * dump the samples 596 */ 597 if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) != 598 nsamples) { 599 perror(sumfile); 600 exit(EX_IOERR); 601 } 602 /* 603 * dump the normalized raw arc information. For old-style dumping, 604 * the only namelist is in modules.nl 605 */ 606 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 607 for (arcp = nlp->children; arcp; 608 arcp = arcp->arc_childlist) { 609 if (Bflag) { 610 arc.raw_frompc = arcp->arc_parentp->value; 611 arc.raw_selfpc = arcp->arc_childp->value; 612 arc.raw_count = arcp->arc_count; 613 if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) { 614 perror(sumfile); 615 exit(EX_IOERR); 616 } 617 } else { 618 arc32.raw_frompc = 619 (pctype32)arcp->arc_parentp->value; 620 arc32.raw_selfpc = 621 (pctype32)arcp->arc_childp->value; 622 arc32.raw_count = (actype32)arcp->arc_count; 623 if (fwrite(&arc32, sizeof (arc32), 1, sfile) != 624 1) { 625 perror(sumfile); 626 exit(EX_IOERR); 627 } 628 } 629 #ifdef DEBUG 630 if (debug & SAMPLEDEBUG) { 631 (void) printf( 632 "[dumpsum_ostyle] frompc 0x%llx selfpc " 633 "0x%llx count %lld\n", arc.raw_frompc, 634 arc.raw_selfpc, arc.raw_count); 635 } 636 #endif /* DEBUG */ 637 } 638 } 639 (void) fclose(sfile); 640 } 641 642 /* 643 * dump out the gmon.sum file 644 */ 645 static void 646 dumpsum(char *sumfile) 647 { 648 FILE *sfile; 649 size_t pathbuf_sz; 650 unsigned long total_arcs; /* total number of arcs in all */ 651 unsigned long ncallees; /* no. of callees with parents */ 652 653 if (old_style) { 654 dumpsum_ostyle(sumfile); 655 return; 656 } 657 658 if ((sfile = fopen(sumfile, "w")) == NULL) { 659 perror(sumfile); 660 exit(EX_IOERR); 661 } 662 663 /* 664 * Dump the new-style gprof header. Even if one of the original 665 * profiled-files was of a older version, the summed file is of 666 * current version only. 667 */ 668 dump_gprofhdr(sfile, sumfile); 669 670 /* 671 * Fix up load-maps and dump out modules info 672 * 673 * Fix up module load maps so inactive modules get *some* address 674 * (and btw, could you get the total size of non-aout module path 675 * strings please ?) 676 */ 677 fixup_maps(&pathbuf_sz); 678 dump_modules(sfile, sumfile, pathbuf_sz); 679 680 681 /* 682 * Dump out the summ'd pcsamples 683 * 684 * For dumping call graph information later, we need certain 685 * statistics (like total arcs, number of callers for each node); 686 * collect these also while we are at it. 687 */ 688 dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees); 689 690 /* 691 * Dump out the summ'd call graph information 692 */ 693 dump_callgraph(sfile, sumfile, total_arcs, ncallees); 694 695 696 (void) fclose(sfile); 697 } 698 699 static void 700 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp) 701 { 702 nltype *parentp; 703 nltype *childp; 704 705 /* 706 * if count == 0 this is a null arc and 707 * we don't need to tally it. 708 */ 709 if (rawp->raw_count == 0) 710 return; 711 712 /* 713 * Lookup the caller and callee pcs in namelists of 714 * appropriate modules 715 */ 716 parentp = nllookup(caller_mod, rawp->raw_frompc, NULL); 717 childp = nllookup(callee_mod, rawp->raw_selfpc, NULL); 718 if (childp && parentp) { 719 if (!Dflag) 720 childp->ncall += rawp->raw_count; 721 else { 722 if (first_file) 723 childp->ncall += rawp->raw_count; 724 else { 725 childp->ncall -= rawp->raw_count; 726 if (childp->ncall < 0) 727 childp->ncall = 0; 728 } 729 } 730 731 #ifdef DEBUG 732 if (debug & TALLYDEBUG) { 733 (void) printf("[tally] arc from %s to %s traversed " 734 "%lld times\n", parentp->name, 735 childp->name, rawp->raw_count); 736 } 737 #endif /* DEBUG */ 738 addarc(parentp, childp, rawp->raw_count); 739 } 740 } 741 742 /* 743 * Look up a module's base address in a sorted list of pc-hits. Unlike 744 * nllookup(), this deals with misses by mapping them to the next *higher* 745 * pc-hit. This is so that we get into the module's first pc-hit rightaway, 746 * even if the module's entry-point (load_base) itself is not a hit. 747 */ 748 static Address * 749 locate(Address *pclist, size_t nelem, Address keypc) 750 { 751 size_t low = 0, middle, high = nelem - 1; 752 753 if (keypc <= pclist[low]) 754 return (pclist); 755 756 if (keypc > pclist[high]) 757 return (NULL); 758 759 while (low != high) { 760 middle = (high + low) >> 1; 761 762 if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc)) 763 return (&pclist[middle + 1]); 764 765 if (pclist[middle] >= keypc) 766 high = middle; 767 else 768 low = middle + 1; 769 } 770 771 /* must never reach here! */ 772 return (NULL); 773 } 774 775 static void 776 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples) 777 { 778 Address *pcptr, *pcse = pcsmpl + n_samples; 779 pctype nxt_func; 780 nltype *fnl; 781 size_t func_nticks; 782 #ifdef DEBUG 783 size_t n_hits_in_module = 0; 784 #endif /* DEBUG */ 785 786 /* Locate the first pc-hit for this module */ 787 if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) { 788 #ifdef DEBUG 789 if (debug & PCSMPLDEBUG) { 790 (void) printf("[assign_pcsamples] no pc-hits in\n"); 791 (void) printf( 792 " `%s'\n", module->name); 793 } 794 #endif /* DEBUG */ 795 return; /* no pc-hits in this module */ 796 } 797 798 /* Assign all pc-hits in this module to appropriate functions */ 799 while ((pcptr < pcse) && (*pcptr < module->load_end)) { 800 801 /* Update the corresponding function's time */ 802 if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) { 803 /* 804 * Collect all pc-hits in this function. Each 805 * pc-hit counts as 1 tick. 806 */ 807 func_nticks = 0; 808 while ((pcptr < pcse) && (*pcptr < nxt_func)) { 809 func_nticks++; 810 pcptr++; 811 } 812 813 if (func_nticks == 0) 814 pcptr++; 815 else { 816 fnl->nticks += func_nticks; 817 fnl->time += func_nticks; 818 totime += func_nticks; 819 } 820 821 #ifdef DEBUG 822 n_hits_in_module += func_nticks; 823 #endif /* DEBUG */ 824 } else { 825 /* 826 * pc sample could not be assigned to function; 827 * probably in a PLT 828 */ 829 pcptr++; 830 } 831 } 832 833 #ifdef DEBUG 834 if (debug & PCSMPLDEBUG) { 835 (void) printf( 836 "[assign_pcsamples] %ld hits in\n", n_hits_in_module); 837 (void) printf(" `%s'\n", module->name); 838 } 839 #endif /* DEBUG */ 840 } 841 842 int 843 pc_cmp(const void *arg1, const void *arg2) 844 { 845 Address *pc1 = (Address *)arg1; 846 Address *pc2 = (Address *)arg2; 847 848 if (*pc1 > *pc2) 849 return (1); 850 851 if (*pc1 < *pc2) 852 return (-1); 853 854 return (0); 855 } 856 857 static void 858 process_pcsamples(ProfBuffer *bufp) 859 { 860 Address *pc_samples; 861 mod_info_t *mi; 862 caddr_t p; 863 size_t chunk_size, nelem_read, nelem_to_read; 864 865 #ifdef DEBUG 866 if (debug & PCSMPLDEBUG) { 867 (void) printf( 868 "[process_pcsamples] number of pcsamples = %lld\n", 869 bufp->bufsize); 870 } 871 #endif /* DEBUG */ 872 873 /* buffer with no pc samples ? */ 874 if (bufp->bufsize == 0) 875 return; 876 877 /* 878 * If we're processing pcsamples of a profile sum, we could have 879 * more than PROF_BUFFER_SIZE number of samples. In such a case, 880 * we must read the pcsamples in chunks. 881 */ 882 if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE) 883 chunk_size = PROF_BUFFER_SIZE; 884 885 /* Allocate for the pcsample chunk */ 886 pc_samples = (Address *) calloc(chunk_size, sizeof (Address)); 887 if (pc_samples == NULL) { 888 (void) fprintf(stderr, "%s: no room for %d sample pc's\n", 889 whoami, chunk_size); 890 exit(EX_OSERR); 891 } 892 893 /* Copy the current set of pcsamples */ 894 nelem_read = 0; 895 nelem_to_read = bufp->bufsize; 896 p = (char *)bufp + bufp->buffer; 897 898 while (nelem_read < nelem_to_read) { 899 (void) memcpy((void *) pc_samples, p, 900 chunk_size * sizeof (Address)); 901 902 /* Sort the pc samples */ 903 qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp); 904 905 /* 906 * Assign pcsamples to functions in the currently active 907 * module list 908 */ 909 for (mi = &modules; mi; mi = mi->next) { 910 if (mi->active == FALSE) 911 continue; 912 assign_pcsamples(mi, pc_samples, chunk_size); 913 } 914 915 p += (chunk_size * sizeof (Address)); 916 nelem_read += chunk_size; 917 918 if ((nelem_to_read - nelem_read) < chunk_size) 919 chunk_size = nelem_to_read - nelem_read; 920 } 921 922 free(pc_samples); 923 924 /* Update total number of pcsamples read so far */ 925 n_pcsamples += bufp->bufsize; 926 } 927 928 static mod_info_t * 929 find_module(Address addr) 930 { 931 mod_info_t *mi; 932 933 for (mi = &modules; mi; mi = mi->next) { 934 if (mi->active == FALSE) 935 continue; 936 937 if (addr >= mi->load_base && addr < mi->load_end) 938 return (mi); 939 } 940 941 return (NULL); 942 } 943 944 static void 945 process_cgraph(ProfCallGraph *cgp) 946 { 947 struct rawarc arc; 948 mod_info_t *callee_mi, *caller_mi; 949 ProfFunction *calleep, *callerp; 950 Index caller_off, callee_off; 951 952 /* 953 * Note that *callee_off* increment in the for loop below 954 * uses *calleep* and *calleep* doesn't get set until the for loop 955 * is entered. We don't expect the increment to be executed before 956 * the loop body is executed atleast once, so this should be ok. 957 */ 958 for (callee_off = cgp->functions; callee_off; 959 callee_off = calleep->next_to) { 960 961 /* LINTED: pointer cast */ 962 calleep = (ProfFunction *)((char *)cgp + callee_off); 963 964 /* 965 * We could choose either to sort the {caller, callee} 966 * list twice and assign callee/caller to modules or inspect 967 * each callee/caller in the active modules list. Since 968 * the modules list is usually very small, we'l choose the 969 * latter. 970 */ 971 972 /* 973 * If we cannot identify a callee with a module, there's 974 * no use worrying about who called it. 975 */ 976 if ((callee_mi = find_module(calleep->topc)) == NULL) { 977 #ifdef DEBUG 978 if (debug & CGRAPHDEBUG) { 979 (void) printf( 980 "[process_cgraph] callee %#llx missed\n", 981 calleep->topc); 982 } 983 #endif /* DEBUG */ 984 continue; 985 } else 986 arc.raw_selfpc = calleep->topc; 987 988 for (caller_off = callee_off; caller_off; 989 caller_off = callerp->next_from) { 990 991 /* LINTED: pointer cast */ 992 callerp = (ProfFunction *)((char *)cgp + caller_off); 993 if ((caller_mi = find_module(callerp->frompc)) == 994 NULL) { 995 #ifdef DEBUG 996 if (debug & CGRAPHDEBUG) { 997 (void) printf( 998 "[process_cgraph] caller %#llx " 999 "missed\n", callerp->frompc); 1000 } 1001 #endif /* DEBUG */ 1002 continue; 1003 } 1004 1005 arc.raw_frompc = callerp->frompc; 1006 arc.raw_count = callerp->count; 1007 1008 #ifdef DEBUG 1009 if (debug & CGRAPHDEBUG) { 1010 (void) printf( 1011 "[process_cgraph] arc <%#llx, %#llx, " 1012 "%lld>\n", arc.raw_frompc, arc.raw_selfpc, 1013 arc.raw_count); 1014 } 1015 #endif /* DEBUG */ 1016 tally(caller_mi, callee_mi, &arc); 1017 } 1018 } 1019 1020 #ifdef DEBUG 1021 puts("\n"); 1022 #endif /* DEBUG */ 1023 } 1024 1025 /* 1026 * Two modules overlap each other if they don't lie completely *outside* 1027 * each other. 1028 */ 1029 static bool 1030 does_overlap(ProfModule *new, mod_info_t *old) 1031 { 1032 /* case 1: new module lies completely *before* the old one */ 1033 if (new->startaddr < old->load_base && new->endaddr <= old->load_base) 1034 return (FALSE); 1035 1036 /* case 2: new module lies completely *after* the old one */ 1037 if (new->startaddr >= old->load_end && new->endaddr >= old->load_end) 1038 return (FALSE); 1039 1040 /* probably a dlopen: the modules overlap each other */ 1041 return (TRUE); 1042 } 1043 1044 static bool 1045 is_same_as_aout(char *modpath, struct stat *buf) 1046 { 1047 if (stat(modpath, buf) == -1) { 1048 (void) fprintf(stderr, "%s: can't get info on `%s'\n", 1049 whoami, modpath); 1050 exit(EX_NOINPUT); 1051 } 1052 1053 if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino)) 1054 return (TRUE); 1055 else 1056 return (FALSE); 1057 } 1058 1059 static void 1060 process_modules(ProfModuleList *modlp) 1061 { 1062 ProfModule *newmodp; 1063 mod_info_t *mi, *last, *new_module; 1064 char *so_path; 1065 bool more_modules = TRUE; 1066 struct stat so_statbuf; 1067 1068 #ifdef DEBUG 1069 if (debug & MODULEDEBUG) { 1070 (void) printf("[process_modules] module obj version %u\n", 1071 modlp->version); 1072 } 1073 #endif /* DEBUG */ 1074 1075 /* Check version of module type object */ 1076 if (modlp->version > PROF_MODULES_VER) { 1077 (void) fprintf(stderr, "%s: version %d for module type objects" 1078 "is not supported\n", whoami, modlp->version); 1079 exit(EX_SOFTWARE); 1080 } 1081 1082 1083 /* 1084 * Scan the PROF_MODULES_T list and add modules to current list 1085 * of modules, if they're not present already 1086 */ 1087 /* LINTED: pointer cast */ 1088 newmodp = (ProfModule *)((char *)modlp + modlp->modules); 1089 do { 1090 /* 1091 * Since the prog could've been renamed after its run, we 1092 * should see if this overlaps a.out. If it does, it is 1093 * probably the renamed aout. We should also skip any other 1094 * non-sharedobj's that we see (or should we report an error ?) 1095 */ 1096 so_path = (caddr_t)modlp + newmodp->path; 1097 if (does_overlap(newmodp, &modules) || 1098 is_same_as_aout(so_path, &so_statbuf) || 1099 (!is_shared_obj(so_path))) { 1100 1101 if (!newmodp->next) 1102 more_modules = FALSE; 1103 1104 /* LINTED: pointer cast */ 1105 newmodp = (ProfModule *) 1106 ((caddr_t)modlp + newmodp->next); 1107 #ifdef DEBUG 1108 if (debug & MODULEDEBUG) { 1109 (void) printf( 1110 "[process_modules] `%s'\n", so_path); 1111 (void) printf(" skipped\n"); 1112 } 1113 #endif /* DEBUG */ 1114 continue; 1115 } 1116 #ifdef DEBUG 1117 if (debug & MODULEDEBUG) 1118 (void) printf("[process_modules] `%s'...\n", so_path); 1119 #endif /* DEBUG */ 1120 1121 /* 1122 * Check all modules (leave the first one, 'cos that 1123 * is the program executable info). If this module is already 1124 * there in the list, update the load addresses and proceed. 1125 */ 1126 last = &modules; 1127 while ((mi = last->next) != NULL) { 1128 /* 1129 * We expect the full pathname for all shared objects 1130 * needed by the program executable. In this case, we 1131 * simply need to compare the paths to see if they are 1132 * the same file. 1133 */ 1134 if (strcmp(mi->name, so_path) == 0) 1135 break; 1136 1137 /* 1138 * Check if this new shared object will overlap 1139 * any existing module. If yes, remove the old one 1140 * from the linked list (but don't free it, 'cos 1141 * there may be symbols referring to this module 1142 * still) 1143 */ 1144 if (does_overlap(newmodp, mi)) { 1145 #ifdef DEBUG 1146 if (debug & MODULEDEBUG) { 1147 (void) printf( 1148 "[process_modules] `%s'\n", 1149 so_path); 1150 (void) printf( 1151 " overlaps\n"); 1152 (void) printf( 1153 " `%s'\n", 1154 mi->name); 1155 } 1156 #endif /* DEBUG */ 1157 mi->active = FALSE; 1158 } 1159 1160 last = mi; 1161 } 1162 1163 /* Module already there, skip it */ 1164 if (mi != NULL) { 1165 mi->load_base = newmodp->startaddr; 1166 mi->load_end = newmodp->endaddr; 1167 mi->active = TRUE; 1168 if (!newmodp->next) 1169 more_modules = FALSE; 1170 1171 /* LINTED: pointer cast */ 1172 newmodp = (ProfModule *) 1173 ((caddr_t)modlp + newmodp->next); 1174 1175 #ifdef DEBUG 1176 if (debug & MODULEDEBUG) { 1177 (void) printf("[process_modules] base=%#llx, " 1178 "end=%#llx\n", mi->load_base, mi->load_end); 1179 } 1180 #endif /* DEBUG */ 1181 continue; 1182 } 1183 1184 /* 1185 * Check if gmon.out is outdated with respect to the new 1186 * module we want to add 1187 */ 1188 if (gmonout_info.mtime < so_statbuf.st_mtime) { 1189 (void) fprintf(stderr, 1190 "%s: shared obj outdates prof info\n", whoami); 1191 (void) fprintf(stderr, "\t(newer %s)\n", so_path); 1192 exit(EX_NOINPUT); 1193 } 1194 1195 /* Create a new module element */ 1196 new_module = malloc(sizeof (mod_info_t)); 1197 if (new_module == NULL) { 1198 (void) fprintf(stderr, "%s: no room for %d bytes\n", 1199 whoami, sizeof (mod_info_t)); 1200 exit(EX_OSERR); 1201 } 1202 1203 /* and fill in info... */ 1204 new_module->id = n_modules + 1; 1205 new_module->load_base = newmodp->startaddr; 1206 new_module->load_end = newmodp->endaddr; 1207 new_module->name = malloc(strlen(so_path) + 1); 1208 if (new_module->name == NULL) { 1209 (void) fprintf(stderr, "%s: no room for %d bytes\n", 1210 whoami, strlen(so_path) + 1); 1211 exit(EX_OSERR); 1212 } 1213 (void) strcpy(new_module->name, so_path); 1214 #ifdef DEBUG 1215 if (debug & MODULEDEBUG) { 1216 (void) printf( 1217 "[process_modules] base=%#llx, end=%#llx\n", 1218 new_module->load_base, new_module->load_end); 1219 } 1220 #endif /* DEBUG */ 1221 1222 /* Create this module's nameslist */ 1223 process_namelist(new_module); 1224 1225 /* Add it to the tail of active module list */ 1226 last->next = new_module; 1227 n_modules++; 1228 1229 #ifdef DEBUG 1230 if (debug & MODULEDEBUG) { 1231 (void) printf( 1232 "[process_modules] total shared objects = %ld\n", 1233 n_modules - 1); 1234 } 1235 #endif /* DEBUG */ 1236 /* 1237 * Move to the next module in the PROF_MODULES_T list 1238 * (if present) 1239 */ 1240 if (!newmodp->next) 1241 more_modules = FALSE; 1242 1243 /* LINTED: pointer cast */ 1244 newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next); 1245 1246 } while (more_modules); 1247 } 1248 1249 static void 1250 reset_active_modules(void) 1251 { 1252 mod_info_t *mi; 1253 1254 /* Except the executable, no other module should remain active */ 1255 for (mi = modules.next; mi; mi = mi->next) 1256 mi->active = FALSE; 1257 } 1258 1259 static void 1260 getpfiledata(caddr_t memp, size_t fsz) 1261 { 1262 ProfObject *objp; 1263 caddr_t file_end; 1264 bool found_pcsamples = FALSE, found_cgraph = FALSE; 1265 1266 /* 1267 * Before processing a new gmon.out, all modules except the 1268 * program executable must be made inactive, so that symbols 1269 * are searched only in the program executable, if we don't 1270 * find a MODULES_T object. Don't do it *after* we read a gmon.out, 1271 * because we need the active module data after we're done with 1272 * the last gmon.out, if we're doing summing. 1273 */ 1274 reset_active_modules(); 1275 1276 file_end = memp + fsz; 1277 /* LINTED: pointer cast */ 1278 objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size); 1279 while ((caddr_t)objp < file_end) { 1280 #ifdef DEBUG 1281 { 1282 unsigned int type = 0; 1283 1284 if (debug & MONOUTDEBUG) { 1285 if (objp->type <= MAX_OBJTYPES) 1286 type = objp->type; 1287 1288 (void) printf( 1289 "\n[getpfiledata] object %s [%#lx]\n", 1290 objname[type], objp->type); 1291 } 1292 } 1293 #endif /* DEBUG */ 1294 switch (objp->type) { 1295 case PROF_MODULES_T : 1296 process_modules((ProfModuleList *) objp); 1297 break; 1298 1299 case PROF_CALLGRAPH_T : 1300 process_cgraph((ProfCallGraph *) objp); 1301 found_cgraph = TRUE; 1302 break; 1303 1304 case PROF_BUFFER_T : 1305 process_pcsamples((ProfBuffer *) objp); 1306 found_pcsamples = TRUE; 1307 break; 1308 1309 default : 1310 (void) fprintf(stderr, 1311 "%s: unknown prof object type=%d\n", 1312 whoami, objp->type); 1313 exit(EX_SOFTWARE); 1314 } 1315 /* LINTED: pointer cast */ 1316 objp = (ProfObject *)((caddr_t)objp + objp->size); 1317 } 1318 1319 if (!found_cgraph || !found_pcsamples) { 1320 (void) fprintf(stderr, 1321 "%s: missing callgraph/pcsamples object\n", whoami); 1322 exit(EX_SOFTWARE); 1323 } 1324 1325 if ((caddr_t)objp > file_end) { 1326 (void) fprintf(stderr, "%s: malformed profile file.\n", whoami); 1327 exit(EX_SOFTWARE); 1328 } 1329 1330 if (first_file) 1331 first_file = FALSE; 1332 } 1333 1334 static void 1335 readarcs(FILE *pfile) 1336 { 1337 /* 1338 * the rest of the file consists of 1339 * a bunch of <from,self,count> tuples. 1340 */ 1341 /* CONSTCOND */ 1342 while (1) { 1343 struct rawarc arc; 1344 1345 if (rflag) { 1346 if (Bflag) { 1347 L_cgarc64 rtld_arc64; 1348 1349 /* 1350 * If rflag is set then this is an profiled 1351 * image generated by rtld. It needs to be 1352 * 'converted' to the standard data format. 1353 */ 1354 if (fread(&rtld_arc64, 1355 sizeof (L_cgarc64), 1, pfile) != 1) 1356 break; 1357 1358 if (rtld_arc64.cg_from == PRF_OUTADDR64) 1359 arc.raw_frompc = s_highpc + 0x10; 1360 else 1361 arc.raw_frompc = 1362 (pctype)rtld_arc64.cg_from; 1363 arc.raw_selfpc = (pctype)rtld_arc64.cg_to; 1364 arc.raw_count = (actype)rtld_arc64.cg_count; 1365 } else { 1366 L_cgarc rtld_arc; 1367 1368 /* 1369 * If rflag is set then this is an profiled 1370 * image generated by rtld. It needs to be 1371 * 'converted' to the standard data format. 1372 */ 1373 if (fread(&rtld_arc, 1374 sizeof (L_cgarc), 1, pfile) != 1) 1375 break; 1376 1377 if (rtld_arc.cg_from == PRF_OUTADDR) 1378 arc.raw_frompc = s_highpc + 0x10; 1379 else 1380 arc.raw_frompc = (pctype) 1381 (uintptr_t)rtld_arc.cg_from; 1382 arc.raw_selfpc = (pctype) 1383 (uintptr_t)rtld_arc.cg_to; 1384 arc.raw_count = (actype)rtld_arc.cg_count; 1385 } 1386 } else { 1387 if (Bflag) { 1388 if (fread(&arc, sizeof (struct rawarc), 1, 1389 pfile) != 1) { 1390 break; 1391 } 1392 } else { 1393 /* 1394 * If these aren't big %pc's, we need to read 1395 * into the 32-bit raw arc structure, and 1396 * assign the members into the actual arc. 1397 */ 1398 struct rawarc32 arc32; 1399 if (fread(&arc32, sizeof (struct rawarc32), 1400 1, pfile) != 1) 1401 break; 1402 arc.raw_frompc = (pctype)arc32.raw_frompc; 1403 arc.raw_selfpc = (pctype)arc32.raw_selfpc; 1404 arc.raw_count = (actype)arc32.raw_count; 1405 } 1406 } 1407 1408 #ifdef DEBUG 1409 if (debug & SAMPLEDEBUG) { 1410 (void) printf("[getpfile] frompc 0x%llx selfpc " 1411 "0x%llx count %lld\n", arc.raw_frompc, 1412 arc.raw_selfpc, arc.raw_count); 1413 } 1414 #endif /* DEBUG */ 1415 /* 1416 * add this arc 1417 */ 1418 tally(&modules, &modules, &arc); 1419 } 1420 if (first_file) 1421 first_file = FALSE; 1422 } 1423 1424 static void 1425 readsamples(FILE *pfile) 1426 { 1427 sztype i; 1428 unsigned_UNIT sample; 1429 1430 if (samples == 0) { 1431 samples = (unsigned_UNIT *) calloc(nsamples, 1432 sizeof (unsigned_UNIT)); 1433 if (samples == 0) { 1434 (void) fprintf(stderr, 1435 "%s: No room for %d sample pc's\n", 1436 whoami, sampbytes / sizeof (unsigned_UNIT)); 1437 exit(EX_OSERR); 1438 } 1439 } 1440 1441 for (i = 0; i < nsamples; i++) { 1442 (void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile); 1443 if (feof(pfile)) 1444 break; 1445 samples[i] += sample; 1446 } 1447 if (i != nsamples) { 1448 (void) fprintf(stderr, 1449 "%s: unexpected EOF after reading %d/%d samples\n", 1450 whoami, --i, nsamples); 1451 exit(EX_IOERR); 1452 } 1453 } 1454 1455 static void * 1456 handle_versioned(FILE *pfile, char *filename, size_t *fsz) 1457 { 1458 int fd; 1459 bool invalid_version; 1460 caddr_t fmem; 1461 struct stat buf; 1462 ProfHeader prof_hdr; 1463 off_t lret; 1464 1465 /* 1466 * Check versioning info. For now, let's say we provide 1467 * backward compatibility, so we accept all older versions. 1468 */ 1469 if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) { 1470 perror("fread()"); 1471 exit(EX_IOERR); 1472 } 1473 1474 invalid_version = FALSE; 1475 if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION) 1476 invalid_version = TRUE; 1477 else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) { 1478 if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION) 1479 invalid_version = FALSE; 1480 } 1481 1482 if (invalid_version) { 1483 (void) fprintf(stderr, "%s: version %d.%d not supported\n", 1484 whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver); 1485 exit(EX_SOFTWARE); 1486 } 1487 1488 /* 1489 * Map gmon.out onto memory. 1490 */ 1491 (void) fclose(pfile); 1492 if ((fd = open(filename, O_RDONLY)) == -1) { 1493 perror(filename); 1494 exit(EX_IOERR); 1495 } 1496 1497 if ((lret = lseek(fd, 0, SEEK_END)) == -1) { 1498 perror(filename); 1499 exit(EX_IOERR); 1500 } 1501 *fsz = lret; 1502 1503 fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0); 1504 if (fmem == MAP_FAILED) { 1505 (void) fprintf(stderr, "%s: can't map %s\n", whoami, filename); 1506 exit(EX_IOERR); 1507 } 1508 1509 /* 1510 * Before we close this fd, save this gmon.out's info to later verify 1511 * if the shared objects it references have changed since the time 1512 * they were used to generate this gmon.out 1513 */ 1514 if (fstat(fd, &buf) == -1) { 1515 (void) fprintf(stderr, "%s: can't get info on `%s'\n", 1516 whoami, filename); 1517 exit(EX_NOINPUT); 1518 } 1519 gmonout_info.dev = buf.st_dev; 1520 gmonout_info.ino = buf.st_ino; 1521 gmonout_info.mtime = buf.st_mtime; 1522 gmonout_info.size = buf.st_size; 1523 1524 (void) close(fd); 1525 1526 return ((void *) fmem); 1527 } 1528 1529 static void * 1530 openpfile(char *filename, size_t *fsz) 1531 { 1532 struct hdr tmp; 1533 FILE *pfile; 1534 unsigned long magic_num; 1535 size_t hdrsize; 1536 static bool first_time = TRUE; 1537 extern bool old_style; 1538 1539 if ((pfile = fopen(filename, "r")) == NULL) { 1540 perror(filename); 1541 exit(EX_IOERR); 1542 } 1543 1544 /* 1545 * Read in the magic. Note that we changed the cast "unsigned long" 1546 * to "unsigned int" because that's how h_magic is defined in the 1547 * new format ProfHeader. 1548 */ 1549 if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) { 1550 perror("fread()"); 1551 exit(EX_IOERR); 1552 } 1553 1554 rewind(pfile); 1555 1556 /* 1557 * First check if this is versioned or *old-style* gmon.out 1558 */ 1559 if (magic_num == (unsigned int)PROF_MAGIC) { 1560 if ((!first_time) && (old_style == TRUE)) { 1561 (void) fprintf(stderr, "%s: can't mix old & new format " 1562 "profiled files\n", whoami); 1563 exit(EX_SOFTWARE); 1564 } 1565 first_time = FALSE; 1566 old_style = FALSE; 1567 return (handle_versioned(pfile, filename, fsz)); 1568 } 1569 1570 if ((!first_time) && (old_style == FALSE)) { 1571 (void) fprintf(stderr, "%s: can't mix old & new format " 1572 "profiled files\n", whoami); 1573 exit(EX_SOFTWARE); 1574 } 1575 1576 first_time = FALSE; 1577 old_style = TRUE; 1578 fsz = 0; 1579 1580 /* 1581 * Now, we need to determine if this is a run-time linker 1582 * profiled file or if it is a standard gmon.out. 1583 * 1584 * We do this by checking if magic matches PRF_MAGIC. If it 1585 * does, then this is a run-time linker profiled file, if it 1586 * doesn't, it must be a gmon.out file. 1587 */ 1588 if (magic_num == (unsigned long)PRF_MAGIC) 1589 rflag = TRUE; 1590 else 1591 rflag = FALSE; 1592 1593 hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32); 1594 1595 if (rflag) { 1596 if (Bflag) { 1597 L_hdr64 l_hdr64; 1598 1599 /* 1600 * If the rflag is set then the input file is 1601 * rtld profiled data, we'll read it in and convert 1602 * it to the standard format (ie: make it look like 1603 * a gmon.out file). 1604 */ 1605 if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) { 1606 perror("fread()"); 1607 exit(EX_IOERR); 1608 } 1609 if (l_hdr64.hd_version != PRF_VERSION_64) { 1610 (void) fprintf(stderr, 1611 "%s: expected version %d, " 1612 "got version %d when processing 64-bit " 1613 "run-time linker profiled file.\n", 1614 whoami, PRF_VERSION_64, l_hdr64.hd_version); 1615 exit(EX_SOFTWARE); 1616 } 1617 tmp.lowpc = 0; 1618 tmp.highpc = (pctype)l_hdr64.hd_hpc; 1619 tmp.ncnt = hdrsize + l_hdr64.hd_psize; 1620 } else { 1621 L_hdr l_hdr; 1622 1623 /* 1624 * If the rflag is set then the input file is 1625 * rtld profiled data, we'll read it in and convert 1626 * it to the standard format (ie: make it look like 1627 * a gmon.out file). 1628 */ 1629 if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) { 1630 perror("fread()"); 1631 exit(EX_IOERR); 1632 } 1633 if (l_hdr.hd_version != PRF_VERSION) { 1634 (void) fprintf(stderr, 1635 "%s: expected version %d, " 1636 "got version %d when processing " 1637 "run-time linker profiled file.\n", 1638 whoami, PRF_VERSION, l_hdr.hd_version); 1639 exit(EX_SOFTWARE); 1640 } 1641 tmp.lowpc = 0; 1642 tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc; 1643 tmp.ncnt = hdrsize + l_hdr.hd_psize; 1644 } 1645 } else { 1646 if (Bflag) { 1647 if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) { 1648 perror("fread()"); 1649 exit(EX_IOERR); 1650 } 1651 } else { 1652 /* 1653 * If we're not reading big %pc's, we need to read 1654 * the 32-bit header, and assign the members to 1655 * the actual header. 1656 */ 1657 struct hdr32 hdr32; 1658 if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) { 1659 perror("fread()"); 1660 exit(EX_IOERR); 1661 } 1662 tmp.lowpc = hdr32.lowpc; 1663 tmp.highpc = hdr32.highpc; 1664 tmp.ncnt = hdr32.ncnt; 1665 } 1666 } 1667 1668 /* 1669 * perform sanity check on profiled file we've opened. 1670 */ 1671 if (tmp.lowpc >= tmp.highpc) { 1672 if (rflag) 1673 (void) fprintf(stderr, 1674 "%s: badly formed profiled data.\n", 1675 filename); 1676 else 1677 (void) fprintf(stderr, 1678 "%s: badly formed gmon.out file.\n", 1679 filename); 1680 exit(EX_SOFTWARE); 1681 } 1682 1683 if (s_highpc != 0 && (tmp.lowpc != h.lowpc || 1684 tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) { 1685 (void) fprintf(stderr, 1686 "%s: incompatible with first gmon file\n", 1687 filename); 1688 exit(EX_IOERR); 1689 } 1690 h = tmp; 1691 s_lowpc = h.lowpc; 1692 s_highpc = h.highpc; 1693 lowpc = h.lowpc / sizeof (UNIT); 1694 highpc = h.highpc / sizeof (UNIT); 1695 sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0; 1696 nsamples = sampbytes / sizeof (unsigned_UNIT); 1697 1698 #ifdef DEBUG 1699 if (debug & SAMPLEDEBUG) { 1700 (void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc " 1701 "0x%llx hdr.ncnt %lld\n", 1702 h.lowpc, h.highpc, h.ncnt); 1703 (void) printf( 1704 "[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n", 1705 s_lowpc, s_highpc); 1706 (void) printf( 1707 "[openpfile] lowpc 0x%llx highpc 0x%llx\n", 1708 lowpc, highpc); 1709 (void) printf("[openpfile] sampbytes %d nsamples %d\n", 1710 sampbytes, nsamples); 1711 } 1712 #endif /* DEBUG */ 1713 1714 return ((void *) pfile); 1715 } 1716 1717 /* 1718 * Information from a gmon.out file depends on whether it's versioned 1719 * or non-versioned, *old style* gmon.out. If old-style, it is in two 1720 * parts : an array of sampling hits within pc ranges, and the arcs. If 1721 * versioned, it contains a header, followed by any number of 1722 * modules/callgraph/pcsample_buffer objects. 1723 */ 1724 static void 1725 getpfile(char *filename) 1726 { 1727 void *handle; 1728 size_t fsz; 1729 1730 handle = openpfile(filename, &fsz); 1731 1732 if (old_style) { 1733 readsamples((FILE *)handle); 1734 readarcs((FILE *)handle); 1735 (void) fclose((FILE *)handle); 1736 return; 1737 } 1738 1739 getpfiledata((caddr_t)handle, fsz); 1740 (void) munmap(handle, fsz); 1741 } 1742 1743 int 1744 main(int argc, char **argv) 1745 { 1746 char **sp; 1747 nltype **timesortnlp; 1748 int c; 1749 int errflg; 1750 1751 prog_name = *argv; /* preserve program name */ 1752 debug = 0; 1753 nflag = FALSE; 1754 bflag = TRUE; 1755 lflag = FALSE; 1756 Cflag = FALSE; 1757 first_file = TRUE; 1758 rflag = FALSE; 1759 Bflag = FALSE; 1760 errflg = FALSE; 1761 1762 while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF) 1763 switch (c) { 1764 case 'a': 1765 aflag = TRUE; 1766 break; 1767 case 'b': 1768 bflag = FALSE; 1769 break; 1770 case 'c': 1771 cflag = TRUE; 1772 break; 1773 case 'C': 1774 Cflag = TRUE; 1775 break; 1776 case 'd': 1777 dflag = TRUE; 1778 debug |= atoi(optarg); 1779 (void) printf("[main] debug = 0x%x\n", debug); 1780 break; 1781 case 'D': 1782 Dflag = TRUE; 1783 break; 1784 case 'E': 1785 addlist(Elist, optarg); 1786 Eflag = TRUE; 1787 addlist(elist, optarg); 1788 eflag = TRUE; 1789 break; 1790 case 'e': 1791 addlist(elist, optarg); 1792 eflag = TRUE; 1793 break; 1794 case 'F': 1795 addlist(Flist, optarg); 1796 Fflag = TRUE; 1797 addlist(flist, optarg); 1798 fflag = TRUE; 1799 break; 1800 case 'f': 1801 addlist(flist, optarg); 1802 fflag = TRUE; 1803 break; 1804 case 'l': 1805 lflag = TRUE; 1806 break; 1807 case 'n': 1808 nflag = TRUE; 1809 number_funcs_toprint = atoi(optarg); 1810 break; 1811 case 's': 1812 sflag = TRUE; 1813 break; 1814 case 'z': 1815 zflag = TRUE; 1816 break; 1817 case '?': 1818 errflg++; 1819 1820 } 1821 1822 if (errflg) { 1823 (void) fprintf(stderr, 1824 "usage: gprof [ -abcCDlsz ] [ -e function-name ] " 1825 "[ -E function-name ]\n\t[ -f function-name ] " 1826 "[ -F function-name ]\n\t[ image-file " 1827 "[ profile-file ... ] ]\n"); 1828 exit(EX_USAGE); 1829 } 1830 1831 if (optind < argc) { 1832 a_outname = argv[optind++]; 1833 } else { 1834 a_outname = A_OUTNAME; 1835 } 1836 if (optind < argc) { 1837 gmonname = argv[optind++]; 1838 } else { 1839 gmonname = GMONNAME; 1840 } 1841 /* 1842 * turn off default functions 1843 */ 1844 for (sp = &defaultEs[0]; *sp; sp++) { 1845 Eflag = TRUE; 1846 addlist(Elist, *sp); 1847 eflag = TRUE; 1848 addlist(elist, *sp); 1849 } 1850 /* 1851 * how many ticks per second? 1852 * if we can't tell, report time in ticks. 1853 */ 1854 hz = sysconf(_SC_CLK_TCK); 1855 if (hz == -1) { 1856 hz = 1; 1857 (void) fprintf(stderr, "time is in ticks, not seconds\n"); 1858 } 1859 1860 getnfile(a_outname); 1861 1862 /* 1863 * get information about mon.out file(s). 1864 */ 1865 do { 1866 getpfile(gmonname); 1867 if (optind < argc) 1868 gmonname = argv[optind++]; 1869 else 1870 optind++; 1871 } while (optind <= argc); 1872 /* 1873 * dump out a gmon.sum file if requested 1874 */ 1875 if (sflag || Dflag) 1876 dumpsum(GMONSUM); 1877 1878 if (old_style) { 1879 /* 1880 * assign samples to procedures 1881 */ 1882 asgnsamples(); 1883 } 1884 1885 /* 1886 * assemble the dynamic profile 1887 */ 1888 timesortnlp = doarcs(); 1889 1890 /* 1891 * print the dynamic profile 1892 */ 1893 #ifdef DEBUG 1894 if (debug & ANYDEBUG) { 1895 /* raw output of all symbols in all their glory */ 1896 int i; 1897 (void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, " 1898 "#calls, selfcalls, index \n"); 1899 for (i = 0; i < modules.nname; i++) { /* Print each symbol */ 1900 if (timesortnlp[i]->name) 1901 (void) printf(" %s ", timesortnlp[i]->name); 1902 else 1903 (void) printf(" <cycle> "); 1904 (void) printf(" %lld ", timesortnlp[i]->value); 1905 (void) printf(" %lld ", timesortnlp[i]->svalue); 1906 (void) printf(" %f ", timesortnlp[i]->time); 1907 (void) printf(" %lld ", timesortnlp[i]->ncall); 1908 (void) printf(" %lld ", timesortnlp[i]->selfcalls); 1909 (void) printf(" %d ", timesortnlp[i]->index); 1910 (void) printf(" \n"); 1911 } 1912 } 1913 #endif /* DEBUG */ 1914 1915 printgprof(timesortnlp); 1916 /* 1917 * print the flat profile 1918 */ 1919 printprof(); 1920 /* 1921 * print the index 1922 */ 1923 printindex(); 1924 1925 /* 1926 * print the modules 1927 */ 1928 printmodules(); 1929 1930 done(); 1931 /* NOTREACHED */ 1932 return (0); 1933 } 1934