1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sysexits.h> 30 #include <stdlib.h> 31 #include <unistd.h> 32 #include "gprof.h" 33 #include "profile.h" 34 35 char *whoami = "gprof"; 36 static pctype lowpc, highpc; /* range profiled, in UNIT's */ 37 38 /* 39 * things which get -E excluded by default. 40 */ 41 static char *defaultEs[] = { 42 "mcount", 43 "__mcleanup", 44 0 45 }; 46 47 #ifdef DEBUG 48 49 static char *objname[] = { 50 "<invalid object>", 51 "PROF_BUFFER_T", 52 "PROF_CALLGRAPH_T", 53 "PROF_MODULES_T", 54 0 55 }; 56 #define MAX_OBJTYPES 3 57 58 #endif DEBUG 59 60 void 61 done() 62 { 63 64 exit(EX_OK); 65 } 66 67 static pctype 68 max(pctype a, pctype b) 69 { 70 if (a > b) 71 return (a); 72 return (b); 73 } 74 75 static pctype 76 min(pctype a, pctype b) 77 { 78 if (a < b) 79 return (a); 80 return (b); 81 } 82 83 /* 84 * calculate scaled entry point addresses (to save time in asgnsamples), 85 * and possibly push the scaled entry points over the entry mask, 86 * if it turns out that the entry point is in one bucket and the code 87 * for a routine is in the next bucket. 88 * 89 */ 90 static void 91 alignentries() 92 { 93 register struct nl * nlp; 94 #ifdef DEBUG 95 pctype bucket_of_entry; 96 pctype bucket_of_code; 97 #endif DEBUG 98 99 /* for old-style gmon.out, nameslist is only in modules.nl */ 100 101 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 102 nlp->svalue = nlp->value / sizeof (UNIT); 103 #ifdef DEBUG 104 bucket_of_entry = (nlp->svalue - lowpc) / scale; 105 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 106 if (bucket_of_entry < bucket_of_code) { 107 if (debug & SAMPLEDEBUG) { 108 printf("[alignentries] pushing svalue 0x%llx " 109 "to 0x%llx\n", nlp->svalue, 110 nlp->svalue + UNITS_TO_CODE); 111 } 112 } 113 #endif DEBUG 114 } 115 } 116 117 /* 118 * old-style gmon.out 119 * ------------------ 120 * 121 * Assign samples to the procedures to which they belong. 122 * 123 * There are three cases as to where pcl and pch can be 124 * with respect to the routine entry addresses svalue0 and svalue1 125 * as shown in the following diagram. overlap computes the 126 * distance between the arrows, the fraction of the sample 127 * that is to be credited to the routine which starts at svalue0. 128 * 129 * svalue0 svalue1 130 * | | 131 * v v 132 * 133 * +-----------------------------------------------+ 134 * | | 135 * | ->| |<- ->| |<- ->| |<- | 136 * | | | | | | 137 * +---------+ +---------+ +---------+ 138 * 139 * ^ ^ ^ ^ ^ ^ 140 * | | | | | | 141 * pcl pch pcl pch pcl pch 142 * 143 * For the vax we assert that samples will never fall in the first 144 * two bytes of any routine, since that is the entry mask, 145 * thus we give call alignentries() to adjust the entry points if 146 * the entry mask falls in one bucket but the code for the routine 147 * doesn't start until the next bucket. In conjunction with the 148 * alignment of routine addresses, this should allow us to have 149 * only one sample for every four bytes of text space and never 150 * have any overlap (the two end cases, above). 151 */ 152 static void 153 asgnsamples() 154 { 155 sztype i, j; 156 unsigned_UNIT ccnt; 157 double time; 158 pctype pcl, pch; 159 pctype overlap; 160 pctype svalue0, svalue1; 161 162 extern mod_info_t modules; 163 nltype *nl = modules.nl; 164 sztype nname = modules.nname; 165 166 /* read samples and assign to namelist symbols */ 167 scale = highpc - lowpc; 168 scale /= nsamples; 169 alignentries(); 170 for (i = 0, j = 1; i < nsamples; i++) { 171 ccnt = samples[i]; 172 if (ccnt == 0) 173 continue; 174 pcl = lowpc + scale * i; 175 pch = lowpc + scale * (i + 1); 176 time = ccnt; 177 #ifdef DEBUG 178 if (debug & SAMPLEDEBUG) { 179 printf("[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n", 180 pcl, pch, ccnt); 181 } 182 #endif DEBUG 183 totime += time; 184 for (j = (j ? j - 1 : 0); j < nname; j++) { 185 svalue0 = nl[j].svalue; 186 svalue1 = nl[j+1].svalue; 187 /* 188 * if high end of tick is below entry address, 189 * go for next tick. 190 */ 191 if (pch < svalue0) 192 break; 193 /* 194 * if low end of tick into next routine, 195 * go for next routine. 196 */ 197 if (pcl >= svalue1) 198 continue; 199 overlap = min(pch, svalue1) - max(pcl, svalue0); 200 if (overlap != 0) { 201 #ifdef DEBUG 202 if (debug & SAMPLEDEBUG) { 203 printf("[asgnsamples] " 204 "(0x%llx->0x%llx-0x%llx) %s gets " 205 "%f ticks %lld overlap\n", 206 nl[j].value/sizeof (UNIT), svalue0, 207 svalue1, nl[j].name, 208 overlap * time / scale, overlap); 209 } 210 #endif DEBUG 211 nl[j].time += overlap * time / scale; 212 } 213 } 214 } 215 #ifdef DEBUG 216 if (debug & SAMPLEDEBUG) { 217 printf("[asgnsamples] totime %f\n", totime); 218 } 219 #endif DEBUG 220 } 221 222 223 static void 224 dump_callgraph(FILE *fp, char *filename, 225 unsigned long tarcs, unsigned long ncallees) 226 { 227 ProfCallGraph prof_cgraph; 228 ProfFunction prof_func; 229 register arctype *arcp; 230 mod_info_t *mi; 231 nltype *nlp; 232 size_t cur_offset; 233 unsigned long caller_id = 0, callee_id = 0; 234 235 /* 236 * Write the callgraph header 237 */ 238 prof_cgraph.type = PROF_CALLGRAPH_T; 239 prof_cgraph.version = PROF_CALLGRAPH_VER; 240 prof_cgraph.functions = PROFCGRAPH_SZ; 241 prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ; 242 if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) { 243 perror(filename); 244 exit(EX_IOERR); 245 } 246 if (CGRAPH_FILLER) 247 fseek(fp, CGRAPH_FILLER, SEEK_CUR); 248 249 /* Current offset inside the callgraph object */ 250 cur_offset = prof_cgraph.functions; 251 252 for (mi = &modules; mi; mi = mi->next) { 253 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 254 if (nlp->ncallers == 0) 255 continue; 256 257 /* If this is the last callee, set next_to to 0 */ 258 callee_id++; 259 if (callee_id == ncallees) 260 prof_func.next_to = 0; 261 else { 262 prof_func.next_to = cur_offset + 263 nlp->ncallers * PROFFUNC_SZ; 264 } 265 266 /* 267 * Dump this callee's raw arc information with all 268 * its callers 269 */ 270 caller_id = 1; 271 for (arcp = nlp->parents; arcp; 272 arcp = arcp->arc_parentlist) { 273 /* 274 * If no more callers for this callee, set 275 * next_from to 0 276 */ 277 if (caller_id == nlp->ncallers) 278 prof_func.next_from = 0; 279 else { 280 prof_func.next_from = cur_offset + 281 PROFFUNC_SZ; 282 } 283 284 prof_func.frompc = 285 arcp->arc_parentp->module->load_base + 286 (arcp->arc_parentp->value - 287 arcp->arc_parentp->module->txt_origin); 288 prof_func.topc = 289 mi->load_base + 290 (nlp->value - mi->txt_origin); 291 prof_func.count = arcp->arc_count; 292 293 294 if (fwrite(&prof_func, sizeof (ProfFunction), 295 1, fp) != 1) { 296 perror(filename); 297 exit(EX_IOERR); 298 } 299 if (FUNC_FILLER) 300 fseek(fp, FUNC_FILLER, SEEK_CUR); 301 302 cur_offset += PROFFUNC_SZ; 303 caller_id++; 304 } 305 } /* for nlp... */ 306 } /* for mi... */ 307 } 308 309 /* 310 * To save all pc-hits in all the gmon.out's is infeasible, as this 311 * may become quite huge even with a small number of files to sum. 312 * Instead, we'll dump *fictitious hits* to correct functions 313 * by scanning module namelists. Again, since this is summing 314 * pc-hits, we may have to dump the pcsamples out in chunks if the 315 * number of pc-hits is high. 316 */ 317 static void 318 dump_hits(FILE *fp, char *filename, nltype *nlp) 319 { 320 Address *p, hitpc; 321 size_t i, nelem, ntowrite; 322 323 if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE) 324 nelem = PROF_BUFFER_SIZE; 325 326 if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) { 327 fprintf(stderr, "%s: no room for %ld pcsamples\n", 328 whoami, nelem); 329 exit(EX_OSERR); 330 } 331 332 /* 333 * Set up *fictitious* hits (to function entry) buffer 334 */ 335 hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin); 336 for (i = 0; i < nelem; i++) 337 p[i] = hitpc; 338 339 for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) { 340 if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) { 341 perror(filename); 342 exit(EX_IOERR); 343 } 344 } 345 346 if (ntowrite) { 347 if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) { 348 perror(filename); 349 exit(EX_IOERR); 350 } 351 } 352 353 free(p); 354 } 355 356 static void 357 dump_pcsamples(FILE *fp, char *filename, 358 unsigned long *tarcs, unsigned long *ncallees) 359 { 360 ProfBuffer prof_buffer; 361 register arctype *arcp; 362 mod_info_t *mi; 363 nltype *nlp; 364 365 prof_buffer.type = PROF_BUFFER_T; 366 prof_buffer.version = PROF_BUFFER_VER; 367 prof_buffer.buffer = PROFBUF_SZ; 368 prof_buffer.bufsize = n_pcsamples; 369 prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address); 370 if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) { 371 perror(filename); 372 exit(EX_IOERR); 373 } 374 if (BUF_FILLER) 375 fseek(fp, BUF_FILLER, SEEK_CUR); 376 377 *tarcs = 0; 378 *ncallees = 0; 379 for (mi = &modules; mi; mi = mi->next) { 380 for (nlp = mi->nl; nlp < mi->npe; nlp++) { 381 if (nlp->nticks) 382 dump_hits(fp, filename, nlp); 383 384 nlp->ncallers = 0; 385 for (arcp = nlp->parents; arcp; 386 arcp = arcp->arc_parentlist) { 387 (nlp->ncallers)++; 388 } 389 390 if (nlp->ncallers) { 391 (*tarcs) += nlp->ncallers; 392 (*ncallees)++; 393 } 394 } 395 } 396 } 397 398 static void 399 dump_modules(FILE *fp, char *filename, size_t pbuf_sz) 400 { 401 char *pbuf, *p; 402 size_t namelen; 403 Index off_nxt, off_path; 404 mod_info_t *mi; 405 406 ProfModuleList prof_modlist; 407 ProfModule prof_mod; 408 409 /* Allocate for path strings buffer */ 410 pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN); 411 if ((p = pbuf = (char *) calloc(pbuf_sz, sizeof (char))) == NULL) { 412 fprintf(stderr, "%s: no room for %ld bytes\n", 413 whoami, pbuf_sz * sizeof (char)); 414 exit(EX_OSERR); 415 } 416 417 /* Dump out PROF_MODULE_T info for all non-aout modules */ 418 prof_modlist.type = PROF_MODULES_T; 419 prof_modlist.version = PROF_MODULES_VER; 420 prof_modlist.modules = PROFMODLIST_SZ; 421 prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ + 422 pbuf_sz; 423 if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) { 424 perror(filename); 425 exit(EX_IOERR); 426 } 427 if (MODLIST_FILLER) 428 fseek(fp, MODLIST_FILLER, SEEK_CUR); 429 430 /* 431 * Initialize offsets for ProfModule elements. 432 */ 433 off_nxt = PROFMODLIST_SZ + PROFMOD_SZ; 434 off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ; 435 436 for (mi = modules.next; mi; mi = mi->next) { 437 if (mi->next) 438 prof_mod.next = off_nxt; 439 else 440 prof_mod.next = 0; 441 prof_mod.path = off_path; 442 prof_mod.startaddr = mi->load_base; 443 prof_mod.endaddr = mi->load_end; 444 445 if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) { 446 perror(filename); 447 exit(EX_IOERR); 448 } 449 450 if (MOD_FILLER) 451 fseek(fp, MOD_FILLER, SEEK_CUR); 452 453 strcpy(p, mi->name); 454 namelen = strlen(mi->name); 455 p += namelen + 1; 456 457 /* Note that offset to every path str need not be aligned */ 458 off_nxt += PROFMOD_SZ; 459 off_path += namelen + 1; 460 } 461 462 /* Write out the module path strings */ 463 if (pbuf_sz) { 464 if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) { 465 perror(filename); 466 exit(EX_IOERR); 467 } 468 469 free(pbuf); 470 } 471 } 472 473 /* 474 * If we have inactive modules, their current load addresses may overlap with 475 * active ones, and so we've to assign fictitious, non-overlapping addresses 476 * to all modules before we dump them. 477 */ 478 static void 479 fixup_maps(size_t *pathsz) 480 { 481 unsigned int n_inactive = 0; 482 Address lbase, lend; 483 mod_info_t *mi; 484 485 /* Pick the lowest load address among modules */ 486 *pathsz = 0; 487 for (mi = &modules; mi; mi = mi->next) { 488 489 if (mi->active == FALSE) 490 n_inactive++; 491 492 if (mi == &modules || mi->load_base < lbase) 493 lbase = mi->load_base; 494 495 /* 496 * Return total path size of non-aout modules only 497 */ 498 if (mi != &modules) 499 *pathsz = (*pathsz) + strlen(mi->name) + 1; 500 } 501 502 /* 503 * All module info is in fine shape already if there are no 504 * inactive modules 505 */ 506 if (n_inactive == 0) 507 return; 508 509 /* 510 * Assign fictitious load addresses to all (non-aout) modules so 511 * that sum info can be dumped out. 512 */ 513 for (mi = modules.next; mi; mi = mi->next) { 514 lend = lbase + (mi->data_end - mi->txt_origin); 515 if ((lbase < modules.load_base && lend < modules.load_base) || 516 (lbase > modules.load_end && lend > modules.load_end)) { 517 518 mi->load_base = lbase; 519 mi->load_end = lend; 520 521 /* just to give an appearance of reality */ 522 lbase = CEIL(lend + PGSZ, PGSZ); 523 } else { 524 /* 525 * can't use this lbase & lend pair, as it 526 * overlaps with aout's addresses 527 */ 528 mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ); 529 mi->load_end = mi->load_base + (lend - lbase); 530 531 lbase = CEIL(mi->load_end + PGSZ, PGSZ); 532 } 533 } 534 } 535 536 static void 537 dump_gprofhdr(FILE *fp, char *filename) 538 { 539 ProfHeader prof_hdr; 540 541 prof_hdr.h_magic = PROF_MAGIC; 542 prof_hdr.h_major_ver = PROF_MAJOR_VERSION; 543 prof_hdr.h_minor_ver = PROF_MINOR_VERSION; 544 prof_hdr.size = PROFHDR_SZ; 545 if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) { 546 perror(filename); 547 exit(EX_IOERR); 548 } 549 550 if (HDR_FILLER) 551 fseek(fp, HDR_FILLER, SEEK_CUR); 552 } 553 554 static void 555 dumpsum_ostyle(char *sumfile) 556 { 557 register nltype *nlp; 558 register arctype *arcp; 559 struct rawarc arc; 560 struct rawarc32 arc32; 561 FILE *sfile; 562 563 if ((sfile = fopen(sumfile, "w")) == NULL) { 564 perror(sumfile); 565 exit(EX_IOERR); 566 } 567 /* 568 * dump the header; use the last header read in 569 */ 570 if (Bflag) { 571 if (fwrite(&h, sizeof (h), 1, sfile) != 1) { 572 perror(sumfile); 573 exit(EX_IOERR); 574 } 575 } else { 576 struct hdr32 hdr; 577 hdr.lowpc = (pctype32)h.lowpc; 578 hdr.highpc = (pctype32)h.highpc; 579 hdr.ncnt = (pctype32)h.ncnt; 580 if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) { 581 perror(sumfile); 582 exit(EX_IOERR); 583 } 584 } 585 /* 586 * dump the samples 587 */ 588 if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) != 589 nsamples) { 590 perror(sumfile); 591 exit(EX_IOERR); 592 } 593 /* 594 * dump the normalized raw arc information. For old-style dumping, 595 * the only namelist is in modules.nl 596 */ 597 for (nlp = modules.nl; nlp < modules.npe; nlp++) { 598 for (arcp = nlp->children; arcp; 599 arcp = arcp->arc_childlist) { 600 if (Bflag) { 601 arc.raw_frompc = arcp->arc_parentp->value; 602 arc.raw_selfpc = arcp->arc_childp->value; 603 arc.raw_count = arcp->arc_count; 604 if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) { 605 perror(sumfile); 606 exit(EX_IOERR); 607 } 608 } else { 609 arc32.raw_frompc = 610 (pctype32)arcp->arc_parentp->value; 611 arc32.raw_selfpc = 612 (pctype32)arcp->arc_childp->value; 613 arc32.raw_count = (actype32)arcp->arc_count; 614 if (fwrite(&arc32, sizeof (arc32), 1, sfile) != 1) { 615 perror(sumfile); 616 exit(EX_IOERR); 617 } 618 } 619 #ifdef DEBUG 620 if (debug & SAMPLEDEBUG) { 621 printf("[dumpsum_ostyle] frompc 0x%llx selfpc " 622 "0x%llx count %lld\n", arc.raw_frompc, 623 arc.raw_selfpc, arc.raw_count); 624 } 625 #endif DEBUG 626 } 627 } 628 fclose(sfile); 629 } 630 631 /* 632 * dump out the gmon.sum file 633 */ 634 static void 635 dumpsum(char *sumfile) 636 { 637 FILE *sfile; 638 size_t pathbuf_sz; 639 unsigned long total_arcs; /* total number of arcs in all */ 640 unsigned long ncallees; /* no. of callees with parents */ 641 642 if (old_style) { 643 dumpsum_ostyle(sumfile); 644 return; 645 } 646 647 if ((sfile = fopen(sumfile, "w")) == NULL) { 648 perror(sumfile); 649 exit(EX_IOERR); 650 } 651 652 /* 653 * Dump the new-style gprof header. Even if one of the original 654 * profiled-files was of a older version, the summed file is of 655 * current version only. 656 */ 657 dump_gprofhdr(sfile, sumfile); 658 659 /* 660 * Fix up load-maps and dump out modules info 661 * 662 * Fix up module load maps so inactive modules get *some* address 663 * (and btw, could you get the total size of non-aout module path 664 * strings please ?) 665 */ 666 fixup_maps(&pathbuf_sz); 667 dump_modules(sfile, sumfile, pathbuf_sz); 668 669 670 /* 671 * Dump out the summ'd pcsamples 672 * 673 * For dumping call graph information later, we need certain 674 * statistics (like total arcs, number of callers for each node); 675 * collect these also while we are at it. 676 */ 677 dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees); 678 679 /* 680 * Dump out the summ'd call graph information 681 */ 682 dump_callgraph(sfile, sumfile, total_arcs, ncallees); 683 684 685 fclose(sfile); 686 } 687 688 static void 689 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp) 690 { 691 nltype *parentp; 692 nltype *childp; 693 694 /* 695 * if count == 0 this is a null arc and 696 * we don't need to tally it. 697 */ 698 if (rawp->raw_count == 0) 699 return; 700 701 /* 702 * Lookup the caller and callee pcs in namelists of 703 * appropriate modules 704 */ 705 parentp = nllookup(caller_mod, rawp->raw_frompc, NULL); 706 childp = nllookup(callee_mod, rawp->raw_selfpc, NULL); 707 if (childp && parentp) { 708 if (!Dflag) 709 childp->ncall += rawp->raw_count; 710 else { 711 if (first_file) 712 childp->ncall += rawp->raw_count; 713 else { 714 childp->ncall -= rawp->raw_count; 715 if (childp->ncall < 0) 716 childp->ncall = 0; 717 } 718 } 719 720 #ifdef DEBUG 721 if (debug & TALLYDEBUG) { 722 printf("[tally] arc from %s to %s traversed " 723 "%lld times\n", parentp->name, 724 childp->name, rawp->raw_count); 725 } 726 #endif DEBUG 727 addarc(parentp, childp, rawp->raw_count); 728 } 729 } 730 731 /* 732 * Look up a module's base address in a sorted list of pc-hits. Unlike 733 * nllookup(), this deals with misses by mapping them to the next *higher* 734 * pc-hit. This is so that we get into the module's first pc-hit rightaway, 735 * even if the module's entry-point (load_base) itself is not a hit. 736 */ 737 static Address * 738 locate(Address *pclist, size_t nelem, Address keypc) 739 { 740 size_t low = 0, middle, high = nelem - 1; 741 742 if (keypc <= pclist[low]) 743 return (pclist); 744 745 if (keypc > pclist[high]) 746 return (NULL); 747 748 while (low != high) { 749 middle = (high + low) >> 1; 750 751 if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc)) 752 return (&pclist[middle + 1]); 753 754 if (pclist[middle] >= keypc) 755 high = middle; 756 else 757 low = middle + 1; 758 } 759 760 /* must never reach here! */ 761 return (NULL); 762 } 763 764 static void 765 assign_pcsamples(module, pcsmpl, n_samples) 766 mod_info_t *module; 767 Address *pcsmpl; 768 size_t n_samples; 769 { 770 Address *pcptr, *pcse = pcsmpl + n_samples; 771 pctype nxt_func; 772 nltype *fnl; 773 size_t func_nticks; 774 #ifdef DEBUG 775 size_t n_hits_in_module = 0; 776 #endif DEBUG 777 778 /* Locate the first pc-hit for this module */ 779 if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) { 780 #ifdef DEBUG 781 if (debug & PCSMPLDEBUG) { 782 printf("[assign_pcsamples] no pc-hits in\n"); 783 printf(" `%s'\n", module->name); 784 } 785 #endif DEBUG 786 return; /* no pc-hits in this module */ 787 } 788 789 /* Assign all pc-hits in this module to appropriate functions */ 790 while ((pcptr < pcse) && (*pcptr < module->load_end)) { 791 792 /* Update the corresponding function's time */ 793 if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) { 794 /* 795 * Collect all pc-hits in this function. Each 796 * pc-hit counts as 1 tick. 797 */ 798 func_nticks = 0; 799 while ((pcptr < pcse) && (*pcptr < nxt_func)) { 800 func_nticks++; 801 pcptr++; 802 } 803 804 if (func_nticks == 0) 805 pcptr++; 806 else { 807 fnl->nticks += func_nticks; 808 fnl->time += func_nticks; 809 totime += func_nticks; 810 } 811 812 #ifdef DEBUG 813 n_hits_in_module += func_nticks; 814 #endif DEBUG 815 } else { 816 /* 817 * pc sample could not be assigned to function; 818 * probably in a PLT 819 */ 820 pcptr++; 821 } 822 } 823 824 #ifdef DEBUG 825 if (debug & PCSMPLDEBUG) { 826 printf("[assign_pcsamples] %ld hits in\n", n_hits_in_module); 827 printf(" `%s'\n", module->name); 828 } 829 #endif DEBUG 830 } 831 832 int 833 pc_cmp(Address *pc1, Address *pc2) 834 { 835 if (*pc1 > *pc2) 836 return (1); 837 838 if (*pc1 < *pc2) 839 return (-1); 840 841 return (0); 842 } 843 844 static void 845 process_pcsamples(bufp) 846 ProfBuffer *bufp; 847 { 848 Address *pc_samples; 849 mod_info_t *mi; 850 caddr_t p; 851 size_t chunk_size, nelem_read, nelem_to_read; 852 853 #ifdef DEBUG 854 if (debug & PCSMPLDEBUG) { 855 printf("[process_pcsamples] number of pcsamples = %lld\n", 856 bufp->bufsize); 857 } 858 #endif DEBUG 859 860 /* buffer with no pc samples ? */ 861 if (bufp->bufsize == 0) 862 return; 863 864 /* 865 * If we're processing pcsamples of a profile sum, we could have 866 * more than PROF_BUFFER_SIZE number of samples. In such a case, 867 * we must read the pcsamples in chunks. 868 */ 869 if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE) 870 chunk_size = PROF_BUFFER_SIZE; 871 872 /* Allocate for the pcsample chunk */ 873 pc_samples = (Address *) calloc(chunk_size, sizeof (Address)); 874 if (pc_samples == NULL) { 875 fprintf(stderr, "%s: no room for %ld sample pc's\n", 876 whoami, chunk_size); 877 exit(EX_OSERR); 878 } 879 880 /* Copy the current set of pcsamples */ 881 nelem_read = 0; 882 nelem_to_read = bufp->bufsize; 883 p = (char *) bufp + bufp->buffer; 884 885 while (nelem_read < nelem_to_read) { 886 memcpy((void *) pc_samples, p, chunk_size * sizeof (Address)); 887 888 /* Sort the pc samples */ 889 qsort(pc_samples, chunk_size, sizeof (Address), 890 (int (*)(const void *, const void *)) pc_cmp); 891 892 /* 893 * Assign pcsamples to functions in the currently active 894 * module list 895 */ 896 for (mi = &modules; mi; mi = mi->next) { 897 if (mi->active == FALSE) 898 continue; 899 assign_pcsamples(mi, pc_samples, chunk_size); 900 } 901 902 p += (chunk_size * sizeof (Address)); 903 nelem_read += chunk_size; 904 905 if ((nelem_to_read - nelem_read) < chunk_size) 906 chunk_size = nelem_to_read - nelem_read; 907 } 908 909 free(pc_samples); 910 911 /* Update total number of pcsamples read so far */ 912 n_pcsamples += bufp->bufsize; 913 } 914 915 static mod_info_t * 916 find_module(Address addr) 917 { 918 mod_info_t *mi; 919 920 for (mi = &modules; mi; mi = mi->next) { 921 if (mi->active == FALSE) 922 continue; 923 924 if (addr >= mi->load_base && addr < mi->load_end) 925 return (mi); 926 } 927 928 return (NULL); 929 } 930 931 static void 932 process_cgraph(cgp) 933 ProfCallGraph *cgp; 934 { 935 struct rawarc arc; 936 mod_info_t *callee_mi, *caller_mi; 937 ProfFunction *calleep, *callerp; 938 Index caller_off, callee_off; 939 940 /* 941 * Note that *callee_off* increment in the for loop below 942 * uses *calleep* and *calleep* doesn't get set until the for loop 943 * is entered. We don't expect the increment to be executed before 944 * the loop body is executed atleast once, so this should be ok. 945 */ 946 for (callee_off = cgp->functions; callee_off; 947 callee_off = calleep->next_to) { 948 949 calleep = (ProfFunction *) ((char *) cgp + callee_off); 950 951 /* 952 * We could choose either to sort the {caller, callee} 953 * list twice and assign callee/caller to modules or inspect 954 * each callee/caller in the active modules list. Since 955 * the modules list is usually very small, we'l choose the 956 * latter. 957 */ 958 959 /* 960 * If we cannot identify a callee with a module, there's 961 * no use worrying about who called it. 962 */ 963 if ((callee_mi = find_module(calleep->topc)) == NULL) { 964 #ifdef DEBUG 965 if (debug & CGRAPHDEBUG) { 966 printf("[process_cgraph] callee %#llx missed\n", 967 calleep->topc); 968 } 969 #endif DEBUG 970 continue; 971 } else 972 arc.raw_selfpc = calleep->topc; 973 974 for (caller_off = callee_off; caller_off; 975 caller_off = callerp->next_from) { 976 977 callerp = (ProfFunction *) ((char *) cgp + caller_off); 978 if ((caller_mi = find_module(callerp->frompc)) == 979 NULL) { 980 #ifdef DEBUG 981 if (debug & CGRAPHDEBUG) { 982 printf("[process_cgraph] caller %#llx " 983 "missed\n", callerp->frompc); 984 } 985 #endif DEBUG 986 continue; 987 } 988 989 arc.raw_frompc = callerp->frompc; 990 arc.raw_count = callerp->count; 991 992 #ifdef DEBUG 993 if (debug & CGRAPHDEBUG) { 994 printf("[process_cgraph] arc <%#llx, %#llx, " 995 "%lld>\n", arc.raw_frompc, 996 arc.raw_selfpc, arc.raw_count); 997 } 998 #endif DEBUG 999 tally(caller_mi, callee_mi, &arc); 1000 } 1001 } 1002 1003 #ifdef DEBUG 1004 puts("\n"); 1005 #endif DEBUG 1006 } 1007 1008 /* 1009 * Two modules overlap each other if they don't lie completely *outside* 1010 * each other. 1011 */ 1012 static bool 1013 does_overlap(ProfModule *new, mod_info_t *old) 1014 { 1015 /* case 1: new module lies completely *before* the old one */ 1016 if (new->startaddr < old->load_base && new->endaddr <= old->load_base) 1017 return (FALSE); 1018 1019 /* case 2: new module lies completely *after* the old one */ 1020 if (new->startaddr >= old->load_end && new->endaddr >= old->load_end) 1021 return (FALSE); 1022 1023 /* probably a dlopen: the modules overlap each other */ 1024 return (TRUE); 1025 } 1026 1027 static bool 1028 is_same_as_aout(char *modpath, struct stat *buf) 1029 { 1030 if (stat(modpath, buf) == -1) { 1031 fprintf(stderr, "%s: can't get info on `%s'\n", 1032 whoami, modpath); 1033 exit(EX_NOINPUT); 1034 } 1035 1036 if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino)) 1037 return (TRUE); 1038 else 1039 return (FALSE); 1040 } 1041 1042 static void 1043 process_modules(modlp) 1044 ProfModuleList *modlp; 1045 { 1046 ProfModule *newmodp; 1047 mod_info_t *mi, *last, *new_module; 1048 char *so_path, *name; 1049 bool more_modules = TRUE; 1050 struct stat so_statbuf; 1051 1052 #ifdef DEBUG 1053 if (debug & MODULEDEBUG) { 1054 printf("[process_modules] module obj version %u\n", 1055 modlp->version); 1056 } 1057 #endif DEBUG 1058 1059 /* Check version of module type object */ 1060 if (modlp->version > PROF_MODULES_VER) { 1061 fprintf(stderr, "%s: version %d for module type objects" 1062 "is not supported\n", whoami, modlp->version); 1063 exit(EX_SOFTWARE); 1064 } 1065 1066 1067 /* 1068 * Scan the PROF_MODULES_T list and add modules to current list 1069 * of modules, if they're not present already 1070 */ 1071 newmodp = (ProfModule *) ((char *) modlp + modlp->modules); 1072 do { 1073 /* 1074 * Since the prog could've been renamed after its run, we 1075 * should see if this overlaps a.out. If it does, it is 1076 * probably the renamed aout. We should also skip any other 1077 * non-sharedobj's that we see (or should we report an error ?) 1078 */ 1079 so_path = (caddr_t) modlp + newmodp->path; 1080 if (does_overlap(newmodp, &modules) || 1081 is_same_as_aout(so_path, &so_statbuf) || 1082 (!is_shared_obj(so_path))) { 1083 1084 if (!newmodp->next) 1085 more_modules = FALSE; 1086 1087 newmodp = (ProfModule *) 1088 ((caddr_t) modlp + newmodp->next); 1089 #ifdef DEBUG 1090 if (debug & MODULEDEBUG) { 1091 printf("[process_modules] `%s'\n", so_path); 1092 printf(" skipped\n"); 1093 } 1094 #endif DEBUG 1095 continue; 1096 } 1097 #ifdef DEBUG 1098 if (debug & MODULEDEBUG) 1099 printf("[process_modules] `%s'...\n", so_path); 1100 #endif DEBUG 1101 1102 /* 1103 * Check all modules (leave the first one, 'cos that 1104 * is the program executable info). If this module is already 1105 * there in the list, update the load addresses and proceed. 1106 */ 1107 last = &modules; 1108 while (mi = last->next) { 1109 /* 1110 * We expect the full pathname for all shared objects 1111 * needed by the program executable. In this case, we 1112 * simply need to compare the paths to see if they are 1113 * the same file. 1114 */ 1115 if (strcmp(mi->name, so_path) == 0) 1116 break; 1117 1118 /* 1119 * Check if this new shared object will overlap 1120 * any existing module. If yes, remove the old one 1121 * from the linked list (but don't free it, 'cos 1122 * there may be symbols referring to this module 1123 * still) 1124 */ 1125 if (does_overlap(newmodp, mi)) { 1126 #ifdef DEBUG 1127 if (debug & MODULEDEBUG) { 1128 printf("[process_modules] `%s'\n", 1129 so_path); 1130 printf(" overlaps\n"); 1131 printf(" `%s'\n", 1132 mi->name); 1133 } 1134 #endif DEBUG 1135 mi->active = FALSE; 1136 } 1137 1138 last = mi; 1139 } 1140 1141 /* Module already there, skip it */ 1142 if (mi != NULL) { 1143 mi->load_base = newmodp->startaddr; 1144 mi->load_end = newmodp->endaddr; 1145 mi->active = TRUE; 1146 if (!newmodp->next) 1147 more_modules = FALSE; 1148 1149 newmodp = (ProfModule *) 1150 ((caddr_t) modlp + newmodp->next); 1151 1152 #ifdef DEBUG 1153 if (debug & MODULEDEBUG) { 1154 printf("[process_modules] base=%#llx, " 1155 "end=%#llx\n", mi->load_base, 1156 mi->load_end); 1157 } 1158 #endif DEBUG 1159 continue; 1160 } 1161 1162 /* 1163 * Check if gmon.out is outdated with respect to the new 1164 * module we want to add 1165 */ 1166 if (gmonout_info.mtime < so_statbuf.st_mtime) { 1167 fprintf(stderr, "%s: shared obj outdates prof info\n", 1168 whoami); 1169 fprintf(stderr, "\t(newer %s)\n", so_path); 1170 exit(EX_NOINPUT); 1171 } 1172 1173 /* Create a new module element */ 1174 new_module = (mod_info_t *) malloc(sizeof (mod_info_t)); 1175 if (new_module == NULL) { 1176 fprintf(stderr, "%s: no room for %ld bytes\n", 1177 whoami, sizeof (mod_info_t)); 1178 exit(EX_OSERR); 1179 } 1180 1181 /* and fill in info... */ 1182 new_module->id = n_modules + 1; 1183 new_module->load_base = newmodp->startaddr; 1184 new_module->load_end = newmodp->endaddr; 1185 new_module->name = (char *) malloc(strlen(so_path) + 1); 1186 if (new_module->name == NULL) { 1187 fprintf(stderr, "%s: no room for %ld bytes\n", 1188 whoami, strlen(so_path) + 1); 1189 exit(EX_OSERR); 1190 } 1191 strcpy(new_module->name, so_path); 1192 #ifdef DEBUG 1193 if (debug & MODULEDEBUG) { 1194 printf("[process_modules] base=%#llx, end=%#llx\n", 1195 new_module->load_base, new_module->load_end); 1196 } 1197 #endif DEBUG 1198 1199 /* Create this module's nameslist */ 1200 process_namelist(new_module); 1201 1202 /* Add it to the tail of active module list */ 1203 last->next = new_module; 1204 n_modules++; 1205 1206 #ifdef DEBUG 1207 if (debug & MODULEDEBUG) { 1208 printf("[process_modules] total shared objects = %ld\n", 1209 n_modules - 1); 1210 } 1211 #endif DEBUG 1212 /* 1213 * Move to the next module in the PROF_MODULES_T list 1214 * (if present) 1215 */ 1216 if (!newmodp->next) 1217 more_modules = FALSE; 1218 1219 newmodp = (ProfModule *) ((caddr_t) modlp + newmodp->next); 1220 1221 } while (more_modules); 1222 } 1223 1224 static void 1225 reset_active_modules() 1226 { 1227 mod_info_t *mi; 1228 1229 /* Except the executable, no other module should remain active */ 1230 for (mi = modules.next; mi; mi = mi->next) 1231 mi->active = FALSE; 1232 } 1233 1234 static void 1235 getpfiledata(memp, fsz) 1236 caddr_t memp; 1237 size_t fsz; 1238 { 1239 ProfObject *objp; 1240 caddr_t file_end; 1241 bool found_pcsamples = FALSE, found_cgraph = FALSE; 1242 1243 /* 1244 * Before processing a new gmon.out, all modules except the 1245 * program executable must be made inactive, so that symbols 1246 * are searched only in the program executable, if we don't 1247 * find a MODULES_T object. Don't do it *after* we read a gmon.out, 1248 * because we need the active module data after we're done with 1249 * the last gmon.out, if we're doing summing. 1250 */ 1251 reset_active_modules(); 1252 1253 file_end = memp + fsz; 1254 objp = (ProfObject *) (memp + ((ProfHeader *) memp)->size); 1255 while ((caddr_t) objp < file_end) { 1256 #ifdef DEBUG 1257 { 1258 unsigned int type = 0; 1259 1260 if (debug & MONOUTDEBUG) { 1261 if (objp->type <= MAX_OBJTYPES) 1262 type = objp->type; 1263 1264 printf("\n[getpfiledata] object %s [%#lx]\n", 1265 objname[type], objp->type); 1266 } 1267 } 1268 #endif DEBUG 1269 switch (objp->type) { 1270 case PROF_MODULES_T : 1271 process_modules((ProfModuleList *) objp); 1272 break; 1273 1274 case PROF_CALLGRAPH_T : 1275 process_cgraph((ProfCallGraph *) objp); 1276 found_cgraph = TRUE; 1277 break; 1278 1279 case PROF_BUFFER_T : 1280 process_pcsamples((ProfBuffer *) objp); 1281 found_pcsamples = TRUE; 1282 break; 1283 1284 default : 1285 fprintf(stderr, 1286 "%s: unknown prof object type=%d\n", 1287 whoami, objp->type); 1288 exit(EX_SOFTWARE); 1289 } 1290 objp = (ProfObject *) ((caddr_t) objp + objp->size); 1291 } 1292 1293 if (!found_cgraph || !found_pcsamples) { 1294 fprintf(stderr, 1295 "%s: missing callgraph/pcsamples object\n", whoami); 1296 exit(EX_SOFTWARE); 1297 } 1298 1299 if ((caddr_t) objp > file_end) { 1300 fprintf(stderr, "%s: malformed profile file.\n", whoami); 1301 exit(EX_SOFTWARE); 1302 } 1303 1304 if (first_file) 1305 first_file = FALSE; 1306 } 1307 1308 static void 1309 readarcs(pfile) 1310 FILE *pfile; 1311 { 1312 /* 1313 * the rest of the file consists of 1314 * a bunch of <from,self,count> tuples. 1315 */ 1316 /* CONSTCOND */ 1317 while (1) { 1318 struct rawarc arc; 1319 1320 if (rflag) { 1321 if (Bflag) { 1322 L_cgarc64 rtld_arc64; 1323 1324 /* 1325 * If rflag is set then this is an profiled 1326 * image generated by rtld. It needs to be 1327 * 'converted' to the standard data format. 1328 */ 1329 if (fread(&rtld_arc64, 1330 sizeof (L_cgarc64), 1, pfile) != 1) 1331 break; 1332 1333 if (rtld_arc64.cg_from == PRF_OUTADDR64) 1334 arc.raw_frompc = s_highpc + 0x10; 1335 else 1336 arc.raw_frompc = 1337 (pctype)rtld_arc64.cg_from; 1338 arc.raw_selfpc = (pctype)rtld_arc64.cg_to; 1339 arc.raw_count = (actype)rtld_arc64.cg_count; 1340 } else { 1341 L_cgarc rtld_arc; 1342 1343 /* 1344 * If rflag is set then this is an profiled 1345 * image generated by rtld. It needs to be 1346 * 'converted' to the standard data format. 1347 */ 1348 if (fread(&rtld_arc, 1349 sizeof (L_cgarc), 1, pfile) != 1) 1350 break; 1351 1352 if (rtld_arc.cg_from == PRF_OUTADDR) 1353 arc.raw_frompc = s_highpc + 0x10; 1354 else 1355 arc.raw_frompc = (pctype) 1356 (uintptr_t)rtld_arc.cg_from; 1357 arc.raw_selfpc = (pctype) 1358 (uintptr_t)rtld_arc.cg_to; 1359 arc.raw_count = (actype)rtld_arc.cg_count; 1360 } 1361 } else { 1362 if (Bflag) { 1363 if (fread(&arc, sizeof (struct rawarc), 1, 1364 pfile) != 1) { 1365 break; 1366 } 1367 } else { 1368 /* 1369 * If these aren't big %pc's, we need to read 1370 * into the 32-bit raw arc structure, and 1371 * assign the members into the actual arc. 1372 */ 1373 struct rawarc32 arc32; 1374 if (fread(&arc32, sizeof (struct rawarc32), 1375 1, pfile) != 1) 1376 break; 1377 arc.raw_frompc = (pctype)arc32.raw_frompc; 1378 arc.raw_selfpc = (pctype)arc32.raw_selfpc; 1379 arc.raw_count = (actype)arc32.raw_count; 1380 } 1381 } 1382 1383 #ifdef DEBUG 1384 if (debug & SAMPLEDEBUG) { 1385 printf("[getpfile] frompc 0x%llx selfpc " 1386 "0x%llx count %lld\n", arc.raw_frompc, 1387 arc.raw_selfpc, arc.raw_count); 1388 } 1389 #endif DEBUG 1390 /* 1391 * add this arc 1392 */ 1393 tally(&modules, &modules, &arc); 1394 } 1395 if (first_file) 1396 first_file = FALSE; 1397 } 1398 1399 static void 1400 readsamples(FILE *pfile) 1401 { 1402 sztype i; 1403 unsigned_UNIT sample; 1404 1405 if (samples == 0) { 1406 samples = (unsigned_UNIT *) calloc(nsamples, 1407 sizeof (unsigned_UNIT)); 1408 if (samples == 0) { 1409 fprintf(stderr, "%s: No room for %ld sample pc's\n", 1410 whoami, sampbytes / sizeof (unsigned_UNIT)); 1411 exit(EX_OSERR); 1412 } 1413 } 1414 1415 for (i = 0; i < nsamples; i++) { 1416 fread(&sample, sizeof (unsigned_UNIT), 1, pfile); 1417 if (feof(pfile)) 1418 break; 1419 samples[i] += sample; 1420 } 1421 if (i != nsamples) { 1422 fprintf(stderr, 1423 "%s: unexpected EOF after reading %ld/%ld samples\n", 1424 whoami, --i, nsamples); 1425 exit(EX_IOERR); 1426 } 1427 } 1428 1429 static void * 1430 handle_versioned(FILE *pfile, char *filename, size_t *fsz) 1431 { 1432 int fd; 1433 bool invalid_version; 1434 caddr_t fmem; 1435 struct stat buf; 1436 ProfHeader prof_hdr; 1437 1438 /* 1439 * Check versioning info. For now, let's say we provide 1440 * backward compatibility, so we accept all older versions. 1441 */ 1442 if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) { 1443 perror("fread()"); 1444 exit(EX_IOERR); 1445 } 1446 1447 invalid_version = FALSE; 1448 if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION) 1449 invalid_version = TRUE; 1450 else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) { 1451 if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION) 1452 invalid_version = FALSE; 1453 } 1454 1455 if (invalid_version) { 1456 fprintf(stderr, "%s: version %d.%d not supported\n", 1457 whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver); 1458 exit(EX_SOFTWARE); 1459 } 1460 1461 /* 1462 * Map gmon.out onto memory. 1463 */ 1464 fclose(pfile); 1465 if ((fd = open(filename, O_RDONLY)) == -1) { 1466 perror(filename); 1467 exit(EX_IOERR); 1468 } 1469 1470 if ((*fsz = lseek(fd, 0, SEEK_END)) == -1) { 1471 perror(filename); 1472 exit(EX_IOERR); 1473 } 1474 1475 fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0); 1476 if (fmem == MAP_FAILED) { 1477 fprintf(stderr, "%s: can't map %s\n", whoami, filename); 1478 exit(EX_IOERR); 1479 } 1480 1481 /* 1482 * Before we close this fd, save this gmon.out's info to later verify 1483 * if the shared objects it references have changed since the time 1484 * they were used to generate this gmon.out 1485 */ 1486 if (fstat(fd, &buf) == -1) { 1487 fprintf(stderr, "%s: can't get info on `%s'\n", 1488 whoami, filename); 1489 exit(EX_NOINPUT); 1490 } 1491 gmonout_info.dev = buf.st_dev; 1492 gmonout_info.ino = buf.st_ino; 1493 gmonout_info.mtime = buf.st_mtime; 1494 gmonout_info.size = buf.st_size; 1495 1496 close(fd); 1497 1498 return ((void *) fmem); 1499 } 1500 1501 static void * 1502 openpfile(filename, fsz) 1503 char *filename; 1504 size_t *fsz; 1505 { 1506 struct hdr tmp; 1507 FILE * pfile; 1508 unsigned long magic_num; 1509 size_t hdrsize = sizeof (struct hdr); 1510 static bool first_time = TRUE; 1511 extern bool old_style; 1512 1513 if ((pfile = fopen(filename, "r")) == NULL) { 1514 perror(filename); 1515 exit(EX_IOERR); 1516 } 1517 1518 /* 1519 * Read in the magic. Note that we changed the cast "unsigned long" 1520 * to "unsigned int" because that's how h_magic is defined in the 1521 * new format ProfHeader. 1522 */ 1523 if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) { 1524 perror("fread()"); 1525 exit(EX_IOERR); 1526 } 1527 1528 rewind(pfile); 1529 1530 /* 1531 * First check if this is versioned or *old-style* gmon.out 1532 */ 1533 if (magic_num == (unsigned int)PROF_MAGIC) { 1534 if ((!first_time) && (old_style == TRUE)) { 1535 fprintf(stderr, "%s: can't mix old & new format " 1536 "profiled files\n", whoami); 1537 exit(EX_SOFTWARE); 1538 } 1539 first_time = FALSE; 1540 old_style = FALSE; 1541 return (handle_versioned(pfile, filename, fsz)); 1542 } 1543 1544 if ((!first_time) && (old_style == FALSE)) { 1545 fprintf(stderr, "%s: can't mix old & new format " 1546 "profiled files\n", whoami); 1547 exit(EX_SOFTWARE); 1548 } 1549 1550 first_time = FALSE; 1551 old_style = TRUE; 1552 fsz = 0; 1553 1554 /* 1555 * Now, we need to determine if this is a run-time linker 1556 * profiled file or if it is a standard gmon.out. 1557 * 1558 * We do this by checking if magic matches PRF_MAGIC. If it 1559 * does, then this is a run-time linker profiled file, if it 1560 * doesn't, it must be a gmon.out file. 1561 */ 1562 if (magic_num == (unsigned long)PRF_MAGIC) 1563 rflag = TRUE; 1564 else 1565 rflag = FALSE; 1566 1567 if (rflag) { 1568 if (Bflag) { 1569 L_hdr64 l_hdr64; 1570 1571 /* 1572 * If the rflag is set then the input file is 1573 * rtld profiled data, we'll read it in and convert 1574 * it to the standard format (ie: make it look like 1575 * a gmon.out file). 1576 */ 1577 if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) { 1578 perror("fread()"); 1579 exit(EX_IOERR); 1580 } 1581 if (l_hdr64.hd_version != PRF_VERSION_64) { 1582 fprintf(stderr, "%s: expected version %d, " 1583 "got version %d when processing 64-bit " 1584 "run-time linker profiled file.\n", 1585 whoami, PRF_VERSION_64, l_hdr64.hd_version); 1586 exit(EX_SOFTWARE); 1587 } 1588 tmp.lowpc = 0; 1589 tmp.highpc = (pctype)l_hdr64.hd_hpc; 1590 tmp.ncnt = sizeof (M_hdr64) + l_hdr64.hd_psize; 1591 } else { 1592 L_hdr l_hdr; 1593 1594 /* 1595 * If the rflag is set then the input file is 1596 * rtld profiled data, we'll read it in and convert 1597 * it to the standard format (ie: make it look like 1598 * a gmon.out file). 1599 */ 1600 if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) { 1601 perror("fread()"); 1602 exit(EX_IOERR); 1603 } 1604 if (l_hdr.hd_version != PRF_VERSION) { 1605 fprintf(stderr, "%s: expected version %d, " 1606 "got version %d when processing " 1607 "run-time linker profiled file.\n", 1608 whoami, PRF_VERSION, l_hdr.hd_version); 1609 exit(EX_SOFTWARE); 1610 } 1611 tmp.lowpc = 0; 1612 tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc; 1613 tmp.ncnt = sizeof (M_hdr) + l_hdr.hd_psize; 1614 hdrsize = sizeof (M_hdr); 1615 } 1616 } else { 1617 if (Bflag) { 1618 if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) { 1619 perror("fread()"); 1620 exit(EX_IOERR); 1621 } 1622 } else { 1623 /* 1624 * If we're not reading big %pc's, we need to read 1625 * the 32-bit header, and assign the members to 1626 * the actual header. 1627 */ 1628 struct hdr32 hdr32; 1629 if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) { 1630 perror("fread()"); 1631 exit(EX_IOERR); 1632 } 1633 tmp.lowpc = hdr32.lowpc; 1634 tmp.highpc = hdr32.highpc; 1635 tmp.ncnt = hdr32.ncnt; 1636 hdrsize = sizeof (struct hdr32); 1637 } 1638 } 1639 1640 /* 1641 * perform sanity check on profiled file we've opened. 1642 */ 1643 if (tmp.lowpc >= tmp.highpc) { 1644 if (rflag) 1645 fprintf(stderr, "%s: badly formed profiled data.\n", 1646 filename); 1647 else 1648 fprintf(stderr, "%s: badly formed gmon.out file.\n", 1649 filename); 1650 exit(EX_SOFTWARE); 1651 } 1652 1653 if (s_highpc != 0 && (tmp.lowpc != h.lowpc || 1654 tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) { 1655 fprintf(stderr, 1656 "%s: incompatible with first gmon file\n", 1657 filename); 1658 exit(EX_IOERR); 1659 } 1660 h = tmp; 1661 s_lowpc = h.lowpc; 1662 s_highpc = h.highpc; 1663 lowpc = h.lowpc / sizeof (UNIT); 1664 highpc = h.highpc / sizeof (UNIT); 1665 sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0; 1666 nsamples = sampbytes / sizeof (unsigned_UNIT); 1667 1668 #ifdef DEBUG 1669 if (debug & SAMPLEDEBUG) { 1670 printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc " 1671 "0x%llx hdr.ncnt %lld\n", 1672 h.lowpc, h.highpc, h.ncnt); 1673 printf("[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n", 1674 s_lowpc, s_highpc); 1675 printf("[openpfile] lowpc 0x%llx highpc 0x%llx\n", 1676 lowpc, highpc); 1677 printf("[openpfile] sampbytes %d nsamples %d\n", 1678 sampbytes, nsamples); 1679 } 1680 #endif DEBUG 1681 1682 return ((void *) pfile); 1683 } 1684 1685 /* 1686 * Information from a gmon.out file depends on whether it's versioned 1687 * or non-versioned, *old style* gmon.out. If old-style, it is in two 1688 * parts : an array of sampling hits within pc ranges, and the arcs. If 1689 * versioned, it contains a header, followed by any number of 1690 * modules/callgraph/pcsample_buffer objects. 1691 */ 1692 static void 1693 getpfile(char *filename) 1694 { 1695 void *handle; 1696 size_t fsz; 1697 1698 handle = openpfile(filename, &fsz); 1699 1700 if (old_style) { 1701 readsamples((FILE *) handle); 1702 readarcs((FILE *) handle); 1703 fclose((FILE *) handle); 1704 return; 1705 } 1706 1707 getpfiledata((caddr_t) handle, fsz); 1708 munmap(handle, fsz); 1709 } 1710 1711 main(int argc, char ** argv) 1712 { 1713 char **sp; 1714 nltype **timesortnlp; 1715 int c; 1716 int errflg; 1717 extern char *optarg; 1718 extern int optind; 1719 1720 prog_name = *argv; /* preserve program name */ 1721 debug = 0; 1722 nflag = FALSE; 1723 bflag = TRUE; 1724 lflag = FALSE; 1725 Cflag = FALSE; 1726 first_file = TRUE; 1727 rflag = FALSE; 1728 Bflag = FALSE; 1729 errflg = FALSE; 1730 1731 while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF) 1732 switch (c) { 1733 case 'a': 1734 aflag = TRUE; 1735 break; 1736 case 'b': 1737 bflag = FALSE; 1738 break; 1739 case 'c': 1740 cflag = TRUE; 1741 break; 1742 case 'C': 1743 Cflag = TRUE; 1744 break; 1745 case 'd': 1746 dflag = TRUE; 1747 debug |= atoi(optarg); 1748 printf("[main] debug = 0x%x\n", debug); 1749 break; 1750 case 'D': 1751 Dflag = TRUE; 1752 break; 1753 case 'E': 1754 addlist(Elist, optarg); 1755 Eflag = TRUE; 1756 addlist(elist, optarg); 1757 eflag = TRUE; 1758 break; 1759 case 'e': 1760 addlist(elist, optarg); 1761 eflag = TRUE; 1762 break; 1763 case 'F': 1764 addlist(Flist, optarg); 1765 Fflag = TRUE; 1766 addlist(flist, optarg); 1767 fflag = TRUE; 1768 break; 1769 case 'f': 1770 addlist(flist, optarg); 1771 fflag = TRUE; 1772 break; 1773 case 'l': 1774 lflag = TRUE; 1775 break; 1776 case 'n': 1777 nflag = TRUE; 1778 number_funcs_toprint = atoi(optarg); 1779 break; 1780 case 's': 1781 sflag = TRUE; 1782 break; 1783 case 'z': 1784 zflag = TRUE; 1785 break; 1786 case '?': 1787 errflg++; 1788 1789 } 1790 1791 if (errflg) { 1792 (void) fprintf(stderr, 1793 "usage: gprof [ -abcCDlsz ] [ -e function-name ] " 1794 "[ -E function-name ]\n\t[ -f function-name ] " 1795 "[ -F function-name ]\n\t[ image-file " 1796 "[ profile-file ... ] ]\n"); 1797 exit(EX_USAGE); 1798 } 1799 1800 if (optind < argc) { 1801 a_outname = argv[optind++]; 1802 } else { 1803 a_outname = A_OUTNAME; 1804 } 1805 if (optind < argc) { 1806 gmonname = argv[optind++]; 1807 } else { 1808 gmonname = GMONNAME; 1809 } 1810 /* 1811 * turn off default functions 1812 */ 1813 for (sp = &defaultEs[0]; *sp; sp++) { 1814 Eflag = TRUE; 1815 addlist(Elist, *sp); 1816 eflag = TRUE; 1817 addlist(elist, *sp); 1818 } 1819 /* 1820 * how many ticks per second? 1821 * if we can't tell, report time in ticks. 1822 */ 1823 hz = sysconf(_SC_CLK_TCK); 1824 if (hz == -1) { 1825 hz = 1; 1826 fprintf(stderr, "time is in ticks, not seconds\n"); 1827 } 1828 1829 getnfile(a_outname); 1830 1831 /* 1832 * get information about mon.out file(s). 1833 */ 1834 do { 1835 getpfile(gmonname); 1836 if (optind < argc) 1837 gmonname = argv[optind++]; 1838 else 1839 optind++; 1840 } while (optind <= argc); 1841 /* 1842 * dump out a gmon.sum file if requested 1843 */ 1844 if (sflag || Dflag) 1845 dumpsum(GMONSUM); 1846 1847 if (old_style) { 1848 /* 1849 * assign samples to procedures 1850 */ 1851 asgnsamples(); 1852 } 1853 1854 /* 1855 * assemble the dynamic profile 1856 */ 1857 timesortnlp = doarcs(); 1858 1859 /* 1860 * print the dynamic profile 1861 */ 1862 #ifdef DEBUG 1863 if (debug & ANYDEBUG) { 1864 /* raw output of all symbols in all their glory */ 1865 int i; 1866 printf(" Name, pc_entry_pt, svalue, tix_in_routine, " 1867 "#calls, selfcalls, index \n"); 1868 for (i = 0; i < modules.nname; i++) { /* Print each symbol */ 1869 if (timesortnlp[i]->name) 1870 printf(" %s ", timesortnlp[i]->name); 1871 else 1872 printf(" <cycle> "); 1873 printf(" %lld ", timesortnlp[i]->value); 1874 printf(" %lld ", timesortnlp[i]->svalue); 1875 printf(" %f ", timesortnlp[i]->time); 1876 printf(" %lld ", timesortnlp[i]->ncall); 1877 printf(" %lld ", timesortnlp[i]->selfcalls); 1878 printf(" %d ", timesortnlp[i]->index); 1879 printf(" \n"); 1880 } 1881 } 1882 #endif DEBUG 1883 1884 printgprof(timesortnlp); 1885 /* 1886 * print the flat profile 1887 */ 1888 printprof(); 1889 /* 1890 * print the index 1891 */ 1892 printindex(); 1893 1894 /* 1895 * print the modules 1896 */ 1897 printmodules(); 1898 1899 done(); 1900 /* NOTREACHED */ 1901 return (0); 1902 } 1903