1 /* 2 * Copyright (c) 1983, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1983, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #ifndef lint 41 #if 0 42 static char sccsid[] = "@(#)gprof.c 8.1 (Berkeley) 6/6/93"; 43 #endif 44 static const char rcsid[] = 45 "$FreeBSD$"; 46 #endif /* not lint */ 47 48 #include <err.h> 49 #include <limits.h> 50 #include <stdint.h> 51 #include "gprof.h" 52 53 static int valcmp(const void *, const void *); 54 55 56 static struct gmonhdr gmonhdr; 57 static int lflag; 58 static int Lflag; 59 60 main(argc, argv) 61 int argc; 62 char **argv; 63 { 64 char **sp; 65 nltype **timesortnlp; 66 char **defaultEs; 67 68 --argc; 69 argv++; 70 debug = 0; 71 bflag = TRUE; 72 while ( *argv != 0 && **argv == '-' ) { 73 (*argv)++; 74 switch ( **argv ) { 75 case 'a': 76 aflag = TRUE; 77 break; 78 case 'b': 79 bflag = FALSE; 80 break; 81 case 'C': 82 Cflag = TRUE; 83 cyclethreshold = atoi( *++argv ); 84 break; 85 case 'c': 86 #if defined(vax) || defined(tahoe) 87 cflag = TRUE; 88 #else 89 errx(1, "-c isn't supported on this architecture yet"); 90 #endif 91 break; 92 case 'd': 93 dflag = TRUE; 94 setlinebuf(stdout); 95 debug |= atoi( *++argv ); 96 debug |= ANYDEBUG; 97 # ifdef DEBUG 98 printf("[main] debug = %d\n", debug); 99 # else /* not DEBUG */ 100 printf("gprof: -d ignored\n"); 101 # endif /* DEBUG */ 102 break; 103 case 'E': 104 ++argv; 105 addlist( Elist , *argv ); 106 Eflag = TRUE; 107 addlist( elist , *argv ); 108 eflag = TRUE; 109 break; 110 case 'e': 111 addlist( elist , *++argv ); 112 eflag = TRUE; 113 break; 114 case 'F': 115 ++argv; 116 addlist( Flist , *argv ); 117 Fflag = TRUE; 118 addlist( flist , *argv ); 119 fflag = TRUE; 120 break; 121 case 'f': 122 addlist( flist , *++argv ); 123 fflag = TRUE; 124 break; 125 case 'k': 126 addlist( kfromlist , *++argv ); 127 addlist( ktolist , *++argv ); 128 kflag = TRUE; 129 break; 130 case 'K': 131 Kflag = TRUE; 132 break; 133 case 'l': 134 lflag = 1; 135 Lflag = 0; 136 break; 137 case 'L': 138 Lflag = 1; 139 lflag = 0; 140 break; 141 case 's': 142 sflag = TRUE; 143 break; 144 case 'u': 145 uflag = TRUE; 146 break; 147 case 'z': 148 zflag = TRUE; 149 break; 150 } 151 argv++; 152 } 153 if ( *argv != 0 ) { 154 a_outname = *argv; 155 argv++; 156 } else { 157 a_outname = A_OUTNAME; 158 } 159 if ( *argv != 0 ) { 160 gmonname = *argv; 161 argv++; 162 } else { 163 gmonname = (char *) malloc(strlen(a_outname)+6); 164 strcpy(gmonname, a_outname); 165 strcat(gmonname, ".gmon"); 166 } 167 /* 168 * get information from the executable file. 169 */ 170 if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) || 171 (elf_getnfile(a_outname, &defaultEs) == -1 && 172 aout_getnfile(a_outname, &defaultEs) == -1)) 173 errx(1, "%s: bad format", a_outname); 174 /* 175 * sort symbol table. 176 */ 177 qsort(nl, nname, sizeof(nltype), valcmp); 178 /* 179 * turn off default functions 180 */ 181 for ( sp = defaultEs ; *sp ; sp++ ) { 182 Eflag = TRUE; 183 addlist( Elist , *sp ); 184 eflag = TRUE; 185 addlist( elist , *sp ); 186 } 187 /* 188 * get information about mon.out file(s). 189 */ 190 do { 191 getpfile( gmonname ); 192 if ( *argv != 0 ) { 193 gmonname = *argv; 194 } 195 } while ( *argv++ != 0 ); 196 /* 197 * how many ticks per second? 198 * if we can't tell, report time in ticks. 199 */ 200 if (hz == 0) { 201 hz = 1; 202 fprintf(stderr, "time is in ticks, not seconds\n"); 203 } 204 /* 205 * dump out a gmon.sum file if requested 206 */ 207 if ( sflag ) { 208 dumpsum( GMONSUM ); 209 } 210 /* 211 * assign samples to procedures 212 */ 213 asgnsamples(); 214 /* 215 * assemble the dynamic profile 216 */ 217 timesortnlp = doarcs(); 218 /* 219 * print the dynamic profile 220 */ 221 if(!lflag) { 222 printgprof( timesortnlp ); 223 } 224 /* 225 * print the flat profile 226 */ 227 if(!Lflag) { 228 printprof(); 229 } 230 /* 231 * print the index 232 */ 233 printindex(); 234 done(); 235 } 236 237 /* 238 * information from a gmon.out file is in two parts: 239 * an array of sampling hits within pc ranges, 240 * and the arcs. 241 */ 242 getpfile(filename) 243 char *filename; 244 { 245 FILE *pfile; 246 FILE *openpfile(); 247 struct rawarc arc; 248 249 pfile = openpfile(filename); 250 readsamples(pfile); 251 /* 252 * the rest of the file consists of 253 * a bunch of <from,self,count> tuples. 254 */ 255 while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 256 # ifdef DEBUG 257 if ( debug & SAMPLEDEBUG ) { 258 printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" , 259 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 260 } 261 # endif /* DEBUG */ 262 /* 263 * add this arc 264 */ 265 tally( &arc ); 266 } 267 fclose(pfile); 268 } 269 270 FILE * 271 openpfile(filename) 272 char *filename; 273 { 274 struct gmonhdr tmp; 275 FILE *pfile; 276 int size; 277 int rate; 278 279 if((pfile = fopen(filename, "r")) == NULL) { 280 perror(filename); 281 done(); 282 } 283 fread(&tmp, sizeof(struct gmonhdr), 1, pfile); 284 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 285 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) { 286 warnx("%s: incompatible with first gmon file", filename); 287 done(); 288 } 289 gmonhdr = tmp; 290 if ( gmonhdr.version == GMONVERSION ) { 291 rate = gmonhdr.profrate; 292 size = sizeof(struct gmonhdr); 293 } else { 294 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 295 size = sizeof(struct ophdr); 296 gmonhdr.profrate = rate = hertz(); 297 gmonhdr.version = GMONVERSION; 298 } 299 if (hz == 0) { 300 hz = rate; 301 } else if (hz != rate) { 302 fprintf(stderr, 303 "%s: profile clock rate (%d) %s (%ld) in first gmon file\n", 304 filename, rate, "incompatible with clock rate", hz); 305 done(); 306 } 307 if ( gmonhdr.histcounter_type == 0 ) { 308 /* Historical case. The type was u_short (2 bytes in practice). */ 309 histcounter_type = 16; 310 histcounter_size = 2; 311 } else { 312 histcounter_type = gmonhdr.histcounter_type; 313 histcounter_size = abs(histcounter_type) / CHAR_BIT; 314 } 315 s_lowpc = (unsigned long) gmonhdr.lpc; 316 s_highpc = (unsigned long) gmonhdr.hpc; 317 lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2; 318 highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2; 319 sampbytes = gmonhdr.ncnt - size; 320 nsamples = sampbytes / histcounter_size; 321 # ifdef DEBUG 322 if ( debug & SAMPLEDEBUG ) { 323 printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n", 324 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 325 printf( "[openpfile] s_lowpc 0x%lx s_highpc 0x%lx\n" , 326 s_lowpc , s_highpc ); 327 printf( "[openpfile] lowpc 0x%lx highpc 0x%lx\n" , 328 lowpc , highpc ); 329 printf( "[openpfile] sampbytes %d nsamples %d\n" , 330 sampbytes , nsamples ); 331 printf( "[openpfile] sample rate %ld\n" , hz ); 332 } 333 # endif /* DEBUG */ 334 return(pfile); 335 } 336 337 tally( rawp ) 338 struct rawarc *rawp; 339 { 340 nltype *parentp; 341 nltype *childp; 342 343 parentp = nllookup( rawp -> raw_frompc ); 344 childp = nllookup( rawp -> raw_selfpc ); 345 if ( parentp == 0 || childp == 0 ) 346 return; 347 if ( kflag 348 && onlist( kfromlist , parentp -> name ) 349 && onlist( ktolist , childp -> name ) ) { 350 return; 351 } 352 childp -> ncall += rawp -> raw_count; 353 # ifdef DEBUG 354 if ( debug & TALLYDEBUG ) { 355 printf( "[tally] arc from %s to %s traversed %ld times\n" , 356 parentp -> name , childp -> name , rawp -> raw_count ); 357 } 358 # endif /* DEBUG */ 359 addarc( parentp , childp , rawp -> raw_count ); 360 } 361 362 /* 363 * dump out the gmon.sum file 364 */ 365 dumpsum( sumfile ) 366 char *sumfile; 367 { 368 register nltype *nlp; 369 register arctype *arcp; 370 struct rawarc arc; 371 FILE *sfile; 372 373 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) { 374 perror( sumfile ); 375 done(); 376 } 377 /* 378 * dump the header; use the last header read in 379 */ 380 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) { 381 perror( sumfile ); 382 done(); 383 } 384 /* 385 * dump the samples 386 */ 387 if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples) { 388 perror( sumfile ); 389 done(); 390 } 391 /* 392 * dump the normalized raw arc information 393 */ 394 for ( nlp = nl ; nlp < npe ; nlp++ ) { 395 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 396 arc.raw_frompc = arcp -> arc_parentp -> value; 397 arc.raw_selfpc = arcp -> arc_childp -> value; 398 arc.raw_count = arcp -> arc_count; 399 if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) { 400 perror( sumfile ); 401 done(); 402 } 403 # ifdef DEBUG 404 if ( debug & SAMPLEDEBUG ) { 405 printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" , 406 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 407 } 408 # endif /* DEBUG */ 409 } 410 } 411 fclose( sfile ); 412 } 413 414 static int 415 valcmp(v1, v2) 416 const void *v1; 417 const void *v2; 418 { 419 const nltype *p1 = (const nltype *)v1; 420 const nltype *p2 = (const nltype *)v2; 421 422 if ( p1 -> value < p2 -> value ) { 423 return LESSTHAN; 424 } 425 if ( p1 -> value > p2 -> value ) { 426 return GREATERTHAN; 427 } 428 return EQUALTO; 429 } 430 431 readsamples(pfile) 432 FILE *pfile; 433 { 434 register i; 435 intmax_t sample; 436 437 if (samples == 0) { 438 samples = (double *) calloc(nsamples, sizeof(double)); 439 if (samples == 0) { 440 warnx("no room for %d sample pc's", nsamples); 441 done(); 442 } 443 } 444 for (i = 0; i < nsamples; i++) { 445 fread(&sample, histcounter_size, 1, pfile); 446 if (feof(pfile)) 447 break; 448 switch ( histcounter_type ) { 449 case -8: 450 samples[i] += *(int8_t *)&sample; 451 break; 452 case 8: 453 samples[i] += *(u_int8_t *)&sample; 454 break; 455 case -16: 456 samples[i] += *(int16_t *)&sample; 457 break; 458 case 16: 459 samples[i] += *(u_int16_t *)&sample; 460 break; 461 case -32: 462 samples[i] += *(int32_t *)&sample; 463 break; 464 case 32: 465 samples[i] += *(u_int32_t *)&sample; 466 break; 467 case -64: 468 samples[i] += *(int64_t *)&sample; 469 break; 470 case 64: 471 samples[i] += *(u_int64_t *)&sample; 472 break; 473 default: 474 err(1, "unsupported histogram counter type %d", histcounter_type); 475 } 476 } 477 if (i != nsamples) { 478 warnx("unexpected EOF after reading %d/%d samples", --i , nsamples ); 479 done(); 480 } 481 } 482 483 /* 484 * Assign samples to the procedures to which they belong. 485 * 486 * There are three cases as to where pcl and pch can be 487 * with respect to the routine entry addresses svalue0 and svalue1 488 * as shown in the following diagram. overlap computes the 489 * distance between the arrows, the fraction of the sample 490 * that is to be credited to the routine which starts at svalue0. 491 * 492 * svalue0 svalue1 493 * | | 494 * v v 495 * 496 * +-----------------------------------------------+ 497 * | | 498 * | ->| |<- ->| |<- ->| |<- | 499 * | | | | | | 500 * +---------+ +---------+ +---------+ 501 * 502 * ^ ^ ^ ^ ^ ^ 503 * | | | | | | 504 * pcl pch pcl pch pcl pch 505 * 506 * For the vax we assert that samples will never fall in the first 507 * two bytes of any routine, since that is the entry mask, 508 * thus we give call alignentries() to adjust the entry points if 509 * the entry mask falls in one bucket but the code for the routine 510 * doesn't start until the next bucket. In conjunction with the 511 * alignment of routine addresses, this should allow us to have 512 * only one sample for every four bytes of text space and never 513 * have any overlap (the two end cases, above). 514 */ 515 asgnsamples() 516 { 517 register int j; 518 double ccnt; 519 double time; 520 unsigned long pcl, pch; 521 register int i; 522 unsigned long overlap; 523 unsigned long svalue0, svalue1; 524 525 /* read samples and assign to namelist symbols */ 526 scale = highpc - lowpc; 527 scale /= nsamples; 528 alignentries(); 529 for (i = 0, j = 1; i < nsamples; i++) { 530 ccnt = samples[i]; 531 if (ccnt == 0) 532 continue; 533 pcl = lowpc + (unsigned long)(scale * i); 534 pch = lowpc + (unsigned long)(scale * (i + 1)); 535 time = ccnt; 536 # ifdef DEBUG 537 if ( debug & SAMPLEDEBUG ) { 538 printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" , 539 pcl , pch , ccnt ); 540 } 541 # endif /* DEBUG */ 542 totime += time; 543 for (j = j - 1; j < nname; j++) { 544 svalue0 = nl[j].svalue; 545 svalue1 = nl[j+1].svalue; 546 /* 547 * if high end of tick is below entry address, 548 * go for next tick. 549 */ 550 if (pch < svalue0) 551 break; 552 /* 553 * if low end of tick into next routine, 554 * go for next routine. 555 */ 556 if (pcl >= svalue1) 557 continue; 558 overlap = min(pch, svalue1) - max(pcl, svalue0); 559 if (overlap > 0) { 560 # ifdef DEBUG 561 if (debug & SAMPLEDEBUG) { 562 printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n", 563 nl[j].value / HISTORICAL_SCALE_2, 564 svalue0, svalue1, nl[j].name, 565 overlap * time / scale, overlap); 566 } 567 # endif /* DEBUG */ 568 nl[j].time += overlap * time / scale; 569 } 570 } 571 } 572 # ifdef DEBUG 573 if (debug & SAMPLEDEBUG) { 574 printf("[asgnsamples] totime %f\n", totime); 575 } 576 # endif /* DEBUG */ 577 } 578 579 580 unsigned long 581 min(a, b) 582 unsigned long a,b; 583 { 584 if (a<b) 585 return(a); 586 return(b); 587 } 588 589 unsigned long 590 max(a, b) 591 unsigned long a,b; 592 { 593 if (a>b) 594 return(a); 595 return(b); 596 } 597 598 /* 599 * calculate scaled entry point addresses (to save time in asgnsamples), 600 * and possibly push the scaled entry points over the entry mask, 601 * if it turns out that the entry point is in one bucket and the code 602 * for a routine is in the next bucket. 603 */ 604 alignentries() 605 { 606 register struct nl *nlp; 607 unsigned long bucket_of_entry; 608 unsigned long bucket_of_code; 609 610 for (nlp = nl; nlp < npe; nlp++) { 611 nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2; 612 bucket_of_entry = (nlp->svalue - lowpc) / scale; 613 bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 - 614 lowpc) / scale; 615 if (bucket_of_entry < bucket_of_code) { 616 # ifdef DEBUG 617 if (debug & SAMPLEDEBUG) { 618 printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n", 619 nlp->svalue, 620 nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2); 621 } 622 # endif /* DEBUG */ 623 nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2; 624 } 625 } 626 } 627 628 done() 629 { 630 631 exit(0); 632 } 633