1 /* 2 * Copyright (c) 1983, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1983, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #ifndef lint 41 #if 0 42 static char sccsid[] = "@(#)gprof.c 8.1 (Berkeley) 6/6/93"; 43 #endif 44 static const char rcsid[] = 45 "$FreeBSD$"; 46 #endif /* not lint */ 47 48 #include <err.h> 49 #include <limits.h> 50 #include "gprof.h" 51 52 static int valcmp(const void *, const void *); 53 54 55 static struct gmonhdr gmonhdr; 56 static int lflag; 57 static int Lflag; 58 59 main(argc, argv) 60 int argc; 61 char **argv; 62 { 63 char **sp; 64 nltype **timesortnlp; 65 char **defaultEs; 66 67 --argc; 68 argv++; 69 debug = 0; 70 bflag = TRUE; 71 while ( *argv != 0 && **argv == '-' ) { 72 (*argv)++; 73 switch ( **argv ) { 74 case 'a': 75 aflag = TRUE; 76 break; 77 case 'b': 78 bflag = FALSE; 79 break; 80 case 'C': 81 Cflag = TRUE; 82 cyclethreshold = atoi( *++argv ); 83 break; 84 case 'c': 85 #if defined(vax) || defined(tahoe) 86 cflag = TRUE; 87 #else 88 errx(1, "-c isn't supported on this architecture yet"); 89 #endif 90 break; 91 case 'd': 92 dflag = TRUE; 93 setlinebuf(stdout); 94 debug |= atoi( *++argv ); 95 debug |= ANYDEBUG; 96 # ifdef DEBUG 97 printf("[main] debug = %d\n", debug); 98 # else not DEBUG 99 printf("gprof: -d ignored\n"); 100 # endif DEBUG 101 break; 102 case 'E': 103 ++argv; 104 addlist( Elist , *argv ); 105 Eflag = TRUE; 106 addlist( elist , *argv ); 107 eflag = TRUE; 108 break; 109 case 'e': 110 addlist( elist , *++argv ); 111 eflag = TRUE; 112 break; 113 case 'F': 114 ++argv; 115 addlist( Flist , *argv ); 116 Fflag = TRUE; 117 addlist( flist , *argv ); 118 fflag = TRUE; 119 break; 120 case 'f': 121 addlist( flist , *++argv ); 122 fflag = TRUE; 123 break; 124 case 'k': 125 addlist( kfromlist , *++argv ); 126 addlist( ktolist , *++argv ); 127 kflag = TRUE; 128 break; 129 case 'K': 130 Kflag = TRUE; 131 break; 132 case 'l': 133 lflag = 1; 134 Lflag = 0; 135 break; 136 case 'L': 137 Lflag = 1; 138 lflag = 0; 139 break; 140 case 's': 141 sflag = TRUE; 142 break; 143 case 'u': 144 uflag = TRUE; 145 break; 146 case 'z': 147 zflag = TRUE; 148 break; 149 } 150 argv++; 151 } 152 if ( *argv != 0 ) { 153 a_outname = *argv; 154 argv++; 155 } else { 156 a_outname = A_OUTNAME; 157 } 158 if ( *argv != 0 ) { 159 gmonname = *argv; 160 argv++; 161 } else { 162 gmonname = (char *) malloc(strlen(a_outname)+6); 163 strcpy(gmonname, a_outname); 164 strcat(gmonname, ".gmon"); 165 } 166 /* 167 * get information from the executable file. 168 */ 169 if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) || 170 (elf_getnfile(a_outname, &defaultEs) == -1 && 171 aout_getnfile(a_outname, &defaultEs) == -1)) 172 errx(1, "%s: bad format", a_outname); 173 /* 174 * sort symbol table. 175 */ 176 qsort(nl, nname, sizeof(nltype), valcmp); 177 /* 178 * turn off default functions 179 */ 180 for ( sp = defaultEs ; *sp ; sp++ ) { 181 Eflag = TRUE; 182 addlist( Elist , *sp ); 183 eflag = TRUE; 184 addlist( elist , *sp ); 185 } 186 /* 187 * get information about mon.out file(s). 188 */ 189 do { 190 getpfile( gmonname ); 191 if ( *argv != 0 ) { 192 gmonname = *argv; 193 } 194 } while ( *argv++ != 0 ); 195 /* 196 * how many ticks per second? 197 * if we can't tell, report time in ticks. 198 */ 199 if (hz == 0) { 200 hz = 1; 201 fprintf(stderr, "time is in ticks, not seconds\n"); 202 } 203 /* 204 * dump out a gmon.sum file if requested 205 */ 206 if ( sflag ) { 207 dumpsum( GMONSUM ); 208 } 209 /* 210 * assign samples to procedures 211 */ 212 asgnsamples(); 213 /* 214 * assemble the dynamic profile 215 */ 216 timesortnlp = doarcs(); 217 /* 218 * print the dynamic profile 219 */ 220 if(!lflag) { 221 printgprof( timesortnlp ); 222 } 223 /* 224 * print the flat profile 225 */ 226 if(!Lflag) { 227 printprof(); 228 } 229 /* 230 * print the index 231 */ 232 printindex(); 233 done(); 234 } 235 236 /* 237 * information from a gmon.out file is in two parts: 238 * an array of sampling hits within pc ranges, 239 * and the arcs. 240 */ 241 getpfile(filename) 242 char *filename; 243 { 244 FILE *pfile; 245 FILE *openpfile(); 246 struct rawarc arc; 247 248 pfile = openpfile(filename); 249 readsamples(pfile); 250 /* 251 * the rest of the file consists of 252 * a bunch of <from,self,count> tuples. 253 */ 254 while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 255 # ifdef DEBUG 256 if ( debug & SAMPLEDEBUG ) { 257 printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" , 258 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 259 } 260 # endif DEBUG 261 /* 262 * add this arc 263 */ 264 tally( &arc ); 265 } 266 fclose(pfile); 267 } 268 269 FILE * 270 openpfile(filename) 271 char *filename; 272 { 273 struct gmonhdr tmp; 274 FILE *pfile; 275 int size; 276 int rate; 277 278 if((pfile = fopen(filename, "r")) == NULL) { 279 perror(filename); 280 done(); 281 } 282 fread(&tmp, sizeof(struct gmonhdr), 1, pfile); 283 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 284 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) { 285 warnx("%s: incompatible with first gmon file", filename); 286 done(); 287 } 288 gmonhdr = tmp; 289 if ( gmonhdr.version == GMONVERSION ) { 290 rate = gmonhdr.profrate; 291 size = sizeof(struct gmonhdr); 292 } else { 293 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 294 size = sizeof(struct ophdr); 295 gmonhdr.profrate = rate = hertz(); 296 gmonhdr.version = GMONVERSION; 297 } 298 if (hz == 0) { 299 hz = rate; 300 } else if (hz != rate) { 301 fprintf(stderr, 302 "%s: profile clock rate (%d) %s (%ld) in first gmon file\n", 303 filename, rate, "incompatible with clock rate", hz); 304 done(); 305 } 306 if ( gmonhdr.histcounter_type == 0 ) { 307 /* Historical case. The type was u_short (2 bytes in practice). */ 308 histcounter_type = 16; 309 histcounter_size = 2; 310 } else { 311 histcounter_type = gmonhdr.histcounter_type; 312 histcounter_size = abs(histcounter_type) / CHAR_BIT; 313 } 314 s_lowpc = (unsigned long) gmonhdr.lpc; 315 s_highpc = (unsigned long) gmonhdr.hpc; 316 lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2; 317 highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2; 318 sampbytes = gmonhdr.ncnt - size; 319 nsamples = sampbytes / histcounter_size; 320 # ifdef DEBUG 321 if ( debug & SAMPLEDEBUG ) { 322 printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n", 323 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 324 printf( "[openpfile] s_lowpc 0x%lx s_highpc 0x%lx\n" , 325 s_lowpc , s_highpc ); 326 printf( "[openpfile] lowpc 0x%lx highpc 0x%lx\n" , 327 lowpc , highpc ); 328 printf( "[openpfile] sampbytes %d nsamples %d\n" , 329 sampbytes , nsamples ); 330 printf( "[openpfile] sample rate %ld\n" , hz ); 331 } 332 # endif DEBUG 333 return(pfile); 334 } 335 336 tally( rawp ) 337 struct rawarc *rawp; 338 { 339 nltype *parentp; 340 nltype *childp; 341 342 parentp = nllookup( rawp -> raw_frompc ); 343 childp = nllookup( rawp -> raw_selfpc ); 344 if ( parentp == 0 || childp == 0 ) 345 return; 346 if ( kflag 347 && onlist( kfromlist , parentp -> name ) 348 && onlist( ktolist , childp -> name ) ) { 349 return; 350 } 351 childp -> ncall += rawp -> raw_count; 352 # ifdef DEBUG 353 if ( debug & TALLYDEBUG ) { 354 printf( "[tally] arc from %s to %s traversed %ld times\n" , 355 parentp -> name , childp -> name , rawp -> raw_count ); 356 } 357 # endif DEBUG 358 addarc( parentp , childp , rawp -> raw_count ); 359 } 360 361 /* 362 * dump out the gmon.sum file 363 */ 364 dumpsum( sumfile ) 365 char *sumfile; 366 { 367 register nltype *nlp; 368 register arctype *arcp; 369 struct rawarc arc; 370 FILE *sfile; 371 372 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) { 373 perror( sumfile ); 374 done(); 375 } 376 /* 377 * dump the header; use the last header read in 378 */ 379 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) { 380 perror( sumfile ); 381 done(); 382 } 383 /* 384 * dump the samples 385 */ 386 if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples) { 387 perror( sumfile ); 388 done(); 389 } 390 /* 391 * dump the normalized raw arc information 392 */ 393 for ( nlp = nl ; nlp < npe ; nlp++ ) { 394 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 395 arc.raw_frompc = arcp -> arc_parentp -> value; 396 arc.raw_selfpc = arcp -> arc_childp -> value; 397 arc.raw_count = arcp -> arc_count; 398 if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) { 399 perror( sumfile ); 400 done(); 401 } 402 # ifdef DEBUG 403 if ( debug & SAMPLEDEBUG ) { 404 printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" , 405 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 406 } 407 # endif DEBUG 408 } 409 } 410 fclose( sfile ); 411 } 412 413 static int 414 valcmp(v1, v2) 415 const void *v1; 416 const void *v2; 417 { 418 const nltype *p1 = (const nltype *)v1; 419 const nltype *p2 = (const nltype *)v2; 420 421 if ( p1 -> value < p2 -> value ) { 422 return LESSTHAN; 423 } 424 if ( p1 -> value > p2 -> value ) { 425 return GREATERTHAN; 426 } 427 return EQUALTO; 428 } 429 430 readsamples(pfile) 431 FILE *pfile; 432 { 433 register i; 434 intmax_t sample; 435 436 if (samples == 0) { 437 samples = (double *) calloc(nsamples, sizeof(double)); 438 if (samples == 0) { 439 warnx("no room for %d sample pc's", nsamples); 440 done(); 441 } 442 } 443 for (i = 0; i < nsamples; i++) { 444 fread(&sample, histcounter_size, 1, pfile); 445 if (feof(pfile)) 446 break; 447 switch ( histcounter_type ) { 448 case -8: 449 samples[i] += *(int8_t *)&sample; 450 break; 451 case 8: 452 samples[i] += *(u_int8_t *)&sample; 453 break; 454 case -16: 455 samples[i] += *(int16_t *)&sample; 456 break; 457 case 16: 458 samples[i] += *(u_int16_t *)&sample; 459 break; 460 case -32: 461 samples[i] += *(int32_t *)&sample; 462 break; 463 case 32: 464 samples[i] += *(u_int32_t *)&sample; 465 break; 466 case -64: 467 samples[i] += *(int64_t *)&sample; 468 break; 469 case 64: 470 samples[i] += *(u_int64_t *)&sample; 471 break; 472 default: 473 err(1, "unsupported histogram counter type %d", histcounter_type); 474 } 475 } 476 if (i != nsamples) { 477 warnx("unexpected EOF after reading %d/%d samples", --i , nsamples ); 478 done(); 479 } 480 } 481 482 /* 483 * Assign samples to the procedures to which they belong. 484 * 485 * There are three cases as to where pcl and pch can be 486 * with respect to the routine entry addresses svalue0 and svalue1 487 * as shown in the following diagram. overlap computes the 488 * distance between the arrows, the fraction of the sample 489 * that is to be credited to the routine which starts at svalue0. 490 * 491 * svalue0 svalue1 492 * | | 493 * v v 494 * 495 * +-----------------------------------------------+ 496 * | | 497 * | ->| |<- ->| |<- ->| |<- | 498 * | | | | | | 499 * +---------+ +---------+ +---------+ 500 * 501 * ^ ^ ^ ^ ^ ^ 502 * | | | | | | 503 * pcl pch pcl pch pcl pch 504 * 505 * For the vax we assert that samples will never fall in the first 506 * two bytes of any routine, since that is the entry mask, 507 * thus we give call alignentries() to adjust the entry points if 508 * the entry mask falls in one bucket but the code for the routine 509 * doesn't start until the next bucket. In conjunction with the 510 * alignment of routine addresses, this should allow us to have 511 * only one sample for every four bytes of text space and never 512 * have any overlap (the two end cases, above). 513 */ 514 asgnsamples() 515 { 516 register int j; 517 double ccnt; 518 double time; 519 unsigned long pcl, pch; 520 register int i; 521 unsigned long overlap; 522 unsigned long svalue0, svalue1; 523 524 /* read samples and assign to namelist symbols */ 525 scale = highpc - lowpc; 526 scale /= nsamples; 527 alignentries(); 528 for (i = 0, j = 1; i < nsamples; i++) { 529 ccnt = samples[i]; 530 if (ccnt == 0) 531 continue; 532 pcl = lowpc + (unsigned long)(scale * i); 533 pch = lowpc + (unsigned long)(scale * (i + 1)); 534 time = ccnt; 535 # ifdef DEBUG 536 if ( debug & SAMPLEDEBUG ) { 537 printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" , 538 pcl , pch , ccnt ); 539 } 540 # endif DEBUG 541 totime += time; 542 for (j = j - 1; j < nname; j++) { 543 svalue0 = nl[j].svalue; 544 svalue1 = nl[j+1].svalue; 545 /* 546 * if high end of tick is below entry address, 547 * go for next tick. 548 */ 549 if (pch < svalue0) 550 break; 551 /* 552 * if low end of tick into next routine, 553 * go for next routine. 554 */ 555 if (pcl >= svalue1) 556 continue; 557 overlap = min(pch, svalue1) - max(pcl, svalue0); 558 if (overlap > 0) { 559 # ifdef DEBUG 560 if (debug & SAMPLEDEBUG) { 561 printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n", 562 nl[j].value / HISTORICAL_SCALE_2, 563 svalue0, svalue1, nl[j].name, 564 overlap * time / scale, overlap); 565 } 566 # endif DEBUG 567 nl[j].time += overlap * time / scale; 568 } 569 } 570 } 571 # ifdef DEBUG 572 if (debug & SAMPLEDEBUG) { 573 printf("[asgnsamples] totime %f\n", totime); 574 } 575 # endif DEBUG 576 } 577 578 579 unsigned long 580 min(a, b) 581 unsigned long a,b; 582 { 583 if (a<b) 584 return(a); 585 return(b); 586 } 587 588 unsigned long 589 max(a, b) 590 unsigned long a,b; 591 { 592 if (a>b) 593 return(a); 594 return(b); 595 } 596 597 /* 598 * calculate scaled entry point addresses (to save time in asgnsamples), 599 * and possibly push the scaled entry points over the entry mask, 600 * if it turns out that the entry point is in one bucket and the code 601 * for a routine is in the next bucket. 602 */ 603 alignentries() 604 { 605 register struct nl *nlp; 606 unsigned long bucket_of_entry; 607 unsigned long bucket_of_code; 608 609 for (nlp = nl; nlp < npe; nlp++) { 610 nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2; 611 bucket_of_entry = (nlp->svalue - lowpc) / scale; 612 bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 - 613 lowpc) / scale; 614 if (bucket_of_entry < bucket_of_code) { 615 # ifdef DEBUG 616 if (debug & SAMPLEDEBUG) { 617 printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n", 618 nlp->svalue, 619 nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2); 620 } 621 # endif DEBUG 622 nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2; 623 } 624 } 625 } 626 627 done() 628 { 629 630 exit(0); 631 } 632