1 /* 2 * Copyright (c) 1983, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1983, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #ifndef lint 41 #if 0 42 static char sccsid[] = "@(#)gprof.c 8.1 (Berkeley) 6/6/93"; 43 #endif 44 static const char rcsid[] = 45 "$Id: gprof.c,v 1.7 1998/08/08 17:48:26 jdp Exp $"; 46 #endif /* not lint */ 47 48 #include <err.h> 49 #include "gprof.h" 50 51 static int valcmp(const void *, const void *); 52 53 54 static struct gmonhdr gmonhdr; 55 static int lflag; 56 static int Lflag; 57 58 main(argc, argv) 59 int argc; 60 char **argv; 61 { 62 char **sp; 63 nltype **timesortnlp; 64 char **defaultEs; 65 66 --argc; 67 argv++; 68 debug = 0; 69 bflag = TRUE; 70 while ( *argv != 0 && **argv == '-' ) { 71 (*argv)++; 72 switch ( **argv ) { 73 case 'a': 74 aflag = TRUE; 75 break; 76 case 'b': 77 bflag = FALSE; 78 break; 79 case 'C': 80 Cflag = TRUE; 81 cyclethreshold = atoi( *++argv ); 82 break; 83 case 'c': 84 #if defined(vax) || defined(tahoe) 85 cflag = TRUE; 86 #else 87 errx(1, "-c isn't supported on this architecture yet"); 88 #endif 89 break; 90 case 'd': 91 dflag = TRUE; 92 setlinebuf(stdout); 93 debug |= atoi( *++argv ); 94 debug |= ANYDEBUG; 95 # ifdef DEBUG 96 printf("[main] debug = %d\n", debug); 97 # else not DEBUG 98 printf("gprof: -d ignored\n"); 99 # endif DEBUG 100 break; 101 case 'E': 102 ++argv; 103 addlist( Elist , *argv ); 104 Eflag = TRUE; 105 addlist( elist , *argv ); 106 eflag = TRUE; 107 break; 108 case 'e': 109 addlist( elist , *++argv ); 110 eflag = TRUE; 111 break; 112 case 'F': 113 ++argv; 114 addlist( Flist , *argv ); 115 Fflag = TRUE; 116 addlist( flist , *argv ); 117 fflag = TRUE; 118 break; 119 case 'f': 120 addlist( flist , *++argv ); 121 fflag = TRUE; 122 break; 123 case 'k': 124 addlist( kfromlist , *++argv ); 125 addlist( ktolist , *++argv ); 126 kflag = TRUE; 127 break; 128 case 'l': 129 lflag = 1; 130 Lflag = 0; 131 break; 132 case 'L': 133 Lflag = 1; 134 lflag = 0; 135 break; 136 case 's': 137 sflag = TRUE; 138 break; 139 case 'u': 140 uflag = TRUE; 141 break; 142 case 'z': 143 zflag = TRUE; 144 break; 145 } 146 argv++; 147 } 148 if ( *argv != 0 ) { 149 a_outname = *argv; 150 argv++; 151 } else { 152 a_outname = A_OUTNAME; 153 } 154 if ( *argv != 0 ) { 155 gmonname = *argv; 156 argv++; 157 } else { 158 gmonname = GMONNAME; 159 } 160 /* 161 * get information from the executable file. 162 */ 163 if (elf_getnfile(a_outname, &defaultEs) == -1 && 164 aout_getnfile(a_outname, &defaultEs) == -1) 165 errx(1, "%s: bad format", a_outname); 166 /* 167 * sort symbol table. 168 */ 169 qsort(nl, nname, sizeof(nltype), valcmp); 170 /* 171 * turn off default functions 172 */ 173 for ( sp = defaultEs ; *sp ; sp++ ) { 174 Eflag = TRUE; 175 addlist( Elist , *sp ); 176 eflag = TRUE; 177 addlist( elist , *sp ); 178 } 179 /* 180 * get information about mon.out file(s). 181 */ 182 do { 183 getpfile( gmonname ); 184 if ( *argv != 0 ) { 185 gmonname = *argv; 186 } 187 } while ( *argv++ != 0 ); 188 /* 189 * how many ticks per second? 190 * if we can't tell, report time in ticks. 191 */ 192 if (hz == 0) { 193 hz = 1; 194 fprintf(stderr, "time is in ticks, not seconds\n"); 195 } 196 /* 197 * dump out a gmon.sum file if requested 198 */ 199 if ( sflag ) { 200 dumpsum( GMONSUM ); 201 } 202 /* 203 * assign samples to procedures 204 */ 205 asgnsamples(); 206 /* 207 * assemble the dynamic profile 208 */ 209 timesortnlp = doarcs(); 210 /* 211 * print the dynamic profile 212 */ 213 if(!lflag) { 214 printgprof( timesortnlp ); 215 } 216 /* 217 * print the flat profile 218 */ 219 if(!Lflag) { 220 printprof(); 221 } 222 /* 223 * print the index 224 */ 225 printindex(); 226 done(); 227 } 228 229 /* 230 * information from a gmon.out file is in two parts: 231 * an array of sampling hits within pc ranges, 232 * and the arcs. 233 */ 234 getpfile(filename) 235 char *filename; 236 { 237 FILE *pfile; 238 FILE *openpfile(); 239 struct rawarc arc; 240 241 pfile = openpfile(filename); 242 readsamples(pfile); 243 /* 244 * the rest of the file consists of 245 * a bunch of <from,self,count> tuples. 246 */ 247 while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 248 # ifdef DEBUG 249 if ( debug & SAMPLEDEBUG ) { 250 printf( "[getpfile] frompc 0x%x selfpc 0x%x count %d\n" , 251 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 252 } 253 # endif DEBUG 254 /* 255 * add this arc 256 */ 257 tally( &arc ); 258 } 259 fclose(pfile); 260 } 261 262 FILE * 263 openpfile(filename) 264 char *filename; 265 { 266 struct gmonhdr tmp; 267 FILE *pfile; 268 int size; 269 int rate; 270 271 if((pfile = fopen(filename, "r")) == NULL) { 272 perror(filename); 273 done(); 274 } 275 fread(&tmp, sizeof(struct gmonhdr), 1, pfile); 276 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 277 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) { 278 warnx("%s: incompatible with first gmon file", filename); 279 done(); 280 } 281 gmonhdr = tmp; 282 if ( gmonhdr.version == GMONVERSION ) { 283 rate = gmonhdr.profrate; 284 size = sizeof(struct gmonhdr); 285 } else { 286 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 287 size = sizeof(struct ophdr); 288 gmonhdr.profrate = rate = hertz(); 289 gmonhdr.version = GMONVERSION; 290 } 291 if (hz == 0) { 292 hz = rate; 293 } else if (hz != rate) { 294 fprintf(stderr, 295 "%s: profile clock rate (%d) %s (%d) in first gmon file\n", 296 filename, rate, "incompatible with clock rate", hz); 297 done(); 298 } 299 s_lowpc = (unsigned long) gmonhdr.lpc; 300 s_highpc = (unsigned long) gmonhdr.hpc; 301 lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT); 302 highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT); 303 sampbytes = gmonhdr.ncnt - size; 304 nsamples = sampbytes / sizeof (UNIT); 305 # ifdef DEBUG 306 if ( debug & SAMPLEDEBUG ) { 307 printf( "[openpfile] hdr.lpc 0x%x hdr.hpc 0x%x hdr.ncnt %d\n", 308 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 309 printf( "[openpfile] s_lowpc 0x%x s_highpc 0x%x\n" , 310 s_lowpc , s_highpc ); 311 printf( "[openpfile] lowpc 0x%x highpc 0x%x\n" , 312 lowpc , highpc ); 313 printf( "[openpfile] sampbytes %d nsamples %d\n" , 314 sampbytes , nsamples ); 315 printf( "[openpfile] sample rate %d\n" , hz ); 316 } 317 # endif DEBUG 318 return(pfile); 319 } 320 321 tally( rawp ) 322 struct rawarc *rawp; 323 { 324 nltype *parentp; 325 nltype *childp; 326 327 parentp = nllookup( rawp -> raw_frompc ); 328 childp = nllookup( rawp -> raw_selfpc ); 329 if ( parentp == 0 || childp == 0 ) 330 return; 331 if ( kflag 332 && onlist( kfromlist , parentp -> name ) 333 && onlist( ktolist , childp -> name ) ) { 334 return; 335 } 336 childp -> ncall += rawp -> raw_count; 337 # ifdef DEBUG 338 if ( debug & TALLYDEBUG ) { 339 printf( "[tally] arc from %s to %s traversed %d times\n" , 340 parentp -> name , childp -> name , rawp -> raw_count ); 341 } 342 # endif DEBUG 343 addarc( parentp , childp , rawp -> raw_count ); 344 } 345 346 /* 347 * dump out the gmon.sum file 348 */ 349 dumpsum( sumfile ) 350 char *sumfile; 351 { 352 register nltype *nlp; 353 register arctype *arcp; 354 struct rawarc arc; 355 FILE *sfile; 356 357 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) { 358 perror( sumfile ); 359 done(); 360 } 361 /* 362 * dump the header; use the last header read in 363 */ 364 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) { 365 perror( sumfile ); 366 done(); 367 } 368 /* 369 * dump the samples 370 */ 371 if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) { 372 perror( sumfile ); 373 done(); 374 } 375 /* 376 * dump the normalized raw arc information 377 */ 378 for ( nlp = nl ; nlp < npe ; nlp++ ) { 379 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 380 arc.raw_frompc = arcp -> arc_parentp -> value; 381 arc.raw_selfpc = arcp -> arc_childp -> value; 382 arc.raw_count = arcp -> arc_count; 383 if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) { 384 perror( sumfile ); 385 done(); 386 } 387 # ifdef DEBUG 388 if ( debug & SAMPLEDEBUG ) { 389 printf( "[dumpsum] frompc 0x%x selfpc 0x%x count %d\n" , 390 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 391 } 392 # endif DEBUG 393 } 394 } 395 fclose( sfile ); 396 } 397 398 static int 399 valcmp(v1, v2) 400 const void *v1; 401 const void *v2; 402 { 403 const nltype *p1 = (const nltype *)v1; 404 const nltype *p2 = (const nltype *)v2; 405 406 if ( p1 -> value < p2 -> value ) { 407 return LESSTHAN; 408 } 409 if ( p1 -> value > p2 -> value ) { 410 return GREATERTHAN; 411 } 412 return EQUALTO; 413 } 414 415 readsamples(pfile) 416 FILE *pfile; 417 { 418 register i; 419 UNIT sample; 420 421 if (samples == 0) { 422 samples = (UNIT *) calloc(sampbytes, sizeof (UNIT)); 423 if (samples == 0) { 424 warnx("no room for %d sample pc's", sampbytes / sizeof (UNIT)); 425 done(); 426 } 427 } 428 for (i = 0; i < nsamples; i++) { 429 fread(&sample, sizeof (UNIT), 1, pfile); 430 if (feof(pfile)) 431 break; 432 samples[i] += sample; 433 } 434 if (i != nsamples) { 435 warnx("unexpected EOF after reading %d/%d samples", --i , nsamples ); 436 done(); 437 } 438 } 439 440 /* 441 * Assign samples to the procedures to which they belong. 442 * 443 * There are three cases as to where pcl and pch can be 444 * with respect to the routine entry addresses svalue0 and svalue1 445 * as shown in the following diagram. overlap computes the 446 * distance between the arrows, the fraction of the sample 447 * that is to be credited to the routine which starts at svalue0. 448 * 449 * svalue0 svalue1 450 * | | 451 * v v 452 * 453 * +-----------------------------------------------+ 454 * | | 455 * | ->| |<- ->| |<- ->| |<- | 456 * | | | | | | 457 * +---------+ +---------+ +---------+ 458 * 459 * ^ ^ ^ ^ ^ ^ 460 * | | | | | | 461 * pcl pch pcl pch pcl pch 462 * 463 * For the vax we assert that samples will never fall in the first 464 * two bytes of any routine, since that is the entry mask, 465 * thus we give call alignentries() to adjust the entry points if 466 * the entry mask falls in one bucket but the code for the routine 467 * doesn't start until the next bucket. In conjunction with the 468 * alignment of routine addresses, this should allow us to have 469 * only one sample for every four bytes of text space and never 470 * have any overlap (the two end cases, above). 471 */ 472 asgnsamples() 473 { 474 register int j; 475 UNIT ccnt; 476 double time; 477 unsigned long pcl, pch; 478 register int i; 479 unsigned long overlap; 480 unsigned long svalue0, svalue1; 481 482 /* read samples and assign to namelist symbols */ 483 scale = highpc - lowpc; 484 scale /= nsamples; 485 alignentries(); 486 for (i = 0, j = 1; i < nsamples; i++) { 487 ccnt = samples[i]; 488 if (ccnt == 0) 489 continue; 490 pcl = lowpc + scale * i; 491 pch = lowpc + scale * (i + 1); 492 time = ccnt; 493 # ifdef DEBUG 494 if ( debug & SAMPLEDEBUG ) { 495 printf( "[asgnsamples] pcl 0x%x pch 0x%x ccnt %d\n" , 496 pcl , pch , ccnt ); 497 } 498 # endif DEBUG 499 totime += time; 500 for (j = j - 1; j < nname; j++) { 501 svalue0 = nl[j].svalue; 502 svalue1 = nl[j+1].svalue; 503 /* 504 * if high end of tick is below entry address, 505 * go for next tick. 506 */ 507 if (pch < svalue0) 508 break; 509 /* 510 * if low end of tick into next routine, 511 * go for next routine. 512 */ 513 if (pcl >= svalue1) 514 continue; 515 overlap = min(pch, svalue1) - max(pcl, svalue0); 516 if (overlap > 0) { 517 # ifdef DEBUG 518 if (debug & SAMPLEDEBUG) { 519 printf("[asgnsamples] (0x%x->0x%x-0x%x) %s gets %f ticks %d overlap\n", 520 nl[j].value/sizeof(UNIT), svalue0, svalue1, 521 nl[j].name, 522 overlap * time / scale, overlap); 523 } 524 # endif DEBUG 525 nl[j].time += overlap * time / scale; 526 } 527 } 528 } 529 # ifdef DEBUG 530 if (debug & SAMPLEDEBUG) { 531 printf("[asgnsamples] totime %f\n", totime); 532 } 533 # endif DEBUG 534 } 535 536 537 unsigned long 538 min(a, b) 539 unsigned long a,b; 540 { 541 if (a<b) 542 return(a); 543 return(b); 544 } 545 546 unsigned long 547 max(a, b) 548 unsigned long a,b; 549 { 550 if (a>b) 551 return(a); 552 return(b); 553 } 554 555 /* 556 * calculate scaled entry point addresses (to save time in asgnsamples), 557 * and possibly push the scaled entry points over the entry mask, 558 * if it turns out that the entry point is in one bucket and the code 559 * for a routine is in the next bucket. 560 */ 561 alignentries() 562 { 563 register struct nl *nlp; 564 unsigned long bucket_of_entry; 565 unsigned long bucket_of_code; 566 567 for (nlp = nl; nlp < npe; nlp++) { 568 nlp -> svalue = nlp -> value / sizeof(UNIT); 569 bucket_of_entry = (nlp->svalue - lowpc) / scale; 570 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 571 if (bucket_of_entry < bucket_of_code) { 572 # ifdef DEBUG 573 if (debug & SAMPLEDEBUG) { 574 printf("[alignentries] pushing svalue 0x%x to 0x%x\n", 575 nlp->svalue, nlp->svalue + UNITS_TO_CODE); 576 } 577 # endif DEBUG 578 nlp->svalue += UNITS_TO_CODE; 579 } 580 } 581 } 582 583 done() 584 { 585 586 exit(0); 587 } 588