1 /* 2 * Copyright (c) 1983, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1983, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #ifndef lint 41 #if 0 42 static char sccsid[] = "@(#)gprof.c 8.1 (Berkeley) 6/6/93"; 43 #endif 44 static const char rcsid[] = 45 "$FreeBSD$"; 46 #endif /* not lint */ 47 48 #include <err.h> 49 #include "gprof.h" 50 51 static int valcmp(const void *, const void *); 52 53 54 static struct gmonhdr gmonhdr; 55 static int lflag; 56 static int Lflag; 57 58 main(argc, argv) 59 int argc; 60 char **argv; 61 { 62 char **sp; 63 nltype **timesortnlp; 64 char **defaultEs; 65 66 --argc; 67 argv++; 68 debug = 0; 69 bflag = TRUE; 70 while ( *argv != 0 && **argv == '-' ) { 71 (*argv)++; 72 switch ( **argv ) { 73 case 'a': 74 aflag = TRUE; 75 break; 76 case 'b': 77 bflag = FALSE; 78 break; 79 case 'C': 80 Cflag = TRUE; 81 cyclethreshold = atoi( *++argv ); 82 break; 83 case 'c': 84 #if defined(vax) || defined(tahoe) 85 cflag = TRUE; 86 #else 87 errx(1, "-c isn't supported on this architecture yet"); 88 #endif 89 break; 90 case 'd': 91 dflag = TRUE; 92 setlinebuf(stdout); 93 debug |= atoi( *++argv ); 94 debug |= ANYDEBUG; 95 # ifdef DEBUG 96 printf("[main] debug = %d\n", debug); 97 # else not DEBUG 98 printf("gprof: -d ignored\n"); 99 # endif DEBUG 100 break; 101 case 'E': 102 ++argv; 103 addlist( Elist , *argv ); 104 Eflag = TRUE; 105 addlist( elist , *argv ); 106 eflag = TRUE; 107 break; 108 case 'e': 109 addlist( elist , *++argv ); 110 eflag = TRUE; 111 break; 112 case 'F': 113 ++argv; 114 addlist( Flist , *argv ); 115 Fflag = TRUE; 116 addlist( flist , *argv ); 117 fflag = TRUE; 118 break; 119 case 'f': 120 addlist( flist , *++argv ); 121 fflag = TRUE; 122 break; 123 case 'k': 124 addlist( kfromlist , *++argv ); 125 addlist( ktolist , *++argv ); 126 kflag = TRUE; 127 break; 128 case 'K': 129 Kflag = TRUE; 130 break; 131 case 'l': 132 lflag = 1; 133 Lflag = 0; 134 break; 135 case 'L': 136 Lflag = 1; 137 lflag = 0; 138 break; 139 case 's': 140 sflag = TRUE; 141 break; 142 case 'u': 143 uflag = TRUE; 144 break; 145 case 'z': 146 zflag = TRUE; 147 break; 148 } 149 argv++; 150 } 151 if ( *argv != 0 ) { 152 a_outname = *argv; 153 argv++; 154 } else { 155 a_outname = A_OUTNAME; 156 } 157 if ( *argv != 0 ) { 158 gmonname = *argv; 159 argv++; 160 } else { 161 gmonname = (char *) malloc(strlen(a_outname)+6); 162 strcpy(gmonname, a_outname); 163 strcat(gmonname, ".gmon"); 164 } 165 /* 166 * get information from the executable file. 167 */ 168 if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) || 169 (elf_getnfile(a_outname, &defaultEs) == -1 && 170 aout_getnfile(a_outname, &defaultEs) == -1)) 171 errx(1, "%s: bad format", a_outname); 172 /* 173 * sort symbol table. 174 */ 175 qsort(nl, nname, sizeof(nltype), valcmp); 176 /* 177 * turn off default functions 178 */ 179 for ( sp = defaultEs ; *sp ; sp++ ) { 180 Eflag = TRUE; 181 addlist( Elist , *sp ); 182 eflag = TRUE; 183 addlist( elist , *sp ); 184 } 185 /* 186 * get information about mon.out file(s). 187 */ 188 do { 189 getpfile( gmonname ); 190 if ( *argv != 0 ) { 191 gmonname = *argv; 192 } 193 } while ( *argv++ != 0 ); 194 /* 195 * how many ticks per second? 196 * if we can't tell, report time in ticks. 197 */ 198 if (hz == 0) { 199 hz = 1; 200 fprintf(stderr, "time is in ticks, not seconds\n"); 201 } 202 /* 203 * dump out a gmon.sum file if requested 204 */ 205 if ( sflag ) { 206 dumpsum( GMONSUM ); 207 } 208 /* 209 * assign samples to procedures 210 */ 211 asgnsamples(); 212 /* 213 * assemble the dynamic profile 214 */ 215 timesortnlp = doarcs(); 216 /* 217 * print the dynamic profile 218 */ 219 if(!lflag) { 220 printgprof( timesortnlp ); 221 } 222 /* 223 * print the flat profile 224 */ 225 if(!Lflag) { 226 printprof(); 227 } 228 /* 229 * print the index 230 */ 231 printindex(); 232 done(); 233 } 234 235 /* 236 * information from a gmon.out file is in two parts: 237 * an array of sampling hits within pc ranges, 238 * and the arcs. 239 */ 240 getpfile(filename) 241 char *filename; 242 { 243 FILE *pfile; 244 FILE *openpfile(); 245 struct rawarc arc; 246 247 pfile = openpfile(filename); 248 readsamples(pfile); 249 /* 250 * the rest of the file consists of 251 * a bunch of <from,self,count> tuples. 252 */ 253 while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 254 # ifdef DEBUG 255 if ( debug & SAMPLEDEBUG ) { 256 printf( "[getpfile] frompc 0x%x selfpc 0x%x count %d\n" , 257 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 258 } 259 # endif DEBUG 260 /* 261 * add this arc 262 */ 263 tally( &arc ); 264 } 265 fclose(pfile); 266 } 267 268 FILE * 269 openpfile(filename) 270 char *filename; 271 { 272 struct gmonhdr tmp; 273 FILE *pfile; 274 int size; 275 int rate; 276 277 if((pfile = fopen(filename, "r")) == NULL) { 278 perror(filename); 279 done(); 280 } 281 fread(&tmp, sizeof(struct gmonhdr), 1, pfile); 282 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 283 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) { 284 warnx("%s: incompatible with first gmon file", filename); 285 done(); 286 } 287 gmonhdr = tmp; 288 if ( gmonhdr.version == GMONVERSION ) { 289 rate = gmonhdr.profrate; 290 size = sizeof(struct gmonhdr); 291 } else { 292 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 293 size = sizeof(struct ophdr); 294 gmonhdr.profrate = rate = hertz(); 295 gmonhdr.version = GMONVERSION; 296 } 297 if (hz == 0) { 298 hz = rate; 299 } else if (hz != rate) { 300 fprintf(stderr, 301 "%s: profile clock rate (%d) %s (%d) in first gmon file\n", 302 filename, rate, "incompatible with clock rate", hz); 303 done(); 304 } 305 s_lowpc = (unsigned long) gmonhdr.lpc; 306 s_highpc = (unsigned long) gmonhdr.hpc; 307 lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT); 308 highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT); 309 sampbytes = gmonhdr.ncnt - size; 310 nsamples = sampbytes / sizeof (UNIT); 311 # ifdef DEBUG 312 if ( debug & SAMPLEDEBUG ) { 313 printf( "[openpfile] hdr.lpc 0x%x hdr.hpc 0x%x hdr.ncnt %d\n", 314 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 315 printf( "[openpfile] s_lowpc 0x%x s_highpc 0x%x\n" , 316 s_lowpc , s_highpc ); 317 printf( "[openpfile] lowpc 0x%x highpc 0x%x\n" , 318 lowpc , highpc ); 319 printf( "[openpfile] sampbytes %d nsamples %d\n" , 320 sampbytes , nsamples ); 321 printf( "[openpfile] sample rate %d\n" , hz ); 322 } 323 # endif DEBUG 324 return(pfile); 325 } 326 327 tally( rawp ) 328 struct rawarc *rawp; 329 { 330 nltype *parentp; 331 nltype *childp; 332 333 parentp = nllookup( rawp -> raw_frompc ); 334 childp = nllookup( rawp -> raw_selfpc ); 335 if ( parentp == 0 || childp == 0 ) 336 return; 337 if ( kflag 338 && onlist( kfromlist , parentp -> name ) 339 && onlist( ktolist , childp -> name ) ) { 340 return; 341 } 342 childp -> ncall += rawp -> raw_count; 343 # ifdef DEBUG 344 if ( debug & TALLYDEBUG ) { 345 printf( "[tally] arc from %s to %s traversed %d times\n" , 346 parentp -> name , childp -> name , rawp -> raw_count ); 347 } 348 # endif DEBUG 349 addarc( parentp , childp , rawp -> raw_count ); 350 } 351 352 /* 353 * dump out the gmon.sum file 354 */ 355 dumpsum( sumfile ) 356 char *sumfile; 357 { 358 register nltype *nlp; 359 register arctype *arcp; 360 struct rawarc arc; 361 FILE *sfile; 362 363 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) { 364 perror( sumfile ); 365 done(); 366 } 367 /* 368 * dump the header; use the last header read in 369 */ 370 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) { 371 perror( sumfile ); 372 done(); 373 } 374 /* 375 * dump the samples 376 */ 377 if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) { 378 perror( sumfile ); 379 done(); 380 } 381 /* 382 * dump the normalized raw arc information 383 */ 384 for ( nlp = nl ; nlp < npe ; nlp++ ) { 385 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 386 arc.raw_frompc = arcp -> arc_parentp -> value; 387 arc.raw_selfpc = arcp -> arc_childp -> value; 388 arc.raw_count = arcp -> arc_count; 389 if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) { 390 perror( sumfile ); 391 done(); 392 } 393 # ifdef DEBUG 394 if ( debug & SAMPLEDEBUG ) { 395 printf( "[dumpsum] frompc 0x%x selfpc 0x%x count %d\n" , 396 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 397 } 398 # endif DEBUG 399 } 400 } 401 fclose( sfile ); 402 } 403 404 static int 405 valcmp(v1, v2) 406 const void *v1; 407 const void *v2; 408 { 409 const nltype *p1 = (const nltype *)v1; 410 const nltype *p2 = (const nltype *)v2; 411 412 if ( p1 -> value < p2 -> value ) { 413 return LESSTHAN; 414 } 415 if ( p1 -> value > p2 -> value ) { 416 return GREATERTHAN; 417 } 418 return EQUALTO; 419 } 420 421 readsamples(pfile) 422 FILE *pfile; 423 { 424 register i; 425 UNIT sample; 426 427 if (samples == 0) { 428 samples = (UNIT *) calloc(sampbytes, sizeof (UNIT)); 429 if (samples == 0) { 430 warnx("no room for %d sample pc's", sampbytes / sizeof (UNIT)); 431 done(); 432 } 433 } 434 for (i = 0; i < nsamples; i++) { 435 fread(&sample, sizeof (UNIT), 1, pfile); 436 if (feof(pfile)) 437 break; 438 samples[i] += sample; 439 } 440 if (i != nsamples) { 441 warnx("unexpected EOF after reading %d/%d samples", --i , nsamples ); 442 done(); 443 } 444 } 445 446 /* 447 * Assign samples to the procedures to which they belong. 448 * 449 * There are three cases as to where pcl and pch can be 450 * with respect to the routine entry addresses svalue0 and svalue1 451 * as shown in the following diagram. overlap computes the 452 * distance between the arrows, the fraction of the sample 453 * that is to be credited to the routine which starts at svalue0. 454 * 455 * svalue0 svalue1 456 * | | 457 * v v 458 * 459 * +-----------------------------------------------+ 460 * | | 461 * | ->| |<- ->| |<- ->| |<- | 462 * | | | | | | 463 * +---------+ +---------+ +---------+ 464 * 465 * ^ ^ ^ ^ ^ ^ 466 * | | | | | | 467 * pcl pch pcl pch pcl pch 468 * 469 * For the vax we assert that samples will never fall in the first 470 * two bytes of any routine, since that is the entry mask, 471 * thus we give call alignentries() to adjust the entry points if 472 * the entry mask falls in one bucket but the code for the routine 473 * doesn't start until the next bucket. In conjunction with the 474 * alignment of routine addresses, this should allow us to have 475 * only one sample for every four bytes of text space and never 476 * have any overlap (the two end cases, above). 477 */ 478 asgnsamples() 479 { 480 register int j; 481 UNIT ccnt; 482 double time; 483 unsigned long pcl, pch; 484 register int i; 485 unsigned long overlap; 486 unsigned long svalue0, svalue1; 487 488 /* read samples and assign to namelist symbols */ 489 scale = highpc - lowpc; 490 scale /= nsamples; 491 alignentries(); 492 for (i = 0, j = 1; i < nsamples; i++) { 493 ccnt = samples[i]; 494 if (ccnt == 0) 495 continue; 496 pcl = lowpc + (unsigned long)(scale * i); 497 pch = lowpc + (unsigned long)(scale * (i + 1)); 498 time = ccnt; 499 # ifdef DEBUG 500 if ( debug & SAMPLEDEBUG ) { 501 printf( "[asgnsamples] pcl 0x%x pch 0x%x ccnt %d\n" , 502 pcl , pch , ccnt ); 503 } 504 # endif DEBUG 505 totime += time; 506 for (j = j - 1; j < nname; j++) { 507 svalue0 = nl[j].svalue; 508 svalue1 = nl[j+1].svalue; 509 /* 510 * if high end of tick is below entry address, 511 * go for next tick. 512 */ 513 if (pch < svalue0) 514 break; 515 /* 516 * if low end of tick into next routine, 517 * go for next routine. 518 */ 519 if (pcl >= svalue1) 520 continue; 521 overlap = min(pch, svalue1) - max(pcl, svalue0); 522 if (overlap > 0) { 523 # ifdef DEBUG 524 if (debug & SAMPLEDEBUG) { 525 printf("[asgnsamples] (0x%x->0x%x-0x%x) %s gets %f ticks %d overlap\n", 526 nl[j].value/sizeof(UNIT), svalue0, svalue1, 527 nl[j].name, 528 overlap * time / scale, overlap); 529 } 530 # endif DEBUG 531 nl[j].time += overlap * time / scale; 532 } 533 } 534 } 535 # ifdef DEBUG 536 if (debug & SAMPLEDEBUG) { 537 printf("[asgnsamples] totime %f\n", totime); 538 } 539 # endif DEBUG 540 } 541 542 543 unsigned long 544 min(a, b) 545 unsigned long a,b; 546 { 547 if (a<b) 548 return(a); 549 return(b); 550 } 551 552 unsigned long 553 max(a, b) 554 unsigned long a,b; 555 { 556 if (a>b) 557 return(a); 558 return(b); 559 } 560 561 /* 562 * calculate scaled entry point addresses (to save time in asgnsamples), 563 * and possibly push the scaled entry points over the entry mask, 564 * if it turns out that the entry point is in one bucket and the code 565 * for a routine is in the next bucket. 566 */ 567 alignentries() 568 { 569 register struct nl *nlp; 570 unsigned long bucket_of_entry; 571 unsigned long bucket_of_code; 572 573 for (nlp = nl; nlp < npe; nlp++) { 574 nlp -> svalue = nlp -> value / sizeof(UNIT); 575 bucket_of_entry = (nlp->svalue - lowpc) / scale; 576 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 577 if (bucket_of_entry < bucket_of_code) { 578 # ifdef DEBUG 579 if (debug & SAMPLEDEBUG) { 580 printf("[alignentries] pushing svalue 0x%x to 0x%x\n", 581 nlp->svalue, nlp->svalue + UNITS_TO_CODE); 582 } 583 # endif DEBUG 584 nlp->svalue += UNITS_TO_CODE; 585 } 586 } 587 } 588 589 done() 590 { 591 592 exit(0); 593 } 594