1 /* 2 * Copyright (c) 1983, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1983, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif /* not lint */ 39 40 #if 0 41 #ifndef lint 42 static char sccsid[] = "@(#)gprof.c 8.1 (Berkeley) 6/6/93"; 43 #endif /* not lint */ 44 #endif 45 46 #include <sys/cdefs.h> 47 __FBSDID("$FreeBSD$"); 48 49 #include <err.h> 50 #include <limits.h> 51 #include <stdint.h> 52 #include "gprof.h" 53 54 static int valcmp(const void *, const void *); 55 56 57 static struct gmonhdr gmonhdr; 58 static int lflag; 59 static int Lflag; 60 61 int 62 main(argc, argv) 63 int argc; 64 char **argv; 65 { 66 char **sp; 67 nltype **timesortnlp; 68 char **defaultEs; 69 70 --argc; 71 argv++; 72 debug = 0; 73 bflag = TRUE; 74 while ( *argv != 0 && **argv == '-' ) { 75 (*argv)++; 76 switch ( **argv ) { 77 case 'a': 78 aflag = TRUE; 79 break; 80 case 'b': 81 bflag = FALSE; 82 break; 83 case 'C': 84 Cflag = TRUE; 85 cyclethreshold = atoi( *++argv ); 86 break; 87 case 'c': 88 #if defined(vax) || defined(tahoe) 89 cflag = TRUE; 90 #else 91 errx(1, "-c isn't supported on this architecture yet"); 92 #endif 93 break; 94 case 'd': 95 dflag = TRUE; 96 setlinebuf(stdout); 97 debug |= atoi( *++argv ); 98 debug |= ANYDEBUG; 99 # ifdef DEBUG 100 printf("[main] debug = %d\n", debug); 101 # else /* not DEBUG */ 102 printf("gprof: -d ignored\n"); 103 # endif /* DEBUG */ 104 break; 105 case 'E': 106 ++argv; 107 addlist( Elist , *argv ); 108 Eflag = TRUE; 109 addlist( elist , *argv ); 110 eflag = TRUE; 111 break; 112 case 'e': 113 addlist( elist , *++argv ); 114 eflag = TRUE; 115 break; 116 case 'F': 117 ++argv; 118 addlist( Flist , *argv ); 119 Fflag = TRUE; 120 addlist( flist , *argv ); 121 fflag = TRUE; 122 break; 123 case 'f': 124 addlist( flist , *++argv ); 125 fflag = TRUE; 126 break; 127 case 'k': 128 addlist( kfromlist , *++argv ); 129 addlist( ktolist , *++argv ); 130 kflag = TRUE; 131 break; 132 case 'K': 133 Kflag = TRUE; 134 break; 135 case 'l': 136 lflag = 1; 137 Lflag = 0; 138 break; 139 case 'L': 140 Lflag = 1; 141 lflag = 0; 142 break; 143 case 's': 144 sflag = TRUE; 145 break; 146 case 'u': 147 uflag = TRUE; 148 break; 149 case 'z': 150 zflag = TRUE; 151 break; 152 } 153 argv++; 154 } 155 if ( *argv != 0 ) { 156 a_outname = *argv; 157 argv++; 158 } else { 159 a_outname = A_OUTNAME; 160 } 161 if ( *argv != 0 ) { 162 gmonname = *argv; 163 argv++; 164 } else { 165 gmonname = (char *) malloc(strlen(a_outname)+6); 166 strcpy(gmonname, a_outname); 167 strcat(gmonname, ".gmon"); 168 } 169 /* 170 * get information from the executable file. 171 */ 172 if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) || 173 (elf_getnfile(a_outname, &defaultEs) == -1 && 174 aout_getnfile(a_outname, &defaultEs) == -1)) 175 errx(1, "%s: bad format", a_outname); 176 /* 177 * sort symbol table. 178 */ 179 qsort(nl, nname, sizeof(nltype), valcmp); 180 /* 181 * turn off default functions 182 */ 183 for ( sp = defaultEs ; *sp ; sp++ ) { 184 Eflag = TRUE; 185 addlist( Elist , *sp ); 186 eflag = TRUE; 187 addlist( elist , *sp ); 188 } 189 /* 190 * get information about mon.out file(s). 191 */ 192 do { 193 getpfile( gmonname ); 194 if ( *argv != 0 ) { 195 gmonname = *argv; 196 } 197 } while ( *argv++ != 0 ); 198 /* 199 * how many ticks per second? 200 * if we can't tell, report time in ticks. 201 */ 202 if (hz == 0) { 203 hz = 1; 204 fprintf(stderr, "time is in ticks, not seconds\n"); 205 } 206 /* 207 * dump out a gmon.sum file if requested 208 */ 209 if ( sflag ) { 210 dumpsum( GMONSUM ); 211 } 212 /* 213 * assign samples to procedures 214 */ 215 asgnsamples(); 216 /* 217 * assemble the dynamic profile 218 */ 219 timesortnlp = doarcs(); 220 /* 221 * print the dynamic profile 222 */ 223 if(!lflag) { 224 printgprof( timesortnlp ); 225 } 226 /* 227 * print the flat profile 228 */ 229 if(!Lflag) { 230 printprof(); 231 } 232 /* 233 * print the index 234 */ 235 printindex(); 236 exit(0); 237 } 238 239 /* 240 * information from a gmon.out file is in two parts: 241 * an array of sampling hits within pc ranges, 242 * and the arcs. 243 */ 244 void 245 getpfile(filename) 246 char *filename; 247 { 248 FILE *pfile; 249 FILE *openpfile(); 250 struct rawarc arc; 251 252 pfile = openpfile(filename); 253 readsamples(pfile); 254 /* 255 * the rest of the file consists of 256 * a bunch of <from,self,count> tuples. 257 */ 258 while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 259 # ifdef DEBUG 260 if ( debug & SAMPLEDEBUG ) { 261 printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" , 262 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 263 } 264 # endif /* DEBUG */ 265 /* 266 * add this arc 267 */ 268 tally( &arc ); 269 } 270 fclose(pfile); 271 } 272 273 FILE * 274 openpfile(filename) 275 char *filename; 276 { 277 struct gmonhdr tmp; 278 FILE *pfile; 279 int size; 280 int rate; 281 282 if((pfile = fopen(filename, "r")) == NULL) 283 err(1, "%s", filename); 284 fread(&tmp, sizeof(struct gmonhdr), 1, pfile); 285 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 286 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) 287 errx(1, "%s: incompatible with first gmon file", filename); 288 gmonhdr = tmp; 289 if ( gmonhdr.version == GMONVERSION ) { 290 rate = gmonhdr.profrate; 291 size = sizeof(struct gmonhdr); 292 } else { 293 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 294 size = sizeof(struct ophdr); 295 gmonhdr.profrate = rate = hertz(); 296 gmonhdr.version = GMONVERSION; 297 } 298 if (hz == 0) { 299 hz = rate; 300 } else if (hz != rate) 301 errx(0, "%s: profile clock rate (%d) %s (%ld) in first gmon file", 302 filename, rate, "incompatible with clock rate", hz); 303 if ( gmonhdr.histcounter_type == 0 ) { 304 /* Historical case. The type was u_short (2 bytes in practice). */ 305 histcounter_type = 16; 306 histcounter_size = 2; 307 } else { 308 histcounter_type = gmonhdr.histcounter_type; 309 histcounter_size = abs(histcounter_type) / CHAR_BIT; 310 } 311 s_lowpc = (unsigned long) gmonhdr.lpc; 312 s_highpc = (unsigned long) gmonhdr.hpc; 313 lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2; 314 highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2; 315 sampbytes = gmonhdr.ncnt - size; 316 nsamples = sampbytes / histcounter_size; 317 # ifdef DEBUG 318 if ( debug & SAMPLEDEBUG ) { 319 printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n", 320 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 321 printf( "[openpfile] s_lowpc 0x%lx s_highpc 0x%lx\n" , 322 s_lowpc , s_highpc ); 323 printf( "[openpfile] lowpc 0x%lx highpc 0x%lx\n" , 324 lowpc , highpc ); 325 printf( "[openpfile] sampbytes %d nsamples %d\n" , 326 sampbytes , nsamples ); 327 printf( "[openpfile] sample rate %ld\n" , hz ); 328 } 329 # endif /* DEBUG */ 330 return(pfile); 331 } 332 333 void 334 tally( rawp ) 335 struct rawarc *rawp; 336 { 337 nltype *parentp; 338 nltype *childp; 339 340 parentp = nllookup( rawp -> raw_frompc ); 341 childp = nllookup( rawp -> raw_selfpc ); 342 if ( parentp == 0 || childp == 0 ) 343 return; 344 if ( kflag 345 && onlist( kfromlist , parentp -> name ) 346 && onlist( ktolist , childp -> name ) ) { 347 return; 348 } 349 childp -> ncall += rawp -> raw_count; 350 # ifdef DEBUG 351 if ( debug & TALLYDEBUG ) { 352 printf( "[tally] arc from %s to %s traversed %ld times\n" , 353 parentp -> name , childp -> name , rawp -> raw_count ); 354 } 355 # endif /* DEBUG */ 356 addarc( parentp , childp , rawp -> raw_count ); 357 } 358 359 /* 360 * dump out the gmon.sum file 361 */ 362 void 363 dumpsum( sumfile ) 364 char *sumfile; 365 { 366 register nltype *nlp; 367 register arctype *arcp; 368 struct rawarc arc; 369 FILE *sfile; 370 371 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) 372 err( 1 , "%s" , sumfile ); 373 /* 374 * dump the header; use the last header read in 375 */ 376 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) 377 err( 1 , "%s" , sumfile ); 378 /* 379 * dump the samples 380 */ 381 if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples) 382 err( 1 , "%s" , sumfile ); 383 /* 384 * dump the normalized raw arc information 385 */ 386 for ( nlp = nl ; nlp < npe ; nlp++ ) { 387 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 388 arc.raw_frompc = arcp -> arc_parentp -> value; 389 arc.raw_selfpc = arcp -> arc_childp -> value; 390 arc.raw_count = arcp -> arc_count; 391 if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) 392 err( 1 , "%s" , sumfile ); 393 # ifdef DEBUG 394 if ( debug & SAMPLEDEBUG ) { 395 printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" , 396 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 397 } 398 # endif /* DEBUG */ 399 } 400 } 401 fclose( sfile ); 402 } 403 404 static int 405 valcmp(v1, v2) 406 const void *v1; 407 const void *v2; 408 { 409 const nltype *p1 = (const nltype *)v1; 410 const nltype *p2 = (const nltype *)v2; 411 412 if ( p1 -> value < p2 -> value ) { 413 return LESSTHAN; 414 } 415 if ( p1 -> value > p2 -> value ) { 416 return GREATERTHAN; 417 } 418 return EQUALTO; 419 } 420 421 void 422 readsamples(pfile) 423 FILE *pfile; 424 { 425 register i; 426 intmax_t sample; 427 428 if (samples == 0) { 429 samples = (double *) calloc(nsamples, sizeof(double)); 430 if (samples == 0) 431 errx(0, "no room for %d sample pc's", nsamples); 432 } 433 for (i = 0; i < nsamples; i++) { 434 fread(&sample, histcounter_size, 1, pfile); 435 if (feof(pfile)) 436 break; 437 switch ( histcounter_type ) { 438 case -8: 439 samples[i] += *(int8_t *)&sample; 440 break; 441 case 8: 442 samples[i] += *(u_int8_t *)&sample; 443 break; 444 case -16: 445 samples[i] += *(int16_t *)&sample; 446 break; 447 case 16: 448 samples[i] += *(u_int16_t *)&sample; 449 break; 450 case -32: 451 samples[i] += *(int32_t *)&sample; 452 break; 453 case 32: 454 samples[i] += *(u_int32_t *)&sample; 455 break; 456 case -64: 457 samples[i] += *(int64_t *)&sample; 458 break; 459 case 64: 460 samples[i] += *(u_int64_t *)&sample; 461 break; 462 default: 463 err(1, "unsupported histogram counter type %d", histcounter_type); 464 } 465 } 466 if (i != nsamples) 467 errx(1, "unexpected EOF after reading %d/%d samples", --i , nsamples ); 468 } 469 470 /* 471 * Assign samples to the procedures to which they belong. 472 * 473 * There are three cases as to where pcl and pch can be 474 * with respect to the routine entry addresses svalue0 and svalue1 475 * as shown in the following diagram. overlap computes the 476 * distance between the arrows, the fraction of the sample 477 * that is to be credited to the routine which starts at svalue0. 478 * 479 * svalue0 svalue1 480 * | | 481 * v v 482 * 483 * +-----------------------------------------------+ 484 * | | 485 * | ->| |<- ->| |<- ->| |<- | 486 * | | | | | | 487 * +---------+ +---------+ +---------+ 488 * 489 * ^ ^ ^ ^ ^ ^ 490 * | | | | | | 491 * pcl pch pcl pch pcl pch 492 * 493 * For the vax we assert that samples will never fall in the first 494 * two bytes of any routine, since that is the entry mask, 495 * thus we give call alignentries() to adjust the entry points if 496 * the entry mask falls in one bucket but the code for the routine 497 * doesn't start until the next bucket. In conjunction with the 498 * alignment of routine addresses, this should allow us to have 499 * only one sample for every four bytes of text space and never 500 * have any overlap (the two end cases, above). 501 */ 502 void 503 asgnsamples() 504 { 505 register int j; 506 double ccnt; 507 double time; 508 unsigned long pcl, pch; 509 register int i; 510 unsigned long overlap; 511 unsigned long svalue0, svalue1; 512 513 /* read samples and assign to namelist symbols */ 514 scale = highpc - lowpc; 515 scale /= nsamples; 516 alignentries(); 517 for (i = 0, j = 1; i < nsamples; i++) { 518 ccnt = samples[i]; 519 if (ccnt == 0) 520 continue; 521 pcl = lowpc + (unsigned long)(scale * i); 522 pch = lowpc + (unsigned long)(scale * (i + 1)); 523 time = ccnt; 524 # ifdef DEBUG 525 if ( debug & SAMPLEDEBUG ) { 526 printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" , 527 pcl , pch , ccnt ); 528 } 529 # endif /* DEBUG */ 530 totime += time; 531 for (j = j - 1; j < nname; j++) { 532 svalue0 = nl[j].svalue; 533 svalue1 = nl[j+1].svalue; 534 /* 535 * if high end of tick is below entry address, 536 * go for next tick. 537 */ 538 if (pch < svalue0) 539 break; 540 /* 541 * if low end of tick into next routine, 542 * go for next routine. 543 */ 544 if (pcl >= svalue1) 545 continue; 546 overlap = min(pch, svalue1) - max(pcl, svalue0); 547 if (overlap > 0) { 548 # ifdef DEBUG 549 if (debug & SAMPLEDEBUG) { 550 printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n", 551 nl[j].value / HISTORICAL_SCALE_2, 552 svalue0, svalue1, nl[j].name, 553 overlap * time / scale, overlap); 554 } 555 # endif /* DEBUG */ 556 nl[j].time += overlap * time / scale; 557 } 558 } 559 } 560 # ifdef DEBUG 561 if (debug & SAMPLEDEBUG) { 562 printf("[asgnsamples] totime %f\n", totime); 563 } 564 # endif /* DEBUG */ 565 } 566 567 568 unsigned long 569 min(a, b) 570 unsigned long a,b; 571 { 572 if (a<b) 573 return(a); 574 return(b); 575 } 576 577 unsigned long 578 max(a, b) 579 unsigned long a,b; 580 { 581 if (a>b) 582 return(a); 583 return(b); 584 } 585 586 /* 587 * calculate scaled entry point addresses (to save time in asgnsamples), 588 * and possibly push the scaled entry points over the entry mask, 589 * if it turns out that the entry point is in one bucket and the code 590 * for a routine is in the next bucket. 591 */ 592 void 593 alignentries() 594 { 595 register struct nl *nlp; 596 unsigned long bucket_of_entry; 597 unsigned long bucket_of_code; 598 599 for (nlp = nl; nlp < npe; nlp++) { 600 nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2; 601 bucket_of_entry = (nlp->svalue - lowpc) / scale; 602 bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 - 603 lowpc) / scale; 604 if (bucket_of_entry < bucket_of_code) { 605 # ifdef DEBUG 606 if (debug & SAMPLEDEBUG) { 607 printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n", 608 nlp->svalue, 609 nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2); 610 } 611 # endif /* DEBUG */ 612 nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2; 613 } 614 } 615 } 616