xref: /freebsd/usr.bin/gprof/gprof.c (revision 51a9219f5780e61e1437d25220bf8750d9df7f8b)
1 /*
2  * Copyright (c) 1983, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #ifndef lint
35 static const char copyright[] =
36 "@(#) Copyright (c) 1983, 1993\n\
37 	The Regents of the University of California.  All rights reserved.\n";
38 #endif /* not lint */
39 
40 #if 0
41 #ifndef lint
42 static char sccsid[] = "@(#)gprof.c	8.1 (Berkeley) 6/6/93";
43 #endif /* not lint */
44 #endif
45 
46 #include <sys/cdefs.h>
47 __FBSDID("$FreeBSD$");
48 
49 #include <err.h>
50 #include <limits.h>
51 #include <stdint.h>
52 #include "gprof.h"
53 
54 static int valcmp(const void *, const void *);
55 
56 
57 static struct gmonhdr	gmonhdr;
58 static int lflag;
59 static int Lflag;
60 
61 int
62 main(argc, argv)
63     int argc;
64     char **argv;
65 {
66     char	**sp;
67     nltype	**timesortnlp;
68     char	**defaultEs;
69 
70     --argc;
71     argv++;
72     debug = 0;
73     bflag = TRUE;
74     while ( *argv != 0 && **argv == '-' ) {
75 	(*argv)++;
76 	switch ( **argv ) {
77 	case 'a':
78 	    aflag = TRUE;
79 	    break;
80 	case 'b':
81 	    bflag = FALSE;
82 	    break;
83 	case 'C':
84 	    Cflag = TRUE;
85 	    cyclethreshold = atoi( *++argv );
86 	    break;
87 	case 'c':
88 #if defined(vax) || defined(tahoe)
89 	    cflag = TRUE;
90 #else
91 	    errx(1, "-c isn't supported on this architecture yet");
92 #endif
93 	    break;
94 	case 'd':
95 	    dflag = TRUE;
96 	    setlinebuf(stdout);
97 	    debug |= atoi( *++argv );
98 	    debug |= ANYDEBUG;
99 #	    ifdef DEBUG
100 		printf("[main] debug = %d\n", debug);
101 #	    else /* not DEBUG */
102 		printf("gprof: -d ignored\n");
103 #	    endif /* DEBUG */
104 	    break;
105 	case 'E':
106 	    ++argv;
107 	    addlist( Elist , *argv );
108 	    Eflag = TRUE;
109 	    addlist( elist , *argv );
110 	    eflag = TRUE;
111 	    break;
112 	case 'e':
113 	    addlist( elist , *++argv );
114 	    eflag = TRUE;
115 	    break;
116 	case 'F':
117 	    ++argv;
118 	    addlist( Flist , *argv );
119 	    Fflag = TRUE;
120 	    addlist( flist , *argv );
121 	    fflag = TRUE;
122 	    break;
123 	case 'f':
124 	    addlist( flist , *++argv );
125 	    fflag = TRUE;
126 	    break;
127 	case 'k':
128 	    addlist( kfromlist , *++argv );
129 	    addlist( ktolist , *++argv );
130 	    kflag = TRUE;
131 	    break;
132 	case 'K':
133 	    Kflag = TRUE;
134 	    break;
135 	case 'l':
136 	    lflag = 1;
137 	    Lflag = 0;
138 	    break;
139 	case 'L':
140 	    Lflag = 1;
141 	    lflag = 0;
142 	    break;
143 	case 's':
144 	    sflag = TRUE;
145 	    break;
146 	case 'u':
147 	    uflag = TRUE;
148 	    break;
149 	case 'z':
150 	    zflag = TRUE;
151 	    break;
152 	}
153 	argv++;
154     }
155     if ( *argv != 0 ) {
156 	a_outname  = *argv;
157 	argv++;
158     } else {
159 	a_outname  = A_OUTNAME;
160     }
161     if ( *argv != 0 ) {
162 	gmonname = *argv;
163 	argv++;
164     } else {
165 	gmonname = (char *) malloc(strlen(a_outname)+6);
166 	strcpy(gmonname, a_outname);
167 	strcat(gmonname, ".gmon");
168     }
169 	/*
170 	 *	get information from the executable file.
171 	 */
172     if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) ||
173       (elf_getnfile(a_outname, &defaultEs) == -1 &&
174       aout_getnfile(a_outname, &defaultEs) == -1))
175 	errx(1, "%s: bad format", a_outname);
176 	/*
177 	 *	sort symbol table.
178 	 */
179     qsort(nl, nname, sizeof(nltype), valcmp);
180 	/*
181 	 *	turn off default functions
182 	 */
183     for ( sp = defaultEs ; *sp ; sp++ ) {
184 	Eflag = TRUE;
185 	addlist( Elist , *sp );
186 	eflag = TRUE;
187 	addlist( elist , *sp );
188     }
189 	/*
190 	 *	get information about mon.out file(s).
191 	 */
192     do	{
193 	getpfile( gmonname );
194 	if ( *argv != 0 ) {
195 	    gmonname = *argv;
196 	}
197     } while ( *argv++ != 0 );
198 	/*
199 	 *	how many ticks per second?
200 	 *	if we can't tell, report time in ticks.
201 	 */
202     if (hz == 0) {
203 	hz = 1;
204 	fprintf(stderr, "time is in ticks, not seconds\n");
205     }
206 	/*
207 	 *	dump out a gmon.sum file if requested
208 	 */
209     if ( sflag ) {
210 	dumpsum( GMONSUM );
211     }
212 	/*
213 	 *	assign samples to procedures
214 	 */
215     asgnsamples();
216 	/*
217 	 *	assemble the dynamic profile
218 	 */
219     timesortnlp = doarcs();
220 	/*
221 	 *	print the dynamic profile
222 	 */
223     if(!lflag) {
224 	    printgprof( timesortnlp );
225     }
226 	/*
227 	 *	print the flat profile
228 	 */
229     if(!Lflag) {
230 	    printprof();
231     }
232 	/*
233 	 *	print the index
234 	 */
235     printindex();
236     exit(0);
237 }
238 
239     /*
240      *	information from a gmon.out file is in two parts:
241      *	an array of sampling hits within pc ranges,
242      *	and the arcs.
243      */
244 void
245 getpfile(filename)
246     char *filename;
247 {
248     FILE		*pfile;
249     FILE		*openpfile();
250     struct rawarc	arc;
251 
252     pfile = openpfile(filename);
253     readsamples(pfile);
254 	/*
255 	 *	the rest of the file consists of
256 	 *	a bunch of <from,self,count> tuples.
257 	 */
258     while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
259 #	ifdef DEBUG
260 	    if ( debug & SAMPLEDEBUG ) {
261 		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
262 			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
263 	    }
264 #	endif /* DEBUG */
265 	    /*
266 	     *	add this arc
267 	     */
268 	tally( &arc );
269     }
270     fclose(pfile);
271 }
272 
273 FILE *
274 openpfile(filename)
275     char *filename;
276 {
277     struct gmonhdr	tmp;
278     FILE		*pfile;
279     int			size;
280     int			rate;
281 
282     if((pfile = fopen(filename, "r")) == NULL)
283 	err(1, "%s", filename);
284     fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
285     if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
286 	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) )
287 	errx(1, "%s: incompatible with first gmon file", filename);
288     gmonhdr = tmp;
289     if ( gmonhdr.version == GMONVERSION ) {
290 	rate = gmonhdr.profrate;
291 	size = sizeof(struct gmonhdr);
292     } else {
293 	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
294 	size = sizeof(struct ophdr);
295 	gmonhdr.profrate = rate = hertz();
296 	gmonhdr.version = GMONVERSION;
297     }
298     if (hz == 0) {
299 	hz = rate;
300     } else if (hz != rate)
301 	errx(0, "%s: profile clock rate (%d) %s (%ld) in first gmon file",
302 	    filename, rate, "incompatible with clock rate", hz);
303     if ( gmonhdr.histcounter_type == 0 ) {
304 	/* Historical case.  The type was u_short (2 bytes in practice). */
305 	histcounter_type = 16;
306 	histcounter_size = 2;
307     } else {
308 	histcounter_type = gmonhdr.histcounter_type;
309 	histcounter_size = abs(histcounter_type) / CHAR_BIT;
310     }
311     s_lowpc = (unsigned long) gmonhdr.lpc;
312     s_highpc = (unsigned long) gmonhdr.hpc;
313     lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2;
314     highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2;
315     sampbytes = gmonhdr.ncnt - size;
316     nsamples = sampbytes / histcounter_size;
317 #   ifdef DEBUG
318 	if ( debug & SAMPLEDEBUG ) {
319 	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
320 		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
321 	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
322 		s_lowpc , s_highpc );
323 	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
324 		lowpc , highpc );
325 	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
326 		sampbytes , nsamples );
327 	    printf( "[openpfile] sample rate %ld\n" , hz );
328 	}
329 #   endif /* DEBUG */
330     return(pfile);
331 }
332 
333 void
334 tally( rawp )
335     struct rawarc	*rawp;
336 {
337     nltype		*parentp;
338     nltype		*childp;
339 
340     parentp = nllookup( rawp -> raw_frompc );
341     childp = nllookup( rawp -> raw_selfpc );
342     if ( parentp == 0 || childp == 0 )
343 	return;
344     if ( kflag
345 	 && onlist( kfromlist , parentp -> name )
346 	 && onlist( ktolist , childp -> name ) ) {
347 	return;
348     }
349     childp -> ncall += rawp -> raw_count;
350 #   ifdef DEBUG
351 	if ( debug & TALLYDEBUG ) {
352 	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
353 		    parentp -> name , childp -> name , rawp -> raw_count );
354 	}
355 #   endif /* DEBUG */
356     addarc( parentp , childp , rawp -> raw_count );
357 }
358 
359 /*
360  * dump out the gmon.sum file
361  */
362 void
363 dumpsum( sumfile )
364     char *sumfile;
365 {
366     register nltype *nlp;
367     register arctype *arcp;
368     struct rawarc arc;
369     FILE *sfile;
370 
371     if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL )
372 	err( 1 , "%s" , sumfile );
373     /*
374      * dump the header; use the last header read in
375      */
376     if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 )
377 	err( 1 , "%s" , sumfile );
378     /*
379      * dump the samples
380      */
381     if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples)
382 	err( 1 , "%s" , sumfile );
383     /*
384      * dump the normalized raw arc information
385      */
386     for ( nlp = nl ; nlp < npe ; nlp++ ) {
387 	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
388 	    arc.raw_frompc = arcp -> arc_parentp -> value;
389 	    arc.raw_selfpc = arcp -> arc_childp -> value;
390 	    arc.raw_count = arcp -> arc_count;
391 	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 )
392 		err( 1 , "%s" , sumfile );
393 #	    ifdef DEBUG
394 		if ( debug & SAMPLEDEBUG ) {
395 		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
396 			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
397 		}
398 #	    endif /* DEBUG */
399 	}
400     }
401     fclose( sfile );
402 }
403 
404 static int
405 valcmp(v1, v2)
406     const void *v1;
407     const void *v2;
408 {
409     const nltype *p1 = (const nltype *)v1;
410     const nltype *p2 = (const nltype *)v2;
411 
412     if ( p1 -> value < p2 -> value ) {
413 	return LESSTHAN;
414     }
415     if ( p1 -> value > p2 -> value ) {
416 	return GREATERTHAN;
417     }
418     return EQUALTO;
419 }
420 
421 void
422 readsamples(pfile)
423     FILE	*pfile;
424 {
425     register i;
426     intmax_t	sample;
427 
428     if (samples == 0) {
429 	samples = (double *) calloc(nsamples, sizeof(double));
430 	if (samples == 0)
431 	    errx(0, "no room for %d sample pc's", nsamples);
432     }
433     for (i = 0; i < nsamples; i++) {
434 	fread(&sample, histcounter_size, 1, pfile);
435 	if (feof(pfile))
436 		break;
437 	switch ( histcounter_type ) {
438 	case -8:
439 	    samples[i] += *(int8_t *)&sample;
440 	    break;
441 	case 8:
442 	    samples[i] += *(u_int8_t *)&sample;
443 	    break;
444 	case -16:
445 	    samples[i] += *(int16_t *)&sample;
446 	    break;
447 	case 16:
448 	    samples[i] += *(u_int16_t *)&sample;
449 	    break;
450 	case -32:
451 	    samples[i] += *(int32_t *)&sample;
452 	    break;
453 	case 32:
454 	    samples[i] += *(u_int32_t *)&sample;
455 	    break;
456 	case -64:
457 	    samples[i] += *(int64_t *)&sample;
458 	    break;
459 	case 64:
460 	    samples[i] += *(u_int64_t *)&sample;
461 	    break;
462 	default:
463 	    err(1, "unsupported histogram counter type %d", histcounter_type);
464 	}
465     }
466     if (i != nsamples)
467 	errx(1, "unexpected EOF after reading %d/%d samples", --i , nsamples );
468 }
469 
470 /*
471  *	Assign samples to the procedures to which they belong.
472  *
473  *	There are three cases as to where pcl and pch can be
474  *	with respect to the routine entry addresses svalue0 and svalue1
475  *	as shown in the following diagram.  overlap computes the
476  *	distance between the arrows, the fraction of the sample
477  *	that is to be credited to the routine which starts at svalue0.
478  *
479  *	    svalue0                                         svalue1
480  *	       |                                               |
481  *	       v                                               v
482  *
483  *	       +-----------------------------------------------+
484  *	       |					       |
485  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
486  *	  |         |		  |         |		  |         |
487  *	  +---------+		  +---------+		  +---------+
488  *
489  *	  ^         ^		  ^         ^		  ^         ^
490  *	  |         |		  |         |		  |         |
491  *	 pcl       pch		 pcl       pch		 pcl       pch
492  *
493  *	For the vax we assert that samples will never fall in the first
494  *	two bytes of any routine, since that is the entry mask,
495  *	thus we give call alignentries() to adjust the entry points if
496  *	the entry mask falls in one bucket but the code for the routine
497  *	doesn't start until the next bucket.  In conjunction with the
498  *	alignment of routine addresses, this should allow us to have
499  *	only one sample for every four bytes of text space and never
500  *	have any overlap (the two end cases, above).
501  */
502 void
503 asgnsamples()
504 {
505     register int	j;
506     double		ccnt;
507     double		time;
508     unsigned long	pcl, pch;
509     register int	i;
510     unsigned long	overlap;
511     unsigned long	svalue0, svalue1;
512 
513     /* read samples and assign to namelist symbols */
514     scale = highpc - lowpc;
515     scale /= nsamples;
516     alignentries();
517     for (i = 0, j = 1; i < nsamples; i++) {
518 	ccnt = samples[i];
519 	if (ccnt == 0)
520 		continue;
521 	pcl = lowpc + (unsigned long)(scale * i);
522 	pch = lowpc + (unsigned long)(scale * (i + 1));
523 	time = ccnt;
524 #	ifdef DEBUG
525 	    if ( debug & SAMPLEDEBUG ) {
526 		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" ,
527 			pcl , pch , ccnt );
528 	    }
529 #	endif /* DEBUG */
530 	totime += time;
531 	for (j = j - 1; j < nname; j++) {
532 	    svalue0 = nl[j].svalue;
533 	    svalue1 = nl[j+1].svalue;
534 		/*
535 		 *	if high end of tick is below entry address,
536 		 *	go for next tick.
537 		 */
538 	    if (pch < svalue0)
539 		    break;
540 		/*
541 		 *	if low end of tick into next routine,
542 		 *	go for next routine.
543 		 */
544 	    if (pcl >= svalue1)
545 		    continue;
546 	    overlap = min(pch, svalue1) - max(pcl, svalue0);
547 	    if (overlap > 0) {
548 #		ifdef DEBUG
549 		    if (debug & SAMPLEDEBUG) {
550 			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n",
551 				nl[j].value / HISTORICAL_SCALE_2,
552 				svalue0, svalue1, nl[j].name,
553 				overlap * time / scale, overlap);
554 		    }
555 #		endif /* DEBUG */
556 		nl[j].time += overlap * time / scale;
557 	    }
558 	}
559     }
560 #   ifdef DEBUG
561 	if (debug & SAMPLEDEBUG) {
562 	    printf("[asgnsamples] totime %f\n", totime);
563 	}
564 #   endif /* DEBUG */
565 }
566 
567 
568 unsigned long
569 min(a, b)
570     unsigned long a,b;
571 {
572     if (a<b)
573 	return(a);
574     return(b);
575 }
576 
577 unsigned long
578 max(a, b)
579     unsigned long a,b;
580 {
581     if (a>b)
582 	return(a);
583     return(b);
584 }
585 
586     /*
587      *	calculate scaled entry point addresses (to save time in asgnsamples),
588      *	and possibly push the scaled entry points over the entry mask,
589      *	if it turns out that the entry point is in one bucket and the code
590      *	for a routine is in the next bucket.
591      */
592 void
593 alignentries()
594 {
595     register struct nl	*nlp;
596     unsigned long	bucket_of_entry;
597     unsigned long	bucket_of_code;
598 
599     for (nlp = nl; nlp < npe; nlp++) {
600 	nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2;
601 	bucket_of_entry = (nlp->svalue - lowpc) / scale;
602 	bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 -
603 	  lowpc) / scale;
604 	if (bucket_of_entry < bucket_of_code) {
605 #	    ifdef DEBUG
606 		if (debug & SAMPLEDEBUG) {
607 		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
608 			    nlp->svalue,
609 			    nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2);
610 		}
611 #	    endif /* DEBUG */
612 	    nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2;
613 	}
614     }
615 }
616