xref: /freebsd/usr.bin/gprof/gprof.c (revision 5b31cc94b10d4bb7109c6b27940a0fc76a44a331)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1983, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #ifndef lint
33 static const char copyright[] =
34 "@(#) Copyright (c) 1983, 1993\n\
35 	The Regents of the University of California.  All rights reserved.\n";
36 #endif /* not lint */
37 
38 #if 0
39 #endif
40 
41 #include <sys/cdefs.h>
42 #include <err.h>
43 #include <limits.h>
44 #include <stdint.h>
45 #include <string.h>
46 
47 #define	EXTERN
48 #include "gprof.h"
49 
50 static int valcmp(const void *, const void *);
51 
52 static struct gmonhdr	gmonhdr;
53 static int lflag;
54 static int Lflag;
55 
56 int
57 main(int argc, char **argv)
58 {
59     char	**sp;
60     nltype	**timesortnlp;
61     char	**defaultEs;
62 
63     --argc;
64     argv++;
65     debug = 0;
66     bflag = TRUE;
67     while ( *argv != 0 && **argv == '-' ) {
68 	(*argv)++;
69 	switch ( **argv ) {
70 	case 'a':
71 	    aflag = TRUE;
72 	    break;
73 	case 'b':
74 	    bflag = FALSE;
75 	    break;
76 	case 'C':
77 	    Cflag = TRUE;
78 	    cyclethreshold = atoi( *++argv );
79 	    break;
80 	case 'd':
81 	    dflag = TRUE;
82 	    setlinebuf(stdout);
83 	    debug |= atoi( *++argv );
84 	    debug |= ANYDEBUG;
85 #	    ifdef DEBUG
86 		printf("[main] debug = %d\n", debug);
87 #	    else /* not DEBUG */
88 		printf("gprof: -d ignored\n");
89 #	    endif /* DEBUG */
90 	    break;
91 	case 'E':
92 	    ++argv;
93 	    addlist( Elist , *argv );
94 	    Eflag = TRUE;
95 	    addlist( elist , *argv );
96 	    eflag = TRUE;
97 	    break;
98 	case 'e':
99 	    addlist( elist , *++argv );
100 	    eflag = TRUE;
101 	    break;
102 	case 'F':
103 	    ++argv;
104 	    addlist( Flist , *argv );
105 	    Fflag = TRUE;
106 	    addlist( flist , *argv );
107 	    fflag = TRUE;
108 	    break;
109 	case 'f':
110 	    addlist( flist , *++argv );
111 	    fflag = TRUE;
112 	    break;
113 	case 'k':
114 	    addlist( kfromlist , *++argv );
115 	    addlist( ktolist , *++argv );
116 	    kflag = TRUE;
117 	    break;
118 	case 'K':
119 	    Kflag = TRUE;
120 	    break;
121 	case 'l':
122 	    lflag = 1;
123 	    Lflag = 0;
124 	    break;
125 	case 'L':
126 	    Lflag = 1;
127 	    lflag = 0;
128 	    break;
129 	case 's':
130 	    sflag = TRUE;
131 	    break;
132 	case 'u':
133 	    uflag = TRUE;
134 	    break;
135 	case 'z':
136 	    zflag = TRUE;
137 	    break;
138 	}
139 	argv++;
140     }
141     if ( *argv != 0 ) {
142 	a_outname  = *argv;
143 	argv++;
144     } else {
145 	a_outname  = A_OUTNAME;
146     }
147     if ( *argv != 0 ) {
148 	gmonname = *argv;
149 	argv++;
150     } else {
151 	gmonname = (char *) malloc(strlen(a_outname)+6);
152 	strcpy(gmonname, a_outname);
153 	strcat(gmonname, ".gmon");
154     }
155 	/*
156 	 *	get information from the executable file.
157 	 */
158     if ((Kflag && kernel_getnfile(a_outname, &defaultEs) == -1) ||
159       (!Kflag && elf_getnfile(a_outname, &defaultEs) == -1))
160 	errx(1, "%s: bad format", a_outname);
161 	/*
162 	 *	sort symbol table.
163 	 */
164     qsort(nl, nname, sizeof(nltype), valcmp);
165 	/*
166 	 *	turn off default functions
167 	 */
168     for ( sp = defaultEs ; *sp ; sp++ ) {
169 	Eflag = TRUE;
170 	addlist( Elist , *sp );
171 	eflag = TRUE;
172 	addlist( elist , *sp );
173     }
174 	/*
175 	 *	get information about mon.out file(s).
176 	 */
177     do	{
178 	getpfile( gmonname );
179 	if ( *argv != 0 ) {
180 	    gmonname = *argv;
181 	}
182     } while ( *argv++ != 0 );
183 	/*
184 	 *	how many ticks per second?
185 	 *	if we can't tell, report time in ticks.
186 	 */
187     if (hz == 0) {
188 	hz = 1;
189 	fprintf(stderr, "time is in ticks, not seconds\n");
190     }
191 	/*
192 	 *	dump out a gmon.sum file if requested
193 	 */
194     if ( sflag ) {
195 	dumpsum( GMONSUM );
196     }
197 	/*
198 	 *	assign samples to procedures
199 	 */
200     asgnsamples();
201 	/*
202 	 *	assemble the dynamic profile
203 	 */
204     timesortnlp = doarcs();
205 	/*
206 	 *	print the dynamic profile
207 	 */
208     if(!lflag) {
209 	    printgprof( timesortnlp );
210     }
211 	/*
212 	 *	print the flat profile
213 	 */
214     if(!Lflag) {
215 	    printprof();
216     }
217 	/*
218 	 *	print the index
219 	 */
220     printindex();
221     exit(0);
222 }
223 
224     /*
225      *	information from a gmon.out file is in two parts:
226      *	an array of sampling hits within pc ranges,
227      *	and the arcs.
228      */
229 void
230 getpfile(char *filename)
231 {
232     FILE		*pfile;
233     struct rawarc	arc;
234 
235     pfile = openpfile(filename);
236     readsamples(pfile);
237 	/*
238 	 *	the rest of the file consists of
239 	 *	a bunch of <from,self,count> tuples.
240 	 */
241     while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
242 #	ifdef DEBUG
243 	    if ( debug & SAMPLEDEBUG ) {
244 		printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
245 			arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
246 	    }
247 #	endif /* DEBUG */
248 	    /*
249 	     *	add this arc
250 	     */
251 	tally( &arc );
252     }
253     fclose(pfile);
254 }
255 
256 FILE *
257 openpfile(char *filename)
258 {
259     struct gmonhdr	tmp;
260     FILE		*pfile;
261     int			size;
262     int			rate;
263 
264     if((pfile = fopen(filename, "r")) == NULL)
265 	err(1, "%s", filename);
266     fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
267     if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
268 	 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) )
269 	errx(1, "%s: incompatible with first gmon file", filename);
270     gmonhdr = tmp;
271     if ( gmonhdr.version == GMONVERSION ) {
272 	rate = gmonhdr.profrate;
273 	size = sizeof(struct gmonhdr);
274     } else {
275 	fseek(pfile, sizeof(struct ophdr), SEEK_SET);
276 	size = sizeof(struct ophdr);
277 	gmonhdr.profrate = rate = hertz();
278 	gmonhdr.version = GMONVERSION;
279     }
280     if (hz == 0) {
281 	hz = rate;
282     } else if (hz != rate)
283 	errx(0, "%s: profile clock rate (%d) %s (%ld) in first gmon file",
284 	    filename, rate, "incompatible with clock rate", hz);
285     if ( gmonhdr.histcounter_type == 0 ) {
286 	/* Historical case.  The type was u_short (2 bytes in practice). */
287 	histcounter_type = 16;
288 	histcounter_size = 2;
289     } else {
290 	histcounter_type = gmonhdr.histcounter_type;
291 	histcounter_size = abs(histcounter_type) / CHAR_BIT;
292     }
293     s_lowpc = (unsigned long) gmonhdr.lpc;
294     s_highpc = (unsigned long) gmonhdr.hpc;
295     lowpc = (unsigned long)gmonhdr.lpc / HISTORICAL_SCALE_2;
296     highpc = (unsigned long)gmonhdr.hpc / HISTORICAL_SCALE_2;
297     sampbytes = gmonhdr.ncnt - size;
298     nsamples = sampbytes / histcounter_size;
299 #   ifdef DEBUG
300 	if ( debug & SAMPLEDEBUG ) {
301 	    printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n",
302 		gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
303 	    printf( "[openpfile]   s_lowpc 0x%lx   s_highpc 0x%lx\n" ,
304 		s_lowpc , s_highpc );
305 	    printf( "[openpfile]     lowpc 0x%lx     highpc 0x%lx\n" ,
306 		lowpc , highpc );
307 	    printf( "[openpfile] sampbytes %d nsamples %d\n" ,
308 		sampbytes , nsamples );
309 	    printf( "[openpfile] sample rate %ld\n" , hz );
310 	}
311 #   endif /* DEBUG */
312     return(pfile);
313 }
314 
315 void
316 tally(struct rawarc *rawp)
317 {
318     nltype		*parentp;
319     nltype		*childp;
320 
321     parentp = nllookup( rawp -> raw_frompc );
322     childp = nllookup( rawp -> raw_selfpc );
323     if ( parentp == 0 || childp == 0 )
324 	return;
325     if ( kflag
326 	 && onlist( kfromlist , parentp -> name )
327 	 && onlist( ktolist , childp -> name ) ) {
328 	return;
329     }
330     childp -> ncall += rawp -> raw_count;
331 #   ifdef DEBUG
332 	if ( debug & TALLYDEBUG ) {
333 	    printf( "[tally] arc from %s to %s traversed %ld times\n" ,
334 		    parentp -> name , childp -> name , rawp -> raw_count );
335 	}
336 #   endif /* DEBUG */
337     addarc( parentp , childp , rawp -> raw_count );
338 }
339 
340 /*
341  * dump out the gmon.sum file
342  */
343 void
344 dumpsum(const char *sumfile)
345 {
346     register nltype *nlp;
347     register arctype *arcp;
348     struct rawarc arc;
349     FILE *sfile;
350 
351     if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL )
352 	err( 1 , "%s" , sumfile );
353     /*
354      * dump the header; use the last header read in
355      */
356     if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 )
357 	err( 1 , "%s" , sumfile );
358     /*
359      * dump the samples
360      */
361     if (fwrite(samples, histcounter_size, nsamples, sfile) != nsamples)
362 	err( 1 , "%s" , sumfile );
363     /*
364      * dump the normalized raw arc information
365      */
366     for ( nlp = nl ; nlp < npe ; nlp++ ) {
367 	for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
368 	    arc.raw_frompc = arcp -> arc_parentp -> value;
369 	    arc.raw_selfpc = arcp -> arc_childp -> value;
370 	    arc.raw_count = arcp -> arc_count;
371 	    if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 )
372 		err( 1 , "%s" , sumfile );
373 #	    ifdef DEBUG
374 		if ( debug & SAMPLEDEBUG ) {
375 		    printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" ,
376 			    arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
377 		}
378 #	    endif /* DEBUG */
379 	}
380     }
381     fclose( sfile );
382 }
383 
384 static int
385 valcmp(const void *v1, const void *v2)
386 {
387     const nltype *p1 = (const nltype *)v1;
388     const nltype *p2 = (const nltype *)v2;
389 
390     if ( p1 -> value < p2 -> value ) {
391 	return LESSTHAN;
392     }
393     if ( p1 -> value > p2 -> value ) {
394 	return GREATERTHAN;
395     }
396     return EQUALTO;
397 }
398 
399 void
400 readsamples(FILE *pfile)
401 {
402     int		i;
403     intmax_t	sample;
404 
405     if (samples == 0) {
406 	samples = (double *) calloc(nsamples, sizeof(double));
407 	if (samples == NULL)
408 	    errx(0, "no room for %d sample pc's", nsamples);
409     }
410     for (i = 0; i < nsamples; i++) {
411 	fread(&sample, histcounter_size, 1, pfile);
412 	if (feof(pfile))
413 		break;
414 	switch ( histcounter_type ) {
415 	case -8:
416 	    samples[i] += *(int8_t *)&sample;
417 	    break;
418 	case 8:
419 	    samples[i] += *(u_int8_t *)&sample;
420 	    break;
421 	case -16:
422 	    samples[i] += *(int16_t *)&sample;
423 	    break;
424 	case 16:
425 	    samples[i] += *(u_int16_t *)&sample;
426 	    break;
427 	case -32:
428 	    samples[i] += *(int32_t *)&sample;
429 	    break;
430 	case 32:
431 	    samples[i] += *(u_int32_t *)&sample;
432 	    break;
433 	case -64:
434 	    samples[i] += *(int64_t *)&sample;
435 	    break;
436 	case 64:
437 	    samples[i] += *(u_int64_t *)&sample;
438 	    break;
439 	default:
440 	    err(1, "unsupported histogram counter type %d", histcounter_type);
441 	}
442     }
443     if (i != nsamples)
444 	errx(1, "unexpected EOF after reading %d/%d samples", --i , nsamples );
445 }
446 
447 /*
448  *	Assign samples to the procedures to which they belong.
449  *
450  *	There are three cases as to where pcl and pch can be
451  *	with respect to the routine entry addresses svalue0 and svalue1
452  *	as shown in the following diagram.  overlap computes the
453  *	distance between the arrows, the fraction of the sample
454  *	that is to be credited to the routine which starts at svalue0.
455  *
456  *	    svalue0                                         svalue1
457  *	       |                                               |
458  *	       v                                               v
459  *
460  *	       +-----------------------------------------------+
461  *	       |					       |
462  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
463  *	  |         |		  |         |		  |         |
464  *	  +---------+		  +---------+		  +---------+
465  *
466  *	  ^         ^		  ^         ^		  ^         ^
467  *	  |         |		  |         |		  |         |
468  *	 pcl       pch		 pcl       pch		 pcl       pch
469  *
470  *	For the vax we assert that samples will never fall in the first
471  *	two bytes of any routine, since that is the entry mask,
472  *	thus we give call alignentries() to adjust the entry points if
473  *	the entry mask falls in one bucket but the code for the routine
474  *	doesn't start until the next bucket.  In conjunction with the
475  *	alignment of routine addresses, this should allow us to have
476  *	only one sample for every four bytes of text space and never
477  *	have any overlap (the two end cases, above).
478  */
479 void
480 asgnsamples(void)
481 {
482     register int	j;
483     double		ccnt;
484     double		thetime;
485     unsigned long	pcl, pch;
486     register int	i;
487     unsigned long	overlap;
488     unsigned long	svalue0, svalue1;
489 
490     /* read samples and assign to namelist symbols */
491     scale = highpc - lowpc;
492     scale /= nsamples;
493     alignentries();
494     for (i = 0, j = 1; i < nsamples; i++) {
495 	ccnt = samples[i];
496 	if (ccnt == 0)
497 		continue;
498 	pcl = lowpc + (unsigned long)(scale * i);
499 	pch = lowpc + (unsigned long)(scale * (i + 1));
500 	thetime = ccnt;
501 #	ifdef DEBUG
502 	    if ( debug & SAMPLEDEBUG ) {
503 		printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %.0f\n" ,
504 			pcl , pch , ccnt );
505 	    }
506 #	endif /* DEBUG */
507 	totime += thetime;
508 	for (j = j - 1; j < nname; j++) {
509 	    svalue0 = nl[j].svalue;
510 	    svalue1 = nl[j+1].svalue;
511 		/*
512 		 *	if high end of tick is below entry address,
513 		 *	go for next tick.
514 		 */
515 	    if (pch < svalue0)
516 		    break;
517 		/*
518 		 *	if low end of tick into next routine,
519 		 *	go for next routine.
520 		 */
521 	    if (pcl >= svalue1)
522 		    continue;
523 	    overlap = min(pch, svalue1) - max(pcl, svalue0);
524 	    if (overlap > 0) {
525 #		ifdef DEBUG
526 		    if (debug & SAMPLEDEBUG) {
527 			printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %lu overlap\n",
528 				nl[j].value / HISTORICAL_SCALE_2,
529 				svalue0, svalue1, nl[j].name,
530 				overlap * thetime / scale, overlap);
531 		    }
532 #		endif /* DEBUG */
533 		nl[j].time += overlap * thetime / scale;
534 	    }
535 	}
536     }
537 #   ifdef DEBUG
538 	if (debug & SAMPLEDEBUG) {
539 	    printf("[asgnsamples] totime %f\n", totime);
540 	}
541 #   endif /* DEBUG */
542 }
543 
544 
545 unsigned long
546 min(unsigned long a, unsigned long b)
547 {
548     if (a<b)
549 	return(a);
550     return(b);
551 }
552 
553 unsigned long
554 max(unsigned long a, unsigned long b)
555 {
556     if (a>b)
557 	return(a);
558     return(b);
559 }
560 
561     /*
562      *	calculate scaled entry point addresses (to save time in asgnsamples),
563      *	and possibly push the scaled entry points over the entry mask,
564      *	if it turns out that the entry point is in one bucket and the code
565      *	for a routine is in the next bucket.
566      */
567 void
568 alignentries(void)
569 {
570     register struct nl	*nlp;
571     unsigned long	bucket_of_entry;
572     unsigned long	bucket_of_code;
573 
574     for (nlp = nl; nlp < npe; nlp++) {
575 	nlp -> svalue = nlp -> value / HISTORICAL_SCALE_2;
576 	bucket_of_entry = (nlp->svalue - lowpc) / scale;
577 	bucket_of_code = (nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2 -
578 	  lowpc) / scale;
579 	if (bucket_of_entry < bucket_of_code) {
580 #	    ifdef DEBUG
581 		if (debug & SAMPLEDEBUG) {
582 		    printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n",
583 			    nlp->svalue,
584 			    nlp->svalue + OFFSET_OF_CODE / HISTORICAL_SCALE_2);
585 		}
586 #	    endif /* DEBUG */
587 	    nlp->svalue += OFFSET_OF_CODE / HISTORICAL_SCALE_2;
588 	}
589     }
590 }
591