xref: /illumos-gate/usr/src/cmd/sgs/gprof/common/gprof.c (revision 326c1baf095e4fca67a632261ea9d20f32ead20c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include	<sysexits.h>
27 #include	<stdlib.h>
28 #include	<stdio.h>
29 #include	<unistd.h>
30 #include	"gprof.h"
31 #include	"profile.h"
32 
33 bool		aflag;
34 bool		bflag;
35 bool		Bflag;
36 bool		cflag;
37 bool		Cflag;
38 bool		dflag;
39 bool		Dflag;
40 bool		eflag;
41 bool		Eflag;
42 bool		fflag;
43 bool		Fflag;
44 bool		lflag;
45 bool		sflag;
46 bool		zflag;
47 bool		nflag;
48 bool		rflag;
49 bool		first_file;
50 bool		old_style;
51 double		scale;
52 double		totime;
53 Size		n_pcsamples;
54 mod_info_t	modules;
55 pctype		s_lowpc;
56 pctype		s_highpc;
57 sztype		n_modules;
58 sztype		sampbytes;
59 sztype		nsamples;
60 unsigned short	*samples;
61 fl_info_t	aout_info;
62 fl_info_t	gmonout_info;
63 long		hz;
64 struct hdr	h;
65 unsigned char	*textspace;
66 int		debug;
67 int		number_funcs_toprint;
68 char		*a_outname;
69 char		*prog_name;
70 char		*gmonname;
71 char		*whoami = "gprof";
72 static pctype	lowpc, highpc;		/* range profiled, in UNIT's */
73 
74 /*
75  *	things which get -E excluded by default.
76  */
77 static char *defaultEs[] = {
78 	"mcount",
79 	"__mcleanup",
80 	NULL
81 };
82 
83 #ifdef DEBUG
84 
85 static char *objname[] = {
86 	"<invalid object>",
87 	"PROF_BUFFER_T",
88 	"PROF_CALLGRAPH_T",
89 	"PROF_MODULES_T",
90 	NULL
91 };
92 #define	MAX_OBJTYPES	3
93 
94 #endif /* DEBUG */
95 
96 void
done(void)97 done(void)
98 {
99 
100 	exit(EX_OK);
101 }
102 
103 static pctype
max(pctype a,pctype b)104 max(pctype a, pctype b)
105 {
106 	if (a > b)
107 		return (a);
108 	return (b);
109 }
110 
111 static pctype
min(pctype a,pctype b)112 min(pctype a, pctype b)
113 {
114 	if (a < b)
115 		return (a);
116 	return (b);
117 }
118 
119 /*
120  *	calculate scaled entry point addresses (to save time in asgnsamples),
121  *	and possibly push the scaled entry points over the entry mask,
122  *	if it turns out that the entry point is in one bucket and the code
123  *	for a routine is in the next bucket.
124  *
125  */
126 static void
alignentries(void)127 alignentries(void)
128 {
129 	struct nl *nlp;
130 #ifdef DEBUG
131 	pctype			bucket_of_entry;
132 	pctype			bucket_of_code;
133 #endif /* DEBUG */
134 
135 	/* for old-style gmon.out, nameslist is only in modules.nl */
136 
137 	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
138 		nlp->svalue = nlp->value / sizeof (UNIT);
139 #ifdef DEBUG
140 		bucket_of_entry = (nlp->svalue - lowpc) / scale;
141 		bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
142 		if (bucket_of_entry < bucket_of_code) {
143 			if (debug & SAMPLEDEBUG) {
144 				(void) printf(
145 				    "[alignentries] pushing svalue 0x%llx "
146 				    "to 0x%llx\n", nlp->svalue,
147 				    nlp->svalue + UNITS_TO_CODE);
148 			}
149 		}
150 #endif /* DEBUG */
151 	}
152 }
153 
154 /*
155  *	old-style gmon.out
156  *	------------------
157  *
158  *	Assign samples to the procedures to which they belong.
159  *
160  *	There are three cases as to where pcl and pch can be
161  *	with respect to the routine entry addresses svalue0 and svalue1
162  *	as shown in the following diagram.  overlap computes the
163  *	distance between the arrows, the fraction of the sample
164  *	that is to be credited to the routine which starts at svalue0.
165  *
166  *	    svalue0                                         svalue1
167  *	       |                                               |
168  *	       v                                               v
169  *
170  *	       +-----------------------------------------------+
171  *	       |					       |
172  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
173  *	  |         |		  |         |		  |         |
174  *	  +---------+		  +---------+		  +---------+
175  *
176  *	  ^         ^		  ^         ^		  ^         ^
177  *	  |         |		  |         |		  |         |
178  *	 pcl       pch		 pcl       pch		 pcl       pch
179  *
180  *	For the vax we assert that samples will never fall in the first
181  *	two bytes of any routine, since that is the entry mask,
182  *	thus we give call alignentries() to adjust the entry points if
183  *	the entry mask falls in one bucket but the code for the routine
184  *	doesn't start until the next bucket.  In conjunction with the
185  *	alignment of routine addresses, this should allow us to have
186  *	only one sample for every four bytes of text space and never
187  *	have any overlap (the two end cases, above).
188  */
189 static void
asgnsamples(void)190 asgnsamples(void)
191 {
192 	sztype		i, j;
193 	unsigned_UNIT	ccnt;
194 	double		time;
195 	pctype		pcl, pch;
196 	pctype		overlap;
197 	pctype		svalue0, svalue1;
198 
199 	extern mod_info_t	modules;
200 	nltype		*nl = modules.nl;
201 	sztype		nname = modules.nname;
202 
203 	/* read samples and assign to namelist symbols */
204 	scale = highpc - lowpc;
205 	scale /= nsamples;
206 	alignentries();
207 	for (i = 0, j = 1; i < nsamples; i++) {
208 		ccnt = samples[i];
209 		if (ccnt == 0)
210 			continue;
211 		/*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
212 		pcl = lowpc + scale * i;
213 		/*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
214 		pch = lowpc + scale * (i + 1);
215 		time = ccnt;
216 #ifdef DEBUG
217 		if (debug & SAMPLEDEBUG) {
218 			(void) printf(
219 			    "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
220 			    pcl, pch, ccnt);
221 		}
222 #endif /* DEBUG */
223 		totime += time;
224 		for (j = (j ? j - 1 : 0); j < nname; j++) {
225 			svalue0 = nl[j].svalue;
226 			svalue1 = nl[j+1].svalue;
227 			/*
228 			 *	if high end of tick is below entry address,
229 			 *	go for next tick.
230 			 */
231 			if (pch < svalue0)
232 				break;
233 			/*
234 			 *	if low end of tick into next routine,
235 			 *	go for next routine.
236 			 */
237 			if (pcl >= svalue1)
238 				continue;
239 			overlap = min(pch, svalue1) - max(pcl, svalue0);
240 			if (overlap != 0) {
241 #ifdef DEBUG
242 				if (debug & SAMPLEDEBUG) {
243 					(void) printf("[asgnsamples] "
244 					    "(0x%llx->0x%llx-0x%llx) %s gets "
245 					    "%f ticks %lld overlap\n",
246 					    nl[j].value/sizeof (UNIT), svalue0,
247 					    svalue1, nl[j].name,
248 					    overlap * time / scale, overlap);
249 				}
250 #endif /* DEBUG */
251 				nl[j].time += overlap * time / scale;
252 			}
253 		}
254 	}
255 #ifdef DEBUG
256 	if (debug & SAMPLEDEBUG) {
257 		(void) printf("[asgnsamples] totime %f\n", totime);
258 	}
259 #endif /* DEBUG */
260 }
261 
262 
263 static void
dump_callgraph(FILE * fp,char * filename,unsigned long tarcs,unsigned long ncallees)264 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs,
265     unsigned long ncallees)
266 {
267 	ProfCallGraph		prof_cgraph;
268 	ProfFunction		prof_func;
269 	arctype	*arcp;
270 	mod_info_t		*mi;
271 	nltype			*nlp;
272 	size_t			cur_offset;
273 	unsigned long		caller_id = 0, callee_id = 0;
274 
275 	/*
276 	 * Write the callgraph header
277 	 */
278 	prof_cgraph.type = PROF_CALLGRAPH_T;
279 	prof_cgraph.version = PROF_CALLGRAPH_VER;
280 	prof_cgraph.functions = PROFCGRAPH_SZ;
281 	prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ;
282 	if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) {
283 		perror(filename);
284 		exit(EX_IOERR);
285 	}
286 	/* CONSTCOND */
287 	if (CGRAPH_FILLER)
288 		(void) fseek(fp, CGRAPH_FILLER, SEEK_CUR);
289 
290 	/* Current offset inside the callgraph object */
291 	cur_offset = prof_cgraph.functions;
292 
293 	for (mi = &modules; mi; mi = mi->next) {
294 		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
295 			if (nlp->ncallers == 0)
296 				continue;
297 
298 			/* If this is the last callee, set next_to to 0 */
299 			callee_id++;
300 			if (callee_id == ncallees)
301 				prof_func.next_to = 0;
302 			else {
303 				prof_func.next_to = cur_offset +
304 				    nlp->ncallers * PROFFUNC_SZ;
305 			}
306 
307 			/*
308 			 * Dump this callee's raw arc information with all
309 			 * its callers
310 			 */
311 			caller_id = 1;
312 			for (arcp = nlp->parents; arcp;
313 			    arcp = arcp->arc_parentlist) {
314 				/*
315 				 * If no more callers for this callee, set
316 				 * next_from to 0
317 				 */
318 				if (caller_id == nlp->ncallers)
319 					prof_func.next_from = 0;
320 				else {
321 					prof_func.next_from = cur_offset +
322 					    PROFFUNC_SZ;
323 				}
324 
325 				prof_func.frompc =
326 				    arcp->arc_parentp->module->load_base +
327 				    (arcp->arc_parentp->value -
328 				    arcp->arc_parentp->module->txt_origin);
329 				prof_func.topc = mi->load_base +
330 				    (nlp->value - mi->txt_origin);
331 				prof_func.count = arcp->arc_count;
332 
333 
334 				if (fwrite(&prof_func, sizeof (ProfFunction),
335 				    1, fp) != 1) {
336 					perror(filename);
337 					exit(EX_IOERR);
338 				}
339 				/* CONSTCOND */
340 				if (FUNC_FILLER)
341 					(void) fseek(fp, FUNC_FILLER, SEEK_CUR);
342 
343 				cur_offset += PROFFUNC_SZ;
344 				caller_id++;
345 			}
346 		} /* for nlp... */
347 	} /* for mi... */
348 }
349 
350 /*
351  * To save all pc-hits in all the gmon.out's is infeasible, as this
352  * may become quite huge even with a small number of files to sum.
353  * Instead, we'll dump *fictitious hits* to correct functions
354  * by scanning module namelists. Again, since this is summing
355  * pc-hits, we may have to dump the pcsamples out in chunks if the
356  * number of pc-hits is high.
357  */
358 static void
dump_hits(FILE * fp,char * filename,nltype * nlp)359 dump_hits(FILE *fp, char *filename, nltype *nlp)
360 {
361 	Address		*p, hitpc;
362 	size_t		i, nelem, ntowrite;
363 
364 	if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE)
365 		nelem = PROF_BUFFER_SIZE;
366 
367 	if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) {
368 		(void) fprintf(stderr, "%s: no room for %d pcsamples\n",
369 		    whoami, nelem);
370 		exit(EX_OSERR);
371 	}
372 
373 	/*
374 	 * Set up *fictitious* hits (to function entry) buffer
375 	 */
376 	hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin);
377 	for (i = 0; i < nelem; i++)
378 		p[i] = hitpc;
379 
380 	for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) {
381 		if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) {
382 			perror(filename);
383 			exit(EX_IOERR);
384 		}
385 	}
386 
387 	if (ntowrite) {
388 		if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) {
389 			perror(filename);
390 			exit(EX_IOERR);
391 		}
392 	}
393 
394 	free(p);
395 }
396 
397 static void
dump_pcsamples(FILE * fp,char * filename,unsigned long * tarcs,unsigned long * ncallees)398 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs,
399     unsigned long *ncallees)
400 {
401 	ProfBuffer		prof_buffer;
402 	arctype	*arcp;
403 	mod_info_t		*mi;
404 	nltype			*nlp;
405 
406 	prof_buffer.type = PROF_BUFFER_T;
407 	prof_buffer.version = PROF_BUFFER_VER;
408 	prof_buffer.buffer = PROFBUF_SZ;
409 	prof_buffer.bufsize = n_pcsamples;
410 	prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address);
411 	if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) {
412 		perror(filename);
413 		exit(EX_IOERR);
414 	}
415 	/* CONSTCOND */
416 	if (BUF_FILLER)
417 		(void) fseek(fp, BUF_FILLER, SEEK_CUR);
418 
419 	*tarcs = 0;
420 	*ncallees = 0;
421 	for (mi = &modules; mi; mi = mi->next) {
422 		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
423 			if (nlp->nticks)
424 				dump_hits(fp, filename, nlp);
425 
426 			nlp->ncallers = 0;
427 			for (arcp = nlp->parents; arcp;
428 			    arcp = arcp->arc_parentlist) {
429 				(nlp->ncallers)++;
430 			}
431 
432 			if (nlp->ncallers) {
433 				(*tarcs) += nlp->ncallers;
434 				(*ncallees)++;
435 			}
436 		}
437 	}
438 }
439 
440 static void
dump_modules(FILE * fp,char * filename,size_t pbuf_sz)441 dump_modules(FILE *fp, char *filename, size_t pbuf_sz)
442 {
443 	char		*pbuf, *p;
444 	size_t		namelen;
445 	Index		off_nxt, off_path;
446 	mod_info_t	*mi;
447 
448 	ProfModuleList	prof_modlist;
449 	ProfModule	prof_mod;
450 
451 	/* Allocate for path strings buffer */
452 	pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN);
453 	if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) {
454 		(void) fprintf(stderr, "%s: no room for %d bytes\n",
455 		    whoami, pbuf_sz * sizeof (char));
456 		exit(EX_OSERR);
457 	}
458 
459 	/* Dump out PROF_MODULE_T info for all non-aout modules */
460 	prof_modlist.type = PROF_MODULES_T;
461 	prof_modlist.version = PROF_MODULES_VER;
462 	prof_modlist.modules = PROFMODLIST_SZ;
463 	prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ +
464 	    pbuf_sz;
465 	if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) {
466 		perror(filename);
467 		exit(EX_IOERR);
468 	}
469 	/* CONSTCOND */
470 	if (MODLIST_FILLER)
471 		(void) fseek(fp, MODLIST_FILLER, SEEK_CUR);
472 
473 	/*
474 	 * Initialize offsets for ProfModule elements.
475 	 */
476 	off_nxt = PROFMODLIST_SZ + PROFMOD_SZ;
477 	off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ;
478 
479 	for (mi = modules.next; mi; mi = mi->next) {
480 		if (mi->next)
481 			prof_mod.next = off_nxt;
482 		else
483 			prof_mod.next = 0;
484 		prof_mod.path = off_path;
485 		prof_mod.startaddr = mi->load_base;
486 		prof_mod.endaddr = mi->load_end;
487 
488 		if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) {
489 			perror(filename);
490 			exit(EX_IOERR);
491 		}
492 
493 		/* CONSTCOND */
494 		if (MOD_FILLER)
495 			(void) fseek(fp, MOD_FILLER, SEEK_CUR);
496 
497 		(void) strcpy(p, mi->name);
498 		namelen = strlen(mi->name);
499 		p += namelen + 1;
500 
501 		/* Note that offset to every path str need not be aligned */
502 		off_nxt += PROFMOD_SZ;
503 		off_path += namelen + 1;
504 	}
505 
506 	/* Write out the module path strings */
507 	if (pbuf_sz) {
508 		if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) {
509 			perror(filename);
510 			exit(EX_IOERR);
511 		}
512 
513 		free(pbuf);
514 	}
515 }
516 
517 /*
518  * If we have inactive modules, their current load addresses may overlap with
519  * active ones, and so we've to assign fictitious, non-overlapping addresses
520  * to all modules before we dump them.
521  */
522 static void
fixup_maps(size_t * pathsz)523 fixup_maps(size_t *pathsz)
524 {
525 	unsigned int	n_inactive = 0;
526 	Address		lbase = 0, lend;
527 	mod_info_t	*mi;
528 
529 	/* Pick the lowest load address among modules */
530 	*pathsz = 0;
531 	for (mi = &modules; mi; mi = mi->next) {
532 
533 		if (mi->active == FALSE)
534 			n_inactive++;
535 
536 		if (mi == &modules || mi->load_base < lbase)
537 			lbase = mi->load_base;
538 
539 		/*
540 		 * Return total path size of non-aout modules only
541 		 */
542 		if (mi != &modules)
543 			*pathsz = (*pathsz) + strlen(mi->name) + 1;
544 	}
545 
546 	/*
547 	 * All module info is in fine shape already if there are no
548 	 * inactive modules
549 	 */
550 	if (n_inactive == 0)
551 		return;
552 
553 	/*
554 	 * Assign fictitious load addresses to all (non-aout) modules so
555 	 * that sum info can be dumped out.
556 	 */
557 	for (mi = modules.next; mi; mi = mi->next) {
558 		lend = lbase + (mi->data_end - mi->txt_origin);
559 		if ((lbase < modules.load_base && lend < modules.load_base) ||
560 		    (lbase > modules.load_end && lend > modules.load_end)) {
561 
562 			mi->load_base = lbase;
563 			mi->load_end = lend;
564 
565 			/* just to give an appearance of reality */
566 			lbase = CEIL(lend + PGSZ, PGSZ);
567 		} else {
568 			/*
569 			 * can't use this lbase & lend pair, as it
570 			 * overlaps with aout's addresses
571 			 */
572 			mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ);
573 			mi->load_end = mi->load_base + (lend - lbase);
574 
575 			lbase = CEIL(mi->load_end + PGSZ, PGSZ);
576 		}
577 	}
578 }
579 
580 static void
dump_gprofhdr(FILE * fp,char * filename)581 dump_gprofhdr(FILE *fp, char *filename)
582 {
583 	ProfHeader	prof_hdr;
584 
585 	prof_hdr.h_magic = PROF_MAGIC;
586 	prof_hdr.h_major_ver = PROF_MAJOR_VERSION;
587 	prof_hdr.h_minor_ver = PROF_MINOR_VERSION;
588 	prof_hdr.size = PROFHDR_SZ;
589 	if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) {
590 		perror(filename);
591 		exit(EX_IOERR);
592 	}
593 
594 	/* CONSTCOND */
595 	if (HDR_FILLER)
596 		(void) fseek(fp, HDR_FILLER, SEEK_CUR);
597 }
598 
599 static void
dumpsum_ostyle(char * sumfile)600 dumpsum_ostyle(char *sumfile)
601 {
602 	nltype *nlp;
603 	arctype *arcp;
604 	struct rawarc arc;
605 	struct rawarc32 arc32;
606 	FILE *sfile;
607 
608 	if ((sfile = fopen(sumfile, "w")) == NULL) {
609 		perror(sumfile);
610 		exit(EX_IOERR);
611 	}
612 	/*
613 	 * dump the header; use the last header read in
614 	 */
615 	if (Bflag) {
616 		if (fwrite(&h, sizeof (h), 1, sfile) != 1) {
617 			perror(sumfile);
618 			exit(EX_IOERR);
619 		}
620 	} else {
621 		struct hdr32 hdr;
622 		hdr.lowpc  = (pctype32)h.lowpc;
623 		hdr.highpc = (pctype32)h.highpc;
624 		hdr.ncnt   = (pctype32)h.ncnt;
625 		if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) {
626 			perror(sumfile);
627 			exit(EX_IOERR);
628 		}
629 	}
630 	/*
631 	 * dump the samples
632 	 */
633 	if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) !=
634 	    nsamples) {
635 		perror(sumfile);
636 		exit(EX_IOERR);
637 	}
638 	/*
639 	 * dump the normalized raw arc information. For old-style dumping,
640 	 * the only namelist is in modules.nl
641 	 */
642 	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
643 		for (arcp = nlp->children; arcp;
644 		    arcp = arcp->arc_childlist) {
645 			if (Bflag) {
646 				arc.raw_frompc = arcp->arc_parentp->value;
647 				arc.raw_selfpc = arcp->arc_childp->value;
648 				arc.raw_count = arcp->arc_count;
649 				if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) {
650 					perror(sumfile);
651 					exit(EX_IOERR);
652 				}
653 			} else {
654 				arc32.raw_frompc =
655 				    (pctype32)arcp->arc_parentp->value;
656 				arc32.raw_selfpc =
657 				    (pctype32)arcp->arc_childp->value;
658 				arc32.raw_count = (actype32)arcp->arc_count;
659 				if (fwrite(&arc32, sizeof (arc32), 1, sfile) !=
660 				    1) {
661 					perror(sumfile);
662 					exit(EX_IOERR);
663 				}
664 			}
665 #ifdef DEBUG
666 			if (debug & SAMPLEDEBUG) {
667 				(void) printf(
668 				    "[dumpsum_ostyle] frompc 0x%llx selfpc "
669 				    "0x%llx count %lld\n", arc.raw_frompc,
670 				    arc.raw_selfpc, arc.raw_count);
671 			}
672 #endif /* DEBUG */
673 		}
674 	}
675 	(void) fclose(sfile);
676 }
677 
678 /*
679  * dump out the gmon.sum file
680  */
681 static void
dumpsum(char * sumfile)682 dumpsum(char *sumfile)
683 {
684 	FILE		*sfile;
685 	size_t		pathbuf_sz;
686 	unsigned long	total_arcs;	/* total number of arcs in all */
687 	unsigned long	ncallees;	/* no. of callees with parents */
688 
689 	if (old_style) {
690 		dumpsum_ostyle(sumfile);
691 		return;
692 	}
693 
694 	if ((sfile = fopen(sumfile, "w")) == NULL) {
695 		perror(sumfile);
696 		exit(EX_IOERR);
697 	}
698 
699 	/*
700 	 * Dump the new-style gprof header. Even if one of the original
701 	 * profiled-files was of a older version, the summed file is of
702 	 * current version only.
703 	 */
704 	dump_gprofhdr(sfile, sumfile);
705 
706 	/*
707 	 * Fix up load-maps and dump out modules info
708 	 *
709 	 * Fix up module load maps so inactive modules get *some* address
710 	 * (and btw, could you get the total size of non-aout module path
711 	 * strings please ?)
712 	 */
713 	fixup_maps(&pathbuf_sz);
714 	dump_modules(sfile, sumfile, pathbuf_sz);
715 
716 
717 	/*
718 	 * Dump out the summ'd pcsamples
719 	 *
720 	 * For dumping call graph information later, we need certain
721 	 * statistics (like total arcs, number of callers for each node);
722 	 * collect these also while we are at it.
723 	 */
724 	dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees);
725 
726 	/*
727 	 * Dump out the summ'd call graph information
728 	 */
729 	dump_callgraph(sfile, sumfile, total_arcs, ncallees);
730 
731 
732 	(void) fclose(sfile);
733 }
734 
735 static void
tally(mod_info_t * caller_mod,mod_info_t * callee_mod,struct rawarc * rawp)736 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp)
737 {
738 	nltype		*parentp;
739 	nltype		*childp;
740 
741 	/*
742 	 * if count == 0 this is a null arc and
743 	 * we don't need to tally it.
744 	 */
745 	if (rawp->raw_count == 0)
746 		return;
747 
748 	/*
749 	 * Lookup the caller and callee pcs in namelists of
750 	 * appropriate modules
751 	 */
752 	parentp = nllookup(caller_mod, rawp->raw_frompc, NULL);
753 	childp = nllookup(callee_mod, rawp->raw_selfpc, NULL);
754 	if (childp && parentp) {
755 		if (!Dflag)
756 			childp->ncall += rawp->raw_count;
757 		else {
758 			if (first_file)
759 				childp->ncall += rawp->raw_count;
760 			else {
761 				childp->ncall -= rawp->raw_count;
762 				if (childp->ncall < 0)
763 					childp->ncall = 0;
764 			}
765 		}
766 
767 #ifdef DEBUG
768 		if (debug & TALLYDEBUG) {
769 			(void) printf("[tally] arc from %s to %s traversed "
770 			    "%lld times\n", parentp->name,
771 			    childp->name, rawp->raw_count);
772 		}
773 #endif /* DEBUG */
774 		addarc(parentp, childp, rawp->raw_count);
775 	}
776 }
777 
778 /*
779  * Look up a module's base address in a sorted list of pc-hits. Unlike
780  * nllookup(), this deals with misses by mapping them to the next *higher*
781  * pc-hit. This is so that we get into the module's first pc-hit rightaway,
782  * even if the module's entry-point (load_base) itself is not a hit.
783  */
784 static Address *
locate(Address * pclist,size_t nelem,Address keypc)785 locate(Address	*pclist, size_t nelem, Address keypc)
786 {
787 	size_t	low = 0, middle, high = nelem - 1;
788 
789 	if (keypc <= pclist[low])
790 		return (pclist);
791 
792 	if (keypc > pclist[high])
793 		return (NULL);
794 
795 	while (low != high) {
796 		middle = (high + low) >> 1;
797 
798 		if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc))
799 			return (&pclist[middle + 1]);
800 
801 		if (pclist[middle] >= keypc)
802 			high = middle;
803 		else
804 			low = middle + 1;
805 	}
806 
807 	/* must never reach here! */
808 	return (NULL);
809 }
810 
811 static void
assign_pcsamples(mod_info_t * module,Address * pcsmpl,size_t n_samples)812 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples)
813 {
814 	Address		*pcptr, *pcse = pcsmpl + n_samples;
815 	pctype		nxt_func;
816 	nltype		*fnl;
817 	size_t		func_nticks;
818 #ifdef DEBUG
819 	size_t		n_hits_in_module = 0;
820 #endif /* DEBUG */
821 
822 	/* Locate the first pc-hit for this module */
823 	if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) {
824 #ifdef DEBUG
825 		if (debug & PCSMPLDEBUG) {
826 			(void) printf("[assign_pcsamples] no pc-hits in\n");
827 			(void) printf(
828 			    "                   `%s'\n", module->name);
829 		}
830 #endif /* DEBUG */
831 		return;			/* no pc-hits in this module */
832 	}
833 
834 	/* Assign all pc-hits in this module to appropriate functions */
835 	while ((pcptr < pcse) && (*pcptr < module->load_end)) {
836 
837 		/* Update the corresponding function's time */
838 		fnl = nllookup(module, (pctype) *pcptr, &nxt_func);
839 		if (fnl != NULL) {
840 			/*
841 			 * Collect all pc-hits in this function. Each
842 			 * pc-hit counts as 1 tick.
843 			 */
844 			func_nticks = 0;
845 			while ((pcptr < pcse) && (*pcptr < nxt_func)) {
846 				func_nticks++;
847 				pcptr++;
848 			}
849 
850 			if (func_nticks == 0)
851 				pcptr++;
852 			else {
853 				fnl->nticks += func_nticks;
854 				fnl->time += func_nticks;
855 				totime += func_nticks;
856 			}
857 
858 #ifdef DEBUG
859 			n_hits_in_module += func_nticks;
860 #endif /* DEBUG */
861 		} else {
862 			/*
863 			 * pc sample could not be assigned to function;
864 			 * probably in a PLT
865 			 */
866 			pcptr++;
867 		}
868 	}
869 
870 #ifdef DEBUG
871 	if (debug & PCSMPLDEBUG) {
872 		(void) printf(
873 		    "[assign_pcsamples] %ld hits in\n", n_hits_in_module);
874 		(void) printf("                   `%s'\n", module->name);
875 	}
876 #endif /* DEBUG */
877 }
878 
879 int
pc_cmp(const void * arg1,const void * arg2)880 pc_cmp(const void *arg1, const void *arg2)
881 {
882 	Address *pc1 = (Address *)arg1;
883 	Address *pc2 = (Address *)arg2;
884 
885 	if (*pc1 > *pc2)
886 		return (1);
887 
888 	if (*pc1 < *pc2)
889 		return (-1);
890 
891 	return (0);
892 }
893 
894 static void
process_pcsamples(ProfBuffer * bufp)895 process_pcsamples(ProfBuffer *bufp)
896 {
897 	Address		*pc_samples;
898 	mod_info_t	*mi;
899 	caddr_t		p;
900 	size_t		chunk_size, nelem_read, nelem_to_read;
901 
902 #ifdef DEBUG
903 	if (debug & PCSMPLDEBUG) {
904 		(void) printf(
905 		    "[process_pcsamples] number of pcsamples = %lld\n",
906 		    bufp->bufsize);
907 	}
908 #endif /* DEBUG */
909 
910 	/* buffer with no pc samples ? */
911 	if (bufp->bufsize == 0)
912 		return;
913 
914 	/*
915 	 * If we're processing pcsamples of a profile sum, we could have
916 	 * more than PROF_BUFFER_SIZE number of samples. In such a case,
917 	 * we must read the pcsamples in chunks.
918 	 */
919 	if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE)
920 		chunk_size = PROF_BUFFER_SIZE;
921 
922 	/* Allocate for the pcsample chunk */
923 	pc_samples = (Address *) calloc(chunk_size, sizeof (Address));
924 	if (pc_samples == NULL) {
925 		(void) fprintf(stderr, "%s: no room for %d sample pc's\n",
926 		    whoami, chunk_size);
927 		exit(EX_OSERR);
928 	}
929 
930 	/* Copy the current set of pcsamples */
931 	nelem_read = 0;
932 	nelem_to_read = bufp->bufsize;
933 	p = (char *)bufp + bufp->buffer;
934 
935 	while (nelem_read < nelem_to_read) {
936 		(void) memcpy((void *) pc_samples, p,
937 		    chunk_size * sizeof (Address));
938 
939 		/* Sort the pc samples */
940 		qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp);
941 
942 		/*
943 		 * Assign pcsamples to functions in the currently active
944 		 * module list
945 		 */
946 		for (mi = &modules; mi; mi = mi->next) {
947 			if (mi->active == FALSE)
948 				continue;
949 			assign_pcsamples(mi, pc_samples, chunk_size);
950 		}
951 
952 		p += (chunk_size * sizeof (Address));
953 		nelem_read += chunk_size;
954 
955 		if ((nelem_to_read - nelem_read) < chunk_size)
956 			chunk_size = nelem_to_read - nelem_read;
957 	}
958 
959 	free(pc_samples);
960 
961 	/* Update total number of pcsamples read so far */
962 	n_pcsamples += bufp->bufsize;
963 }
964 
965 static mod_info_t *
find_module(Address addr)966 find_module(Address addr)
967 {
968 	mod_info_t	*mi;
969 
970 	for (mi = &modules; mi; mi = mi->next) {
971 		if (mi->active == FALSE)
972 			continue;
973 
974 		if (addr >= mi->load_base && addr < mi->load_end)
975 			return (mi);
976 	}
977 
978 	return (NULL);
979 }
980 
981 static void
process_cgraph(ProfCallGraph * cgp)982 process_cgraph(ProfCallGraph *cgp)
983 {
984 	struct rawarc	arc;
985 	mod_info_t	*callee_mi, *caller_mi;
986 	ProfFunction	*calleep, *callerp;
987 	Index		caller_off, callee_off;
988 
989 	/*
990 	 * Note that *callee_off* increment in the for loop below
991 	 * uses *calleep* and *calleep* doesn't get set until the for loop
992 	 * is entered. We don't expect the increment to be executed before
993 	 * the loop body is executed atleast once, so this should be ok.
994 	 */
995 	for (callee_off = cgp->functions; callee_off;
996 	    callee_off = calleep->next_to) {
997 
998 		/* LINTED: pointer cast */
999 		calleep = (ProfFunction *)((char *)cgp + callee_off);
1000 
1001 		/*
1002 		 * We could choose either to sort the {caller, callee}
1003 		 * list twice and assign callee/caller to modules or inspect
1004 		 * each callee/caller in the active modules list. Since
1005 		 * the modules list is usually very small, we'l choose the
1006 		 * latter.
1007 		 */
1008 
1009 		/*
1010 		 * If we cannot identify a callee with a module, there's
1011 		 * no use worrying about who called it.
1012 		 */
1013 		if ((callee_mi = find_module(calleep->topc)) == NULL) {
1014 #ifdef DEBUG
1015 			if (debug & CGRAPHDEBUG) {
1016 				(void) printf(
1017 				    "[process_cgraph] callee %#llx missed\n",
1018 				    calleep->topc);
1019 			}
1020 #endif /* DEBUG */
1021 			continue;
1022 		} else
1023 			arc.raw_selfpc = calleep->topc;
1024 
1025 		for (caller_off = callee_off; caller_off;
1026 		    caller_off = callerp->next_from)  {
1027 
1028 			/* LINTED: pointer cast */
1029 			callerp = (ProfFunction *)((char *)cgp + caller_off);
1030 			if ((caller_mi = find_module(callerp->frompc)) ==
1031 			    NULL) {
1032 #ifdef DEBUG
1033 				if (debug & CGRAPHDEBUG) {
1034 					(void) printf(
1035 					    "[process_cgraph] caller %#llx "
1036 					    "missed\n", callerp->frompc);
1037 				}
1038 #endif /* DEBUG */
1039 				continue;
1040 			}
1041 
1042 			arc.raw_frompc = callerp->frompc;
1043 			arc.raw_count = callerp->count;
1044 
1045 #ifdef DEBUG
1046 			if (debug & CGRAPHDEBUG) {
1047 				(void) printf(
1048 				    "[process_cgraph] arc <%#llx, %#llx, "
1049 				    "%lld>\n", arc.raw_frompc, arc.raw_selfpc,
1050 				    arc.raw_count);
1051 			}
1052 #endif /* DEBUG */
1053 			tally(caller_mi, callee_mi, &arc);
1054 		}
1055 	}
1056 
1057 #ifdef DEBUG
1058 	(void) puts("\n");
1059 #endif /* DEBUG */
1060 }
1061 
1062 /*
1063  * Two modules overlap each other if they don't lie completely *outside*
1064  * each other.
1065  */
1066 static bool
does_overlap(ProfModule * new,mod_info_t * old)1067 does_overlap(ProfModule *new, mod_info_t *old)
1068 {
1069 	/* case 1: new module lies completely *before* the old one */
1070 	if (new->startaddr < old->load_base && new->endaddr <= old->load_base)
1071 		return (FALSE);
1072 
1073 	/* case 2: new module lies completely *after* the old one */
1074 	if (new->startaddr >= old->load_end && new->endaddr >= old->load_end)
1075 		return (FALSE);
1076 
1077 	/* probably a dlopen: the modules overlap each other */
1078 	return (TRUE);
1079 }
1080 
1081 static bool
is_same_as_aout(char * modpath,struct stat * buf)1082 is_same_as_aout(char *modpath, struct stat *buf)
1083 {
1084 	if (stat(modpath, buf) == -1) {
1085 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
1086 		    whoami, modpath);
1087 		exit(EX_NOINPUT);
1088 	}
1089 
1090 	if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino))
1091 		return (TRUE);
1092 	else
1093 		return (FALSE);
1094 }
1095 
1096 static void
process_modules(ProfModuleList * modlp)1097 process_modules(ProfModuleList *modlp)
1098 {
1099 	ProfModule	*newmodp;
1100 	mod_info_t	*mi, *last, *new_module;
1101 	char		*so_path;
1102 	bool		more_modules = TRUE;
1103 	struct stat	so_statbuf;
1104 
1105 #ifdef DEBUG
1106 	if (debug & MODULEDEBUG) {
1107 		(void) printf("[process_modules] module obj version %u\n",
1108 		    modlp->version);
1109 	}
1110 #endif /* DEBUG */
1111 
1112 	/* Check version of module type object */
1113 	if (modlp->version > PROF_MODULES_VER) {
1114 		(void) fprintf(stderr, "%s: version %d for module type objects"
1115 		    "is not supported\n", whoami, modlp->version);
1116 		exit(EX_SOFTWARE);
1117 	}
1118 
1119 
1120 	/*
1121 	 * Scan the PROF_MODULES_T list and add modules to current list
1122 	 * of modules, if they're not present already
1123 	 */
1124 	/* LINTED: pointer cast */
1125 	newmodp = (ProfModule *)((char *)modlp + modlp->modules);
1126 	do {
1127 		/*
1128 		 * Since the prog could've been renamed after its run, we
1129 		 * should see if this overlaps a.out. If it does, it is
1130 		 * probably the renamed aout. We should also skip any other
1131 		 * non-sharedobj's that we see (or should we report an error ?)
1132 		 */
1133 		so_path = (caddr_t)modlp + newmodp->path;
1134 		if (does_overlap(newmodp, &modules) ||
1135 		    is_same_as_aout(so_path, &so_statbuf) ||
1136 		    (!is_shared_obj(so_path))) {
1137 
1138 			if (!newmodp->next)
1139 				more_modules = FALSE;
1140 
1141 			/* LINTED: pointer cast */
1142 			newmodp = (ProfModule *)
1143 			    ((caddr_t)modlp + newmodp->next);
1144 #ifdef DEBUG
1145 			if (debug & MODULEDEBUG) {
1146 				(void) printf(
1147 				    "[process_modules] `%s'\n", so_path);
1148 				(void) printf("                  skipped\n");
1149 			}
1150 #endif /* DEBUG */
1151 			continue;
1152 		}
1153 #ifdef DEBUG
1154 		if (debug & MODULEDEBUG)
1155 			(void) printf("[process_modules] `%s'...\n", so_path);
1156 #endif /* DEBUG */
1157 
1158 		/*
1159 		 * Check all modules (leave the first one, 'cos that
1160 		 * is the program executable info). If this module is already
1161 		 * there in the list, update the load addresses and proceed.
1162 		 */
1163 		last = &modules;
1164 		while ((mi = last->next) != NULL) {
1165 			/*
1166 			 * We expect the full pathname for all shared objects
1167 			 * needed by the program executable. In this case, we
1168 			 * simply need to compare the paths to see if they are
1169 			 * the same file.
1170 			 */
1171 			if (strcmp(mi->name, so_path) == 0)
1172 				break;
1173 
1174 			/*
1175 			 * Check if this new shared object will overlap
1176 			 * any existing module. If yes, remove the old one
1177 			 * from the linked list (but don't free it, 'cos
1178 			 * there may be symbols referring to this module
1179 			 * still)
1180 			 */
1181 			if (does_overlap(newmodp, mi)) {
1182 #ifdef DEBUG
1183 				if (debug & MODULEDEBUG) {
1184 					(void) printf(
1185 					    "[process_modules] `%s'\n",
1186 					    so_path);
1187 					(void) printf(
1188 					    "                  overlaps\n");
1189 					(void) printf(
1190 					    "                  `%s'\n",
1191 					    mi->name);
1192 				}
1193 #endif /* DEBUG */
1194 				mi->active = FALSE;
1195 			}
1196 
1197 			last = mi;
1198 		}
1199 
1200 		/* Module already there, skip it */
1201 		if (mi != NULL) {
1202 			mi->load_base = newmodp->startaddr;
1203 			mi->load_end = newmodp->endaddr;
1204 			mi->active = TRUE;
1205 			if (!newmodp->next)
1206 				more_modules = FALSE;
1207 
1208 			/* LINTED: pointer cast */
1209 			newmodp = (ProfModule *)
1210 			    ((caddr_t)modlp + newmodp->next);
1211 
1212 #ifdef DEBUG
1213 			if (debug & MODULEDEBUG) {
1214 				(void) printf("[process_modules] base=%#llx, "
1215 				    "end=%#llx\n", mi->load_base, mi->load_end);
1216 			}
1217 #endif /* DEBUG */
1218 			continue;
1219 		}
1220 
1221 		/*
1222 		 * Check if gmon.out is outdated with respect to the new
1223 		 * module we want to add
1224 		 */
1225 		if (gmonout_info.mtime < so_statbuf.st_mtime) {
1226 			(void) fprintf(stderr,
1227 			    "%s: shared obj outdates prof info\n", whoami);
1228 			(void) fprintf(stderr, "\t(newer %s)\n", so_path);
1229 			exit(EX_NOINPUT);
1230 		}
1231 
1232 		/* Create a new module element */
1233 		new_module = malloc(sizeof (mod_info_t));
1234 		if (new_module == NULL) {
1235 			(void) fprintf(stderr, "%s: no room for %d bytes\n",
1236 			    whoami, sizeof (mod_info_t));
1237 			exit(EX_OSERR);
1238 		}
1239 
1240 		/* and fill in info... */
1241 		new_module->id = n_modules + 1;
1242 		new_module->load_base = newmodp->startaddr;
1243 		new_module->load_end = newmodp->endaddr;
1244 		new_module->name = malloc(strlen(so_path) + 1);
1245 		if (new_module->name == NULL) {
1246 			(void) fprintf(stderr, "%s: no room for %d bytes\n",
1247 			    whoami, strlen(so_path) + 1);
1248 			exit(EX_OSERR);
1249 		}
1250 		(void) strcpy(new_module->name, so_path);
1251 #ifdef DEBUG
1252 		if (debug & MODULEDEBUG) {
1253 			(void) printf(
1254 			    "[process_modules] base=%#llx, end=%#llx\n",
1255 			    new_module->load_base, new_module->load_end);
1256 		}
1257 #endif /* DEBUG */
1258 
1259 		/* Create this module's nameslist */
1260 		process_namelist(new_module);
1261 
1262 		/* Add it to the tail of active module list */
1263 		last->next = new_module;
1264 		n_modules++;
1265 
1266 #ifdef DEBUG
1267 		if (debug & MODULEDEBUG) {
1268 			(void) printf(
1269 			    "[process_modules] total shared objects = %ld\n",
1270 			    n_modules - 1);
1271 		}
1272 #endif /* DEBUG */
1273 		/*
1274 		 * Move to the next module in the PROF_MODULES_T list
1275 		 * (if present)
1276 		 */
1277 		if (!newmodp->next)
1278 			more_modules = FALSE;
1279 
1280 		/* LINTED: pointer cast */
1281 		newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next);
1282 
1283 	} while (more_modules);
1284 }
1285 
1286 static void
reset_active_modules(void)1287 reset_active_modules(void)
1288 {
1289 	mod_info_t	*mi;
1290 
1291 	/* Except the executable, no other module should remain active */
1292 	for (mi = modules.next; mi; mi = mi->next)
1293 		mi->active = FALSE;
1294 }
1295 
1296 static void
getpfiledata(caddr_t memp,size_t fsz)1297 getpfiledata(caddr_t memp, size_t fsz)
1298 {
1299 	ProfObject	*objp;
1300 	caddr_t		file_end;
1301 	bool		found_pcsamples = FALSE, found_cgraph = FALSE;
1302 
1303 	/*
1304 	 * Before processing a new gmon.out, all modules except the
1305 	 * program executable must be made inactive, so that symbols
1306 	 * are searched only in the program executable, if we don't
1307 	 * find a MODULES_T object. Don't do it *after* we read a gmon.out,
1308 	 * because we need the active module data after we're done with
1309 	 * the last gmon.out, if we're doing summing.
1310 	 */
1311 	reset_active_modules();
1312 
1313 	file_end = memp + fsz;
1314 	/* LINTED: pointer cast */
1315 	objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size);
1316 	while ((caddr_t)objp < file_end) {
1317 #ifdef DEBUG
1318 		{
1319 			unsigned int	type = 0;
1320 
1321 			if (debug & MONOUTDEBUG) {
1322 				if (objp->type <= MAX_OBJTYPES)
1323 					type = objp->type;
1324 
1325 				(void) printf(
1326 				    "\n[getpfiledata] object %s [%#lx]\n",
1327 				    objname[type], objp->type);
1328 			}
1329 		}
1330 #endif /* DEBUG */
1331 		switch (objp->type) {
1332 			case PROF_MODULES_T :
1333 				process_modules((ProfModuleList *) objp);
1334 				break;
1335 
1336 			case PROF_CALLGRAPH_T :
1337 				process_cgraph((ProfCallGraph *) objp);
1338 				found_cgraph = TRUE;
1339 				break;
1340 
1341 			case PROF_BUFFER_T :
1342 				process_pcsamples((ProfBuffer *) objp);
1343 				found_pcsamples = TRUE;
1344 				break;
1345 
1346 			default :
1347 				(void) fprintf(stderr,
1348 				    "%s: unknown prof object type=%d\n",
1349 				    whoami, objp->type);
1350 				exit(EX_SOFTWARE);
1351 		}
1352 		/* LINTED: pointer cast */
1353 		objp = (ProfObject *)((caddr_t)objp + objp->size);
1354 	}
1355 
1356 	if (!found_cgraph || !found_pcsamples) {
1357 		(void) fprintf(stderr,
1358 		    "%s: missing callgraph/pcsamples object\n", whoami);
1359 		exit(EX_SOFTWARE);
1360 	}
1361 
1362 	if ((caddr_t)objp > file_end) {
1363 		(void) fprintf(stderr, "%s: malformed profile file.\n", whoami);
1364 		exit(EX_SOFTWARE);
1365 	}
1366 
1367 	if (first_file)
1368 		first_file = FALSE;
1369 }
1370 
1371 static void
readarcs(FILE * pfile)1372 readarcs(FILE *pfile)
1373 {
1374 	/*
1375 	 *	the rest of the file consists of
1376 	 *	a bunch of <from,self,count> tuples.
1377 	 */
1378 	/* CONSTCOND */
1379 	while (1) {
1380 		struct rawarc	arc;
1381 
1382 		if (rflag) {
1383 			if (Bflag) {
1384 				L_cgarc64		rtld_arc64;
1385 
1386 				/*
1387 				 * If rflag is set then this is an profiled
1388 				 * image generated by rtld.  It needs to be
1389 				 * 'converted' to the standard data format.
1390 				 */
1391 				if (fread(&rtld_arc64,
1392 				    sizeof (L_cgarc64), 1, pfile) != 1)
1393 					break;
1394 
1395 				if (rtld_arc64.cg_from == PRF_OUTADDR64)
1396 					arc.raw_frompc = s_highpc + 0x10;
1397 				else
1398 					arc.raw_frompc =
1399 					    (pctype)rtld_arc64.cg_from;
1400 				arc.raw_selfpc = (pctype)rtld_arc64.cg_to;
1401 				arc.raw_count = (actype)rtld_arc64.cg_count;
1402 			} else {
1403 				L_cgarc		rtld_arc;
1404 
1405 				/*
1406 				 * If rflag is set then this is an profiled
1407 				 * image generated by rtld.  It needs to be
1408 				 * 'converted' to the standard data format.
1409 				 */
1410 				if (fread(&rtld_arc,
1411 				    sizeof (L_cgarc), 1, pfile) != 1)
1412 					break;
1413 
1414 				if (rtld_arc.cg_from == PRF_OUTADDR)
1415 					arc.raw_frompc = s_highpc + 0x10;
1416 				else
1417 					arc.raw_frompc = (pctype)
1418 					    (uintptr_t)rtld_arc.cg_from;
1419 				arc.raw_selfpc = (pctype)
1420 				    (uintptr_t)rtld_arc.cg_to;
1421 				arc.raw_count = (actype)rtld_arc.cg_count;
1422 			}
1423 		} else {
1424 			if (Bflag) {
1425 				if (fread(&arc, sizeof (struct rawarc), 1,
1426 				    pfile) != 1) {
1427 					break;
1428 				}
1429 			} else {
1430 				/*
1431 				 * If these aren't big %pc's, we need to read
1432 				 * into the 32-bit raw arc structure, and
1433 				 * assign the members into the actual arc.
1434 				 */
1435 				struct rawarc32 arc32;
1436 				if (fread(&arc32, sizeof (struct rawarc32),
1437 				    1, pfile) != 1)
1438 					break;
1439 				arc.raw_frompc = (pctype)arc32.raw_frompc;
1440 				arc.raw_selfpc = (pctype)arc32.raw_selfpc;
1441 				arc.raw_count  = (actype)arc32.raw_count;
1442 			}
1443 		}
1444 
1445 #ifdef DEBUG
1446 		if (debug & SAMPLEDEBUG) {
1447 			(void) printf("[getpfile] frompc 0x%llx selfpc "
1448 			    "0x%llx count %lld\n", arc.raw_frompc,
1449 			    arc.raw_selfpc, arc.raw_count);
1450 		}
1451 #endif /* DEBUG */
1452 		/*
1453 		 *	add this arc
1454 		 */
1455 		tally(&modules, &modules, &arc);
1456 	}
1457 	if (first_file)
1458 		first_file = FALSE;
1459 }
1460 
1461 static void
readsamples(FILE * pfile)1462 readsamples(FILE *pfile)
1463 {
1464 	sztype		i;
1465 	unsigned_UNIT	sample;
1466 
1467 	if (samples == 0) {
1468 		samples = (unsigned_UNIT *) calloc(nsamples,
1469 		    sizeof (unsigned_UNIT));
1470 		if (samples == 0) {
1471 			(void) fprintf(stderr,
1472 			    "%s: No room for %d sample pc's\n",
1473 			    whoami, sampbytes / sizeof (unsigned_UNIT));
1474 			exit(EX_OSERR);
1475 		}
1476 	}
1477 
1478 	for (i = 0; i < nsamples; i++) {
1479 		(void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile);
1480 		if (feof(pfile))
1481 			break;
1482 		samples[i] += sample;
1483 	}
1484 	if (i != nsamples) {
1485 		(void) fprintf(stderr,
1486 		    "%s: unexpected EOF after reading %d/%d samples\n",
1487 		    whoami, --i, nsamples);
1488 		exit(EX_IOERR);
1489 	}
1490 }
1491 
1492 static void *
handle_versioned(FILE * pfile,char * filename,size_t * fsz)1493 handle_versioned(FILE *pfile, char *filename, size_t *fsz)
1494 {
1495 	int		fd;
1496 	bool		invalid_version;
1497 	caddr_t		fmem;
1498 	struct stat	buf;
1499 	ProfHeader	prof_hdr;
1500 	off_t		lret;
1501 
1502 	/*
1503 	 * Check versioning info. For now, let's say we provide
1504 	 * backward compatibility, so we accept all older versions.
1505 	 */
1506 	if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) {
1507 		perror("fread()");
1508 		exit(EX_IOERR);
1509 	}
1510 
1511 	invalid_version = FALSE;
1512 	if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION)
1513 		invalid_version = TRUE;
1514 	else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) {
1515 		if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION)
1516 			invalid_version = FALSE;
1517 	}
1518 
1519 	if (invalid_version) {
1520 		(void) fprintf(stderr, "%s: version %d.%d not supported\n",
1521 		    whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver);
1522 		exit(EX_SOFTWARE);
1523 	}
1524 
1525 	/*
1526 	 * Map gmon.out onto memory.
1527 	 */
1528 	(void) fclose(pfile);
1529 	if ((fd = open(filename, O_RDONLY)) == -1) {
1530 		perror(filename);
1531 		exit(EX_IOERR);
1532 	}
1533 
1534 	if ((lret = lseek(fd, 0, SEEK_END)) == -1) {
1535 		perror(filename);
1536 		exit(EX_IOERR);
1537 	}
1538 	*fsz = lret;
1539 
1540 	fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0);
1541 	if (fmem == MAP_FAILED) {
1542 		(void) fprintf(stderr, "%s: can't map %s\n", whoami, filename);
1543 		exit(EX_IOERR);
1544 	}
1545 
1546 	/*
1547 	 * Before we close this fd, save this gmon.out's info to later verify
1548 	 * if the shared objects it references have changed since the time
1549 	 * they were used to generate this gmon.out
1550 	 */
1551 	if (fstat(fd, &buf) == -1) {
1552 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
1553 		    whoami, filename);
1554 		exit(EX_NOINPUT);
1555 	}
1556 	gmonout_info.dev = buf.st_dev;
1557 	gmonout_info.ino = buf.st_ino;
1558 	gmonout_info.mtime = buf.st_mtime;
1559 	gmonout_info.size = buf.st_size;
1560 
1561 	(void) close(fd);
1562 
1563 	return ((void *) fmem);
1564 }
1565 
1566 static void *
openpfile(char * filename,size_t * fsz)1567 openpfile(char *filename, size_t *fsz)
1568 {
1569 	struct hdr	tmp;
1570 	FILE		*pfile;
1571 	unsigned long	magic_num;
1572 	size_t		hdrsize;
1573 	static bool	first_time = TRUE;
1574 	extern bool	old_style;
1575 
1576 	if ((pfile = fopen(filename, "r")) == NULL) {
1577 		perror(filename);
1578 		exit(EX_IOERR);
1579 	}
1580 
1581 	/*
1582 	 * Read in the magic. Note that we changed the cast "unsigned long"
1583 	 * to "unsigned int" because that's how h_magic is defined in the
1584 	 * new format ProfHeader.
1585 	 */
1586 	if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) {
1587 		perror("fread()");
1588 		exit(EX_IOERR);
1589 	}
1590 
1591 	rewind(pfile);
1592 
1593 	/*
1594 	 * First check if this is versioned or *old-style* gmon.out
1595 	 */
1596 	if (magic_num == (unsigned int)PROF_MAGIC) {
1597 		if ((!first_time) && (old_style == TRUE)) {
1598 			(void) fprintf(stderr, "%s: can't mix old & new format "
1599 			    "profiled files\n", whoami);
1600 			exit(EX_SOFTWARE);
1601 		}
1602 		first_time = FALSE;
1603 		old_style = FALSE;
1604 		return (handle_versioned(pfile, filename, fsz));
1605 	}
1606 
1607 	if ((!first_time) && (old_style == FALSE)) {
1608 		(void) fprintf(stderr, "%s: can't mix old & new format "
1609 		    "profiled files\n", whoami);
1610 		exit(EX_SOFTWARE);
1611 	}
1612 
1613 	first_time = FALSE;
1614 	old_style = TRUE;
1615 	fsz = 0;
1616 
1617 	/*
1618 	 * Now, we need to determine if this is a run-time linker
1619 	 * profiled file or if it is a standard gmon.out.
1620 	 *
1621 	 * We do this by checking if magic matches PRF_MAGIC. If it
1622 	 * does, then this is a run-time linker profiled file, if it
1623 	 * doesn't, it must be a gmon.out file.
1624 	 */
1625 	if (magic_num == (unsigned long)PRF_MAGIC)
1626 		rflag = TRUE;
1627 	else
1628 		rflag = FALSE;
1629 
1630 	hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32);
1631 
1632 	if (rflag) {
1633 		if (Bflag) {
1634 			L_hdr64		l_hdr64;
1635 
1636 			/*
1637 			 * If the rflag is set then the input file is
1638 			 * rtld profiled data, we'll read it in and convert
1639 			 * it to the standard format (ie: make it look like
1640 			 * a gmon.out file).
1641 			 */
1642 			if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) {
1643 				perror("fread()");
1644 				exit(EX_IOERR);
1645 			}
1646 			if (l_hdr64.hd_version != PRF_VERSION_64) {
1647 				(void) fprintf(stderr,
1648 				    "%s: expected version %d, "
1649 				    "got version %d when processing 64-bit "
1650 				    "run-time linker profiled file.\n",
1651 				    whoami, PRF_VERSION_64, l_hdr64.hd_version);
1652 				exit(EX_SOFTWARE);
1653 			}
1654 			tmp.lowpc = 0;
1655 			tmp.highpc = (pctype)l_hdr64.hd_hpc;
1656 			tmp.ncnt = hdrsize + l_hdr64.hd_psize;
1657 		} else {
1658 			L_hdr		l_hdr;
1659 
1660 			/*
1661 			 * If the rflag is set then the input file is
1662 			 * rtld profiled data, we'll read it in and convert
1663 			 * it to the standard format (ie: make it look like
1664 			 * a gmon.out file).
1665 			 */
1666 			if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) {
1667 				perror("fread()");
1668 				exit(EX_IOERR);
1669 			}
1670 			if (l_hdr.hd_version != PRF_VERSION) {
1671 				(void) fprintf(stderr,
1672 				    "%s: expected version %d, "
1673 				    "got version %d when processing "
1674 				    "run-time linker profiled file.\n",
1675 				    whoami, PRF_VERSION, l_hdr.hd_version);
1676 				exit(EX_SOFTWARE);
1677 			}
1678 			tmp.lowpc = 0;
1679 			tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc;
1680 			tmp.ncnt = hdrsize + l_hdr.hd_psize;
1681 		}
1682 	} else {
1683 		if (Bflag) {
1684 			if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) {
1685 				perror("fread()");
1686 				exit(EX_IOERR);
1687 			}
1688 		} else {
1689 			/*
1690 			 * If we're not reading big %pc's, we need to read
1691 			 * the 32-bit header, and assign the members to
1692 			 * the actual header.
1693 			 */
1694 			struct hdr32 hdr32;
1695 			if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) {
1696 				perror("fread()");
1697 				exit(EX_IOERR);
1698 			}
1699 			tmp.lowpc = hdr32.lowpc;
1700 			tmp.highpc = hdr32.highpc;
1701 			tmp.ncnt = hdr32.ncnt;
1702 		}
1703 	}
1704 
1705 	/*
1706 	 * perform sanity check on profiled file we've opened.
1707 	 */
1708 	if (tmp.lowpc >= tmp.highpc) {
1709 		if (rflag)
1710 			(void) fprintf(stderr,
1711 			    "%s: badly formed profiled data.\n",
1712 			    filename);
1713 		else
1714 			(void) fprintf(stderr,
1715 			    "%s: badly formed gmon.out file.\n",
1716 			    filename);
1717 		exit(EX_SOFTWARE);
1718 	}
1719 
1720 	if (s_highpc != 0 && (tmp.lowpc != h.lowpc ||
1721 	    tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) {
1722 		(void) fprintf(stderr,
1723 		    "%s: incompatible with first gmon file\n",
1724 		    filename);
1725 		exit(EX_IOERR);
1726 	}
1727 	h = tmp;
1728 	s_lowpc = h.lowpc;
1729 	s_highpc = h.highpc;
1730 	lowpc = h.lowpc / sizeof (UNIT);
1731 	highpc = h.highpc / sizeof (UNIT);
1732 	sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0;
1733 	nsamples = sampbytes / sizeof (unsigned_UNIT);
1734 
1735 #ifdef DEBUG
1736 	if (debug & SAMPLEDEBUG) {
1737 		(void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc "
1738 		    "0x%llx hdr.ncnt %lld\n",
1739 		    h.lowpc, h.highpc, h.ncnt);
1740 		(void) printf(
1741 		    "[openpfile]   s_lowpc 0x%llx   s_highpc 0x%llx\n",
1742 		    s_lowpc, s_highpc);
1743 		(void) printf(
1744 		    "[openpfile]     lowpc 0x%llx     highpc 0x%llx\n",
1745 		    lowpc, highpc);
1746 		(void) printf("[openpfile] sampbytes %d nsamples %d\n",
1747 		    sampbytes, nsamples);
1748 	}
1749 #endif /* DEBUG */
1750 
1751 	return ((void *) pfile);
1752 }
1753 
1754 /*
1755  * Information from a gmon.out file depends on whether it's versioned
1756  * or non-versioned, *old style* gmon.out. If old-style, it is in two
1757  * parts : an array of sampling hits within pc ranges, and the arcs. If
1758  * versioned, it contains a header, followed by any number of
1759  * modules/callgraph/pcsample_buffer objects.
1760  */
1761 static void
getpfile(char * filename)1762 getpfile(char *filename)
1763 {
1764 	void		*handle;
1765 	size_t		fsz;
1766 
1767 	handle = openpfile(filename, &fsz);
1768 
1769 	if (old_style) {
1770 		readsamples((FILE *)handle);
1771 		readarcs((FILE *)handle);
1772 		(void) fclose((FILE *)handle);
1773 		return;
1774 	}
1775 
1776 	getpfiledata((caddr_t)handle, fsz);
1777 	(void) munmap(handle, fsz);
1778 }
1779 
1780 int
main(int argc,char ** argv)1781 main(int argc, char **argv)
1782 {
1783 	char	**sp;
1784 	nltype	**timesortnlp;
1785 	int		c;
1786 	int		errflg;
1787 
1788 	prog_name = *argv;  /* preserve program name */
1789 	debug = 0;
1790 	nflag = FALSE;
1791 	bflag = TRUE;
1792 	lflag = FALSE;
1793 	Cflag = FALSE;
1794 	first_file = TRUE;
1795 	rflag = FALSE;
1796 	Bflag = FALSE;
1797 	errflg = FALSE;
1798 
1799 	while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF)
1800 		switch (c) {
1801 		case 'a':
1802 			aflag = TRUE;
1803 			break;
1804 		case 'b':
1805 			bflag = FALSE;
1806 			break;
1807 		case 'c':
1808 			cflag = TRUE;
1809 			break;
1810 		case 'C':
1811 			Cflag = TRUE;
1812 			break;
1813 		case 'd':
1814 			dflag = TRUE;
1815 			debug |= atoi(optarg);
1816 			(void) printf("[main] debug = 0x%x\n", debug);
1817 			break;
1818 		case 'D':
1819 			Dflag = TRUE;
1820 			break;
1821 		case 'E':
1822 			addlist(Elist, optarg);
1823 			Eflag = TRUE;
1824 			addlist(elist, optarg);
1825 			eflag = TRUE;
1826 			break;
1827 		case 'e':
1828 			addlist(elist, optarg);
1829 			eflag = TRUE;
1830 			break;
1831 		case 'F':
1832 			addlist(Flist, optarg);
1833 			Fflag = TRUE;
1834 			addlist(flist, optarg);
1835 			fflag = TRUE;
1836 			break;
1837 		case 'f':
1838 			addlist(flist, optarg);
1839 			fflag = TRUE;
1840 			break;
1841 		case 'l':
1842 			lflag = TRUE;
1843 			break;
1844 		case 'n':
1845 			nflag = TRUE;
1846 			number_funcs_toprint = atoi(optarg);
1847 			break;
1848 		case 's':
1849 			sflag = TRUE;
1850 			break;
1851 		case 'z':
1852 			zflag = TRUE;
1853 			break;
1854 		case '?':
1855 			errflg++;
1856 
1857 		}
1858 
1859 	if (errflg) {
1860 		(void) fprintf(stderr,
1861 		    "usage: gprof [ -abcCDlsz ] [ -e function-name ] "
1862 		    "[ -E function-name ]\n\t[ -f function-name ] "
1863 		    "[ -F function-name  ]\n\t[  image-file  "
1864 		    "[ profile-file ... ] ]\n");
1865 		exit(EX_USAGE);
1866 	}
1867 
1868 	if (optind < argc) {
1869 		a_outname  = argv[optind++];
1870 	} else {
1871 		a_outname  = A_OUTNAME;
1872 	}
1873 	if (optind < argc) {
1874 		gmonname = argv[optind++];
1875 	} else {
1876 		gmonname = GMONNAME;
1877 	}
1878 	/*
1879 	 *	turn off default functions
1880 	 */
1881 	for (sp = &defaultEs[0]; *sp; sp++) {
1882 		Eflag = TRUE;
1883 		addlist(Elist, *sp);
1884 		eflag = TRUE;
1885 		addlist(elist, *sp);
1886 	}
1887 	/*
1888 	 *	how many ticks per second?
1889 	 *	if we can't tell, report time in ticks.
1890 	 */
1891 	hz = sysconf(_SC_CLK_TCK);
1892 	if (hz == -1) {
1893 		hz = 1;
1894 		(void) fprintf(stderr, "time is in ticks, not seconds\n");
1895 	}
1896 
1897 	getnfile(a_outname);
1898 
1899 	/*
1900 	 *	get information about mon.out file(s).
1901 	 */
1902 	do {
1903 		getpfile(gmonname);
1904 		if (optind < argc)
1905 			gmonname = argv[optind++];
1906 		else
1907 			optind++;
1908 	} while (optind <= argc);
1909 	/*
1910 	 *	dump out a gmon.sum file if requested
1911 	 */
1912 	if (sflag || Dflag)
1913 		dumpsum(GMONSUM);
1914 
1915 	if (old_style) {
1916 		/*
1917 		 *	assign samples to procedures
1918 		 */
1919 		asgnsamples();
1920 	}
1921 
1922 	/*
1923 	 *	assemble the dynamic profile
1924 	 */
1925 	timesortnlp = doarcs();
1926 
1927 	/*
1928 	 *	print the dynamic profile
1929 	 */
1930 #ifdef DEBUG
1931 	if (debug & ANYDEBUG) {
1932 		/* raw output of all symbols in all their glory */
1933 		int i;
1934 		(void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
1935 		    "#calls, selfcalls, index \n");
1936 		for (i = 0; i < modules.nname; i++) {	/* Print each symbol */
1937 			if (timesortnlp[i]->name)
1938 				(void) printf(" %s ", timesortnlp[i]->name);
1939 			else
1940 				(void) printf(" <cycle> ");
1941 			(void) printf(" %lld ", timesortnlp[i]->value);
1942 			(void) printf(" %lld ", timesortnlp[i]->svalue);
1943 			(void) printf(" %f ", timesortnlp[i]->time);
1944 			(void) printf(" %lld ", timesortnlp[i]->ncall);
1945 			(void) printf(" %lld ", timesortnlp[i]->selfcalls);
1946 			(void) printf(" %d ", timesortnlp[i]->index);
1947 			(void) printf(" \n");
1948 		}
1949 	}
1950 #endif /* DEBUG */
1951 
1952 	printgprof(timesortnlp);
1953 	/*
1954 	 *	print the flat profile
1955 	 */
1956 	printprof();
1957 	/*
1958 	 *	print the index
1959 	 */
1960 	printindex();
1961 
1962 	/*
1963 	 * print the modules
1964 	 */
1965 	printmodules();
1966 
1967 	done();
1968 	/* NOTREACHED */
1969 	return (0);
1970 }
1971