xref: /illumos-gate/usr/src/cmd/sgs/gprof/common/gprof.c (revision dd72704bd9e794056c558153663c739e2012d721)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include	<sysexits.h>
27 #include	<stdlib.h>
28 #include	<stdio.h>
29 #include	<unistd.h>
30 #include	"gprof.h"
31 #include	"profile.h"
32 
33 bool		aflag;
34 bool		bflag;
35 bool		Bflag;
36 bool		cflag;
37 bool		Cflag;
38 bool		dflag;
39 bool		Dflag;
40 bool		eflag;
41 bool		Eflag;
42 bool		fflag;
43 bool		Fflag;
44 bool		lflag;
45 bool		sflag;
46 bool		zflag;
47 bool		nflag;
48 bool		rflag;
49 bool		first_file;
50 bool		old_style;
51 double		scale;
52 double		totime;
53 Size		n_pcsamples;
54 mod_info_t	modules;
55 pctype		s_lowpc;
56 pctype		s_highpc;
57 sztype		n_modules;
58 sztype		sampbytes;
59 sztype		nsamples;
60 unsigned short	*samples;
61 fl_info_t	aout_info;
62 fl_info_t	gmonout_info;
63 long		hz;
64 struct hdr	h;
65 unsigned char	*textspace;
66 int		debug;
67 int		number_funcs_toprint;
68 char		*a_outname;
69 char		*prog_name;
70 char		*gmonname;
71 char		*whoami = "gprof";
72 static pctype	lowpc, highpc;		/* range profiled, in UNIT's */
73 
74 /*
75  *	things which get -E excluded by default.
76  */
77 static char *defaultEs[] = {
78 	"mcount",
79 	"__mcleanup",
80 	NULL
81 };
82 
83 #ifdef DEBUG
84 
85 static char *objname[] = {
86 	"<invalid object>",
87 	"PROF_BUFFER_T",
88 	"PROF_CALLGRAPH_T",
89 	"PROF_MODULES_T",
90 	NULL
91 };
92 #define	MAX_OBJTYPES	3
93 
94 #endif /* DEBUG */
95 
96 void
97 done(void)
98 {
99 
100 	exit(EX_OK);
101 }
102 
103 static pctype
104 max(pctype a, pctype b)
105 {
106 	if (a > b)
107 		return (a);
108 	return (b);
109 }
110 
111 static pctype
112 min(pctype a, pctype b)
113 {
114 	if (a < b)
115 		return (a);
116 	return (b);
117 }
118 
119 /*
120  *	calculate scaled entry point addresses (to save time in asgnsamples),
121  *	and possibly push the scaled entry points over the entry mask,
122  *	if it turns out that the entry point is in one bucket and the code
123  *	for a routine is in the next bucket.
124  *
125  */
126 static void
127 alignentries(void)
128 {
129 	struct nl *nlp;
130 #ifdef DEBUG
131 	pctype			bucket_of_entry;
132 	pctype			bucket_of_code;
133 #endif /* DEBUG */
134 
135 	/* for old-style gmon.out, nameslist is only in modules.nl */
136 
137 	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
138 		nlp->svalue = nlp->value / sizeof (UNIT);
139 #ifdef DEBUG
140 		bucket_of_entry = (nlp->svalue - lowpc) / scale;
141 		bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
142 		if (bucket_of_entry < bucket_of_code) {
143 			if (debug & SAMPLEDEBUG) {
144 				(void) printf(
145 				    "[alignentries] pushing svalue 0x%llx "
146 				    "to 0x%llx\n", nlp->svalue,
147 				    nlp->svalue + UNITS_TO_CODE);
148 			}
149 		}
150 #endif /* DEBUG */
151 	}
152 }
153 
154 /*
155  *	old-style gmon.out
156  *	------------------
157  *
158  *	Assign samples to the procedures to which they belong.
159  *
160  *	There are three cases as to where pcl and pch can be
161  *	with respect to the routine entry addresses svalue0 and svalue1
162  *	as shown in the following diagram.  overlap computes the
163  *	distance between the arrows, the fraction of the sample
164  *	that is to be credited to the routine which starts at svalue0.
165  *
166  *	    svalue0                                         svalue1
167  *	       |                                               |
168  *	       v                                               v
169  *
170  *	       +-----------------------------------------------+
171  *	       |					       |
172  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
173  *	  |         |		  |         |		  |         |
174  *	  +---------+		  +---------+		  +---------+
175  *
176  *	  ^         ^		  ^         ^		  ^         ^
177  *	  |         |		  |         |		  |         |
178  *	 pcl       pch		 pcl       pch		 pcl       pch
179  *
180  *	For the vax we assert that samples will never fall in the first
181  *	two bytes of any routine, since that is the entry mask,
182  *	thus we give call alignentries() to adjust the entry points if
183  *	the entry mask falls in one bucket but the code for the routine
184  *	doesn't start until the next bucket.  In conjunction with the
185  *	alignment of routine addresses, this should allow us to have
186  *	only one sample for every four bytes of text space and never
187  *	have any overlap (the two end cases, above).
188  */
189 static void
190 asgnsamples(void)
191 {
192 	sztype		i, j;
193 	unsigned_UNIT	ccnt;
194 	double		time;
195 	pctype		pcl, pch;
196 	pctype		overlap;
197 	pctype		svalue0, svalue1;
198 
199 	extern mod_info_t	modules;
200 	nltype		*nl = modules.nl;
201 	sztype		nname = modules.nname;
202 
203 	/* read samples and assign to namelist symbols */
204 	scale = highpc - lowpc;
205 	scale /= nsamples;
206 	alignentries();
207 	for (i = 0, j = 1; i < nsamples; i++) {
208 		ccnt = samples[i];
209 		if (ccnt == 0)
210 			continue;
211 		/*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
212 		pcl = lowpc + scale * i;
213 		/*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
214 		pch = lowpc + scale * (i + 1);
215 		time = ccnt;
216 #ifdef DEBUG
217 		if (debug & SAMPLEDEBUG) {
218 			(void) printf(
219 			    "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
220 			    pcl, pch, ccnt);
221 		}
222 #endif /* DEBUG */
223 		totime += time;
224 		for (j = (j ? j - 1 : 0); j < nname; j++) {
225 			svalue0 = nl[j].svalue;
226 			svalue1 = nl[j+1].svalue;
227 			/*
228 			 *	if high end of tick is below entry address,
229 			 *	go for next tick.
230 			 */
231 			if (pch < svalue0)
232 				break;
233 			/*
234 			 *	if low end of tick into next routine,
235 			 *	go for next routine.
236 			 */
237 			if (pcl >= svalue1)
238 				continue;
239 			overlap = min(pch, svalue1) - max(pcl, svalue0);
240 			if (overlap != 0) {
241 #ifdef DEBUG
242 				if (debug & SAMPLEDEBUG) {
243 					(void) printf("[asgnsamples] "
244 					    "(0x%llx->0x%llx-0x%llx) %s gets "
245 					    "%f ticks %lld overlap\n",
246 					    nl[j].value/sizeof (UNIT), svalue0,
247 					    svalue1, nl[j].name,
248 					    overlap * time / scale, overlap);
249 				}
250 #endif /* DEBUG */
251 				nl[j].time += overlap * time / scale;
252 			}
253 		}
254 	}
255 #ifdef DEBUG
256 	if (debug & SAMPLEDEBUG) {
257 		(void) printf("[asgnsamples] totime %f\n", totime);
258 	}
259 #endif /* DEBUG */
260 }
261 
262 
263 static void
264 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs,
265     unsigned long ncallees)
266 {
267 	ProfCallGraph		prof_cgraph;
268 	ProfFunction		prof_func;
269 	arctype	*arcp;
270 	mod_info_t		*mi;
271 	nltype			*nlp;
272 	size_t			cur_offset;
273 	unsigned long		caller_id = 0, callee_id = 0;
274 
275 	/*
276 	 * Write the callgraph header
277 	 */
278 	prof_cgraph.type = PROF_CALLGRAPH_T;
279 	prof_cgraph.version = PROF_CALLGRAPH_VER;
280 	prof_cgraph.functions = PROFCGRAPH_SZ;
281 	prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ;
282 	if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) {
283 		perror(filename);
284 		exit(EX_IOERR);
285 	}
286 	/* CONSTCOND */
287 	if (CGRAPH_FILLER)
288 		(void) fseek(fp, CGRAPH_FILLER, SEEK_CUR);
289 
290 	/* Current offset inside the callgraph object */
291 	cur_offset = prof_cgraph.functions;
292 
293 	for (mi = &modules; mi; mi = mi->next) {
294 		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
295 			if (nlp->ncallers == 0)
296 				continue;
297 
298 			/* If this is the last callee, set next_to to 0 */
299 			callee_id++;
300 			if (callee_id == ncallees)
301 				prof_func.next_to = 0;
302 			else {
303 				prof_func.next_to = cur_offset +
304 				    nlp->ncallers * PROFFUNC_SZ;
305 			}
306 
307 			/*
308 			 * Dump this callee's raw arc information with all
309 			 * its callers
310 			 */
311 			caller_id = 1;
312 			for (arcp = nlp->parents; arcp;
313 			    arcp = arcp->arc_parentlist) {
314 				/*
315 				 * If no more callers for this callee, set
316 				 * next_from to 0
317 				 */
318 				if (caller_id == nlp->ncallers)
319 					prof_func.next_from = 0;
320 				else {
321 					prof_func.next_from = cur_offset +
322 					    PROFFUNC_SZ;
323 				}
324 
325 				prof_func.frompc =
326 				    arcp->arc_parentp->module->load_base +
327 				    (arcp->arc_parentp->value -
328 				    arcp->arc_parentp->module->txt_origin);
329 				prof_func.topc = mi->load_base +
330 				    (nlp->value - mi->txt_origin);
331 				prof_func.count = arcp->arc_count;
332 
333 
334 				if (fwrite(&prof_func, sizeof (ProfFunction),
335 				    1, fp) != 1) {
336 					perror(filename);
337 					exit(EX_IOERR);
338 				}
339 				/* CONSTCOND */
340 				if (FUNC_FILLER)
341 					(void) fseek(fp, FUNC_FILLER, SEEK_CUR);
342 
343 				cur_offset += PROFFUNC_SZ;
344 				caller_id++;
345 			}
346 		} /* for nlp... */
347 	} /* for mi... */
348 }
349 
350 /*
351  * To save all pc-hits in all the gmon.out's is infeasible, as this
352  * may become quite huge even with a small number of files to sum.
353  * Instead, we'll dump *fictitious hits* to correct functions
354  * by scanning module namelists. Again, since this is summing
355  * pc-hits, we may have to dump the pcsamples out in chunks if the
356  * number of pc-hits is high.
357  */
358 static void
359 dump_hits(FILE *fp, char *filename, nltype *nlp)
360 {
361 	Address		*p, hitpc;
362 	size_t		i, nelem, ntowrite;
363 
364 	if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE)
365 		nelem = PROF_BUFFER_SIZE;
366 
367 	if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) {
368 		(void) fprintf(stderr, "%s: no room for %d pcsamples\n",
369 		    whoami, nelem);
370 		exit(EX_OSERR);
371 	}
372 
373 	/*
374 	 * Set up *fictitious* hits (to function entry) buffer
375 	 */
376 	hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin);
377 	for (i = 0; i < nelem; i++)
378 		p[i] = hitpc;
379 
380 	for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) {
381 		if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) {
382 			perror(filename);
383 			exit(EX_IOERR);
384 		}
385 	}
386 
387 	if (ntowrite) {
388 		if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) {
389 			perror(filename);
390 			exit(EX_IOERR);
391 		}
392 	}
393 
394 	free(p);
395 }
396 
397 static void
398 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs,
399     unsigned long *ncallees)
400 {
401 	ProfBuffer		prof_buffer;
402 	arctype	*arcp;
403 	mod_info_t		*mi;
404 	nltype			*nlp;
405 
406 	prof_buffer.type = PROF_BUFFER_T;
407 	prof_buffer.version = PROF_BUFFER_VER;
408 	prof_buffer.buffer = PROFBUF_SZ;
409 	prof_buffer.bufsize = n_pcsamples;
410 	prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address);
411 	if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) {
412 		perror(filename);
413 		exit(EX_IOERR);
414 	}
415 	/* CONSTCOND */
416 	if (BUF_FILLER)
417 		(void) fseek(fp, BUF_FILLER, SEEK_CUR);
418 
419 	*tarcs = 0;
420 	*ncallees = 0;
421 	for (mi = &modules; mi; mi = mi->next) {
422 		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
423 			if (nlp->nticks)
424 				dump_hits(fp, filename, nlp);
425 
426 			nlp->ncallers = 0;
427 			for (arcp = nlp->parents; arcp;
428 			    arcp = arcp->arc_parentlist) {
429 				(nlp->ncallers)++;
430 			}
431 
432 			if (nlp->ncallers) {
433 				(*tarcs) += nlp->ncallers;
434 				(*ncallees)++;
435 			}
436 		}
437 	}
438 }
439 
440 static void
441 dump_modules(FILE *fp, char *filename, size_t pbuf_sz)
442 {
443 	char		*pbuf, *p;
444 	size_t		namelen;
445 	Index		off_nxt, off_path;
446 	mod_info_t	*mi;
447 
448 	ProfModuleList	prof_modlist;
449 	ProfModule	prof_mod;
450 
451 	/* Allocate for path strings buffer */
452 	pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN);
453 	if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) {
454 		(void) fprintf(stderr, "%s: no room for %d bytes\n",
455 		    whoami, pbuf_sz * sizeof (char));
456 		exit(EX_OSERR);
457 	}
458 
459 	/* Dump out PROF_MODULE_T info for all non-aout modules */
460 	prof_modlist.type = PROF_MODULES_T;
461 	prof_modlist.version = PROF_MODULES_VER;
462 	prof_modlist.modules = PROFMODLIST_SZ;
463 	prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ +
464 	    pbuf_sz;
465 	if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) {
466 		perror(filename);
467 		exit(EX_IOERR);
468 	}
469 	/* CONSTCOND */
470 	if (MODLIST_FILLER)
471 		(void) fseek(fp, MODLIST_FILLER, SEEK_CUR);
472 
473 	/*
474 	 * Initialize offsets for ProfModule elements.
475 	 */
476 	off_nxt = PROFMODLIST_SZ + PROFMOD_SZ;
477 	off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ;
478 
479 	for (mi = modules.next; mi; mi = mi->next) {
480 		if (mi->next)
481 			prof_mod.next = off_nxt;
482 		else
483 			prof_mod.next = 0;
484 		prof_mod.path = off_path;
485 		prof_mod.startaddr = mi->load_base;
486 		prof_mod.endaddr = mi->load_end;
487 
488 		if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) {
489 			perror(filename);
490 			exit(EX_IOERR);
491 		}
492 
493 		/* CONSTCOND */
494 		if (MOD_FILLER)
495 			(void) fseek(fp, MOD_FILLER, SEEK_CUR);
496 
497 		(void) strcpy(p, mi->name);
498 		namelen = strlen(mi->name);
499 		p += namelen + 1;
500 
501 		/* Note that offset to every path str need not be aligned */
502 		off_nxt += PROFMOD_SZ;
503 		off_path += namelen + 1;
504 	}
505 
506 	/* Write out the module path strings */
507 	if (pbuf_sz) {
508 		if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) {
509 			perror(filename);
510 			exit(EX_IOERR);
511 		}
512 
513 		free(pbuf);
514 	}
515 }
516 
517 /*
518  * If we have inactive modules, their current load addresses may overlap with
519  * active ones, and so we've to assign fictitious, non-overlapping addresses
520  * to all modules before we dump them.
521  */
522 static void
523 fixup_maps(size_t *pathsz)
524 {
525 	unsigned int	n_inactive = 0;
526 	Address		lbase = 0, lend;
527 	mod_info_t	*mi;
528 
529 	/* Pick the lowest load address among modules */
530 	*pathsz = 0;
531 	for (mi = &modules; mi; mi = mi->next) {
532 
533 		if (mi->active == FALSE)
534 			n_inactive++;
535 
536 		if (mi == &modules || mi->load_base < lbase)
537 			lbase = mi->load_base;
538 
539 		/*
540 		 * Return total path size of non-aout modules only
541 		 */
542 		if (mi != &modules)
543 			*pathsz = (*pathsz) + strlen(mi->name) + 1;
544 	}
545 
546 	/*
547 	 * All module info is in fine shape already if there are no
548 	 * inactive modules
549 	 */
550 	if (n_inactive == 0)
551 		return;
552 
553 	/*
554 	 * Assign fictitious load addresses to all (non-aout) modules so
555 	 * that sum info can be dumped out.
556 	 */
557 	for (mi = modules.next; mi; mi = mi->next) {
558 		lend = lbase + (mi->data_end - mi->txt_origin);
559 		if ((lbase < modules.load_base && lend < modules.load_base) ||
560 		    (lbase > modules.load_end && lend > modules.load_end)) {
561 
562 			mi->load_base = lbase;
563 			mi->load_end = lend;
564 
565 			/* just to give an appearance of reality */
566 			lbase = CEIL(lend + PGSZ, PGSZ);
567 		} else {
568 			/*
569 			 * can't use this lbase & lend pair, as it
570 			 * overlaps with aout's addresses
571 			 */
572 			mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ);
573 			mi->load_end = mi->load_base + (lend - lbase);
574 
575 			lbase = CEIL(mi->load_end + PGSZ, PGSZ);
576 		}
577 	}
578 }
579 
580 static void
581 dump_gprofhdr(FILE *fp, char *filename)
582 {
583 	ProfHeader	prof_hdr;
584 
585 	prof_hdr.h_magic = PROF_MAGIC;
586 	prof_hdr.h_major_ver = PROF_MAJOR_VERSION;
587 	prof_hdr.h_minor_ver = PROF_MINOR_VERSION;
588 	prof_hdr.size = PROFHDR_SZ;
589 	if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) {
590 		perror(filename);
591 		exit(EX_IOERR);
592 	}
593 
594 	/* CONSTCOND */
595 	if (HDR_FILLER)
596 		(void) fseek(fp, HDR_FILLER, SEEK_CUR);
597 }
598 
599 static void
600 dumpsum_ostyle(char *sumfile)
601 {
602 	nltype *nlp;
603 	arctype *arcp;
604 	struct rawarc arc;
605 	struct rawarc32 arc32;
606 	FILE *sfile;
607 
608 	if ((sfile = fopen(sumfile, "w")) == NULL) {
609 		perror(sumfile);
610 		exit(EX_IOERR);
611 	}
612 	/*
613 	 * dump the header; use the last header read in
614 	 */
615 	if (Bflag) {
616 		if (fwrite(&h, sizeof (h), 1, sfile) != 1) {
617 			perror(sumfile);
618 			exit(EX_IOERR);
619 		}
620 	} else {
621 		struct hdr32 hdr;
622 		hdr.lowpc  = (pctype32)h.lowpc;
623 		hdr.highpc = (pctype32)h.highpc;
624 		hdr.ncnt   = (pctype32)h.ncnt;
625 		if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) {
626 			perror(sumfile);
627 			exit(EX_IOERR);
628 		}
629 	}
630 	/*
631 	 * dump the samples
632 	 */
633 	if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) !=
634 	    nsamples) {
635 		perror(sumfile);
636 		exit(EX_IOERR);
637 	}
638 	/*
639 	 * dump the normalized raw arc information. For old-style dumping,
640 	 * the only namelist is in modules.nl
641 	 */
642 	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
643 		for (arcp = nlp->children; arcp;
644 		    arcp = arcp->arc_childlist) {
645 			if (Bflag) {
646 				arc.raw_frompc = arcp->arc_parentp->value;
647 				arc.raw_selfpc = arcp->arc_childp->value;
648 				arc.raw_count = arcp->arc_count;
649 				if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) {
650 					perror(sumfile);
651 					exit(EX_IOERR);
652 				}
653 			} else {
654 				arc32.raw_frompc =
655 				    (pctype32)arcp->arc_parentp->value;
656 				arc32.raw_selfpc =
657 				    (pctype32)arcp->arc_childp->value;
658 				arc32.raw_count = (actype32)arcp->arc_count;
659 				if (fwrite(&arc32, sizeof (arc32), 1, sfile) !=
660 				    1) {
661 					perror(sumfile);
662 					exit(EX_IOERR);
663 				}
664 			}
665 #ifdef DEBUG
666 			if (debug & SAMPLEDEBUG) {
667 				(void) printf(
668 				    "[dumpsum_ostyle] frompc 0x%llx selfpc "
669 				    "0x%llx count %lld\n", arc.raw_frompc,
670 				    arc.raw_selfpc, arc.raw_count);
671 			}
672 #endif /* DEBUG */
673 		}
674 	}
675 	(void) fclose(sfile);
676 }
677 
678 /*
679  * dump out the gmon.sum file
680  */
681 static void
682 dumpsum(char *sumfile)
683 {
684 	FILE		*sfile;
685 	size_t		pathbuf_sz;
686 	unsigned long	total_arcs;	/* total number of arcs in all */
687 	unsigned long	ncallees;	/* no. of callees with parents */
688 
689 	if (old_style) {
690 		dumpsum_ostyle(sumfile);
691 		return;
692 	}
693 
694 	if ((sfile = fopen(sumfile, "w")) == NULL) {
695 		perror(sumfile);
696 		exit(EX_IOERR);
697 	}
698 
699 	/*
700 	 * Dump the new-style gprof header. Even if one of the original
701 	 * profiled-files was of a older version, the summed file is of
702 	 * current version only.
703 	 */
704 	dump_gprofhdr(sfile, sumfile);
705 
706 	/*
707 	 * Fix up load-maps and dump out modules info
708 	 *
709 	 * Fix up module load maps so inactive modules get *some* address
710 	 * (and btw, could you get the total size of non-aout module path
711 	 * strings please ?)
712 	 */
713 	fixup_maps(&pathbuf_sz);
714 	dump_modules(sfile, sumfile, pathbuf_sz);
715 
716 
717 	/*
718 	 * Dump out the summ'd pcsamples
719 	 *
720 	 * For dumping call graph information later, we need certain
721 	 * statistics (like total arcs, number of callers for each node);
722 	 * collect these also while we are at it.
723 	 */
724 	dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees);
725 
726 	/*
727 	 * Dump out the summ'd call graph information
728 	 */
729 	dump_callgraph(sfile, sumfile, total_arcs, ncallees);
730 
731 
732 	(void) fclose(sfile);
733 }
734 
735 static void
736 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp)
737 {
738 	nltype		*parentp;
739 	nltype		*childp;
740 
741 	/*
742 	 * if count == 0 this is a null arc and
743 	 * we don't need to tally it.
744 	 */
745 	if (rawp->raw_count == 0)
746 		return;
747 
748 	/*
749 	 * Lookup the caller and callee pcs in namelists of
750 	 * appropriate modules
751 	 */
752 	parentp = nllookup(caller_mod, rawp->raw_frompc, NULL);
753 	childp = nllookup(callee_mod, rawp->raw_selfpc, NULL);
754 	if (childp && parentp) {
755 		if (!Dflag)
756 			childp->ncall += rawp->raw_count;
757 		else {
758 			if (first_file)
759 				childp->ncall += rawp->raw_count;
760 			else {
761 				childp->ncall -= rawp->raw_count;
762 				if (childp->ncall < 0)
763 					childp->ncall = 0;
764 			}
765 		}
766 
767 #ifdef DEBUG
768 		if (debug & TALLYDEBUG) {
769 			(void) printf("[tally] arc from %s to %s traversed "
770 			    "%lld times\n", parentp->name,
771 			    childp->name, rawp->raw_count);
772 		}
773 #endif /* DEBUG */
774 		addarc(parentp, childp, rawp->raw_count);
775 	}
776 }
777 
778 /*
779  * Look up a module's base address in a sorted list of pc-hits. Unlike
780  * nllookup(), this deals with misses by mapping them to the next *higher*
781  * pc-hit. This is so that we get into the module's first pc-hit rightaway,
782  * even if the module's entry-point (load_base) itself is not a hit.
783  */
784 static Address *
785 locate(Address	*pclist, size_t nelem, Address keypc)
786 {
787 	size_t	low = 0, middle, high = nelem - 1;
788 
789 	if (keypc <= pclist[low])
790 		return (pclist);
791 
792 	if (keypc > pclist[high])
793 		return (NULL);
794 
795 	while (low != high) {
796 		middle = (high + low) >> 1;
797 
798 		if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc))
799 			return (&pclist[middle + 1]);
800 
801 		if (pclist[middle] >= keypc)
802 			high = middle;
803 		else
804 			low = middle + 1;
805 	}
806 
807 	/* must never reach here! */
808 	return (NULL);
809 }
810 
811 static void
812 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples)
813 {
814 	Address		*pcptr, *pcse = pcsmpl + n_samples;
815 	pctype		nxt_func;
816 	nltype		*fnl;
817 	size_t		func_nticks;
818 #ifdef DEBUG
819 	size_t		n_hits_in_module = 0;
820 #endif /* DEBUG */
821 
822 	/* Locate the first pc-hit for this module */
823 	if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) {
824 #ifdef DEBUG
825 		if (debug & PCSMPLDEBUG) {
826 			(void) printf("[assign_pcsamples] no pc-hits in\n");
827 			(void) printf(
828 			    "                   `%s'\n", module->name);
829 		}
830 #endif /* DEBUG */
831 		return;			/* no pc-hits in this module */
832 	}
833 
834 	/* Assign all pc-hits in this module to appropriate functions */
835 	while ((pcptr < pcse) && (*pcptr < module->load_end)) {
836 
837 		/* Update the corresponding function's time */
838 		if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) {
839 			/*
840 			 * Collect all pc-hits in this function. Each
841 			 * pc-hit counts as 1 tick.
842 			 */
843 			func_nticks = 0;
844 			while ((pcptr < pcse) && (*pcptr < nxt_func)) {
845 				func_nticks++;
846 				pcptr++;
847 			}
848 
849 			if (func_nticks == 0)
850 				pcptr++;
851 			else {
852 				fnl->nticks += func_nticks;
853 				fnl->time += func_nticks;
854 				totime += func_nticks;
855 			}
856 
857 #ifdef DEBUG
858 			n_hits_in_module += func_nticks;
859 #endif /* DEBUG */
860 		} else {
861 			/*
862 			 * pc sample could not be assigned to function;
863 			 * probably in a PLT
864 			 */
865 			pcptr++;
866 		}
867 	}
868 
869 #ifdef DEBUG
870 	if (debug & PCSMPLDEBUG) {
871 		(void) printf(
872 		    "[assign_pcsamples] %ld hits in\n", n_hits_in_module);
873 		(void) printf("                   `%s'\n", module->name);
874 	}
875 #endif /* DEBUG */
876 }
877 
878 int
879 pc_cmp(const void *arg1, const void *arg2)
880 {
881 	Address *pc1 = (Address *)arg1;
882 	Address *pc2 = (Address *)arg2;
883 
884 	if (*pc1 > *pc2)
885 		return (1);
886 
887 	if (*pc1 < *pc2)
888 		return (-1);
889 
890 	return (0);
891 }
892 
893 static void
894 process_pcsamples(ProfBuffer *bufp)
895 {
896 	Address		*pc_samples;
897 	mod_info_t	*mi;
898 	caddr_t		p;
899 	size_t		chunk_size, nelem_read, nelem_to_read;
900 
901 #ifdef DEBUG
902 	if (debug & PCSMPLDEBUG) {
903 		(void) printf(
904 		    "[process_pcsamples] number of pcsamples = %lld\n",
905 		    bufp->bufsize);
906 	}
907 #endif /* DEBUG */
908 
909 	/* buffer with no pc samples ? */
910 	if (bufp->bufsize == 0)
911 		return;
912 
913 	/*
914 	 * If we're processing pcsamples of a profile sum, we could have
915 	 * more than PROF_BUFFER_SIZE number of samples. In such a case,
916 	 * we must read the pcsamples in chunks.
917 	 */
918 	if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE)
919 		chunk_size = PROF_BUFFER_SIZE;
920 
921 	/* Allocate for the pcsample chunk */
922 	pc_samples = (Address *) calloc(chunk_size, sizeof (Address));
923 	if (pc_samples == NULL) {
924 		(void) fprintf(stderr, "%s: no room for %d sample pc's\n",
925 		    whoami, chunk_size);
926 		exit(EX_OSERR);
927 	}
928 
929 	/* Copy the current set of pcsamples */
930 	nelem_read = 0;
931 	nelem_to_read = bufp->bufsize;
932 	p = (char *)bufp + bufp->buffer;
933 
934 	while (nelem_read < nelem_to_read) {
935 		(void) memcpy((void *) pc_samples, p,
936 		    chunk_size * sizeof (Address));
937 
938 		/* Sort the pc samples */
939 		qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp);
940 
941 		/*
942 		 * Assign pcsamples to functions in the currently active
943 		 * module list
944 		 */
945 		for (mi = &modules; mi; mi = mi->next) {
946 			if (mi->active == FALSE)
947 				continue;
948 			assign_pcsamples(mi, pc_samples, chunk_size);
949 		}
950 
951 		p += (chunk_size * sizeof (Address));
952 		nelem_read += chunk_size;
953 
954 		if ((nelem_to_read - nelem_read) < chunk_size)
955 			chunk_size = nelem_to_read - nelem_read;
956 	}
957 
958 	free(pc_samples);
959 
960 	/* Update total number of pcsamples read so far */
961 	n_pcsamples += bufp->bufsize;
962 }
963 
964 static mod_info_t *
965 find_module(Address addr)
966 {
967 	mod_info_t	*mi;
968 
969 	for (mi = &modules; mi; mi = mi->next) {
970 		if (mi->active == FALSE)
971 			continue;
972 
973 		if (addr >= mi->load_base && addr < mi->load_end)
974 			return (mi);
975 	}
976 
977 	return (NULL);
978 }
979 
980 static void
981 process_cgraph(ProfCallGraph *cgp)
982 {
983 	struct rawarc	arc;
984 	mod_info_t	*callee_mi, *caller_mi;
985 	ProfFunction	*calleep, *callerp;
986 	Index		caller_off, callee_off;
987 
988 	/*
989 	 * Note that *callee_off* increment in the for loop below
990 	 * uses *calleep* and *calleep* doesn't get set until the for loop
991 	 * is entered. We don't expect the increment to be executed before
992 	 * the loop body is executed atleast once, so this should be ok.
993 	 */
994 	for (callee_off = cgp->functions; callee_off;
995 	    callee_off = calleep->next_to) {
996 
997 		/* LINTED: pointer cast */
998 		calleep = (ProfFunction *)((char *)cgp + callee_off);
999 
1000 		/*
1001 		 * We could choose either to sort the {caller, callee}
1002 		 * list twice and assign callee/caller to modules or inspect
1003 		 * each callee/caller in the active modules list. Since
1004 		 * the modules list is usually very small, we'l choose the
1005 		 * latter.
1006 		 */
1007 
1008 		/*
1009 		 * If we cannot identify a callee with a module, there's
1010 		 * no use worrying about who called it.
1011 		 */
1012 		if ((callee_mi = find_module(calleep->topc)) == NULL) {
1013 #ifdef DEBUG
1014 			if (debug & CGRAPHDEBUG) {
1015 				(void) printf(
1016 				    "[process_cgraph] callee %#llx missed\n",
1017 				    calleep->topc);
1018 			}
1019 #endif /* DEBUG */
1020 			continue;
1021 		} else
1022 			arc.raw_selfpc = calleep->topc;
1023 
1024 		for (caller_off = callee_off; caller_off;
1025 		    caller_off = callerp->next_from)  {
1026 
1027 			/* LINTED: pointer cast */
1028 			callerp = (ProfFunction *)((char *)cgp + caller_off);
1029 			if ((caller_mi = find_module(callerp->frompc)) ==
1030 			    NULL) {
1031 #ifdef DEBUG
1032 				if (debug & CGRAPHDEBUG) {
1033 					(void) printf(
1034 					    "[process_cgraph] caller %#llx "
1035 					    "missed\n", callerp->frompc);
1036 				}
1037 #endif /* DEBUG */
1038 				continue;
1039 			}
1040 
1041 			arc.raw_frompc = callerp->frompc;
1042 			arc.raw_count = callerp->count;
1043 
1044 #ifdef DEBUG
1045 			if (debug & CGRAPHDEBUG) {
1046 				(void) printf(
1047 				    "[process_cgraph] arc <%#llx, %#llx, "
1048 				    "%lld>\n", arc.raw_frompc, arc.raw_selfpc,
1049 				    arc.raw_count);
1050 			}
1051 #endif /* DEBUG */
1052 			tally(caller_mi, callee_mi, &arc);
1053 		}
1054 	}
1055 
1056 #ifdef DEBUG
1057 	puts("\n");
1058 #endif /* DEBUG */
1059 }
1060 
1061 /*
1062  * Two modules overlap each other if they don't lie completely *outside*
1063  * each other.
1064  */
1065 static bool
1066 does_overlap(ProfModule *new, mod_info_t *old)
1067 {
1068 	/* case 1: new module lies completely *before* the old one */
1069 	if (new->startaddr < old->load_base && new->endaddr <= old->load_base)
1070 		return (FALSE);
1071 
1072 	/* case 2: new module lies completely *after* the old one */
1073 	if (new->startaddr >= old->load_end && new->endaddr >= old->load_end)
1074 		return (FALSE);
1075 
1076 	/* probably a dlopen: the modules overlap each other */
1077 	return (TRUE);
1078 }
1079 
1080 static bool
1081 is_same_as_aout(char *modpath, struct stat *buf)
1082 {
1083 	if (stat(modpath, buf) == -1) {
1084 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
1085 		    whoami, modpath);
1086 		exit(EX_NOINPUT);
1087 	}
1088 
1089 	if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino))
1090 		return (TRUE);
1091 	else
1092 		return (FALSE);
1093 }
1094 
1095 static void
1096 process_modules(ProfModuleList *modlp)
1097 {
1098 	ProfModule	*newmodp;
1099 	mod_info_t	*mi, *last, *new_module;
1100 	char		*so_path;
1101 	bool		more_modules = TRUE;
1102 	struct stat	so_statbuf;
1103 
1104 #ifdef DEBUG
1105 	if (debug & MODULEDEBUG) {
1106 		(void) printf("[process_modules] module obj version %u\n",
1107 		    modlp->version);
1108 	}
1109 #endif /* DEBUG */
1110 
1111 	/* Check version of module type object */
1112 	if (modlp->version > PROF_MODULES_VER) {
1113 		(void) fprintf(stderr, "%s: version %d for module type objects"
1114 		    "is not supported\n", whoami, modlp->version);
1115 		exit(EX_SOFTWARE);
1116 	}
1117 
1118 
1119 	/*
1120 	 * Scan the PROF_MODULES_T list and add modules to current list
1121 	 * of modules, if they're not present already
1122 	 */
1123 	/* LINTED: pointer cast */
1124 	newmodp = (ProfModule *)((char *)modlp + modlp->modules);
1125 	do {
1126 		/*
1127 		 * Since the prog could've been renamed after its run, we
1128 		 * should see if this overlaps a.out. If it does, it is
1129 		 * probably the renamed aout. We should also skip any other
1130 		 * non-sharedobj's that we see (or should we report an error ?)
1131 		 */
1132 		so_path = (caddr_t)modlp + newmodp->path;
1133 		if (does_overlap(newmodp, &modules) ||
1134 		    is_same_as_aout(so_path, &so_statbuf) ||
1135 		    (!is_shared_obj(so_path))) {
1136 
1137 			if (!newmodp->next)
1138 				more_modules = FALSE;
1139 
1140 			/* LINTED: pointer cast */
1141 			newmodp = (ProfModule *)
1142 			    ((caddr_t)modlp + newmodp->next);
1143 #ifdef DEBUG
1144 			if (debug & MODULEDEBUG) {
1145 				(void) printf(
1146 				    "[process_modules] `%s'\n", so_path);
1147 				(void) printf("                  skipped\n");
1148 			}
1149 #endif /* DEBUG */
1150 			continue;
1151 		}
1152 #ifdef DEBUG
1153 		if (debug & MODULEDEBUG)
1154 			(void) printf("[process_modules] `%s'...\n", so_path);
1155 #endif /* DEBUG */
1156 
1157 		/*
1158 		 * Check all modules (leave the first one, 'cos that
1159 		 * is the program executable info). If this module is already
1160 		 * there in the list, update the load addresses and proceed.
1161 		 */
1162 		last = &modules;
1163 		while ((mi = last->next) != NULL) {
1164 			/*
1165 			 * We expect the full pathname for all shared objects
1166 			 * needed by the program executable. In this case, we
1167 			 * simply need to compare the paths to see if they are
1168 			 * the same file.
1169 			 */
1170 			if (strcmp(mi->name, so_path) == 0)
1171 				break;
1172 
1173 			/*
1174 			 * Check if this new shared object will overlap
1175 			 * any existing module. If yes, remove the old one
1176 			 * from the linked list (but don't free it, 'cos
1177 			 * there may be symbols referring to this module
1178 			 * still)
1179 			 */
1180 			if (does_overlap(newmodp, mi)) {
1181 #ifdef DEBUG
1182 				if (debug & MODULEDEBUG) {
1183 					(void) printf(
1184 					    "[process_modules] `%s'\n",
1185 					    so_path);
1186 					(void) printf(
1187 					    "                  overlaps\n");
1188 					(void) printf(
1189 					    "                  `%s'\n",
1190 					    mi->name);
1191 				}
1192 #endif /* DEBUG */
1193 				mi->active = FALSE;
1194 			}
1195 
1196 			last = mi;
1197 		}
1198 
1199 		/* Module already there, skip it */
1200 		if (mi != NULL) {
1201 			mi->load_base = newmodp->startaddr;
1202 			mi->load_end = newmodp->endaddr;
1203 			mi->active = TRUE;
1204 			if (!newmodp->next)
1205 				more_modules = FALSE;
1206 
1207 			/* LINTED: pointer cast */
1208 			newmodp = (ProfModule *)
1209 			    ((caddr_t)modlp + newmodp->next);
1210 
1211 #ifdef DEBUG
1212 			if (debug & MODULEDEBUG) {
1213 				(void) printf("[process_modules] base=%#llx, "
1214 				    "end=%#llx\n", mi->load_base, mi->load_end);
1215 			}
1216 #endif /* DEBUG */
1217 			continue;
1218 		}
1219 
1220 		/*
1221 		 * Check if gmon.out is outdated with respect to the new
1222 		 * module we want to add
1223 		 */
1224 		if (gmonout_info.mtime < so_statbuf.st_mtime) {
1225 			(void) fprintf(stderr,
1226 			    "%s: shared obj outdates prof info\n", whoami);
1227 			(void) fprintf(stderr, "\t(newer %s)\n", so_path);
1228 			exit(EX_NOINPUT);
1229 		}
1230 
1231 		/* Create a new module element */
1232 		new_module = malloc(sizeof (mod_info_t));
1233 		if (new_module == NULL) {
1234 			(void) fprintf(stderr, "%s: no room for %d bytes\n",
1235 			    whoami, sizeof (mod_info_t));
1236 			exit(EX_OSERR);
1237 		}
1238 
1239 		/* and fill in info... */
1240 		new_module->id = n_modules + 1;
1241 		new_module->load_base = newmodp->startaddr;
1242 		new_module->load_end = newmodp->endaddr;
1243 		new_module->name = malloc(strlen(so_path) + 1);
1244 		if (new_module->name == NULL) {
1245 			(void) fprintf(stderr, "%s: no room for %d bytes\n",
1246 			    whoami, strlen(so_path) + 1);
1247 			exit(EX_OSERR);
1248 		}
1249 		(void) strcpy(new_module->name, so_path);
1250 #ifdef DEBUG
1251 		if (debug & MODULEDEBUG) {
1252 			(void) printf(
1253 			    "[process_modules] base=%#llx, end=%#llx\n",
1254 			    new_module->load_base, new_module->load_end);
1255 		}
1256 #endif /* DEBUG */
1257 
1258 		/* Create this module's nameslist */
1259 		process_namelist(new_module);
1260 
1261 		/* Add it to the tail of active module list */
1262 		last->next = new_module;
1263 		n_modules++;
1264 
1265 #ifdef DEBUG
1266 		if (debug & MODULEDEBUG) {
1267 			(void) printf(
1268 			    "[process_modules] total shared objects = %ld\n",
1269 			    n_modules - 1);
1270 		}
1271 #endif /* DEBUG */
1272 		/*
1273 		 * Move to the next module in the PROF_MODULES_T list
1274 		 * (if present)
1275 		 */
1276 		if (!newmodp->next)
1277 			more_modules = FALSE;
1278 
1279 		/* LINTED: pointer cast */
1280 		newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next);
1281 
1282 	} while (more_modules);
1283 }
1284 
1285 static void
1286 reset_active_modules(void)
1287 {
1288 	mod_info_t	*mi;
1289 
1290 	/* Except the executable, no other module should remain active */
1291 	for (mi = modules.next; mi; mi = mi->next)
1292 		mi->active = FALSE;
1293 }
1294 
1295 static void
1296 getpfiledata(caddr_t memp, size_t fsz)
1297 {
1298 	ProfObject	*objp;
1299 	caddr_t		file_end;
1300 	bool		found_pcsamples = FALSE, found_cgraph = FALSE;
1301 
1302 	/*
1303 	 * Before processing a new gmon.out, all modules except the
1304 	 * program executable must be made inactive, so that symbols
1305 	 * are searched only in the program executable, if we don't
1306 	 * find a MODULES_T object. Don't do it *after* we read a gmon.out,
1307 	 * because we need the active module data after we're done with
1308 	 * the last gmon.out, if we're doing summing.
1309 	 */
1310 	reset_active_modules();
1311 
1312 	file_end = memp + fsz;
1313 	/* LINTED: pointer cast */
1314 	objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size);
1315 	while ((caddr_t)objp < file_end) {
1316 #ifdef DEBUG
1317 		{
1318 			unsigned int	type = 0;
1319 
1320 			if (debug & MONOUTDEBUG) {
1321 				if (objp->type <= MAX_OBJTYPES)
1322 					type = objp->type;
1323 
1324 				(void) printf(
1325 				    "\n[getpfiledata] object %s [%#lx]\n",
1326 				    objname[type], objp->type);
1327 			}
1328 		}
1329 #endif /* DEBUG */
1330 		switch (objp->type) {
1331 			case PROF_MODULES_T :
1332 				process_modules((ProfModuleList *) objp);
1333 				break;
1334 
1335 			case PROF_CALLGRAPH_T :
1336 				process_cgraph((ProfCallGraph *) objp);
1337 				found_cgraph = TRUE;
1338 				break;
1339 
1340 			case PROF_BUFFER_T :
1341 				process_pcsamples((ProfBuffer *) objp);
1342 				found_pcsamples = TRUE;
1343 				break;
1344 
1345 			default :
1346 				(void) fprintf(stderr,
1347 				    "%s: unknown prof object type=%d\n",
1348 				    whoami, objp->type);
1349 				exit(EX_SOFTWARE);
1350 		}
1351 		/* LINTED: pointer cast */
1352 		objp = (ProfObject *)((caddr_t)objp + objp->size);
1353 	}
1354 
1355 	if (!found_cgraph || !found_pcsamples) {
1356 		(void) fprintf(stderr,
1357 		    "%s: missing callgraph/pcsamples object\n", whoami);
1358 		exit(EX_SOFTWARE);
1359 	}
1360 
1361 	if ((caddr_t)objp > file_end) {
1362 		(void) fprintf(stderr, "%s: malformed profile file.\n", whoami);
1363 		exit(EX_SOFTWARE);
1364 	}
1365 
1366 	if (first_file)
1367 		first_file = FALSE;
1368 }
1369 
1370 static void
1371 readarcs(FILE *pfile)
1372 {
1373 	/*
1374 	 *	the rest of the file consists of
1375 	 *	a bunch of <from,self,count> tuples.
1376 	 */
1377 	/* CONSTCOND */
1378 	while (1) {
1379 		struct rawarc	arc;
1380 
1381 		if (rflag) {
1382 			if (Bflag) {
1383 				L_cgarc64		rtld_arc64;
1384 
1385 				/*
1386 				 * If rflag is set then this is an profiled
1387 				 * image generated by rtld.  It needs to be
1388 				 * 'converted' to the standard data format.
1389 				 */
1390 				if (fread(&rtld_arc64,
1391 				    sizeof (L_cgarc64), 1, pfile) != 1)
1392 					break;
1393 
1394 				if (rtld_arc64.cg_from == PRF_OUTADDR64)
1395 					arc.raw_frompc = s_highpc + 0x10;
1396 				else
1397 					arc.raw_frompc =
1398 					    (pctype)rtld_arc64.cg_from;
1399 				arc.raw_selfpc = (pctype)rtld_arc64.cg_to;
1400 				arc.raw_count = (actype)rtld_arc64.cg_count;
1401 			} else {
1402 				L_cgarc		rtld_arc;
1403 
1404 				/*
1405 				 * If rflag is set then this is an profiled
1406 				 * image generated by rtld.  It needs to be
1407 				 * 'converted' to the standard data format.
1408 				 */
1409 				if (fread(&rtld_arc,
1410 				    sizeof (L_cgarc), 1, pfile) != 1)
1411 					break;
1412 
1413 				if (rtld_arc.cg_from == PRF_OUTADDR)
1414 					arc.raw_frompc = s_highpc + 0x10;
1415 				else
1416 					arc.raw_frompc = (pctype)
1417 					    (uintptr_t)rtld_arc.cg_from;
1418 				arc.raw_selfpc = (pctype)
1419 				    (uintptr_t)rtld_arc.cg_to;
1420 				arc.raw_count = (actype)rtld_arc.cg_count;
1421 			}
1422 		} else {
1423 			if (Bflag) {
1424 				if (fread(&arc, sizeof (struct rawarc), 1,
1425 				    pfile) != 1) {
1426 					break;
1427 				}
1428 			} else {
1429 				/*
1430 				 * If these aren't big %pc's, we need to read
1431 				 * into the 32-bit raw arc structure, and
1432 				 * assign the members into the actual arc.
1433 				 */
1434 				struct rawarc32 arc32;
1435 				if (fread(&arc32, sizeof (struct rawarc32),
1436 				    1, pfile) != 1)
1437 					break;
1438 				arc.raw_frompc = (pctype)arc32.raw_frompc;
1439 				arc.raw_selfpc = (pctype)arc32.raw_selfpc;
1440 				arc.raw_count  = (actype)arc32.raw_count;
1441 			}
1442 		}
1443 
1444 #ifdef DEBUG
1445 		if (debug & SAMPLEDEBUG) {
1446 			(void) printf("[getpfile] frompc 0x%llx selfpc "
1447 			    "0x%llx count %lld\n", arc.raw_frompc,
1448 			    arc.raw_selfpc, arc.raw_count);
1449 		}
1450 #endif /* DEBUG */
1451 		/*
1452 		 *	add this arc
1453 		 */
1454 		tally(&modules, &modules, &arc);
1455 	}
1456 	if (first_file)
1457 		first_file = FALSE;
1458 }
1459 
1460 static void
1461 readsamples(FILE *pfile)
1462 {
1463 	sztype		i;
1464 	unsigned_UNIT	sample;
1465 
1466 	if (samples == 0) {
1467 		samples = (unsigned_UNIT *) calloc(nsamples,
1468 		    sizeof (unsigned_UNIT));
1469 		if (samples == 0) {
1470 			(void) fprintf(stderr,
1471 			    "%s: No room for %d sample pc's\n",
1472 			    whoami, sampbytes / sizeof (unsigned_UNIT));
1473 			exit(EX_OSERR);
1474 		}
1475 	}
1476 
1477 	for (i = 0; i < nsamples; i++) {
1478 		(void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile);
1479 		if (feof(pfile))
1480 			break;
1481 		samples[i] += sample;
1482 	}
1483 	if (i != nsamples) {
1484 		(void) fprintf(stderr,
1485 		    "%s: unexpected EOF after reading %d/%d samples\n",
1486 		    whoami, --i, nsamples);
1487 		exit(EX_IOERR);
1488 	}
1489 }
1490 
1491 static void *
1492 handle_versioned(FILE *pfile, char *filename, size_t *fsz)
1493 {
1494 	int		fd;
1495 	bool		invalid_version;
1496 	caddr_t		fmem;
1497 	struct stat	buf;
1498 	ProfHeader	prof_hdr;
1499 	off_t		lret;
1500 
1501 	/*
1502 	 * Check versioning info. For now, let's say we provide
1503 	 * backward compatibility, so we accept all older versions.
1504 	 */
1505 	if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) {
1506 		perror("fread()");
1507 		exit(EX_IOERR);
1508 	}
1509 
1510 	invalid_version = FALSE;
1511 	if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION)
1512 		invalid_version = TRUE;
1513 	else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) {
1514 		if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION)
1515 			invalid_version = FALSE;
1516 	}
1517 
1518 	if (invalid_version) {
1519 		(void) fprintf(stderr, "%s: version %d.%d not supported\n",
1520 		    whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver);
1521 		exit(EX_SOFTWARE);
1522 	}
1523 
1524 	/*
1525 	 * Map gmon.out onto memory.
1526 	 */
1527 	(void) fclose(pfile);
1528 	if ((fd = open(filename, O_RDONLY)) == -1) {
1529 		perror(filename);
1530 		exit(EX_IOERR);
1531 	}
1532 
1533 	if ((lret = lseek(fd, 0, SEEK_END)) == -1) {
1534 		perror(filename);
1535 		exit(EX_IOERR);
1536 	}
1537 	*fsz = lret;
1538 
1539 	fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0);
1540 	if (fmem == MAP_FAILED) {
1541 		(void) fprintf(stderr, "%s: can't map %s\n", whoami, filename);
1542 		exit(EX_IOERR);
1543 	}
1544 
1545 	/*
1546 	 * Before we close this fd, save this gmon.out's info to later verify
1547 	 * if the shared objects it references have changed since the time
1548 	 * they were used to generate this gmon.out
1549 	 */
1550 	if (fstat(fd, &buf) == -1) {
1551 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
1552 		    whoami, filename);
1553 		exit(EX_NOINPUT);
1554 	}
1555 	gmonout_info.dev = buf.st_dev;
1556 	gmonout_info.ino = buf.st_ino;
1557 	gmonout_info.mtime = buf.st_mtime;
1558 	gmonout_info.size = buf.st_size;
1559 
1560 	(void) close(fd);
1561 
1562 	return ((void *) fmem);
1563 }
1564 
1565 static void *
1566 openpfile(char *filename, size_t *fsz)
1567 {
1568 	struct hdr	tmp;
1569 	FILE		*pfile;
1570 	unsigned long	magic_num;
1571 	size_t		hdrsize;
1572 	static bool	first_time = TRUE;
1573 	extern bool	old_style;
1574 
1575 	if ((pfile = fopen(filename, "r")) == NULL) {
1576 		perror(filename);
1577 		exit(EX_IOERR);
1578 	}
1579 
1580 	/*
1581 	 * Read in the magic. Note that we changed the cast "unsigned long"
1582 	 * to "unsigned int" because that's how h_magic is defined in the
1583 	 * new format ProfHeader.
1584 	 */
1585 	if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) {
1586 		perror("fread()");
1587 		exit(EX_IOERR);
1588 	}
1589 
1590 	rewind(pfile);
1591 
1592 	/*
1593 	 * First check if this is versioned or *old-style* gmon.out
1594 	 */
1595 	if (magic_num == (unsigned int)PROF_MAGIC) {
1596 		if ((!first_time) && (old_style == TRUE)) {
1597 			(void) fprintf(stderr, "%s: can't mix old & new format "
1598 			    "profiled files\n", whoami);
1599 			exit(EX_SOFTWARE);
1600 		}
1601 		first_time = FALSE;
1602 		old_style = FALSE;
1603 		return (handle_versioned(pfile, filename, fsz));
1604 	}
1605 
1606 	if ((!first_time) && (old_style == FALSE)) {
1607 		(void) fprintf(stderr, "%s: can't mix old & new format "
1608 		    "profiled files\n", whoami);
1609 		exit(EX_SOFTWARE);
1610 	}
1611 
1612 	first_time = FALSE;
1613 	old_style = TRUE;
1614 	fsz = 0;
1615 
1616 	/*
1617 	 * Now, we need to determine if this is a run-time linker
1618 	 * profiled file or if it is a standard gmon.out.
1619 	 *
1620 	 * We do this by checking if magic matches PRF_MAGIC. If it
1621 	 * does, then this is a run-time linker profiled file, if it
1622 	 * doesn't, it must be a gmon.out file.
1623 	 */
1624 	if (magic_num == (unsigned long)PRF_MAGIC)
1625 		rflag = TRUE;
1626 	else
1627 		rflag = FALSE;
1628 
1629 	hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32);
1630 
1631 	if (rflag) {
1632 		if (Bflag) {
1633 			L_hdr64		l_hdr64;
1634 
1635 			/*
1636 			 * If the rflag is set then the input file is
1637 			 * rtld profiled data, we'll read it in and convert
1638 			 * it to the standard format (ie: make it look like
1639 			 * a gmon.out file).
1640 			 */
1641 			if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) {
1642 				perror("fread()");
1643 				exit(EX_IOERR);
1644 			}
1645 			if (l_hdr64.hd_version != PRF_VERSION_64) {
1646 				(void) fprintf(stderr,
1647 				    "%s: expected version %d, "
1648 				    "got version %d when processing 64-bit "
1649 				    "run-time linker profiled file.\n",
1650 				    whoami, PRF_VERSION_64, l_hdr64.hd_version);
1651 				exit(EX_SOFTWARE);
1652 			}
1653 			tmp.lowpc = 0;
1654 			tmp.highpc = (pctype)l_hdr64.hd_hpc;
1655 			tmp.ncnt = hdrsize + l_hdr64.hd_psize;
1656 		} else {
1657 			L_hdr		l_hdr;
1658 
1659 			/*
1660 			 * If the rflag is set then the input file is
1661 			 * rtld profiled data, we'll read it in and convert
1662 			 * it to the standard format (ie: make it look like
1663 			 * a gmon.out file).
1664 			 */
1665 			if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) {
1666 				perror("fread()");
1667 				exit(EX_IOERR);
1668 			}
1669 			if (l_hdr.hd_version != PRF_VERSION) {
1670 				(void) fprintf(stderr,
1671 				    "%s: expected version %d, "
1672 				    "got version %d when processing "
1673 				    "run-time linker profiled file.\n",
1674 				    whoami, PRF_VERSION, l_hdr.hd_version);
1675 				exit(EX_SOFTWARE);
1676 			}
1677 			tmp.lowpc = 0;
1678 			tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc;
1679 			tmp.ncnt = hdrsize + l_hdr.hd_psize;
1680 		}
1681 	} else {
1682 		if (Bflag) {
1683 			if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) {
1684 				perror("fread()");
1685 				exit(EX_IOERR);
1686 			}
1687 		} else {
1688 			/*
1689 			 * If we're not reading big %pc's, we need to read
1690 			 * the 32-bit header, and assign the members to
1691 			 * the actual header.
1692 			 */
1693 			struct hdr32 hdr32;
1694 			if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) {
1695 				perror("fread()");
1696 				exit(EX_IOERR);
1697 			}
1698 			tmp.lowpc = hdr32.lowpc;
1699 			tmp.highpc = hdr32.highpc;
1700 			tmp.ncnt = hdr32.ncnt;
1701 		}
1702 	}
1703 
1704 	/*
1705 	 * perform sanity check on profiled file we've opened.
1706 	 */
1707 	if (tmp.lowpc >= tmp.highpc) {
1708 		if (rflag)
1709 			(void) fprintf(stderr,
1710 			    "%s: badly formed profiled data.\n",
1711 			    filename);
1712 		else
1713 			(void) fprintf(stderr,
1714 			    "%s: badly formed gmon.out file.\n",
1715 			    filename);
1716 		exit(EX_SOFTWARE);
1717 	}
1718 
1719 	if (s_highpc != 0 && (tmp.lowpc != h.lowpc ||
1720 	    tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) {
1721 		(void) fprintf(stderr,
1722 		    "%s: incompatible with first gmon file\n",
1723 		    filename);
1724 		exit(EX_IOERR);
1725 	}
1726 	h = tmp;
1727 	s_lowpc = h.lowpc;
1728 	s_highpc = h.highpc;
1729 	lowpc = h.lowpc / sizeof (UNIT);
1730 	highpc = h.highpc / sizeof (UNIT);
1731 	sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0;
1732 	nsamples = sampbytes / sizeof (unsigned_UNIT);
1733 
1734 #ifdef DEBUG
1735 	if (debug & SAMPLEDEBUG) {
1736 		(void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc "
1737 		    "0x%llx hdr.ncnt %lld\n",
1738 		    h.lowpc, h.highpc, h.ncnt);
1739 		(void) printf(
1740 		    "[openpfile]   s_lowpc 0x%llx   s_highpc 0x%llx\n",
1741 		    s_lowpc, s_highpc);
1742 		(void) printf(
1743 		    "[openpfile]     lowpc 0x%llx     highpc 0x%llx\n",
1744 		    lowpc, highpc);
1745 		(void) printf("[openpfile] sampbytes %d nsamples %d\n",
1746 		    sampbytes, nsamples);
1747 	}
1748 #endif /* DEBUG */
1749 
1750 	return ((void *) pfile);
1751 }
1752 
1753 /*
1754  * Information from a gmon.out file depends on whether it's versioned
1755  * or non-versioned, *old style* gmon.out. If old-style, it is in two
1756  * parts : an array of sampling hits within pc ranges, and the arcs. If
1757  * versioned, it contains a header, followed by any number of
1758  * modules/callgraph/pcsample_buffer objects.
1759  */
1760 static void
1761 getpfile(char *filename)
1762 {
1763 	void		*handle;
1764 	size_t		fsz;
1765 
1766 	handle = openpfile(filename, &fsz);
1767 
1768 	if (old_style) {
1769 		readsamples((FILE *)handle);
1770 		readarcs((FILE *)handle);
1771 		(void) fclose((FILE *)handle);
1772 		return;
1773 	}
1774 
1775 	getpfiledata((caddr_t)handle, fsz);
1776 	(void) munmap(handle, fsz);
1777 }
1778 
1779 int
1780 main(int argc, char **argv)
1781 {
1782 	char	**sp;
1783 	nltype	**timesortnlp;
1784 	int		c;
1785 	int		errflg;
1786 
1787 	prog_name = *argv;  /* preserve program name */
1788 	debug = 0;
1789 	nflag = FALSE;
1790 	bflag = TRUE;
1791 	lflag = FALSE;
1792 	Cflag = FALSE;
1793 	first_file = TRUE;
1794 	rflag = FALSE;
1795 	Bflag = FALSE;
1796 	errflg = FALSE;
1797 
1798 	while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF)
1799 		switch (c) {
1800 		case 'a':
1801 			aflag = TRUE;
1802 			break;
1803 		case 'b':
1804 			bflag = FALSE;
1805 			break;
1806 		case 'c':
1807 			cflag = TRUE;
1808 			break;
1809 		case 'C':
1810 			Cflag = TRUE;
1811 			break;
1812 		case 'd':
1813 			dflag = TRUE;
1814 			debug |= atoi(optarg);
1815 			(void) printf("[main] debug = 0x%x\n", debug);
1816 			break;
1817 		case 'D':
1818 			Dflag = TRUE;
1819 			break;
1820 		case 'E':
1821 			addlist(Elist, optarg);
1822 			Eflag = TRUE;
1823 			addlist(elist, optarg);
1824 			eflag = TRUE;
1825 			break;
1826 		case 'e':
1827 			addlist(elist, optarg);
1828 			eflag = TRUE;
1829 			break;
1830 		case 'F':
1831 			addlist(Flist, optarg);
1832 			Fflag = TRUE;
1833 			addlist(flist, optarg);
1834 			fflag = TRUE;
1835 			break;
1836 		case 'f':
1837 			addlist(flist, optarg);
1838 			fflag = TRUE;
1839 			break;
1840 		case 'l':
1841 			lflag = TRUE;
1842 			break;
1843 		case 'n':
1844 			nflag = TRUE;
1845 			number_funcs_toprint = atoi(optarg);
1846 			break;
1847 		case 's':
1848 			sflag = TRUE;
1849 			break;
1850 		case 'z':
1851 			zflag = TRUE;
1852 			break;
1853 		case '?':
1854 			errflg++;
1855 
1856 		}
1857 
1858 	if (errflg) {
1859 		(void) fprintf(stderr,
1860 		    "usage: gprof [ -abcCDlsz ] [ -e function-name ] "
1861 		    "[ -E function-name ]\n\t[ -f function-name ] "
1862 		    "[ -F function-name  ]\n\t[  image-file  "
1863 		    "[ profile-file ... ] ]\n");
1864 		exit(EX_USAGE);
1865 	}
1866 
1867 	if (optind < argc) {
1868 		a_outname  = argv[optind++];
1869 	} else {
1870 		a_outname  = A_OUTNAME;
1871 	}
1872 	if (optind < argc) {
1873 		gmonname = argv[optind++];
1874 	} else {
1875 		gmonname = GMONNAME;
1876 	}
1877 	/*
1878 	 *	turn off default functions
1879 	 */
1880 	for (sp = &defaultEs[0]; *sp; sp++) {
1881 		Eflag = TRUE;
1882 		addlist(Elist, *sp);
1883 		eflag = TRUE;
1884 		addlist(elist, *sp);
1885 	}
1886 	/*
1887 	 *	how many ticks per second?
1888 	 *	if we can't tell, report time in ticks.
1889 	 */
1890 	hz = sysconf(_SC_CLK_TCK);
1891 	if (hz == -1) {
1892 		hz = 1;
1893 		(void) fprintf(stderr, "time is in ticks, not seconds\n");
1894 	}
1895 
1896 	getnfile(a_outname);
1897 
1898 	/*
1899 	 *	get information about mon.out file(s).
1900 	 */
1901 	do {
1902 		getpfile(gmonname);
1903 		if (optind < argc)
1904 			gmonname = argv[optind++];
1905 		else
1906 			optind++;
1907 	} while (optind <= argc);
1908 	/*
1909 	 *	dump out a gmon.sum file if requested
1910 	 */
1911 	if (sflag || Dflag)
1912 		dumpsum(GMONSUM);
1913 
1914 	if (old_style) {
1915 		/*
1916 		 *	assign samples to procedures
1917 		 */
1918 		asgnsamples();
1919 	}
1920 
1921 	/*
1922 	 *	assemble the dynamic profile
1923 	 */
1924 	timesortnlp = doarcs();
1925 
1926 	/*
1927 	 *	print the dynamic profile
1928 	 */
1929 #ifdef DEBUG
1930 	if (debug & ANYDEBUG) {
1931 		/* raw output of all symbols in all their glory */
1932 		int i;
1933 		(void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
1934 		    "#calls, selfcalls, index \n");
1935 		for (i = 0; i < modules.nname; i++) {	/* Print each symbol */
1936 			if (timesortnlp[i]->name)
1937 				(void) printf(" %s ", timesortnlp[i]->name);
1938 			else
1939 				(void) printf(" <cycle> ");
1940 			(void) printf(" %lld ", timesortnlp[i]->value);
1941 			(void) printf(" %lld ", timesortnlp[i]->svalue);
1942 			(void) printf(" %f ", timesortnlp[i]->time);
1943 			(void) printf(" %lld ", timesortnlp[i]->ncall);
1944 			(void) printf(" %lld ", timesortnlp[i]->selfcalls);
1945 			(void) printf(" %d ", timesortnlp[i]->index);
1946 			(void) printf(" \n");
1947 		}
1948 	}
1949 #endif /* DEBUG */
1950 
1951 	printgprof(timesortnlp);
1952 	/*
1953 	 *	print the flat profile
1954 	 */
1955 	printprof();
1956 	/*
1957 	 *	print the index
1958 	 */
1959 	printindex();
1960 
1961 	/*
1962 	 * print the modules
1963 	 */
1964 	printmodules();
1965 
1966 	done();
1967 	/* NOTREACHED */
1968 	return (0);
1969 }
1970