xref: /titanic_50/usr/src/cmd/sgs/gprof/common/gprof.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include	<sysexits.h>
30 #include	<stdlib.h>
31 #include	<unistd.h>
32 #include	"gprof.h"
33 #include	"profile.h"
34 
35 char		*whoami = "gprof";
36 static pctype	lowpc, highpc;		/* range profiled, in UNIT's */
37 
38 /*
39  *	things which get -E excluded by default.
40  */
41 static char *defaultEs[] = {
42 	"mcount",
43 	"__mcleanup",
44 	0
45 };
46 
47 #ifdef DEBUG
48 
49 static char *objname[] = {
50 	"<invalid object>",
51 	"PROF_BUFFER_T",
52 	"PROF_CALLGRAPH_T",
53 	"PROF_MODULES_T",
54 	0
55 };
56 #define	MAX_OBJTYPES	3
57 
58 #endif DEBUG
59 
60 void
61 done()
62 {
63 
64 	exit(EX_OK);
65 }
66 
67 static pctype
68 max(pctype a, pctype b)
69 {
70 	if (a > b)
71 		return (a);
72 	return (b);
73 }
74 
75 static pctype
76 min(pctype a, pctype b)
77 {
78 	if (a < b)
79 		return (a);
80 	return (b);
81 }
82 
83 /*
84  *	calculate scaled entry point addresses (to save time in asgnsamples),
85  *	and possibly push the scaled entry points over the entry mask,
86  *	if it turns out that the entry point is in one bucket and the code
87  *	for a routine is in the next bucket.
88  *
89  */
90 static void
91 alignentries()
92 {
93 	register struct nl *	nlp;
94 #ifdef DEBUG
95 	pctype			bucket_of_entry;
96 	pctype			bucket_of_code;
97 #endif DEBUG
98 
99 	/* for old-style gmon.out, nameslist is only in modules.nl */
100 
101 	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
102 		nlp->svalue = nlp->value / sizeof (UNIT);
103 #ifdef DEBUG
104 		bucket_of_entry = (nlp->svalue - lowpc) / scale;
105 		bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
106 		if (bucket_of_entry < bucket_of_code) {
107 			if (debug & SAMPLEDEBUG) {
108 				printf("[alignentries] pushing svalue 0x%llx "
109 				"to 0x%llx\n", nlp->svalue,
110 				nlp->svalue + UNITS_TO_CODE);
111 			}
112 		}
113 #endif DEBUG
114 	}
115 }
116 
117 /*
118  *	old-style gmon.out
119  *	------------------
120  *
121  *	Assign samples to the procedures to which they belong.
122  *
123  *	There are three cases as to where pcl and pch can be
124  *	with respect to the routine entry addresses svalue0 and svalue1
125  *	as shown in the following diagram.  overlap computes the
126  *	distance between the arrows, the fraction of the sample
127  *	that is to be credited to the routine which starts at svalue0.
128  *
129  *	    svalue0                                         svalue1
130  *	       |                                               |
131  *	       v                                               v
132  *
133  *	       +-----------------------------------------------+
134  *	       |					       |
135  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
136  *	  |         |		  |         |		  |         |
137  *	  +---------+		  +---------+		  +---------+
138  *
139  *	  ^         ^		  ^         ^		  ^         ^
140  *	  |         |		  |         |		  |         |
141  *	 pcl       pch		 pcl       pch		 pcl       pch
142  *
143  *	For the vax we assert that samples will never fall in the first
144  *	two bytes of any routine, since that is the entry mask,
145  *	thus we give call alignentries() to adjust the entry points if
146  *	the entry mask falls in one bucket but the code for the routine
147  *	doesn't start until the next bucket.  In conjunction with the
148  *	alignment of routine addresses, this should allow us to have
149  *	only one sample for every four bytes of text space and never
150  *	have any overlap (the two end cases, above).
151  */
152 static void
153 asgnsamples()
154 {
155 	sztype		i, j;
156 	unsigned_UNIT	ccnt;
157 	double		time;
158 	pctype		pcl, pch;
159 	pctype		overlap;
160 	pctype		svalue0, svalue1;
161 
162 	extern mod_info_t	modules;
163 	nltype		*nl = modules.nl;
164 	sztype		nname = modules.nname;
165 
166 	/* read samples and assign to namelist symbols */
167 	scale = highpc - lowpc;
168 	scale /= nsamples;
169 	alignentries();
170 	for (i = 0, j = 1; i < nsamples; i++) {
171 		ccnt = samples[i];
172 		if (ccnt == 0)
173 			continue;
174 		pcl = lowpc + scale * i;
175 		pch = lowpc + scale * (i + 1);
176 		time = ccnt;
177 #ifdef DEBUG
178 		if (debug & SAMPLEDEBUG) {
179 			printf("[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
180 			    pcl, pch, ccnt);
181 		}
182 #endif DEBUG
183 		totime += time;
184 		for (j = (j ? j - 1 : 0); j < nname; j++) {
185 			svalue0 = nl[j].svalue;
186 			svalue1 = nl[j+1].svalue;
187 			/*
188 			 *	if high end of tick is below entry address,
189 			 *	go for next tick.
190 			 */
191 			if (pch < svalue0)
192 				break;
193 			/*
194 			 *	if low end of tick into next routine,
195 			 *	go for next routine.
196 			 */
197 			if (pcl >= svalue1)
198 				continue;
199 			overlap = min(pch, svalue1) - max(pcl, svalue0);
200 			if (overlap != 0) {
201 #ifdef DEBUG
202 				if (debug & SAMPLEDEBUG) {
203 					printf("[asgnsamples] "
204 					    "(0x%llx->0x%llx-0x%llx) %s gets "
205 					    "%f ticks %lld overlap\n",
206 					    nl[j].value/sizeof (UNIT), svalue0,
207 					    svalue1, nl[j].name,
208 					    overlap * time / scale, overlap);
209 				}
210 #endif DEBUG
211 				nl[j].time += overlap * time / scale;
212 			}
213 		}
214 	}
215 #ifdef DEBUG
216 	if (debug & SAMPLEDEBUG) {
217 		printf("[asgnsamples] totime %f\n", totime);
218 	}
219 #endif DEBUG
220 }
221 
222 
223 static void
224 dump_callgraph(FILE *fp, char *filename,
225 				unsigned long tarcs, unsigned long ncallees)
226 {
227 	ProfCallGraph		prof_cgraph;
228 	ProfFunction		prof_func;
229 	register arctype	*arcp;
230 	mod_info_t		*mi;
231 	nltype			*nlp;
232 	size_t			cur_offset;
233 	unsigned long		caller_id = 0, callee_id = 0;
234 
235 	/*
236 	 * Write the callgraph header
237 	 */
238 	prof_cgraph.type = PROF_CALLGRAPH_T;
239 	prof_cgraph.version = PROF_CALLGRAPH_VER;
240 	prof_cgraph.functions = PROFCGRAPH_SZ;
241 	prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ;
242 	if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) {
243 		perror(filename);
244 		exit(EX_IOERR);
245 	}
246 	if (CGRAPH_FILLER)
247 		fseek(fp, CGRAPH_FILLER, SEEK_CUR);
248 
249 	/* Current offset inside the callgraph object */
250 	cur_offset = prof_cgraph.functions;
251 
252 	for (mi = &modules; mi; mi = mi->next) {
253 		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
254 			if (nlp->ncallers == 0)
255 				continue;
256 
257 			/* If this is the last callee, set next_to to 0 */
258 			callee_id++;
259 			if (callee_id == ncallees)
260 				prof_func.next_to = 0;
261 			else {
262 				prof_func.next_to = cur_offset +
263 						    nlp->ncallers * PROFFUNC_SZ;
264 			}
265 
266 			/*
267 			 * Dump this callee's raw arc information with all
268 			 * its callers
269 			 */
270 			caller_id = 1;
271 			for (arcp = nlp->parents; arcp;
272 					    arcp = arcp->arc_parentlist) {
273 				/*
274 				 * If no more callers for this callee, set
275 				 * next_from to 0
276 				 */
277 				if (caller_id == nlp->ncallers)
278 					prof_func.next_from = 0;
279 				else {
280 					prof_func.next_from = cur_offset +
281 								PROFFUNC_SZ;
282 				}
283 
284 				prof_func.frompc =
285 					arcp->arc_parentp->module->load_base +
286 					(arcp->arc_parentp->value -
287 					arcp->arc_parentp->module->txt_origin);
288 				prof_func.topc =
289 					mi->load_base +
290 						(nlp->value - mi->txt_origin);
291 				prof_func.count = arcp->arc_count;
292 
293 
294 				if (fwrite(&prof_func, sizeof (ProfFunction),
295 								1, fp) != 1) {
296 					perror(filename);
297 					exit(EX_IOERR);
298 				}
299 				if (FUNC_FILLER)
300 					fseek(fp, FUNC_FILLER, SEEK_CUR);
301 
302 				cur_offset += PROFFUNC_SZ;
303 				caller_id++;
304 			}
305 		} /* for nlp... */
306 	} /* for mi... */
307 }
308 
309 /*
310  * To save all pc-hits in all the gmon.out's is infeasible, as this
311  * may become quite huge even with a small number of files to sum.
312  * Instead, we'll dump *fictitious hits* to correct functions
313  * by scanning module namelists. Again, since this is summing
314  * pc-hits, we may have to dump the pcsamples out in chunks if the
315  * number of pc-hits is high.
316  */
317 static void
318 dump_hits(FILE *fp, char *filename, nltype *nlp)
319 {
320 	Address		*p, hitpc;
321 	size_t		i, nelem, ntowrite;
322 
323 	if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE)
324 		nelem = PROF_BUFFER_SIZE;
325 
326 	if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) {
327 		fprintf(stderr, "%s: no room for %ld pcsamples\n",
328 							    whoami, nelem);
329 		exit(EX_OSERR);
330 	}
331 
332 	/*
333 	 * Set up *fictitious* hits (to function entry) buffer
334 	 */
335 	hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin);
336 	for (i = 0; i < nelem; i++)
337 		p[i] = hitpc;
338 
339 	for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) {
340 		if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) {
341 			perror(filename);
342 			exit(EX_IOERR);
343 		}
344 	}
345 
346 	if (ntowrite) {
347 		if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) {
348 			perror(filename);
349 			exit(EX_IOERR);
350 		}
351 	}
352 
353 	free(p);
354 }
355 
356 static void
357 dump_pcsamples(FILE *fp, char *filename,
358 				unsigned long *tarcs, unsigned long *ncallees)
359 {
360 	ProfBuffer		prof_buffer;
361 	register arctype	*arcp;
362 	mod_info_t		*mi;
363 	nltype			*nlp;
364 
365 	prof_buffer.type = PROF_BUFFER_T;
366 	prof_buffer.version = PROF_BUFFER_VER;
367 	prof_buffer.buffer = PROFBUF_SZ;
368 	prof_buffer.bufsize = n_pcsamples;
369 	prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address);
370 	if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) {
371 		perror(filename);
372 		exit(EX_IOERR);
373 	}
374 	if (BUF_FILLER)
375 		fseek(fp, BUF_FILLER, SEEK_CUR);
376 
377 	*tarcs = 0;
378 	*ncallees = 0;
379 	for (mi = &modules; mi; mi = mi->next) {
380 		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
381 			if (nlp->nticks)
382 				dump_hits(fp, filename, nlp);
383 
384 			nlp->ncallers = 0;
385 			for (arcp = nlp->parents; arcp;
386 					    arcp = arcp->arc_parentlist) {
387 				(nlp->ncallers)++;
388 			}
389 
390 			if (nlp->ncallers) {
391 				(*tarcs) += nlp->ncallers;
392 				(*ncallees)++;
393 			}
394 		}
395 	}
396 }
397 
398 static void
399 dump_modules(FILE *fp, char *filename, size_t pbuf_sz)
400 {
401 	char		*pbuf, *p;
402 	size_t		namelen;
403 	Index		off_nxt, off_path;
404 	mod_info_t	*mi;
405 
406 	ProfModuleList	prof_modlist;
407 	ProfModule	prof_mod;
408 
409 	/* Allocate for path strings buffer */
410 	pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN);
411 	if ((p = pbuf = (char *) calloc(pbuf_sz, sizeof (char))) == NULL) {
412 		fprintf(stderr, "%s: no room for %ld bytes\n",
413 					    whoami, pbuf_sz * sizeof (char));
414 		exit(EX_OSERR);
415 	}
416 
417 	/* Dump out PROF_MODULE_T info for all non-aout modules */
418 	prof_modlist.type = PROF_MODULES_T;
419 	prof_modlist.version = PROF_MODULES_VER;
420 	prof_modlist.modules = PROFMODLIST_SZ;
421 	prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ +
422 								    pbuf_sz;
423 	if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) {
424 		perror(filename);
425 		exit(EX_IOERR);
426 	}
427 	if (MODLIST_FILLER)
428 		fseek(fp, MODLIST_FILLER, SEEK_CUR);
429 
430 	/*
431 	 * Initialize offsets for ProfModule elements.
432 	 */
433 	off_nxt = PROFMODLIST_SZ + PROFMOD_SZ;
434 	off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ;
435 
436 	for (mi = modules.next; mi; mi = mi->next) {
437 		if (mi->next)
438 			prof_mod.next = off_nxt;
439 		else
440 			prof_mod.next = 0;
441 		prof_mod.path = off_path;
442 		prof_mod.startaddr = mi->load_base;
443 		prof_mod.endaddr = mi->load_end;
444 
445 		if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) {
446 			perror(filename);
447 			exit(EX_IOERR);
448 		}
449 
450 		if (MOD_FILLER)
451 			fseek(fp, MOD_FILLER, SEEK_CUR);
452 
453 		strcpy(p, mi->name);
454 		namelen = strlen(mi->name);
455 		p += namelen + 1;
456 
457 		/* Note that offset to every path str need not be aligned */
458 		off_nxt += PROFMOD_SZ;
459 		off_path += namelen + 1;
460 	}
461 
462 	/* Write out the module path strings */
463 	if (pbuf_sz) {
464 		if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) {
465 			perror(filename);
466 			exit(EX_IOERR);
467 		}
468 
469 		free(pbuf);
470 	}
471 }
472 
473 /*
474  * If we have inactive modules, their current load addresses may overlap with
475  * active ones, and so we've to assign fictitious, non-overlapping addresses
476  * to all modules before we dump them.
477  */
478 static void
479 fixup_maps(size_t *pathsz)
480 {
481 	unsigned int	n_inactive = 0;
482 	Address		lbase, lend;
483 	mod_info_t	*mi;
484 
485 	/* Pick the lowest load address among modules */
486 	*pathsz = 0;
487 	for (mi = &modules; mi; mi = mi->next) {
488 
489 		if (mi->active == FALSE)
490 			n_inactive++;
491 
492 		if (mi == &modules || mi->load_base < lbase)
493 			lbase = mi->load_base;
494 
495 		/*
496 		 * Return total path size of non-aout modules only
497 		 */
498 		if (mi != &modules)
499 			*pathsz = (*pathsz) + strlen(mi->name) + 1;
500 	}
501 
502 	/*
503 	 * All module info is in fine shape already if there are no
504 	 * inactive modules
505 	 */
506 	if (n_inactive == 0)
507 		return;
508 
509 	/*
510 	 * Assign fictitious load addresses to all (non-aout) modules so
511 	 * that sum info can be dumped out.
512 	 */
513 	for (mi = modules.next; mi; mi = mi->next) {
514 		lend = lbase + (mi->data_end - mi->txt_origin);
515 		if ((lbase < modules.load_base && lend < modules.load_base) ||
516 		    (lbase > modules.load_end && lend > modules.load_end)) {
517 
518 			mi->load_base = lbase;
519 			mi->load_end = lend;
520 
521 			/* just to give an appearance of reality */
522 			lbase = CEIL(lend + PGSZ, PGSZ);
523 		} else {
524 			/*
525 			 * can't use this lbase & lend pair, as it
526 			 * overlaps with aout's addresses
527 			 */
528 			mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ);
529 			mi->load_end = mi->load_base + (lend - lbase);
530 
531 			lbase = CEIL(mi->load_end + PGSZ, PGSZ);
532 		}
533 	}
534 }
535 
536 static void
537 dump_gprofhdr(FILE *fp, char *filename)
538 {
539 	ProfHeader	prof_hdr;
540 
541 	prof_hdr.h_magic = PROF_MAGIC;
542 	prof_hdr.h_major_ver = PROF_MAJOR_VERSION;
543 	prof_hdr.h_minor_ver = PROF_MINOR_VERSION;
544 	prof_hdr.size = PROFHDR_SZ;
545 	if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) {
546 		perror(filename);
547 		exit(EX_IOERR);
548 	}
549 
550 	if (HDR_FILLER)
551 		fseek(fp, HDR_FILLER, SEEK_CUR);
552 }
553 
554 static void
555 dumpsum_ostyle(char *sumfile)
556 {
557 	register nltype *nlp;
558 	register arctype *arcp;
559 	struct rawarc arc;
560 	struct rawarc32 arc32;
561 	FILE *sfile;
562 
563 	if ((sfile = fopen(sumfile, "w")) == NULL) {
564 		perror(sumfile);
565 		exit(EX_IOERR);
566 	}
567 	/*
568 	 * dump the header; use the last header read in
569 	 */
570 	if (Bflag) {
571 	    if (fwrite(&h, sizeof (h), 1, sfile) != 1) {
572 		perror(sumfile);
573 		exit(EX_IOERR);
574 	    }
575 	} else {
576 	    struct hdr32 hdr;
577 	    hdr.lowpc  = (pctype32)h.lowpc;
578 	    hdr.highpc = (pctype32)h.highpc;
579 	    hdr.ncnt   = (pctype32)h.ncnt;
580 	    if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) {
581 		perror(sumfile);
582 		exit(EX_IOERR);
583 	    }
584 	}
585 	/*
586 	 * dump the samples
587 	 */
588 	if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) !=
589 	    nsamples) {
590 		perror(sumfile);
591 		exit(EX_IOERR);
592 	}
593 	/*
594 	 * dump the normalized raw arc information. For old-style dumping,
595 	 * the only namelist is in modules.nl
596 	 */
597 	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
598 		for (arcp = nlp->children; arcp;
599 		    arcp = arcp->arc_childlist) {
600 			if (Bflag) {
601 			    arc.raw_frompc = arcp->arc_parentp->value;
602 			    arc.raw_selfpc = arcp->arc_childp->value;
603 			    arc.raw_count = arcp->arc_count;
604 			    if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) {
605 				    perror(sumfile);
606 				    exit(EX_IOERR);
607 			    }
608 			} else {
609 			    arc32.raw_frompc =
610 				(pctype32)arcp->arc_parentp->value;
611 			    arc32.raw_selfpc =
612 				(pctype32)arcp->arc_childp->value;
613 			    arc32.raw_count = (actype32)arcp->arc_count;
614 			    if (fwrite(&arc32, sizeof (arc32), 1, sfile) != 1) {
615 				    perror(sumfile);
616 				    exit(EX_IOERR);
617 			    }
618 			}
619 #ifdef DEBUG
620 			if (debug & SAMPLEDEBUG) {
621 				printf("[dumpsum_ostyle] frompc 0x%llx selfpc "
622 				    "0x%llx count %lld\n", arc.raw_frompc,
623 				    arc.raw_selfpc, arc.raw_count);
624 			}
625 #endif DEBUG
626 		}
627 	}
628 	fclose(sfile);
629 }
630 
631 /*
632  * dump out the gmon.sum file
633  */
634 static void
635 dumpsum(char *sumfile)
636 {
637 	FILE		*sfile;
638 	size_t		pathbuf_sz;
639 	unsigned long	total_arcs;	/* total number of arcs in all */
640 	unsigned long	ncallees;	/* no. of callees with parents */
641 
642 	if (old_style) {
643 		dumpsum_ostyle(sumfile);
644 		return;
645 	}
646 
647 	if ((sfile = fopen(sumfile, "w")) == NULL) {
648 		perror(sumfile);
649 		exit(EX_IOERR);
650 	}
651 
652 	/*
653 	 * Dump the new-style gprof header. Even if one of the original
654 	 * profiled-files was of a older version, the summed file is of
655 	 * current version only.
656 	 */
657 	dump_gprofhdr(sfile, sumfile);
658 
659 	/*
660 	 * Fix up load-maps and dump out modules info
661 	 *
662 	 * Fix up module load maps so inactive modules get *some* address
663 	 * (and btw, could you get the total size of non-aout module path
664 	 * strings please ?)
665 	 */
666 	fixup_maps(&pathbuf_sz);
667 	dump_modules(sfile, sumfile, pathbuf_sz);
668 
669 
670 	/*
671 	 * Dump out the summ'd pcsamples
672 	 *
673 	 * For dumping call graph information later, we need certain
674 	 * statistics (like total arcs, number of callers for each node);
675 	 * collect these also while we are at it.
676 	 */
677 	dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees);
678 
679 	/*
680 	 * Dump out the summ'd call graph information
681 	 */
682 	dump_callgraph(sfile, sumfile, total_arcs, ncallees);
683 
684 
685 	fclose(sfile);
686 }
687 
688 static void
689 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp)
690 {
691 	nltype		*parentp;
692 	nltype		*childp;
693 
694 	/*
695 	 * if count == 0 this is a null arc and
696 	 * we don't need to tally it.
697 	 */
698 	if (rawp->raw_count == 0)
699 		return;
700 
701 	/*
702 	 * Lookup the caller and callee pcs in namelists of
703 	 * appropriate modules
704 	 */
705 	parentp = nllookup(caller_mod, rawp->raw_frompc, NULL);
706 	childp = nllookup(callee_mod, rawp->raw_selfpc, NULL);
707 	if (childp && parentp) {
708 		if (!Dflag)
709 			childp->ncall += rawp->raw_count;
710 		else {
711 			if (first_file)
712 				childp->ncall += rawp->raw_count;
713 			else {
714 				childp->ncall -= rawp->raw_count;
715 				if (childp->ncall < 0)
716 					childp->ncall = 0;
717 			}
718 		}
719 
720 #ifdef DEBUG
721 		if (debug & TALLYDEBUG) {
722 			printf("[tally] arc from %s to %s traversed "
723 			    "%lld times\n", parentp->name,
724 			    childp->name, rawp->raw_count);
725 		}
726 #endif DEBUG
727 		addarc(parentp, childp, rawp->raw_count);
728 	}
729 }
730 
731 /*
732  * Look up a module's base address in a sorted list of pc-hits. Unlike
733  * nllookup(), this deals with misses by mapping them to the next *higher*
734  * pc-hit. This is so that we get into the module's first pc-hit rightaway,
735  * even if the module's entry-point (load_base) itself is not a hit.
736  */
737 static Address *
738 locate(Address	*pclist, size_t nelem, Address keypc)
739 {
740 	size_t	low = 0, middle, high = nelem - 1;
741 
742 	if (keypc <= pclist[low])
743 		return (pclist);
744 
745 	if (keypc > pclist[high])
746 		return (NULL);
747 
748 	while (low != high) {
749 		middle = (high + low) >> 1;
750 
751 		if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc))
752 			return (&pclist[middle + 1]);
753 
754 		if (pclist[middle] >= keypc)
755 			high = middle;
756 		else
757 			low = middle + 1;
758 	}
759 
760 	/* must never reach here! */
761 	return (NULL);
762 }
763 
764 static void
765 assign_pcsamples(module, pcsmpl, n_samples)
766 mod_info_t	*module;
767 Address		*pcsmpl;
768 size_t		n_samples;
769 {
770 	Address		*pcptr, *pcse = pcsmpl + n_samples;
771 	pctype		nxt_func;
772 	nltype		*fnl;
773 	size_t		func_nticks;
774 #ifdef DEBUG
775 	size_t		n_hits_in_module = 0;
776 #endif DEBUG
777 
778 	/* Locate the first pc-hit for this module */
779 	if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) {
780 #ifdef DEBUG
781 		if (debug & PCSMPLDEBUG) {
782 			printf("[assign_pcsamples] no pc-hits in\n");
783 			printf("                   `%s'\n", module->name);
784 		}
785 #endif DEBUG
786 		return;			/* no pc-hits in this module */
787 	}
788 
789 	/* Assign all pc-hits in this module to appropriate functions */
790 	while ((pcptr < pcse) && (*pcptr < module->load_end)) {
791 
792 		/* Update the corresponding function's time */
793 		if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) {
794 			/*
795 			 * Collect all pc-hits in this function. Each
796 			 * pc-hit counts as 1 tick.
797 			 */
798 			func_nticks = 0;
799 			while ((pcptr < pcse) && (*pcptr < nxt_func)) {
800 				func_nticks++;
801 				pcptr++;
802 			}
803 
804 			if (func_nticks == 0)
805 				pcptr++;
806 			else {
807 				fnl->nticks += func_nticks;
808 				fnl->time += func_nticks;
809 				totime += func_nticks;
810 			}
811 
812 #ifdef DEBUG
813 			n_hits_in_module += func_nticks;
814 #endif DEBUG
815 		} else {
816 			/*
817 			 * pc sample could not be assigned to function;
818 			 * probably in a PLT
819 			 */
820 			pcptr++;
821 		}
822 	}
823 
824 #ifdef DEBUG
825 	if (debug & PCSMPLDEBUG) {
826 		printf("[assign_pcsamples] %ld hits in\n", n_hits_in_module);
827 		printf("                   `%s'\n", module->name);
828 	}
829 #endif DEBUG
830 }
831 
832 int
833 pc_cmp(Address *pc1, Address *pc2)
834 {
835 	if (*pc1 > *pc2)
836 		return (1);
837 
838 	if (*pc1 < *pc2)
839 		return (-1);
840 
841 	return (0);
842 }
843 
844 static void
845 process_pcsamples(bufp)
846 ProfBuffer	*bufp;
847 {
848 	Address		*pc_samples;
849 	mod_info_t	*mi;
850 	caddr_t		p;
851 	size_t		chunk_size, nelem_read, nelem_to_read;
852 
853 #ifdef DEBUG
854 	if (debug & PCSMPLDEBUG) {
855 		printf("[process_pcsamples] number of pcsamples = %lld\n",
856 							    bufp->bufsize);
857 	}
858 #endif DEBUG
859 
860 	/* buffer with no pc samples ? */
861 	if (bufp->bufsize == 0)
862 		return;
863 
864 	/*
865 	 * If we're processing pcsamples of a profile sum, we could have
866 	 * more than PROF_BUFFER_SIZE number of samples. In such a case,
867 	 * we must read the pcsamples in chunks.
868 	 */
869 	if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE)
870 		chunk_size = PROF_BUFFER_SIZE;
871 
872 	/* Allocate for the pcsample chunk */
873 	pc_samples = (Address *) calloc(chunk_size, sizeof (Address));
874 	if (pc_samples == NULL) {
875 		fprintf(stderr, "%s: no room for %ld sample pc's\n",
876 							whoami, chunk_size);
877 		exit(EX_OSERR);
878 	}
879 
880 	/* Copy the current set of pcsamples */
881 	nelem_read = 0;
882 	nelem_to_read = bufp->bufsize;
883 	p = (char *) bufp + bufp->buffer;
884 
885 	while (nelem_read < nelem_to_read) {
886 		memcpy((void *) pc_samples, p, chunk_size * sizeof (Address));
887 
888 		/* Sort the pc samples */
889 		qsort(pc_samples, chunk_size, sizeof (Address),
890 				(int (*)(const void *, const void *)) pc_cmp);
891 
892 		/*
893 		 * Assign pcsamples to functions in the currently active
894 		 * module list
895 		 */
896 		for (mi = &modules; mi; mi = mi->next) {
897 			if (mi->active == FALSE)
898 				continue;
899 			assign_pcsamples(mi, pc_samples, chunk_size);
900 		}
901 
902 		p += (chunk_size * sizeof (Address));
903 		nelem_read += chunk_size;
904 
905 		if ((nelem_to_read - nelem_read) < chunk_size)
906 			chunk_size = nelem_to_read - nelem_read;
907 	}
908 
909 	free(pc_samples);
910 
911 	/* Update total number of pcsamples read so far */
912 	n_pcsamples += bufp->bufsize;
913 }
914 
915 static mod_info_t *
916 find_module(Address addr)
917 {
918 	mod_info_t	*mi;
919 
920 	for (mi = &modules; mi; mi = mi->next) {
921 		if (mi->active == FALSE)
922 			continue;
923 
924 		if (addr >= mi->load_base && addr < mi->load_end)
925 			return (mi);
926 	}
927 
928 	return (NULL);
929 }
930 
931 static void
932 process_cgraph(cgp)
933 ProfCallGraph	*cgp;
934 {
935 	struct rawarc	arc;
936 	mod_info_t	*callee_mi, *caller_mi;
937 	ProfFunction	*calleep, *callerp;
938 	Index		caller_off, callee_off;
939 
940 	/*
941 	 * Note that *callee_off* increment in the for loop below
942 	 * uses *calleep* and *calleep* doesn't get set until the for loop
943 	 * is entered. We don't expect the increment to be executed before
944 	 * the loop body is executed atleast once, so this should be ok.
945 	 */
946 	for (callee_off = cgp->functions; callee_off;
947 					    callee_off = calleep->next_to) {
948 
949 		calleep = (ProfFunction *) ((char *) cgp + callee_off);
950 
951 		/*
952 		 * We could choose either to sort the {caller, callee}
953 		 * list twice and assign callee/caller to modules or inspect
954 		 * each callee/caller in the active modules list. Since
955 		 * the modules list is usually very small, we'l choose the
956 		 * latter.
957 		 */
958 
959 		/*
960 		 * If we cannot identify a callee with a module, there's
961 		 * no use worrying about who called it.
962 		 */
963 		if ((callee_mi = find_module(calleep->topc)) == NULL) {
964 #ifdef DEBUG
965 			if (debug & CGRAPHDEBUG) {
966 				printf("[process_cgraph] callee %#llx missed\n",
967 							    calleep->topc);
968 			}
969 #endif DEBUG
970 			continue;
971 		} else
972 			arc.raw_selfpc = calleep->topc;
973 
974 		for (caller_off = callee_off; caller_off;
975 					caller_off = callerp->next_from)  {
976 
977 			callerp = (ProfFunction *) ((char *) cgp + caller_off);
978 			if ((caller_mi = find_module(callerp->frompc)) ==
979 									NULL) {
980 #ifdef DEBUG
981 				if (debug & CGRAPHDEBUG) {
982 					printf("[process_cgraph] caller %#llx "
983 						"missed\n", callerp->frompc);
984 				}
985 #endif DEBUG
986 				continue;
987 			}
988 
989 			arc.raw_frompc = callerp->frompc;
990 			arc.raw_count = callerp->count;
991 
992 #ifdef DEBUG
993 			if (debug & CGRAPHDEBUG) {
994 				printf("[process_cgraph] arc <%#llx, %#llx, "
995 						"%lld>\n", arc.raw_frompc,
996 						arc.raw_selfpc, arc.raw_count);
997 			}
998 #endif DEBUG
999 			tally(caller_mi, callee_mi, &arc);
1000 		}
1001 	}
1002 
1003 #ifdef DEBUG
1004 	puts("\n");
1005 #endif DEBUG
1006 }
1007 
1008 /*
1009  * Two modules overlap each other if they don't lie completely *outside*
1010  * each other.
1011  */
1012 static bool
1013 does_overlap(ProfModule *new, mod_info_t *old)
1014 {
1015 	/* case 1: new module lies completely *before* the old one */
1016 	if (new->startaddr < old->load_base && new->endaddr <= old->load_base)
1017 		return (FALSE);
1018 
1019 	/* case 2: new module lies completely *after* the old one */
1020 	if (new->startaddr >= old->load_end && new->endaddr >= old->load_end)
1021 		return (FALSE);
1022 
1023 	/* probably a dlopen: the modules overlap each other */
1024 	return (TRUE);
1025 }
1026 
1027 static bool
1028 is_same_as_aout(char *modpath, struct stat *buf)
1029 {
1030 	if (stat(modpath, buf) == -1) {
1031 		fprintf(stderr, "%s: can't get info on `%s'\n",
1032 							whoami, modpath);
1033 		exit(EX_NOINPUT);
1034 	}
1035 
1036 	if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino))
1037 		return (TRUE);
1038 	else
1039 		return (FALSE);
1040 }
1041 
1042 static void
1043 process_modules(modlp)
1044 ProfModuleList	*modlp;
1045 {
1046 	ProfModule	*newmodp;
1047 	mod_info_t	*mi, *last, *new_module;
1048 	char		*so_path, *name;
1049 	bool		more_modules = TRUE;
1050 	struct stat	so_statbuf;
1051 
1052 #ifdef DEBUG
1053 	if (debug & MODULEDEBUG) {
1054 		printf("[process_modules] module obj version %u\n",
1055 							    modlp->version);
1056 	}
1057 #endif DEBUG
1058 
1059 	/* Check version of module type object */
1060 	if (modlp->version > PROF_MODULES_VER) {
1061 		fprintf(stderr, "%s: version %d for module type objects"
1062 				"is not supported\n", whoami, modlp->version);
1063 		exit(EX_SOFTWARE);
1064 	}
1065 
1066 
1067 	/*
1068 	 * Scan the PROF_MODULES_T list and add modules to current list
1069 	 * of modules, if they're not present already
1070 	 */
1071 	newmodp = (ProfModule *) ((char *) modlp + modlp->modules);
1072 	do {
1073 		/*
1074 		 * Since the prog could've been renamed after its run, we
1075 		 * should see if this overlaps a.out. If it does, it is
1076 		 * probably the renamed aout. We should also skip any other
1077 		 * non-sharedobj's that we see (or should we report an error ?)
1078 		 */
1079 		so_path = (caddr_t) modlp + newmodp->path;
1080 		if (does_overlap(newmodp, &modules) ||
1081 				    is_same_as_aout(so_path, &so_statbuf) ||
1082 						(!is_shared_obj(so_path))) {
1083 
1084 			if (!newmodp->next)
1085 				more_modules = FALSE;
1086 
1087 			newmodp = (ProfModule *)
1088 					((caddr_t) modlp + newmodp->next);
1089 #ifdef DEBUG
1090 			if (debug & MODULEDEBUG) {
1091 				printf("[process_modules] `%s'\n", so_path);
1092 				printf("                  skipped\n");
1093 			}
1094 #endif DEBUG
1095 			continue;
1096 		}
1097 #ifdef DEBUG
1098 		if (debug & MODULEDEBUG)
1099 			printf("[process_modules] `%s'...\n", so_path);
1100 #endif DEBUG
1101 
1102 		/*
1103 		 * Check all modules (leave the first one, 'cos that
1104 		 * is the program executable info). If this module is already
1105 		 * there in the list, update the load addresses and proceed.
1106 		 */
1107 		last = &modules;
1108 		while (mi = last->next) {
1109 			/*
1110 			 * We expect the full pathname for all shared objects
1111 			 * needed by the program executable. In this case, we
1112 			 * simply need to compare the paths to see if they are
1113 			 * the same file.
1114 			 */
1115 			if (strcmp(mi->name, so_path) == 0)
1116 				break;
1117 
1118 			/*
1119 			 * Check if this new shared object will overlap
1120 			 * any existing module. If yes, remove the old one
1121 			 * from the linked list (but don't free it, 'cos
1122 			 * there may be symbols referring to this module
1123 			 * still)
1124 			 */
1125 			if (does_overlap(newmodp, mi)) {
1126 #ifdef DEBUG
1127 				if (debug & MODULEDEBUG) {
1128 					printf("[process_modules] `%s'\n",
1129 								    so_path);
1130 					printf("                  overlaps\n");
1131 					printf("                  `%s'\n",
1132 								    mi->name);
1133 				}
1134 #endif DEBUG
1135 				mi->active = FALSE;
1136 			}
1137 
1138 			last = mi;
1139 		}
1140 
1141 		/* Module already there, skip it */
1142 		if (mi != NULL) {
1143 			mi->load_base = newmodp->startaddr;
1144 			mi->load_end = newmodp->endaddr;
1145 			mi->active = TRUE;
1146 			if (!newmodp->next)
1147 				more_modules = FALSE;
1148 
1149 			newmodp = (ProfModule *)
1150 					((caddr_t) modlp + newmodp->next);
1151 
1152 #ifdef DEBUG
1153 			if (debug & MODULEDEBUG) {
1154 				printf("[process_modules] base=%#llx, "
1155 						"end=%#llx\n", mi->load_base,
1156 						mi->load_end);
1157 			}
1158 #endif DEBUG
1159 			continue;
1160 		}
1161 
1162 		/*
1163 		 * Check if gmon.out is outdated with respect to the new
1164 		 * module we want to add
1165 		 */
1166 		if (gmonout_info.mtime < so_statbuf.st_mtime) {
1167 			fprintf(stderr, "%s: shared obj outdates prof info\n",
1168 								    whoami);
1169 			fprintf(stderr, "\t(newer %s)\n", so_path);
1170 			exit(EX_NOINPUT);
1171 		}
1172 
1173 		/* Create a new module element */
1174 		new_module = (mod_info_t *) malloc(sizeof (mod_info_t));
1175 		if (new_module == NULL) {
1176 			fprintf(stderr, "%s: no room for %ld bytes\n",
1177 						whoami, sizeof (mod_info_t));
1178 			exit(EX_OSERR);
1179 		}
1180 
1181 		/* and fill in info... */
1182 		new_module->id = n_modules + 1;
1183 		new_module->load_base = newmodp->startaddr;
1184 		new_module->load_end = newmodp->endaddr;
1185 		new_module->name = (char *) malloc(strlen(so_path) + 1);
1186 		if (new_module->name == NULL) {
1187 			fprintf(stderr, "%s: no room for %ld bytes\n",
1188 						whoami, strlen(so_path) + 1);
1189 			exit(EX_OSERR);
1190 		}
1191 		strcpy(new_module->name, so_path);
1192 #ifdef DEBUG
1193 		if (debug & MODULEDEBUG) {
1194 			printf("[process_modules] base=%#llx, end=%#llx\n",
1195 				new_module->load_base, new_module->load_end);
1196 		}
1197 #endif DEBUG
1198 
1199 		/* Create this module's nameslist */
1200 		process_namelist(new_module);
1201 
1202 		/* Add it to the tail of active module list */
1203 		last->next = new_module;
1204 		n_modules++;
1205 
1206 #ifdef DEBUG
1207 		if (debug & MODULEDEBUG) {
1208 			printf("[process_modules] total shared objects = %ld\n",
1209 							    n_modules - 1);
1210 		}
1211 #endif DEBUG
1212 		/*
1213 		 * Move to the next module in the PROF_MODULES_T list
1214 		 * (if present)
1215 		 */
1216 		if (!newmodp->next)
1217 			more_modules = FALSE;
1218 
1219 		newmodp = (ProfModule *) ((caddr_t) modlp + newmodp->next);
1220 
1221 	} while (more_modules);
1222 }
1223 
1224 static void
1225 reset_active_modules()
1226 {
1227 	mod_info_t	*mi;
1228 
1229 	/* Except the executable, no other module should remain active */
1230 	for (mi = modules.next; mi; mi = mi->next)
1231 		mi->active = FALSE;
1232 }
1233 
1234 static void
1235 getpfiledata(memp, fsz)
1236 caddr_t	memp;
1237 size_t	fsz;
1238 {
1239 	ProfObject	*objp;
1240 	caddr_t		file_end;
1241 	bool		found_pcsamples = FALSE, found_cgraph = FALSE;
1242 
1243 	/*
1244 	 * Before processing a new gmon.out, all modules except the
1245 	 * program executable must be made inactive, so that symbols
1246 	 * are searched only in the program executable, if we don't
1247 	 * find a MODULES_T object. Don't do it *after* we read a gmon.out,
1248 	 * because we need the active module data after we're done with
1249 	 * the last gmon.out, if we're doing summing.
1250 	 */
1251 	reset_active_modules();
1252 
1253 	file_end = memp + fsz;
1254 	objp = (ProfObject *) (memp + ((ProfHeader *) memp)->size);
1255 	while ((caddr_t) objp < file_end) {
1256 #ifdef DEBUG
1257 		{
1258 			unsigned int	type = 0;
1259 
1260 			if (debug & MONOUTDEBUG) {
1261 				if (objp->type <= MAX_OBJTYPES)
1262 					type = objp->type;
1263 
1264 				printf("\n[getpfiledata] object %s [%#lx]\n",
1265 						objname[type], objp->type);
1266 			}
1267 		}
1268 #endif DEBUG
1269 		switch (objp->type) {
1270 			case PROF_MODULES_T :
1271 				process_modules((ProfModuleList *) objp);
1272 				break;
1273 
1274 			case PROF_CALLGRAPH_T :
1275 				process_cgraph((ProfCallGraph *) objp);
1276 				found_cgraph = TRUE;
1277 				break;
1278 
1279 			case PROF_BUFFER_T :
1280 				process_pcsamples((ProfBuffer *) objp);
1281 				found_pcsamples = TRUE;
1282 				break;
1283 
1284 			default :
1285 				fprintf(stderr,
1286 					"%s: unknown prof object type=%d\n",
1287 							whoami, objp->type);
1288 				exit(EX_SOFTWARE);
1289 		}
1290 		objp = (ProfObject *) ((caddr_t) objp + objp->size);
1291 	}
1292 
1293 	if (!found_cgraph || !found_pcsamples) {
1294 		fprintf(stderr,
1295 			"%s: missing callgraph/pcsamples object\n", whoami);
1296 		exit(EX_SOFTWARE);
1297 	}
1298 
1299 	if ((caddr_t) objp > file_end) {
1300 		fprintf(stderr, "%s: malformed profile file.\n", whoami);
1301 		exit(EX_SOFTWARE);
1302 	}
1303 
1304 	if (first_file)
1305 		first_file = FALSE;
1306 }
1307 
1308 static void
1309 readarcs(pfile)
1310 FILE	*pfile;
1311 {
1312 	/*
1313 	 *	the rest of the file consists of
1314 	 *	a bunch of <from,self,count> tuples.
1315 	 */
1316 	/* CONSTCOND */
1317 	while (1) {
1318 		struct rawarc	arc;
1319 
1320 		if (rflag) {
1321 			if (Bflag) {
1322 				L_cgarc64		rtld_arc64;
1323 
1324 				/*
1325 				 * If rflag is set then this is an profiled
1326 				 * image generated by rtld.  It needs to be
1327 				 * 'converted' to the standard data format.
1328 				 */
1329 				if (fread(&rtld_arc64,
1330 					    sizeof (L_cgarc64), 1, pfile) != 1)
1331 					break;
1332 
1333 				if (rtld_arc64.cg_from == PRF_OUTADDR64)
1334 					arc.raw_frompc = s_highpc + 0x10;
1335 				else
1336 					arc.raw_frompc =
1337 					    (pctype)rtld_arc64.cg_from;
1338 				arc.raw_selfpc = (pctype)rtld_arc64.cg_to;
1339 				arc.raw_count = (actype)rtld_arc64.cg_count;
1340 			} else {
1341 				L_cgarc		rtld_arc;
1342 
1343 				/*
1344 				 * If rflag is set then this is an profiled
1345 				 * image generated by rtld.  It needs to be
1346 				 * 'converted' to the standard data format.
1347 				 */
1348 				if (fread(&rtld_arc,
1349 					    sizeof (L_cgarc), 1, pfile) != 1)
1350 					break;
1351 
1352 				if (rtld_arc.cg_from == PRF_OUTADDR)
1353 					arc.raw_frompc = s_highpc + 0x10;
1354 				else
1355 					arc.raw_frompc = (pctype)
1356 					    (uintptr_t)rtld_arc.cg_from;
1357 				arc.raw_selfpc = (pctype)
1358 				    (uintptr_t)rtld_arc.cg_to;
1359 				arc.raw_count = (actype)rtld_arc.cg_count;
1360 			}
1361 		} else {
1362 			if (Bflag) {
1363 				if (fread(&arc, sizeof (struct rawarc), 1,
1364 				    pfile) != 1) {
1365 					break;
1366 				}
1367 			} else {
1368 				/*
1369 				 * If these aren't big %pc's, we need to read
1370 				 * into the 32-bit raw arc structure, and
1371 				 * assign the members into the actual arc.
1372 				 */
1373 				struct rawarc32 arc32;
1374 				if (fread(&arc32, sizeof (struct rawarc32),
1375 				    1, pfile) != 1)
1376 					break;
1377 				arc.raw_frompc = (pctype)arc32.raw_frompc;
1378 				arc.raw_selfpc = (pctype)arc32.raw_selfpc;
1379 				arc.raw_count  = (actype)arc32.raw_count;
1380 			}
1381 		}
1382 
1383 #ifdef DEBUG
1384 		if (debug & SAMPLEDEBUG) {
1385 			printf("[getpfile] frompc 0x%llx selfpc "
1386 			    "0x%llx count %lld\n", arc.raw_frompc,
1387 			    arc.raw_selfpc, arc.raw_count);
1388 		}
1389 #endif DEBUG
1390 		/*
1391 		 *	add this arc
1392 		 */
1393 		tally(&modules, &modules, &arc);
1394 	}
1395 	if (first_file)
1396 		first_file = FALSE;
1397 }
1398 
1399 static void
1400 readsamples(FILE *pfile)
1401 {
1402 	sztype		i;
1403 	unsigned_UNIT	sample;
1404 
1405 	if (samples == 0) {
1406 		samples = (unsigned_UNIT *) calloc(nsamples,
1407 		    sizeof (unsigned_UNIT));
1408 		if (samples == 0) {
1409 			fprintf(stderr, "%s: No room for %ld sample pc's\n",
1410 			    whoami, sampbytes / sizeof (unsigned_UNIT));
1411 			exit(EX_OSERR);
1412 		}
1413 	}
1414 
1415 	for (i = 0; i < nsamples; i++) {
1416 		fread(&sample, sizeof (unsigned_UNIT), 1, pfile);
1417 		if (feof(pfile))
1418 			break;
1419 		samples[i] += sample;
1420 	}
1421 	if (i != nsamples) {
1422 		fprintf(stderr,
1423 		    "%s: unexpected EOF after reading %ld/%ld samples\n",
1424 		    whoami, --i, nsamples);
1425 		exit(EX_IOERR);
1426 	}
1427 }
1428 
1429 static void *
1430 handle_versioned(FILE *pfile, char *filename, size_t *fsz)
1431 {
1432 	int		fd;
1433 	bool		invalid_version;
1434 	caddr_t		fmem;
1435 	struct stat	buf;
1436 	ProfHeader	prof_hdr;
1437 
1438 	/*
1439 	 * Check versioning info. For now, let's say we provide
1440 	 * backward compatibility, so we accept all older versions.
1441 	 */
1442 	if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) {
1443 		perror("fread()");
1444 		exit(EX_IOERR);
1445 	}
1446 
1447 	invalid_version = FALSE;
1448 	if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION)
1449 		invalid_version = TRUE;
1450 	else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) {
1451 		if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION)
1452 			invalid_version = FALSE;
1453 	}
1454 
1455 	if (invalid_version) {
1456 		fprintf(stderr, "%s: version %d.%d not supported\n",
1457 			whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver);
1458 		exit(EX_SOFTWARE);
1459 	}
1460 
1461 	/*
1462 	 * Map gmon.out onto memory.
1463 	 */
1464 	fclose(pfile);
1465 	if ((fd = open(filename, O_RDONLY)) == -1) {
1466 		perror(filename);
1467 		exit(EX_IOERR);
1468 	}
1469 
1470 	if ((*fsz = lseek(fd, 0, SEEK_END)) == -1) {
1471 		perror(filename);
1472 		exit(EX_IOERR);
1473 	}
1474 
1475 	fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0);
1476 	if (fmem == MAP_FAILED) {
1477 	    fprintf(stderr, "%s: can't map %s\n", whoami, filename);
1478 	    exit(EX_IOERR);
1479 	}
1480 
1481 	/*
1482 	 * Before we close this fd, save this gmon.out's info to later verify
1483 	 * if the shared objects it references have changed since the time
1484 	 * they were used to generate this gmon.out
1485 	 */
1486 	if (fstat(fd, &buf) == -1) {
1487 		fprintf(stderr, "%s: can't get info on `%s'\n",
1488 							whoami, filename);
1489 		exit(EX_NOINPUT);
1490 	}
1491 	gmonout_info.dev = buf.st_dev;
1492 	gmonout_info.ino = buf.st_ino;
1493 	gmonout_info.mtime = buf.st_mtime;
1494 	gmonout_info.size = buf.st_size;
1495 
1496 	close(fd);
1497 
1498 	return ((void *) fmem);
1499 }
1500 
1501 static void *
1502 openpfile(filename, fsz)
1503 char	*filename;
1504 size_t	*fsz;
1505 {
1506 	struct hdr	tmp;
1507 	FILE *		pfile;
1508 	unsigned long	magic_num;
1509 	size_t		hdrsize = sizeof (struct hdr);
1510 	static bool	first_time = TRUE;
1511 	extern bool	old_style;
1512 
1513 	if ((pfile = fopen(filename, "r")) == NULL) {
1514 		perror(filename);
1515 		exit(EX_IOERR);
1516 	}
1517 
1518 	/*
1519 	 * Read in the magic. Note that we changed the cast "unsigned long"
1520 	 * to "unsigned int" because that's how h_magic is defined in the
1521 	 * new format ProfHeader.
1522 	 */
1523 	if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) {
1524 		perror("fread()");
1525 		exit(EX_IOERR);
1526 	}
1527 
1528 	rewind(pfile);
1529 
1530 	/*
1531 	 * First check if this is versioned or *old-style* gmon.out
1532 	 */
1533 	if (magic_num == (unsigned int)PROF_MAGIC) {
1534 		if ((!first_time) && (old_style == TRUE)) {
1535 			fprintf(stderr, "%s: can't mix old & new format "
1536 						"profiled files\n", whoami);
1537 			exit(EX_SOFTWARE);
1538 		}
1539 		first_time = FALSE;
1540 		old_style = FALSE;
1541 		return (handle_versioned(pfile, filename, fsz));
1542 	}
1543 
1544 	if ((!first_time) && (old_style == FALSE)) {
1545 		fprintf(stderr, "%s: can't mix old & new format "
1546 						"profiled files\n", whoami);
1547 		exit(EX_SOFTWARE);
1548 	}
1549 
1550 	first_time = FALSE;
1551 	old_style = TRUE;
1552 	fsz = 0;
1553 
1554 	/*
1555 	 * Now, we need to determine if this is a run-time linker
1556 	 * profiled file or if it is a standard gmon.out.
1557 	 *
1558 	 * We do this by checking if magic matches PRF_MAGIC. If it
1559 	 * does, then this is a run-time linker profiled file, if it
1560 	 * doesn't, it must be a gmon.out file.
1561 	 */
1562 	if (magic_num == (unsigned long)PRF_MAGIC)
1563 		rflag = TRUE;
1564 	else
1565 		rflag = FALSE;
1566 
1567 	if (rflag) {
1568 		if (Bflag) {
1569 			L_hdr64		l_hdr64;
1570 
1571 			/*
1572 			 * If the rflag is set then the input file is
1573 			 * rtld profiled data, we'll read it in and convert
1574 			 * it to the standard format (ie: make it look like
1575 			 * a gmon.out file).
1576 			 */
1577 			if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) {
1578 				perror("fread()");
1579 				exit(EX_IOERR);
1580 			}
1581 			if (l_hdr64.hd_version != PRF_VERSION_64) {
1582 				fprintf(stderr, "%s: expected version %d, "
1583 				    "got version %d when processing 64-bit "
1584 				    "run-time linker profiled file.\n",
1585 				    whoami, PRF_VERSION_64, l_hdr64.hd_version);
1586 				exit(EX_SOFTWARE);
1587 			}
1588 			tmp.lowpc = 0;
1589 			tmp.highpc = (pctype)l_hdr64.hd_hpc;
1590 			tmp.ncnt = sizeof (M_hdr64) + l_hdr64.hd_psize;
1591 		} else {
1592 			L_hdr		l_hdr;
1593 
1594 			/*
1595 			 * If the rflag is set then the input file is
1596 			 * rtld profiled data, we'll read it in and convert
1597 			 * it to the standard format (ie: make it look like
1598 			 * a gmon.out file).
1599 			 */
1600 			if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) {
1601 				perror("fread()");
1602 				exit(EX_IOERR);
1603 			}
1604 			if (l_hdr.hd_version != PRF_VERSION) {
1605 				fprintf(stderr, "%s: expected version %d, "
1606 				    "got version %d when processing "
1607 				    "run-time linker profiled file.\n",
1608 				    whoami, PRF_VERSION, l_hdr.hd_version);
1609 				exit(EX_SOFTWARE);
1610 			}
1611 			tmp.lowpc = 0;
1612 			tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc;
1613 			tmp.ncnt = sizeof (M_hdr) + l_hdr.hd_psize;
1614 			hdrsize = sizeof (M_hdr);
1615 		}
1616 	} else {
1617 		if (Bflag) {
1618 			if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) {
1619 				perror("fread()");
1620 				exit(EX_IOERR);
1621 			}
1622 		} else {
1623 			/*
1624 			 * If we're not reading big %pc's, we need to read
1625 			 * the 32-bit header, and assign the members to
1626 			 * the actual header.
1627 			 */
1628 			struct hdr32 hdr32;
1629 			if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) {
1630 				perror("fread()");
1631 				exit(EX_IOERR);
1632 			}
1633 			tmp.lowpc = hdr32.lowpc;
1634 			tmp.highpc = hdr32.highpc;
1635 			tmp.ncnt = hdr32.ncnt;
1636 			hdrsize = sizeof (struct hdr32);
1637 		}
1638 	}
1639 
1640 	/*
1641 	 * perform sanity check on profiled file we've opened.
1642 	 */
1643 	if (tmp.lowpc >= tmp.highpc) {
1644 		if (rflag)
1645 			fprintf(stderr, "%s: badly formed profiled data.\n",
1646 			    filename);
1647 		else
1648 			fprintf(stderr, "%s: badly formed gmon.out file.\n",
1649 			    filename);
1650 		exit(EX_SOFTWARE);
1651 	}
1652 
1653 	if (s_highpc != 0 && (tmp.lowpc != h.lowpc ||
1654 	    tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) {
1655 		fprintf(stderr,
1656 		    "%s: incompatible with first gmon file\n",
1657 		    filename);
1658 		exit(EX_IOERR);
1659 	}
1660 	h = tmp;
1661 	s_lowpc = h.lowpc;
1662 	s_highpc = h.highpc;
1663 	lowpc = h.lowpc / sizeof (UNIT);
1664 	highpc = h.highpc / sizeof (UNIT);
1665 	sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0;
1666 	nsamples = sampbytes / sizeof (unsigned_UNIT);
1667 
1668 #ifdef DEBUG
1669 	if (debug & SAMPLEDEBUG) {
1670 		printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc "
1671 		    "0x%llx hdr.ncnt %lld\n",
1672 		    h.lowpc, h.highpc, h.ncnt);
1673 		printf("[openpfile]   s_lowpc 0x%llx   s_highpc 0x%llx\n",
1674 		    s_lowpc, s_highpc);
1675 		printf("[openpfile]     lowpc 0x%llx     highpc 0x%llx\n",
1676 		    lowpc, highpc);
1677 		printf("[openpfile] sampbytes %d nsamples %d\n",
1678 		    sampbytes, nsamples);
1679 	}
1680 #endif DEBUG
1681 
1682 	return ((void *) pfile);
1683 }
1684 
1685 /*
1686  * Information from a gmon.out file depends on whether it's versioned
1687  * or non-versioned, *old style* gmon.out. If old-style, it is in two
1688  * parts : an array of sampling hits within pc ranges, and the arcs. If
1689  * versioned, it contains a header, followed by any number of
1690  * modules/callgraph/pcsample_buffer objects.
1691  */
1692 static void
1693 getpfile(char *filename)
1694 {
1695 	void		*handle;
1696 	size_t		fsz;
1697 
1698 	handle = openpfile(filename, &fsz);
1699 
1700 	if (old_style) {
1701 		readsamples((FILE *) handle);
1702 		readarcs((FILE *) handle);
1703 		fclose((FILE *) handle);
1704 		return;
1705 	}
1706 
1707 	getpfiledata((caddr_t) handle, fsz);
1708 	munmap(handle, fsz);
1709 }
1710 
1711 main(int argc, char ** argv)
1712 {
1713 	char	**sp;
1714 	nltype	**timesortnlp;
1715 	int		c;
1716 	int		errflg;
1717 	extern char	*optarg;
1718 	extern int	optind;
1719 
1720 	prog_name = *argv;  /* preserve program name */
1721 	debug = 0;
1722 	nflag = FALSE;
1723 	bflag = TRUE;
1724 	lflag = FALSE;
1725 	Cflag = FALSE;
1726 	first_file = TRUE;
1727 	rflag = FALSE;
1728 	Bflag = FALSE;
1729 	errflg = FALSE;
1730 
1731 	while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF)
1732 		switch (c) {
1733 		case 'a':
1734 			aflag = TRUE;
1735 			break;
1736 		case 'b':
1737 			bflag = FALSE;
1738 			break;
1739 		case 'c':
1740 			cflag = TRUE;
1741 			break;
1742 		case 'C':
1743 			Cflag = TRUE;
1744 			break;
1745 		case 'd':
1746 			dflag = TRUE;
1747 			debug |= atoi(optarg);
1748 			printf("[main] debug = 0x%x\n", debug);
1749 			break;
1750 		case 'D':
1751 			Dflag = TRUE;
1752 			break;
1753 		case 'E':
1754 			addlist(Elist, optarg);
1755 			Eflag = TRUE;
1756 			addlist(elist, optarg);
1757 			eflag = TRUE;
1758 			break;
1759 		case 'e':
1760 			addlist(elist, optarg);
1761 			eflag = TRUE;
1762 			break;
1763 		case 'F':
1764 			addlist(Flist, optarg);
1765 			Fflag = TRUE;
1766 			addlist(flist, optarg);
1767 			fflag = TRUE;
1768 			break;
1769 		case 'f':
1770 			addlist(flist, optarg);
1771 			fflag = TRUE;
1772 			break;
1773 		case 'l':
1774 			lflag = TRUE;
1775 			break;
1776 		case 'n':
1777 			nflag = TRUE;
1778 			number_funcs_toprint = atoi(optarg);
1779 			break;
1780 		case 's':
1781 			sflag = TRUE;
1782 			break;
1783 		case 'z':
1784 			zflag = TRUE;
1785 			break;
1786 		case '?':
1787 			errflg++;
1788 
1789 		}
1790 
1791 	if (errflg) {
1792 		(void) fprintf(stderr,
1793 		    "usage: gprof [ -abcCDlsz ] [ -e function-name ] "
1794 		    "[ -E function-name ]\n\t[ -f function-name ] "
1795 		    "[ -F function-name  ]\n\t[  image-file  "
1796 		    "[ profile-file ... ] ]\n");
1797 		exit(EX_USAGE);
1798 	}
1799 
1800 	if (optind < argc) {
1801 		a_outname  = argv[optind++];
1802 	} else {
1803 		a_outname  = A_OUTNAME;
1804 	}
1805 	if (optind < argc) {
1806 		gmonname = argv[optind++];
1807 	} else {
1808 		gmonname = GMONNAME;
1809 	}
1810 	/*
1811 	 *	turn off default functions
1812 	 */
1813 	for (sp = &defaultEs[0]; *sp; sp++) {
1814 		Eflag = TRUE;
1815 		addlist(Elist, *sp);
1816 		eflag = TRUE;
1817 		addlist(elist, *sp);
1818 	}
1819 	/*
1820 	 *	how many ticks per second?
1821 	 *	if we can't tell, report time in ticks.
1822 	 */
1823 	hz = sysconf(_SC_CLK_TCK);
1824 	if (hz == -1) {
1825 		hz = 1;
1826 		fprintf(stderr, "time is in ticks, not seconds\n");
1827 	}
1828 
1829 	getnfile(a_outname);
1830 
1831 	/*
1832 	 *	get information about mon.out file(s).
1833 	 */
1834 	do {
1835 		getpfile(gmonname);
1836 		if (optind < argc)
1837 			gmonname = argv[optind++];
1838 		else
1839 			optind++;
1840 	} while (optind <= argc);
1841 	/*
1842 	 *	dump out a gmon.sum file if requested
1843 	 */
1844 	if (sflag || Dflag)
1845 		dumpsum(GMONSUM);
1846 
1847 	if (old_style) {
1848 		/*
1849 		 *	assign samples to procedures
1850 		 */
1851 		asgnsamples();
1852 	}
1853 
1854 	/*
1855 	 *	assemble the dynamic profile
1856 	 */
1857 	timesortnlp = doarcs();
1858 
1859 	/*
1860 	 *	print the dynamic profile
1861 	 */
1862 #ifdef DEBUG
1863 	if (debug & ANYDEBUG) {
1864 		/* raw output of all symbols in all their glory */
1865 		int i;
1866 		printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
1867 		    "#calls, selfcalls, index \n");
1868 		for (i = 0; i < modules.nname; i++) { 	/* Print each symbol */
1869 			if (timesortnlp[i]->name)
1870 				printf(" %s ", timesortnlp[i]->name);
1871 			else
1872 				printf(" <cycle> ");
1873 			printf(" %lld ", timesortnlp[i]->value);
1874 			printf(" %lld ", timesortnlp[i]->svalue);
1875 			printf(" %f ", timesortnlp[i]->time);
1876 			printf(" %lld ", timesortnlp[i]->ncall);
1877 			printf(" %lld ", timesortnlp[i]->selfcalls);
1878 			printf(" %d ", timesortnlp[i]->index);
1879 			printf(" \n");
1880 		}
1881 	}
1882 #endif DEBUG
1883 
1884 	printgprof(timesortnlp);
1885 	/*
1886 	 *	print the flat profile
1887 	 */
1888 	printprof();
1889 	/*
1890 	 *	print the index
1891 	 */
1892 	printindex();
1893 
1894 	/*
1895 	 * print the modules
1896 	 */
1897 	printmodules();
1898 
1899 	done();
1900 	/* NOTREACHED */
1901 	return (0);
1902 }
1903