xref: /titanic_41/usr/src/cmd/sgs/gprof/common/gprof.c (revision 9d25110c26dac1ee74833ce1708685385a732e52)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include	<sysexits.h>
29 #include	<stdlib.h>
30 #include	<stdio.h>
31 #include	<unistd.h>
32 #include	"gprof.h"
33 #include	"profile.h"
34 
35 char		*whoami = "gprof";
36 static pctype	lowpc, highpc;		/* range profiled, in UNIT's */
37 
38 /*
39  *	things which get -E excluded by default.
40  */
41 static char *defaultEs[] = {
42 	"mcount",
43 	"__mcleanup",
44 	NULL
45 };
46 
47 #ifdef DEBUG
48 
49 static char *objname[] = {
50 	"<invalid object>",
51 	"PROF_BUFFER_T",
52 	"PROF_CALLGRAPH_T",
53 	"PROF_MODULES_T",
54 	NULL
55 };
56 #define	MAX_OBJTYPES	3
57 
58 #endif /* DEBUG */
59 
60 void
61 done(void)
62 {
63 
64 	exit(EX_OK);
65 }
66 
67 static pctype
68 max(pctype a, pctype b)
69 {
70 	if (a > b)
71 		return (a);
72 	return (b);
73 }
74 
75 static pctype
76 min(pctype a, pctype b)
77 {
78 	if (a < b)
79 		return (a);
80 	return (b);
81 }
82 
83 /*
84  *	calculate scaled entry point addresses (to save time in asgnsamples),
85  *	and possibly push the scaled entry points over the entry mask,
86  *	if it turns out that the entry point is in one bucket and the code
87  *	for a routine is in the next bucket.
88  *
89  */
90 static void
91 alignentries(void)
92 {
93 	struct nl *nlp;
94 #ifdef DEBUG
95 	pctype			bucket_of_entry;
96 	pctype			bucket_of_code;
97 #endif /* DEBUG */
98 
99 	/* for old-style gmon.out, nameslist is only in modules.nl */
100 
101 	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
102 		nlp->svalue = nlp->value / sizeof (UNIT);
103 #ifdef DEBUG
104 		bucket_of_entry = (nlp->svalue - lowpc) / scale;
105 		bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
106 		if (bucket_of_entry < bucket_of_code) {
107 			if (debug & SAMPLEDEBUG) {
108 				(void) printf(
109 				    "[alignentries] pushing svalue 0x%llx "
110 				    "to 0x%llx\n", nlp->svalue,
111 				    nlp->svalue + UNITS_TO_CODE);
112 			}
113 		}
114 #endif /* DEBUG */
115 	}
116 }
117 
118 /*
119  *	old-style gmon.out
120  *	------------------
121  *
122  *	Assign samples to the procedures to which they belong.
123  *
124  *	There are three cases as to where pcl and pch can be
125  *	with respect to the routine entry addresses svalue0 and svalue1
126  *	as shown in the following diagram.  overlap computes the
127  *	distance between the arrows, the fraction of the sample
128  *	that is to be credited to the routine which starts at svalue0.
129  *
130  *	    svalue0                                         svalue1
131  *	       |                                               |
132  *	       v                                               v
133  *
134  *	       +-----------------------------------------------+
135  *	       |					       |
136  *	  |  ->|    |<-		->|         |<-		->|    |<-  |
137  *	  |         |		  |         |		  |         |
138  *	  +---------+		  +---------+		  +---------+
139  *
140  *	  ^         ^		  ^         ^		  ^         ^
141  *	  |         |		  |         |		  |         |
142  *	 pcl       pch		 pcl       pch		 pcl       pch
143  *
144  *	For the vax we assert that samples will never fall in the first
145  *	two bytes of any routine, since that is the entry mask,
146  *	thus we give call alignentries() to adjust the entry points if
147  *	the entry mask falls in one bucket but the code for the routine
148  *	doesn't start until the next bucket.  In conjunction with the
149  *	alignment of routine addresses, this should allow us to have
150  *	only one sample for every four bytes of text space and never
151  *	have any overlap (the two end cases, above).
152  */
153 static void
154 asgnsamples(void)
155 {
156 	sztype		i, j;
157 	unsigned_UNIT	ccnt;
158 	double		time;
159 	pctype		pcl, pch;
160 	pctype		overlap;
161 	pctype		svalue0, svalue1;
162 
163 	extern mod_info_t	modules;
164 	nltype		*nl = modules.nl;
165 	sztype		nname = modules.nname;
166 
167 	/* read samples and assign to namelist symbols */
168 	scale = highpc - lowpc;
169 	scale /= nsamples;
170 	alignentries();
171 	for (i = 0, j = 1; i < nsamples; i++) {
172 		ccnt = samples[i];
173 		if (ccnt == 0)
174 			continue;
175 		/*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
176 		pcl = lowpc + scale * i;
177 		/*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
178 		pch = lowpc + scale * (i + 1);
179 		time = ccnt;
180 #ifdef DEBUG
181 		if (debug & SAMPLEDEBUG) {
182 			(void) printf(
183 			    "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
184 			    pcl, pch, ccnt);
185 		}
186 #endif /* DEBUG */
187 		totime += time;
188 		for (j = (j ? j - 1 : 0); j < nname; j++) {
189 			svalue0 = nl[j].svalue;
190 			svalue1 = nl[j+1].svalue;
191 			/*
192 			 *	if high end of tick is below entry address,
193 			 *	go for next tick.
194 			 */
195 			if (pch < svalue0)
196 				break;
197 			/*
198 			 *	if low end of tick into next routine,
199 			 *	go for next routine.
200 			 */
201 			if (pcl >= svalue1)
202 				continue;
203 			overlap = min(pch, svalue1) - max(pcl, svalue0);
204 			if (overlap != 0) {
205 #ifdef DEBUG
206 				if (debug & SAMPLEDEBUG) {
207 					(void) printf("[asgnsamples] "
208 					    "(0x%llx->0x%llx-0x%llx) %s gets "
209 					    "%f ticks %lld overlap\n",
210 					    nl[j].value/sizeof (UNIT), svalue0,
211 					    svalue1, nl[j].name,
212 					    overlap * time / scale, overlap);
213 				}
214 #endif /* DEBUG */
215 				nl[j].time += overlap * time / scale;
216 			}
217 		}
218 	}
219 #ifdef DEBUG
220 	if (debug & SAMPLEDEBUG) {
221 		(void) printf("[asgnsamples] totime %f\n", totime);
222 	}
223 #endif /* DEBUG */
224 }
225 
226 
227 static void
228 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs,
229     unsigned long ncallees)
230 {
231 	ProfCallGraph		prof_cgraph;
232 	ProfFunction		prof_func;
233 	arctype	*arcp;
234 	mod_info_t		*mi;
235 	nltype			*nlp;
236 	size_t			cur_offset;
237 	unsigned long		caller_id = 0, callee_id = 0;
238 
239 	/*
240 	 * Write the callgraph header
241 	 */
242 	prof_cgraph.type = PROF_CALLGRAPH_T;
243 	prof_cgraph.version = PROF_CALLGRAPH_VER;
244 	prof_cgraph.functions = PROFCGRAPH_SZ;
245 	prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ;
246 	if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) {
247 		perror(filename);
248 		exit(EX_IOERR);
249 	}
250 	/* CONSTCOND */
251 	if (CGRAPH_FILLER)
252 		(void) fseek(fp, CGRAPH_FILLER, SEEK_CUR);
253 
254 	/* Current offset inside the callgraph object */
255 	cur_offset = prof_cgraph.functions;
256 
257 	for (mi = &modules; mi; mi = mi->next) {
258 		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
259 			if (nlp->ncallers == 0)
260 				continue;
261 
262 			/* If this is the last callee, set next_to to 0 */
263 			callee_id++;
264 			if (callee_id == ncallees)
265 				prof_func.next_to = 0;
266 			else {
267 				prof_func.next_to = cur_offset +
268 				    nlp->ncallers * PROFFUNC_SZ;
269 			}
270 
271 			/*
272 			 * Dump this callee's raw arc information with all
273 			 * its callers
274 			 */
275 			caller_id = 1;
276 			for (arcp = nlp->parents; arcp;
277 			    arcp = arcp->arc_parentlist) {
278 				/*
279 				 * If no more callers for this callee, set
280 				 * next_from to 0
281 				 */
282 				if (caller_id == nlp->ncallers)
283 					prof_func.next_from = 0;
284 				else {
285 					prof_func.next_from = cur_offset +
286 					    PROFFUNC_SZ;
287 				}
288 
289 				prof_func.frompc =
290 				    arcp->arc_parentp->module->load_base +
291 				    (arcp->arc_parentp->value -
292 				    arcp->arc_parentp->module->txt_origin);
293 				prof_func.topc = mi->load_base +
294 				    (nlp->value - mi->txt_origin);
295 				prof_func.count = arcp->arc_count;
296 
297 
298 				if (fwrite(&prof_func, sizeof (ProfFunction),
299 				    1, fp) != 1) {
300 					perror(filename);
301 					exit(EX_IOERR);
302 				}
303 				/* CONSTCOND */
304 				if (FUNC_FILLER)
305 					(void) fseek(fp, FUNC_FILLER, SEEK_CUR);
306 
307 				cur_offset += PROFFUNC_SZ;
308 				caller_id++;
309 			}
310 		} /* for nlp... */
311 	} /* for mi... */
312 }
313 
314 /*
315  * To save all pc-hits in all the gmon.out's is infeasible, as this
316  * may become quite huge even with a small number of files to sum.
317  * Instead, we'll dump *fictitious hits* to correct functions
318  * by scanning module namelists. Again, since this is summing
319  * pc-hits, we may have to dump the pcsamples out in chunks if the
320  * number of pc-hits is high.
321  */
322 static void
323 dump_hits(FILE *fp, char *filename, nltype *nlp)
324 {
325 	Address		*p, hitpc;
326 	size_t		i, nelem, ntowrite;
327 
328 	if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE)
329 		nelem = PROF_BUFFER_SIZE;
330 
331 	if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) {
332 		(void) fprintf(stderr, "%s: no room for %d pcsamples\n",
333 		    whoami, nelem);
334 		exit(EX_OSERR);
335 	}
336 
337 	/*
338 	 * Set up *fictitious* hits (to function entry) buffer
339 	 */
340 	hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin);
341 	for (i = 0; i < nelem; i++)
342 		p[i] = hitpc;
343 
344 	for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) {
345 		if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) {
346 			perror(filename);
347 			exit(EX_IOERR);
348 		}
349 	}
350 
351 	if (ntowrite) {
352 		if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) {
353 			perror(filename);
354 			exit(EX_IOERR);
355 		}
356 	}
357 
358 	free(p);
359 }
360 
361 static void
362 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs,
363     unsigned long *ncallees)
364 {
365 	ProfBuffer		prof_buffer;
366 	arctype	*arcp;
367 	mod_info_t		*mi;
368 	nltype			*nlp;
369 
370 	prof_buffer.type = PROF_BUFFER_T;
371 	prof_buffer.version = PROF_BUFFER_VER;
372 	prof_buffer.buffer = PROFBUF_SZ;
373 	prof_buffer.bufsize = n_pcsamples;
374 	prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address);
375 	if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) {
376 		perror(filename);
377 		exit(EX_IOERR);
378 	}
379 	/* CONSTCOND */
380 	if (BUF_FILLER)
381 		(void) fseek(fp, BUF_FILLER, SEEK_CUR);
382 
383 	*tarcs = 0;
384 	*ncallees = 0;
385 	for (mi = &modules; mi; mi = mi->next) {
386 		for (nlp = mi->nl; nlp < mi->npe; nlp++) {
387 			if (nlp->nticks)
388 				dump_hits(fp, filename, nlp);
389 
390 			nlp->ncallers = 0;
391 			for (arcp = nlp->parents; arcp;
392 			    arcp = arcp->arc_parentlist) {
393 				(nlp->ncallers)++;
394 			}
395 
396 			if (nlp->ncallers) {
397 				(*tarcs) += nlp->ncallers;
398 				(*ncallees)++;
399 			}
400 		}
401 	}
402 }
403 
404 static void
405 dump_modules(FILE *fp, char *filename, size_t pbuf_sz)
406 {
407 	char		*pbuf, *p;
408 	size_t		namelen;
409 	Index		off_nxt, off_path;
410 	mod_info_t	*mi;
411 
412 	ProfModuleList	prof_modlist;
413 	ProfModule	prof_mod;
414 
415 	/* Allocate for path strings buffer */
416 	pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN);
417 	if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) {
418 		(void) fprintf(stderr, "%s: no room for %d bytes\n",
419 		    whoami, pbuf_sz * sizeof (char));
420 		exit(EX_OSERR);
421 	}
422 
423 	/* Dump out PROF_MODULE_T info for all non-aout modules */
424 	prof_modlist.type = PROF_MODULES_T;
425 	prof_modlist.version = PROF_MODULES_VER;
426 	prof_modlist.modules = PROFMODLIST_SZ;
427 	prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ +
428 	    pbuf_sz;
429 	if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) {
430 		perror(filename);
431 		exit(EX_IOERR);
432 	}
433 	/* CONSTCOND */
434 	if (MODLIST_FILLER)
435 		(void) fseek(fp, MODLIST_FILLER, SEEK_CUR);
436 
437 	/*
438 	 * Initialize offsets for ProfModule elements.
439 	 */
440 	off_nxt = PROFMODLIST_SZ + PROFMOD_SZ;
441 	off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ;
442 
443 	for (mi = modules.next; mi; mi = mi->next) {
444 		if (mi->next)
445 			prof_mod.next = off_nxt;
446 		else
447 			prof_mod.next = 0;
448 		prof_mod.path = off_path;
449 		prof_mod.startaddr = mi->load_base;
450 		prof_mod.endaddr = mi->load_end;
451 
452 		if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) {
453 			perror(filename);
454 			exit(EX_IOERR);
455 		}
456 
457 		/* CONSTCOND */
458 		if (MOD_FILLER)
459 			(void) fseek(fp, MOD_FILLER, SEEK_CUR);
460 
461 		(void) strcpy(p, mi->name);
462 		namelen = strlen(mi->name);
463 		p += namelen + 1;
464 
465 		/* Note that offset to every path str need not be aligned */
466 		off_nxt += PROFMOD_SZ;
467 		off_path += namelen + 1;
468 	}
469 
470 	/* Write out the module path strings */
471 	if (pbuf_sz) {
472 		if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) {
473 			perror(filename);
474 			exit(EX_IOERR);
475 		}
476 
477 		free(pbuf);
478 	}
479 }
480 
481 /*
482  * If we have inactive modules, their current load addresses may overlap with
483  * active ones, and so we've to assign fictitious, non-overlapping addresses
484  * to all modules before we dump them.
485  */
486 static void
487 fixup_maps(size_t *pathsz)
488 {
489 	unsigned int	n_inactive = 0;
490 	Address		lbase = 0, lend;
491 	mod_info_t	*mi;
492 
493 	/* Pick the lowest load address among modules */
494 	*pathsz = 0;
495 	for (mi = &modules; mi; mi = mi->next) {
496 
497 		if (mi->active == FALSE)
498 			n_inactive++;
499 
500 		if (mi == &modules || mi->load_base < lbase)
501 			lbase = mi->load_base;
502 
503 		/*
504 		 * Return total path size of non-aout modules only
505 		 */
506 		if (mi != &modules)
507 			*pathsz = (*pathsz) + strlen(mi->name) + 1;
508 	}
509 
510 	/*
511 	 * All module info is in fine shape already if there are no
512 	 * inactive modules
513 	 */
514 	if (n_inactive == 0)
515 		return;
516 
517 	/*
518 	 * Assign fictitious load addresses to all (non-aout) modules so
519 	 * that sum info can be dumped out.
520 	 */
521 	for (mi = modules.next; mi; mi = mi->next) {
522 		lend = lbase + (mi->data_end - mi->txt_origin);
523 		if ((lbase < modules.load_base && lend < modules.load_base) ||
524 		    (lbase > modules.load_end && lend > modules.load_end)) {
525 
526 			mi->load_base = lbase;
527 			mi->load_end = lend;
528 
529 			/* just to give an appearance of reality */
530 			lbase = CEIL(lend + PGSZ, PGSZ);
531 		} else {
532 			/*
533 			 * can't use this lbase & lend pair, as it
534 			 * overlaps with aout's addresses
535 			 */
536 			mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ);
537 			mi->load_end = mi->load_base + (lend - lbase);
538 
539 			lbase = CEIL(mi->load_end + PGSZ, PGSZ);
540 		}
541 	}
542 }
543 
544 static void
545 dump_gprofhdr(FILE *fp, char *filename)
546 {
547 	ProfHeader	prof_hdr;
548 
549 	prof_hdr.h_magic = PROF_MAGIC;
550 	prof_hdr.h_major_ver = PROF_MAJOR_VERSION;
551 	prof_hdr.h_minor_ver = PROF_MINOR_VERSION;
552 	prof_hdr.size = PROFHDR_SZ;
553 	if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) {
554 		perror(filename);
555 		exit(EX_IOERR);
556 	}
557 
558 	/* CONSTCOND */
559 	if (HDR_FILLER)
560 		(void) fseek(fp, HDR_FILLER, SEEK_CUR);
561 }
562 
563 static void
564 dumpsum_ostyle(char *sumfile)
565 {
566 	nltype *nlp;
567 	arctype *arcp;
568 	struct rawarc arc;
569 	struct rawarc32 arc32;
570 	FILE *sfile;
571 
572 	if ((sfile = fopen(sumfile, "w")) == NULL) {
573 		perror(sumfile);
574 		exit(EX_IOERR);
575 	}
576 	/*
577 	 * dump the header; use the last header read in
578 	 */
579 	if (Bflag) {
580 		if (fwrite(&h, sizeof (h), 1, sfile) != 1) {
581 			perror(sumfile);
582 			exit(EX_IOERR);
583 		}
584 	} else {
585 		struct hdr32 hdr;
586 		hdr.lowpc  = (pctype32)h.lowpc;
587 		hdr.highpc = (pctype32)h.highpc;
588 		hdr.ncnt   = (pctype32)h.ncnt;
589 		if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) {
590 			perror(sumfile);
591 			exit(EX_IOERR);
592 		}
593 	}
594 	/*
595 	 * dump the samples
596 	 */
597 	if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) !=
598 	    nsamples) {
599 		perror(sumfile);
600 		exit(EX_IOERR);
601 	}
602 	/*
603 	 * dump the normalized raw arc information. For old-style dumping,
604 	 * the only namelist is in modules.nl
605 	 */
606 	for (nlp = modules.nl; nlp < modules.npe; nlp++) {
607 		for (arcp = nlp->children; arcp;
608 		    arcp = arcp->arc_childlist) {
609 			if (Bflag) {
610 				arc.raw_frompc = arcp->arc_parentp->value;
611 				arc.raw_selfpc = arcp->arc_childp->value;
612 				arc.raw_count = arcp->arc_count;
613 				if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) {
614 					perror(sumfile);
615 					exit(EX_IOERR);
616 				}
617 			} else {
618 				arc32.raw_frompc =
619 				    (pctype32)arcp->arc_parentp->value;
620 				arc32.raw_selfpc =
621 				    (pctype32)arcp->arc_childp->value;
622 				arc32.raw_count = (actype32)arcp->arc_count;
623 				if (fwrite(&arc32, sizeof (arc32), 1, sfile) !=
624 				    1) {
625 					perror(sumfile);
626 					exit(EX_IOERR);
627 				}
628 			}
629 #ifdef DEBUG
630 			if (debug & SAMPLEDEBUG) {
631 				(void) printf(
632 				    "[dumpsum_ostyle] frompc 0x%llx selfpc "
633 				    "0x%llx count %lld\n", arc.raw_frompc,
634 				    arc.raw_selfpc, arc.raw_count);
635 			}
636 #endif /* DEBUG */
637 		}
638 	}
639 	(void) fclose(sfile);
640 }
641 
642 /*
643  * dump out the gmon.sum file
644  */
645 static void
646 dumpsum(char *sumfile)
647 {
648 	FILE		*sfile;
649 	size_t		pathbuf_sz;
650 	unsigned long	total_arcs;	/* total number of arcs in all */
651 	unsigned long	ncallees;	/* no. of callees with parents */
652 
653 	if (old_style) {
654 		dumpsum_ostyle(sumfile);
655 		return;
656 	}
657 
658 	if ((sfile = fopen(sumfile, "w")) == NULL) {
659 		perror(sumfile);
660 		exit(EX_IOERR);
661 	}
662 
663 	/*
664 	 * Dump the new-style gprof header. Even if one of the original
665 	 * profiled-files was of a older version, the summed file is of
666 	 * current version only.
667 	 */
668 	dump_gprofhdr(sfile, sumfile);
669 
670 	/*
671 	 * Fix up load-maps and dump out modules info
672 	 *
673 	 * Fix up module load maps so inactive modules get *some* address
674 	 * (and btw, could you get the total size of non-aout module path
675 	 * strings please ?)
676 	 */
677 	fixup_maps(&pathbuf_sz);
678 	dump_modules(sfile, sumfile, pathbuf_sz);
679 
680 
681 	/*
682 	 * Dump out the summ'd pcsamples
683 	 *
684 	 * For dumping call graph information later, we need certain
685 	 * statistics (like total arcs, number of callers for each node);
686 	 * collect these also while we are at it.
687 	 */
688 	dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees);
689 
690 	/*
691 	 * Dump out the summ'd call graph information
692 	 */
693 	dump_callgraph(sfile, sumfile, total_arcs, ncallees);
694 
695 
696 	(void) fclose(sfile);
697 }
698 
699 static void
700 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp)
701 {
702 	nltype		*parentp;
703 	nltype		*childp;
704 
705 	/*
706 	 * if count == 0 this is a null arc and
707 	 * we don't need to tally it.
708 	 */
709 	if (rawp->raw_count == 0)
710 		return;
711 
712 	/*
713 	 * Lookup the caller and callee pcs in namelists of
714 	 * appropriate modules
715 	 */
716 	parentp = nllookup(caller_mod, rawp->raw_frompc, NULL);
717 	childp = nllookup(callee_mod, rawp->raw_selfpc, NULL);
718 	if (childp && parentp) {
719 		if (!Dflag)
720 			childp->ncall += rawp->raw_count;
721 		else {
722 			if (first_file)
723 				childp->ncall += rawp->raw_count;
724 			else {
725 				childp->ncall -= rawp->raw_count;
726 				if (childp->ncall < 0)
727 					childp->ncall = 0;
728 			}
729 		}
730 
731 #ifdef DEBUG
732 		if (debug & TALLYDEBUG) {
733 			(void) printf("[tally] arc from %s to %s traversed "
734 			    "%lld times\n", parentp->name,
735 			    childp->name, rawp->raw_count);
736 		}
737 #endif /* DEBUG */
738 		addarc(parentp, childp, rawp->raw_count);
739 	}
740 }
741 
742 /*
743  * Look up a module's base address in a sorted list of pc-hits. Unlike
744  * nllookup(), this deals with misses by mapping them to the next *higher*
745  * pc-hit. This is so that we get into the module's first pc-hit rightaway,
746  * even if the module's entry-point (load_base) itself is not a hit.
747  */
748 static Address *
749 locate(Address	*pclist, size_t nelem, Address keypc)
750 {
751 	size_t	low = 0, middle, high = nelem - 1;
752 
753 	if (keypc <= pclist[low])
754 		return (pclist);
755 
756 	if (keypc > pclist[high])
757 		return (NULL);
758 
759 	while (low != high) {
760 		middle = (high + low) >> 1;
761 
762 		if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc))
763 			return (&pclist[middle + 1]);
764 
765 		if (pclist[middle] >= keypc)
766 			high = middle;
767 		else
768 			low = middle + 1;
769 	}
770 
771 	/* must never reach here! */
772 	return (NULL);
773 }
774 
775 static void
776 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples)
777 {
778 	Address		*pcptr, *pcse = pcsmpl + n_samples;
779 	pctype		nxt_func;
780 	nltype		*fnl;
781 	size_t		func_nticks;
782 #ifdef DEBUG
783 	size_t		n_hits_in_module = 0;
784 #endif /* DEBUG */
785 
786 	/* Locate the first pc-hit for this module */
787 	if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) {
788 #ifdef DEBUG
789 		if (debug & PCSMPLDEBUG) {
790 			(void) printf("[assign_pcsamples] no pc-hits in\n");
791 			(void) printf(
792 			    "                   `%s'\n", module->name);
793 		}
794 #endif /* DEBUG */
795 		return;			/* no pc-hits in this module */
796 	}
797 
798 	/* Assign all pc-hits in this module to appropriate functions */
799 	while ((pcptr < pcse) && (*pcptr < module->load_end)) {
800 
801 		/* Update the corresponding function's time */
802 		if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) {
803 			/*
804 			 * Collect all pc-hits in this function. Each
805 			 * pc-hit counts as 1 tick.
806 			 */
807 			func_nticks = 0;
808 			while ((pcptr < pcse) && (*pcptr < nxt_func)) {
809 				func_nticks++;
810 				pcptr++;
811 			}
812 
813 			if (func_nticks == 0)
814 				pcptr++;
815 			else {
816 				fnl->nticks += func_nticks;
817 				fnl->time += func_nticks;
818 				totime += func_nticks;
819 			}
820 
821 #ifdef DEBUG
822 			n_hits_in_module += func_nticks;
823 #endif /* DEBUG */
824 		} else {
825 			/*
826 			 * pc sample could not be assigned to function;
827 			 * probably in a PLT
828 			 */
829 			pcptr++;
830 		}
831 	}
832 
833 #ifdef DEBUG
834 	if (debug & PCSMPLDEBUG) {
835 		(void) printf(
836 		    "[assign_pcsamples] %ld hits in\n", n_hits_in_module);
837 		(void) printf("                   `%s'\n", module->name);
838 	}
839 #endif /* DEBUG */
840 }
841 
842 int
843 pc_cmp(const void *arg1, const void *arg2)
844 {
845 	Address *pc1 = (Address *)arg1;
846 	Address *pc2 = (Address *)arg2;
847 
848 	if (*pc1 > *pc2)
849 		return (1);
850 
851 	if (*pc1 < *pc2)
852 		return (-1);
853 
854 	return (0);
855 }
856 
857 static void
858 process_pcsamples(ProfBuffer *bufp)
859 {
860 	Address		*pc_samples;
861 	mod_info_t	*mi;
862 	caddr_t		p;
863 	size_t		chunk_size, nelem_read, nelem_to_read;
864 
865 #ifdef DEBUG
866 	if (debug & PCSMPLDEBUG) {
867 		(void) printf(
868 		    "[process_pcsamples] number of pcsamples = %lld\n",
869 		    bufp->bufsize);
870 	}
871 #endif /* DEBUG */
872 
873 	/* buffer with no pc samples ? */
874 	if (bufp->bufsize == 0)
875 		return;
876 
877 	/*
878 	 * If we're processing pcsamples of a profile sum, we could have
879 	 * more than PROF_BUFFER_SIZE number of samples. In such a case,
880 	 * we must read the pcsamples in chunks.
881 	 */
882 	if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE)
883 		chunk_size = PROF_BUFFER_SIZE;
884 
885 	/* Allocate for the pcsample chunk */
886 	pc_samples = (Address *) calloc(chunk_size, sizeof (Address));
887 	if (pc_samples == NULL) {
888 		(void) fprintf(stderr, "%s: no room for %d sample pc's\n",
889 		    whoami, chunk_size);
890 		exit(EX_OSERR);
891 	}
892 
893 	/* Copy the current set of pcsamples */
894 	nelem_read = 0;
895 	nelem_to_read = bufp->bufsize;
896 	p = (char *)bufp + bufp->buffer;
897 
898 	while (nelem_read < nelem_to_read) {
899 		(void) memcpy((void *) pc_samples, p,
900 		    chunk_size * sizeof (Address));
901 
902 		/* Sort the pc samples */
903 		qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp);
904 
905 		/*
906 		 * Assign pcsamples to functions in the currently active
907 		 * module list
908 		 */
909 		for (mi = &modules; mi; mi = mi->next) {
910 			if (mi->active == FALSE)
911 				continue;
912 			assign_pcsamples(mi, pc_samples, chunk_size);
913 		}
914 
915 		p += (chunk_size * sizeof (Address));
916 		nelem_read += chunk_size;
917 
918 		if ((nelem_to_read - nelem_read) < chunk_size)
919 			chunk_size = nelem_to_read - nelem_read;
920 	}
921 
922 	free(pc_samples);
923 
924 	/* Update total number of pcsamples read so far */
925 	n_pcsamples += bufp->bufsize;
926 }
927 
928 static mod_info_t *
929 find_module(Address addr)
930 {
931 	mod_info_t	*mi;
932 
933 	for (mi = &modules; mi; mi = mi->next) {
934 		if (mi->active == FALSE)
935 			continue;
936 
937 		if (addr >= mi->load_base && addr < mi->load_end)
938 			return (mi);
939 	}
940 
941 	return (NULL);
942 }
943 
944 static void
945 process_cgraph(ProfCallGraph *cgp)
946 {
947 	struct rawarc	arc;
948 	mod_info_t	*callee_mi, *caller_mi;
949 	ProfFunction	*calleep, *callerp;
950 	Index		caller_off, callee_off;
951 
952 	/*
953 	 * Note that *callee_off* increment in the for loop below
954 	 * uses *calleep* and *calleep* doesn't get set until the for loop
955 	 * is entered. We don't expect the increment to be executed before
956 	 * the loop body is executed atleast once, so this should be ok.
957 	 */
958 	for (callee_off = cgp->functions; callee_off;
959 	    callee_off = calleep->next_to) {
960 
961 		/* LINTED: pointer cast */
962 		calleep = (ProfFunction *)((char *)cgp + callee_off);
963 
964 		/*
965 		 * We could choose either to sort the {caller, callee}
966 		 * list twice and assign callee/caller to modules or inspect
967 		 * each callee/caller in the active modules list. Since
968 		 * the modules list is usually very small, we'l choose the
969 		 * latter.
970 		 */
971 
972 		/*
973 		 * If we cannot identify a callee with a module, there's
974 		 * no use worrying about who called it.
975 		 */
976 		if ((callee_mi = find_module(calleep->topc)) == NULL) {
977 #ifdef DEBUG
978 			if (debug & CGRAPHDEBUG) {
979 				(void) printf(
980 				    "[process_cgraph] callee %#llx missed\n",
981 				    calleep->topc);
982 			}
983 #endif /* DEBUG */
984 			continue;
985 		} else
986 			arc.raw_selfpc = calleep->topc;
987 
988 		for (caller_off = callee_off; caller_off;
989 		    caller_off = callerp->next_from)  {
990 
991 			/* LINTED: pointer cast */
992 			callerp = (ProfFunction *)((char *)cgp + caller_off);
993 			if ((caller_mi = find_module(callerp->frompc)) ==
994 			    NULL) {
995 #ifdef DEBUG
996 				if (debug & CGRAPHDEBUG) {
997 					(void) printf(
998 					    "[process_cgraph] caller %#llx "
999 					    "missed\n", callerp->frompc);
1000 				}
1001 #endif /* DEBUG */
1002 				continue;
1003 			}
1004 
1005 			arc.raw_frompc = callerp->frompc;
1006 			arc.raw_count = callerp->count;
1007 
1008 #ifdef DEBUG
1009 			if (debug & CGRAPHDEBUG) {
1010 				(void) printf(
1011 				    "[process_cgraph] arc <%#llx, %#llx, "
1012 				    "%lld>\n", arc.raw_frompc, arc.raw_selfpc,
1013 				    arc.raw_count);
1014 			}
1015 #endif /* DEBUG */
1016 			tally(caller_mi, callee_mi, &arc);
1017 		}
1018 	}
1019 
1020 #ifdef DEBUG
1021 	puts("\n");
1022 #endif /* DEBUG */
1023 }
1024 
1025 /*
1026  * Two modules overlap each other if they don't lie completely *outside*
1027  * each other.
1028  */
1029 static bool
1030 does_overlap(ProfModule *new, mod_info_t *old)
1031 {
1032 	/* case 1: new module lies completely *before* the old one */
1033 	if (new->startaddr < old->load_base && new->endaddr <= old->load_base)
1034 		return (FALSE);
1035 
1036 	/* case 2: new module lies completely *after* the old one */
1037 	if (new->startaddr >= old->load_end && new->endaddr >= old->load_end)
1038 		return (FALSE);
1039 
1040 	/* probably a dlopen: the modules overlap each other */
1041 	return (TRUE);
1042 }
1043 
1044 static bool
1045 is_same_as_aout(char *modpath, struct stat *buf)
1046 {
1047 	if (stat(modpath, buf) == -1) {
1048 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
1049 		    whoami, modpath);
1050 		exit(EX_NOINPUT);
1051 	}
1052 
1053 	if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino))
1054 		return (TRUE);
1055 	else
1056 		return (FALSE);
1057 }
1058 
1059 static void
1060 process_modules(ProfModuleList *modlp)
1061 {
1062 	ProfModule	*newmodp;
1063 	mod_info_t	*mi, *last, *new_module;
1064 	char		*so_path;
1065 	bool		more_modules = TRUE;
1066 	struct stat	so_statbuf;
1067 
1068 #ifdef DEBUG
1069 	if (debug & MODULEDEBUG) {
1070 		(void) printf("[process_modules] module obj version %u\n",
1071 		    modlp->version);
1072 	}
1073 #endif /* DEBUG */
1074 
1075 	/* Check version of module type object */
1076 	if (modlp->version > PROF_MODULES_VER) {
1077 		(void) fprintf(stderr, "%s: version %d for module type objects"
1078 		    "is not supported\n", whoami, modlp->version);
1079 		exit(EX_SOFTWARE);
1080 	}
1081 
1082 
1083 	/*
1084 	 * Scan the PROF_MODULES_T list and add modules to current list
1085 	 * of modules, if they're not present already
1086 	 */
1087 	/* LINTED: pointer cast */
1088 	newmodp = (ProfModule *)((char *)modlp + modlp->modules);
1089 	do {
1090 		/*
1091 		 * Since the prog could've been renamed after its run, we
1092 		 * should see if this overlaps a.out. If it does, it is
1093 		 * probably the renamed aout. We should also skip any other
1094 		 * non-sharedobj's that we see (or should we report an error ?)
1095 		 */
1096 		so_path = (caddr_t)modlp + newmodp->path;
1097 		if (does_overlap(newmodp, &modules) ||
1098 		    is_same_as_aout(so_path, &so_statbuf) ||
1099 		    (!is_shared_obj(so_path))) {
1100 
1101 			if (!newmodp->next)
1102 				more_modules = FALSE;
1103 
1104 			/* LINTED: pointer cast */
1105 			newmodp = (ProfModule *)
1106 			    ((caddr_t)modlp + newmodp->next);
1107 #ifdef DEBUG
1108 			if (debug & MODULEDEBUG) {
1109 				(void) printf(
1110 				    "[process_modules] `%s'\n", so_path);
1111 				(void) printf("                  skipped\n");
1112 			}
1113 #endif /* DEBUG */
1114 			continue;
1115 		}
1116 #ifdef DEBUG
1117 		if (debug & MODULEDEBUG)
1118 			(void) printf("[process_modules] `%s'...\n", so_path);
1119 #endif /* DEBUG */
1120 
1121 		/*
1122 		 * Check all modules (leave the first one, 'cos that
1123 		 * is the program executable info). If this module is already
1124 		 * there in the list, update the load addresses and proceed.
1125 		 */
1126 		last = &modules;
1127 		while ((mi = last->next) != NULL) {
1128 			/*
1129 			 * We expect the full pathname for all shared objects
1130 			 * needed by the program executable. In this case, we
1131 			 * simply need to compare the paths to see if they are
1132 			 * the same file.
1133 			 */
1134 			if (strcmp(mi->name, so_path) == 0)
1135 				break;
1136 
1137 			/*
1138 			 * Check if this new shared object will overlap
1139 			 * any existing module. If yes, remove the old one
1140 			 * from the linked list (but don't free it, 'cos
1141 			 * there may be symbols referring to this module
1142 			 * still)
1143 			 */
1144 			if (does_overlap(newmodp, mi)) {
1145 #ifdef DEBUG
1146 				if (debug & MODULEDEBUG) {
1147 					(void) printf(
1148 					    "[process_modules] `%s'\n",
1149 					    so_path);
1150 					(void) printf(
1151 					    "                  overlaps\n");
1152 					(void) printf(
1153 					    "                  `%s'\n",
1154 					    mi->name);
1155 				}
1156 #endif /* DEBUG */
1157 				mi->active = FALSE;
1158 			}
1159 
1160 			last = mi;
1161 		}
1162 
1163 		/* Module already there, skip it */
1164 		if (mi != NULL) {
1165 			mi->load_base = newmodp->startaddr;
1166 			mi->load_end = newmodp->endaddr;
1167 			mi->active = TRUE;
1168 			if (!newmodp->next)
1169 				more_modules = FALSE;
1170 
1171 			/* LINTED: pointer cast */
1172 			newmodp = (ProfModule *)
1173 			    ((caddr_t)modlp + newmodp->next);
1174 
1175 #ifdef DEBUG
1176 			if (debug & MODULEDEBUG) {
1177 				(void) printf("[process_modules] base=%#llx, "
1178 				    "end=%#llx\n", mi->load_base, mi->load_end);
1179 			}
1180 #endif /* DEBUG */
1181 			continue;
1182 		}
1183 
1184 		/*
1185 		 * Check if gmon.out is outdated with respect to the new
1186 		 * module we want to add
1187 		 */
1188 		if (gmonout_info.mtime < so_statbuf.st_mtime) {
1189 			(void) fprintf(stderr,
1190 			    "%s: shared obj outdates prof info\n", whoami);
1191 			(void) fprintf(stderr, "\t(newer %s)\n", so_path);
1192 			exit(EX_NOINPUT);
1193 		}
1194 
1195 		/* Create a new module element */
1196 		new_module = malloc(sizeof (mod_info_t));
1197 		if (new_module == NULL) {
1198 			(void) fprintf(stderr, "%s: no room for %d bytes\n",
1199 			    whoami, sizeof (mod_info_t));
1200 			exit(EX_OSERR);
1201 		}
1202 
1203 		/* and fill in info... */
1204 		new_module->id = n_modules + 1;
1205 		new_module->load_base = newmodp->startaddr;
1206 		new_module->load_end = newmodp->endaddr;
1207 		new_module->name = malloc(strlen(so_path) + 1);
1208 		if (new_module->name == NULL) {
1209 			(void) fprintf(stderr, "%s: no room for %d bytes\n",
1210 			    whoami, strlen(so_path) + 1);
1211 			exit(EX_OSERR);
1212 		}
1213 		(void) strcpy(new_module->name, so_path);
1214 #ifdef DEBUG
1215 		if (debug & MODULEDEBUG) {
1216 			(void) printf(
1217 			    "[process_modules] base=%#llx, end=%#llx\n",
1218 			    new_module->load_base, new_module->load_end);
1219 		}
1220 #endif /* DEBUG */
1221 
1222 		/* Create this module's nameslist */
1223 		process_namelist(new_module);
1224 
1225 		/* Add it to the tail of active module list */
1226 		last->next = new_module;
1227 		n_modules++;
1228 
1229 #ifdef DEBUG
1230 		if (debug & MODULEDEBUG) {
1231 			(void) printf(
1232 			    "[process_modules] total shared objects = %ld\n",
1233 			    n_modules - 1);
1234 		}
1235 #endif /* DEBUG */
1236 		/*
1237 		 * Move to the next module in the PROF_MODULES_T list
1238 		 * (if present)
1239 		 */
1240 		if (!newmodp->next)
1241 			more_modules = FALSE;
1242 
1243 		/* LINTED: pointer cast */
1244 		newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next);
1245 
1246 	} while (more_modules);
1247 }
1248 
1249 static void
1250 reset_active_modules(void)
1251 {
1252 	mod_info_t	*mi;
1253 
1254 	/* Except the executable, no other module should remain active */
1255 	for (mi = modules.next; mi; mi = mi->next)
1256 		mi->active = FALSE;
1257 }
1258 
1259 static void
1260 getpfiledata(caddr_t memp, size_t fsz)
1261 {
1262 	ProfObject	*objp;
1263 	caddr_t		file_end;
1264 	bool		found_pcsamples = FALSE, found_cgraph = FALSE;
1265 
1266 	/*
1267 	 * Before processing a new gmon.out, all modules except the
1268 	 * program executable must be made inactive, so that symbols
1269 	 * are searched only in the program executable, if we don't
1270 	 * find a MODULES_T object. Don't do it *after* we read a gmon.out,
1271 	 * because we need the active module data after we're done with
1272 	 * the last gmon.out, if we're doing summing.
1273 	 */
1274 	reset_active_modules();
1275 
1276 	file_end = memp + fsz;
1277 	/* LINTED: pointer cast */
1278 	objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size);
1279 	while ((caddr_t)objp < file_end) {
1280 #ifdef DEBUG
1281 		{
1282 			unsigned int	type = 0;
1283 
1284 			if (debug & MONOUTDEBUG) {
1285 				if (objp->type <= MAX_OBJTYPES)
1286 					type = objp->type;
1287 
1288 				(void) printf(
1289 				    "\n[getpfiledata] object %s [%#lx]\n",
1290 				    objname[type], objp->type);
1291 			}
1292 		}
1293 #endif /* DEBUG */
1294 		switch (objp->type) {
1295 			case PROF_MODULES_T :
1296 				process_modules((ProfModuleList *) objp);
1297 				break;
1298 
1299 			case PROF_CALLGRAPH_T :
1300 				process_cgraph((ProfCallGraph *) objp);
1301 				found_cgraph = TRUE;
1302 				break;
1303 
1304 			case PROF_BUFFER_T :
1305 				process_pcsamples((ProfBuffer *) objp);
1306 				found_pcsamples = TRUE;
1307 				break;
1308 
1309 			default :
1310 				(void) fprintf(stderr,
1311 				    "%s: unknown prof object type=%d\n",
1312 				    whoami, objp->type);
1313 				exit(EX_SOFTWARE);
1314 		}
1315 		/* LINTED: pointer cast */
1316 		objp = (ProfObject *)((caddr_t)objp + objp->size);
1317 	}
1318 
1319 	if (!found_cgraph || !found_pcsamples) {
1320 		(void) fprintf(stderr,
1321 		    "%s: missing callgraph/pcsamples object\n", whoami);
1322 		exit(EX_SOFTWARE);
1323 	}
1324 
1325 	if ((caddr_t)objp > file_end) {
1326 		(void) fprintf(stderr, "%s: malformed profile file.\n", whoami);
1327 		exit(EX_SOFTWARE);
1328 	}
1329 
1330 	if (first_file)
1331 		first_file = FALSE;
1332 }
1333 
1334 static void
1335 readarcs(FILE *pfile)
1336 {
1337 	/*
1338 	 *	the rest of the file consists of
1339 	 *	a bunch of <from,self,count> tuples.
1340 	 */
1341 	/* CONSTCOND */
1342 	while (1) {
1343 		struct rawarc	arc;
1344 
1345 		if (rflag) {
1346 			if (Bflag) {
1347 				L_cgarc64		rtld_arc64;
1348 
1349 				/*
1350 				 * If rflag is set then this is an profiled
1351 				 * image generated by rtld.  It needs to be
1352 				 * 'converted' to the standard data format.
1353 				 */
1354 				if (fread(&rtld_arc64,
1355 				    sizeof (L_cgarc64), 1, pfile) != 1)
1356 					break;
1357 
1358 				if (rtld_arc64.cg_from == PRF_OUTADDR64)
1359 					arc.raw_frompc = s_highpc + 0x10;
1360 				else
1361 					arc.raw_frompc =
1362 					    (pctype)rtld_arc64.cg_from;
1363 				arc.raw_selfpc = (pctype)rtld_arc64.cg_to;
1364 				arc.raw_count = (actype)rtld_arc64.cg_count;
1365 			} else {
1366 				L_cgarc		rtld_arc;
1367 
1368 				/*
1369 				 * If rflag is set then this is an profiled
1370 				 * image generated by rtld.  It needs to be
1371 				 * 'converted' to the standard data format.
1372 				 */
1373 				if (fread(&rtld_arc,
1374 				    sizeof (L_cgarc), 1, pfile) != 1)
1375 					break;
1376 
1377 				if (rtld_arc.cg_from == PRF_OUTADDR)
1378 					arc.raw_frompc = s_highpc + 0x10;
1379 				else
1380 					arc.raw_frompc = (pctype)
1381 					    (uintptr_t)rtld_arc.cg_from;
1382 				arc.raw_selfpc = (pctype)
1383 				    (uintptr_t)rtld_arc.cg_to;
1384 				arc.raw_count = (actype)rtld_arc.cg_count;
1385 			}
1386 		} else {
1387 			if (Bflag) {
1388 				if (fread(&arc, sizeof (struct rawarc), 1,
1389 				    pfile) != 1) {
1390 					break;
1391 				}
1392 			} else {
1393 				/*
1394 				 * If these aren't big %pc's, we need to read
1395 				 * into the 32-bit raw arc structure, and
1396 				 * assign the members into the actual arc.
1397 				 */
1398 				struct rawarc32 arc32;
1399 				if (fread(&arc32, sizeof (struct rawarc32),
1400 				    1, pfile) != 1)
1401 					break;
1402 				arc.raw_frompc = (pctype)arc32.raw_frompc;
1403 				arc.raw_selfpc = (pctype)arc32.raw_selfpc;
1404 				arc.raw_count  = (actype)arc32.raw_count;
1405 			}
1406 		}
1407 
1408 #ifdef DEBUG
1409 		if (debug & SAMPLEDEBUG) {
1410 			(void) printf("[getpfile] frompc 0x%llx selfpc "
1411 			    "0x%llx count %lld\n", arc.raw_frompc,
1412 			    arc.raw_selfpc, arc.raw_count);
1413 		}
1414 #endif /* DEBUG */
1415 		/*
1416 		 *	add this arc
1417 		 */
1418 		tally(&modules, &modules, &arc);
1419 	}
1420 	if (first_file)
1421 		first_file = FALSE;
1422 }
1423 
1424 static void
1425 readsamples(FILE *pfile)
1426 {
1427 	sztype		i;
1428 	unsigned_UNIT	sample;
1429 
1430 	if (samples == 0) {
1431 		samples = (unsigned_UNIT *) calloc(nsamples,
1432 		    sizeof (unsigned_UNIT));
1433 		if (samples == 0) {
1434 			(void) fprintf(stderr,
1435 			    "%s: No room for %d sample pc's\n",
1436 			    whoami, sampbytes / sizeof (unsigned_UNIT));
1437 			exit(EX_OSERR);
1438 		}
1439 	}
1440 
1441 	for (i = 0; i < nsamples; i++) {
1442 		(void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile);
1443 		if (feof(pfile))
1444 			break;
1445 		samples[i] += sample;
1446 	}
1447 	if (i != nsamples) {
1448 		(void) fprintf(stderr,
1449 		    "%s: unexpected EOF after reading %d/%d samples\n",
1450 		    whoami, --i, nsamples);
1451 		exit(EX_IOERR);
1452 	}
1453 }
1454 
1455 static void *
1456 handle_versioned(FILE *pfile, char *filename, size_t *fsz)
1457 {
1458 	int		fd;
1459 	bool		invalid_version;
1460 	caddr_t		fmem;
1461 	struct stat	buf;
1462 	ProfHeader	prof_hdr;
1463 	off_t		lret;
1464 
1465 	/*
1466 	 * Check versioning info. For now, let's say we provide
1467 	 * backward compatibility, so we accept all older versions.
1468 	 */
1469 	if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) {
1470 		perror("fread()");
1471 		exit(EX_IOERR);
1472 	}
1473 
1474 	invalid_version = FALSE;
1475 	if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION)
1476 		invalid_version = TRUE;
1477 	else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) {
1478 		if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION)
1479 			invalid_version = FALSE;
1480 	}
1481 
1482 	if (invalid_version) {
1483 		(void) fprintf(stderr, "%s: version %d.%d not supported\n",
1484 		    whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver);
1485 		exit(EX_SOFTWARE);
1486 	}
1487 
1488 	/*
1489 	 * Map gmon.out onto memory.
1490 	 */
1491 	(void) fclose(pfile);
1492 	if ((fd = open(filename, O_RDONLY)) == -1) {
1493 		perror(filename);
1494 		exit(EX_IOERR);
1495 	}
1496 
1497 	if ((lret = lseek(fd, 0, SEEK_END)) == -1) {
1498 		perror(filename);
1499 		exit(EX_IOERR);
1500 	}
1501 	*fsz = lret;
1502 
1503 	fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0);
1504 	if (fmem == MAP_FAILED) {
1505 		(void) fprintf(stderr, "%s: can't map %s\n", whoami, filename);
1506 		exit(EX_IOERR);
1507 	}
1508 
1509 	/*
1510 	 * Before we close this fd, save this gmon.out's info to later verify
1511 	 * if the shared objects it references have changed since the time
1512 	 * they were used to generate this gmon.out
1513 	 */
1514 	if (fstat(fd, &buf) == -1) {
1515 		(void) fprintf(stderr, "%s: can't get info on `%s'\n",
1516 		    whoami, filename);
1517 		exit(EX_NOINPUT);
1518 	}
1519 	gmonout_info.dev = buf.st_dev;
1520 	gmonout_info.ino = buf.st_ino;
1521 	gmonout_info.mtime = buf.st_mtime;
1522 	gmonout_info.size = buf.st_size;
1523 
1524 	(void) close(fd);
1525 
1526 	return ((void *) fmem);
1527 }
1528 
1529 static void *
1530 openpfile(char *filename, size_t *fsz)
1531 {
1532 	struct hdr	tmp;
1533 	FILE		*pfile;
1534 	unsigned long	magic_num;
1535 	size_t		hdrsize;
1536 	static bool	first_time = TRUE;
1537 	extern bool	old_style;
1538 
1539 	if ((pfile = fopen(filename, "r")) == NULL) {
1540 		perror(filename);
1541 		exit(EX_IOERR);
1542 	}
1543 
1544 	/*
1545 	 * Read in the magic. Note that we changed the cast "unsigned long"
1546 	 * to "unsigned int" because that's how h_magic is defined in the
1547 	 * new format ProfHeader.
1548 	 */
1549 	if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) {
1550 		perror("fread()");
1551 		exit(EX_IOERR);
1552 	}
1553 
1554 	rewind(pfile);
1555 
1556 	/*
1557 	 * First check if this is versioned or *old-style* gmon.out
1558 	 */
1559 	if (magic_num == (unsigned int)PROF_MAGIC) {
1560 		if ((!first_time) && (old_style == TRUE)) {
1561 			(void) fprintf(stderr, "%s: can't mix old & new format "
1562 			    "profiled files\n", whoami);
1563 			exit(EX_SOFTWARE);
1564 		}
1565 		first_time = FALSE;
1566 		old_style = FALSE;
1567 		return (handle_versioned(pfile, filename, fsz));
1568 	}
1569 
1570 	if ((!first_time) && (old_style == FALSE)) {
1571 		(void) fprintf(stderr, "%s: can't mix old & new format "
1572 		    "profiled files\n", whoami);
1573 		exit(EX_SOFTWARE);
1574 	}
1575 
1576 	first_time = FALSE;
1577 	old_style = TRUE;
1578 	fsz = 0;
1579 
1580 	/*
1581 	 * Now, we need to determine if this is a run-time linker
1582 	 * profiled file or if it is a standard gmon.out.
1583 	 *
1584 	 * We do this by checking if magic matches PRF_MAGIC. If it
1585 	 * does, then this is a run-time linker profiled file, if it
1586 	 * doesn't, it must be a gmon.out file.
1587 	 */
1588 	if (magic_num == (unsigned long)PRF_MAGIC)
1589 		rflag = TRUE;
1590 	else
1591 		rflag = FALSE;
1592 
1593 	hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32);
1594 
1595 	if (rflag) {
1596 		if (Bflag) {
1597 			L_hdr64		l_hdr64;
1598 
1599 			/*
1600 			 * If the rflag is set then the input file is
1601 			 * rtld profiled data, we'll read it in and convert
1602 			 * it to the standard format (ie: make it look like
1603 			 * a gmon.out file).
1604 			 */
1605 			if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) {
1606 				perror("fread()");
1607 				exit(EX_IOERR);
1608 			}
1609 			if (l_hdr64.hd_version != PRF_VERSION_64) {
1610 				(void) fprintf(stderr,
1611 				    "%s: expected version %d, "
1612 				    "got version %d when processing 64-bit "
1613 				    "run-time linker profiled file.\n",
1614 				    whoami, PRF_VERSION_64, l_hdr64.hd_version);
1615 				exit(EX_SOFTWARE);
1616 			}
1617 			tmp.lowpc = 0;
1618 			tmp.highpc = (pctype)l_hdr64.hd_hpc;
1619 			tmp.ncnt = hdrsize + l_hdr64.hd_psize;
1620 		} else {
1621 			L_hdr		l_hdr;
1622 
1623 			/*
1624 			 * If the rflag is set then the input file is
1625 			 * rtld profiled data, we'll read it in and convert
1626 			 * it to the standard format (ie: make it look like
1627 			 * a gmon.out file).
1628 			 */
1629 			if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) {
1630 				perror("fread()");
1631 				exit(EX_IOERR);
1632 			}
1633 			if (l_hdr.hd_version != PRF_VERSION) {
1634 				(void) fprintf(stderr,
1635 				    "%s: expected version %d, "
1636 				    "got version %d when processing "
1637 				    "run-time linker profiled file.\n",
1638 				    whoami, PRF_VERSION, l_hdr.hd_version);
1639 				exit(EX_SOFTWARE);
1640 			}
1641 			tmp.lowpc = 0;
1642 			tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc;
1643 			tmp.ncnt = hdrsize + l_hdr.hd_psize;
1644 		}
1645 	} else {
1646 		if (Bflag) {
1647 			if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) {
1648 				perror("fread()");
1649 				exit(EX_IOERR);
1650 			}
1651 		} else {
1652 			/*
1653 			 * If we're not reading big %pc's, we need to read
1654 			 * the 32-bit header, and assign the members to
1655 			 * the actual header.
1656 			 */
1657 			struct hdr32 hdr32;
1658 			if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) {
1659 				perror("fread()");
1660 				exit(EX_IOERR);
1661 			}
1662 			tmp.lowpc = hdr32.lowpc;
1663 			tmp.highpc = hdr32.highpc;
1664 			tmp.ncnt = hdr32.ncnt;
1665 		}
1666 	}
1667 
1668 	/*
1669 	 * perform sanity check on profiled file we've opened.
1670 	 */
1671 	if (tmp.lowpc >= tmp.highpc) {
1672 		if (rflag)
1673 			(void) fprintf(stderr,
1674 			    "%s: badly formed profiled data.\n",
1675 			    filename);
1676 		else
1677 			(void) fprintf(stderr,
1678 			    "%s: badly formed gmon.out file.\n",
1679 			    filename);
1680 		exit(EX_SOFTWARE);
1681 	}
1682 
1683 	if (s_highpc != 0 && (tmp.lowpc != h.lowpc ||
1684 	    tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) {
1685 		(void) fprintf(stderr,
1686 		    "%s: incompatible with first gmon file\n",
1687 		    filename);
1688 		exit(EX_IOERR);
1689 	}
1690 	h = tmp;
1691 	s_lowpc = h.lowpc;
1692 	s_highpc = h.highpc;
1693 	lowpc = h.lowpc / sizeof (UNIT);
1694 	highpc = h.highpc / sizeof (UNIT);
1695 	sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0;
1696 	nsamples = sampbytes / sizeof (unsigned_UNIT);
1697 
1698 #ifdef DEBUG
1699 	if (debug & SAMPLEDEBUG) {
1700 		(void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc "
1701 		    "0x%llx hdr.ncnt %lld\n",
1702 		    h.lowpc, h.highpc, h.ncnt);
1703 		(void) printf(
1704 		    "[openpfile]   s_lowpc 0x%llx   s_highpc 0x%llx\n",
1705 		    s_lowpc, s_highpc);
1706 		(void) printf(
1707 		    "[openpfile]     lowpc 0x%llx     highpc 0x%llx\n",
1708 		    lowpc, highpc);
1709 		(void) printf("[openpfile] sampbytes %d nsamples %d\n",
1710 		    sampbytes, nsamples);
1711 	}
1712 #endif /* DEBUG */
1713 
1714 	return ((void *) pfile);
1715 }
1716 
1717 /*
1718  * Information from a gmon.out file depends on whether it's versioned
1719  * or non-versioned, *old style* gmon.out. If old-style, it is in two
1720  * parts : an array of sampling hits within pc ranges, and the arcs. If
1721  * versioned, it contains a header, followed by any number of
1722  * modules/callgraph/pcsample_buffer objects.
1723  */
1724 static void
1725 getpfile(char *filename)
1726 {
1727 	void		*handle;
1728 	size_t		fsz;
1729 
1730 	handle = openpfile(filename, &fsz);
1731 
1732 	if (old_style) {
1733 		readsamples((FILE *)handle);
1734 		readarcs((FILE *)handle);
1735 		(void) fclose((FILE *)handle);
1736 		return;
1737 	}
1738 
1739 	getpfiledata((caddr_t)handle, fsz);
1740 	(void) munmap(handle, fsz);
1741 }
1742 
1743 int
1744 main(int argc, char **argv)
1745 {
1746 	char	**sp;
1747 	nltype	**timesortnlp;
1748 	int		c;
1749 	int		errflg;
1750 
1751 	prog_name = *argv;  /* preserve program name */
1752 	debug = 0;
1753 	nflag = FALSE;
1754 	bflag = TRUE;
1755 	lflag = FALSE;
1756 	Cflag = FALSE;
1757 	first_file = TRUE;
1758 	rflag = FALSE;
1759 	Bflag = FALSE;
1760 	errflg = FALSE;
1761 
1762 	while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF)
1763 		switch (c) {
1764 		case 'a':
1765 			aflag = TRUE;
1766 			break;
1767 		case 'b':
1768 			bflag = FALSE;
1769 			break;
1770 		case 'c':
1771 			cflag = TRUE;
1772 			break;
1773 		case 'C':
1774 			Cflag = TRUE;
1775 			break;
1776 		case 'd':
1777 			dflag = TRUE;
1778 			debug |= atoi(optarg);
1779 			(void) printf("[main] debug = 0x%x\n", debug);
1780 			break;
1781 		case 'D':
1782 			Dflag = TRUE;
1783 			break;
1784 		case 'E':
1785 			addlist(Elist, optarg);
1786 			Eflag = TRUE;
1787 			addlist(elist, optarg);
1788 			eflag = TRUE;
1789 			break;
1790 		case 'e':
1791 			addlist(elist, optarg);
1792 			eflag = TRUE;
1793 			break;
1794 		case 'F':
1795 			addlist(Flist, optarg);
1796 			Fflag = TRUE;
1797 			addlist(flist, optarg);
1798 			fflag = TRUE;
1799 			break;
1800 		case 'f':
1801 			addlist(flist, optarg);
1802 			fflag = TRUE;
1803 			break;
1804 		case 'l':
1805 			lflag = TRUE;
1806 			break;
1807 		case 'n':
1808 			nflag = TRUE;
1809 			number_funcs_toprint = atoi(optarg);
1810 			break;
1811 		case 's':
1812 			sflag = TRUE;
1813 			break;
1814 		case 'z':
1815 			zflag = TRUE;
1816 			break;
1817 		case '?':
1818 			errflg++;
1819 
1820 		}
1821 
1822 	if (errflg) {
1823 		(void) fprintf(stderr,
1824 		    "usage: gprof [ -abcCDlsz ] [ -e function-name ] "
1825 		    "[ -E function-name ]\n\t[ -f function-name ] "
1826 		    "[ -F function-name  ]\n\t[  image-file  "
1827 		    "[ profile-file ... ] ]\n");
1828 		exit(EX_USAGE);
1829 	}
1830 
1831 	if (optind < argc) {
1832 		a_outname  = argv[optind++];
1833 	} else {
1834 		a_outname  = A_OUTNAME;
1835 	}
1836 	if (optind < argc) {
1837 		gmonname = argv[optind++];
1838 	} else {
1839 		gmonname = GMONNAME;
1840 	}
1841 	/*
1842 	 *	turn off default functions
1843 	 */
1844 	for (sp = &defaultEs[0]; *sp; sp++) {
1845 		Eflag = TRUE;
1846 		addlist(Elist, *sp);
1847 		eflag = TRUE;
1848 		addlist(elist, *sp);
1849 	}
1850 	/*
1851 	 *	how many ticks per second?
1852 	 *	if we can't tell, report time in ticks.
1853 	 */
1854 	hz = sysconf(_SC_CLK_TCK);
1855 	if (hz == -1) {
1856 		hz = 1;
1857 		(void) fprintf(stderr, "time is in ticks, not seconds\n");
1858 	}
1859 
1860 	getnfile(a_outname);
1861 
1862 	/*
1863 	 *	get information about mon.out file(s).
1864 	 */
1865 	do {
1866 		getpfile(gmonname);
1867 		if (optind < argc)
1868 			gmonname = argv[optind++];
1869 		else
1870 			optind++;
1871 	} while (optind <= argc);
1872 	/*
1873 	 *	dump out a gmon.sum file if requested
1874 	 */
1875 	if (sflag || Dflag)
1876 		dumpsum(GMONSUM);
1877 
1878 	if (old_style) {
1879 		/*
1880 		 *	assign samples to procedures
1881 		 */
1882 		asgnsamples();
1883 	}
1884 
1885 	/*
1886 	 *	assemble the dynamic profile
1887 	 */
1888 	timesortnlp = doarcs();
1889 
1890 	/*
1891 	 *	print the dynamic profile
1892 	 */
1893 #ifdef DEBUG
1894 	if (debug & ANYDEBUG) {
1895 		/* raw output of all symbols in all their glory */
1896 		int i;
1897 		(void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
1898 		    "#calls, selfcalls, index \n");
1899 		for (i = 0; i < modules.nname; i++) { 	/* Print each symbol */
1900 			if (timesortnlp[i]->name)
1901 				(void) printf(" %s ", timesortnlp[i]->name);
1902 			else
1903 				(void) printf(" <cycle> ");
1904 			(void) printf(" %lld ", timesortnlp[i]->value);
1905 			(void) printf(" %lld ", timesortnlp[i]->svalue);
1906 			(void) printf(" %f ", timesortnlp[i]->time);
1907 			(void) printf(" %lld ", timesortnlp[i]->ncall);
1908 			(void) printf(" %lld ", timesortnlp[i]->selfcalls);
1909 			(void) printf(" %d ", timesortnlp[i]->index);
1910 			(void) printf(" \n");
1911 		}
1912 	}
1913 #endif /* DEBUG */
1914 
1915 	printgprof(timesortnlp);
1916 	/*
1917 	 *	print the flat profile
1918 	 */
1919 	printprof();
1920 	/*
1921 	 *	print the index
1922 	 */
1923 	printindex();
1924 
1925 	/*
1926 	 * print the modules
1927 	 */
1928 	printmodules();
1929 
1930 	done();
1931 	/* NOTREACHED */
1932 	return (0);
1933 }
1934