1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sysexits.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <unistd.h>
30 #include "gprof.h"
31 #include "profile.h"
32
33 bool aflag;
34 bool bflag;
35 bool Bflag;
36 bool cflag;
37 bool Cflag;
38 bool dflag;
39 bool Dflag;
40 bool eflag;
41 bool Eflag;
42 bool fflag;
43 bool Fflag;
44 bool lflag;
45 bool sflag;
46 bool zflag;
47 bool nflag;
48 bool rflag;
49 bool first_file;
50 bool old_style;
51 double scale;
52 double totime;
53 Size n_pcsamples;
54 mod_info_t modules;
55 pctype s_lowpc;
56 pctype s_highpc;
57 sztype n_modules;
58 sztype sampbytes;
59 sztype nsamples;
60 unsigned short *samples;
61 fl_info_t aout_info;
62 fl_info_t gmonout_info;
63 long hz;
64 struct hdr h;
65 unsigned char *textspace;
66 int debug;
67 int number_funcs_toprint;
68 char *a_outname;
69 char *prog_name;
70 char *gmonname;
71 char *whoami = "gprof";
72 static pctype lowpc, highpc; /* range profiled, in UNIT's */
73
74 /*
75 * things which get -E excluded by default.
76 */
77 static char *defaultEs[] = {
78 "mcount",
79 "__mcleanup",
80 NULL
81 };
82
83 #ifdef DEBUG
84
85 static char *objname[] = {
86 "<invalid object>",
87 "PROF_BUFFER_T",
88 "PROF_CALLGRAPH_T",
89 "PROF_MODULES_T",
90 NULL
91 };
92 #define MAX_OBJTYPES 3
93
94 #endif /* DEBUG */
95
96 void
done(void)97 done(void)
98 {
99
100 exit(EX_OK);
101 }
102
103 static pctype
max(pctype a,pctype b)104 max(pctype a, pctype b)
105 {
106 if (a > b)
107 return (a);
108 return (b);
109 }
110
111 static pctype
min(pctype a,pctype b)112 min(pctype a, pctype b)
113 {
114 if (a < b)
115 return (a);
116 return (b);
117 }
118
119 /*
120 * calculate scaled entry point addresses (to save time in asgnsamples),
121 * and possibly push the scaled entry points over the entry mask,
122 * if it turns out that the entry point is in one bucket and the code
123 * for a routine is in the next bucket.
124 *
125 */
126 static void
alignentries(void)127 alignentries(void)
128 {
129 struct nl *nlp;
130 #ifdef DEBUG
131 pctype bucket_of_entry;
132 pctype bucket_of_code;
133 #endif /* DEBUG */
134
135 /* for old-style gmon.out, nameslist is only in modules.nl */
136
137 for (nlp = modules.nl; nlp < modules.npe; nlp++) {
138 nlp->svalue = nlp->value / sizeof (UNIT);
139 #ifdef DEBUG
140 bucket_of_entry = (nlp->svalue - lowpc) / scale;
141 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
142 if (bucket_of_entry < bucket_of_code) {
143 if (debug & SAMPLEDEBUG) {
144 (void) printf(
145 "[alignentries] pushing svalue 0x%llx "
146 "to 0x%llx\n", nlp->svalue,
147 nlp->svalue + UNITS_TO_CODE);
148 }
149 }
150 #endif /* DEBUG */
151 }
152 }
153
154 /*
155 * old-style gmon.out
156 * ------------------
157 *
158 * Assign samples to the procedures to which they belong.
159 *
160 * There are three cases as to where pcl and pch can be
161 * with respect to the routine entry addresses svalue0 and svalue1
162 * as shown in the following diagram. overlap computes the
163 * distance between the arrows, the fraction of the sample
164 * that is to be credited to the routine which starts at svalue0.
165 *
166 * svalue0 svalue1
167 * | |
168 * v v
169 *
170 * +-----------------------------------------------+
171 * | |
172 * | ->| |<- ->| |<- ->| |<- |
173 * | | | | | |
174 * +---------+ +---------+ +---------+
175 *
176 * ^ ^ ^ ^ ^ ^
177 * | | | | | |
178 * pcl pch pcl pch pcl pch
179 *
180 * For the vax we assert that samples will never fall in the first
181 * two bytes of any routine, since that is the entry mask,
182 * thus we give call alignentries() to adjust the entry points if
183 * the entry mask falls in one bucket but the code for the routine
184 * doesn't start until the next bucket. In conjunction with the
185 * alignment of routine addresses, this should allow us to have
186 * only one sample for every four bytes of text space and never
187 * have any overlap (the two end cases, above).
188 */
189 static void
asgnsamples(void)190 asgnsamples(void)
191 {
192 sztype i, j;
193 unsigned_UNIT ccnt;
194 double time;
195 pctype pcl, pch;
196 pctype overlap;
197 pctype svalue0, svalue1;
198
199 extern mod_info_t modules;
200 nltype *nl = modules.nl;
201 sztype nname = modules.nname;
202
203 /* read samples and assign to namelist symbols */
204 scale = highpc - lowpc;
205 scale /= nsamples;
206 alignentries();
207 for (i = 0, j = 1; i < nsamples; i++) {
208 ccnt = samples[i];
209 if (ccnt == 0)
210 continue;
211 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
212 pcl = lowpc + scale * i;
213 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
214 pch = lowpc + scale * (i + 1);
215 time = ccnt;
216 #ifdef DEBUG
217 if (debug & SAMPLEDEBUG) {
218 (void) printf(
219 "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
220 pcl, pch, ccnt);
221 }
222 #endif /* DEBUG */
223 totime += time;
224 for (j = (j ? j - 1 : 0); j < nname; j++) {
225 svalue0 = nl[j].svalue;
226 svalue1 = nl[j+1].svalue;
227 /*
228 * if high end of tick is below entry address,
229 * go for next tick.
230 */
231 if (pch < svalue0)
232 break;
233 /*
234 * if low end of tick into next routine,
235 * go for next routine.
236 */
237 if (pcl >= svalue1)
238 continue;
239 overlap = min(pch, svalue1) - max(pcl, svalue0);
240 if (overlap != 0) {
241 #ifdef DEBUG
242 if (debug & SAMPLEDEBUG) {
243 (void) printf("[asgnsamples] "
244 "(0x%llx->0x%llx-0x%llx) %s gets "
245 "%f ticks %lld overlap\n",
246 nl[j].value/sizeof (UNIT), svalue0,
247 svalue1, nl[j].name,
248 overlap * time / scale, overlap);
249 }
250 #endif /* DEBUG */
251 nl[j].time += overlap * time / scale;
252 }
253 }
254 }
255 #ifdef DEBUG
256 if (debug & SAMPLEDEBUG) {
257 (void) printf("[asgnsamples] totime %f\n", totime);
258 }
259 #endif /* DEBUG */
260 }
261
262
263 static void
dump_callgraph(FILE * fp,char * filename,unsigned long tarcs,unsigned long ncallees)264 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs,
265 unsigned long ncallees)
266 {
267 ProfCallGraph prof_cgraph;
268 ProfFunction prof_func;
269 arctype *arcp;
270 mod_info_t *mi;
271 nltype *nlp;
272 size_t cur_offset;
273 unsigned long caller_id = 0, callee_id = 0;
274
275 /*
276 * Write the callgraph header
277 */
278 prof_cgraph.type = PROF_CALLGRAPH_T;
279 prof_cgraph.version = PROF_CALLGRAPH_VER;
280 prof_cgraph.functions = PROFCGRAPH_SZ;
281 prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ;
282 if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) {
283 perror(filename);
284 exit(EX_IOERR);
285 }
286 /* CONSTCOND */
287 if (CGRAPH_FILLER)
288 (void) fseek(fp, CGRAPH_FILLER, SEEK_CUR);
289
290 /* Current offset inside the callgraph object */
291 cur_offset = prof_cgraph.functions;
292
293 for (mi = &modules; mi; mi = mi->next) {
294 for (nlp = mi->nl; nlp < mi->npe; nlp++) {
295 if (nlp->ncallers == 0)
296 continue;
297
298 /* If this is the last callee, set next_to to 0 */
299 callee_id++;
300 if (callee_id == ncallees)
301 prof_func.next_to = 0;
302 else {
303 prof_func.next_to = cur_offset +
304 nlp->ncallers * PROFFUNC_SZ;
305 }
306
307 /*
308 * Dump this callee's raw arc information with all
309 * its callers
310 */
311 caller_id = 1;
312 for (arcp = nlp->parents; arcp;
313 arcp = arcp->arc_parentlist) {
314 /*
315 * If no more callers for this callee, set
316 * next_from to 0
317 */
318 if (caller_id == nlp->ncallers)
319 prof_func.next_from = 0;
320 else {
321 prof_func.next_from = cur_offset +
322 PROFFUNC_SZ;
323 }
324
325 prof_func.frompc =
326 arcp->arc_parentp->module->load_base +
327 (arcp->arc_parentp->value -
328 arcp->arc_parentp->module->txt_origin);
329 prof_func.topc = mi->load_base +
330 (nlp->value - mi->txt_origin);
331 prof_func.count = arcp->arc_count;
332
333
334 if (fwrite(&prof_func, sizeof (ProfFunction),
335 1, fp) != 1) {
336 perror(filename);
337 exit(EX_IOERR);
338 }
339 /* CONSTCOND */
340 if (FUNC_FILLER)
341 (void) fseek(fp, FUNC_FILLER, SEEK_CUR);
342
343 cur_offset += PROFFUNC_SZ;
344 caller_id++;
345 }
346 } /* for nlp... */
347 } /* for mi... */
348 }
349
350 /*
351 * To save all pc-hits in all the gmon.out's is infeasible, as this
352 * may become quite huge even with a small number of files to sum.
353 * Instead, we'll dump *fictitious hits* to correct functions
354 * by scanning module namelists. Again, since this is summing
355 * pc-hits, we may have to dump the pcsamples out in chunks if the
356 * number of pc-hits is high.
357 */
358 static void
dump_hits(FILE * fp,char * filename,nltype * nlp)359 dump_hits(FILE *fp, char *filename, nltype *nlp)
360 {
361 Address *p, hitpc;
362 size_t i, nelem, ntowrite;
363
364 if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE)
365 nelem = PROF_BUFFER_SIZE;
366
367 if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) {
368 (void) fprintf(stderr, "%s: no room for %d pcsamples\n",
369 whoami, nelem);
370 exit(EX_OSERR);
371 }
372
373 /*
374 * Set up *fictitious* hits (to function entry) buffer
375 */
376 hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin);
377 for (i = 0; i < nelem; i++)
378 p[i] = hitpc;
379
380 for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) {
381 if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) {
382 perror(filename);
383 exit(EX_IOERR);
384 }
385 }
386
387 if (ntowrite) {
388 if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) {
389 perror(filename);
390 exit(EX_IOERR);
391 }
392 }
393
394 free(p);
395 }
396
397 static void
dump_pcsamples(FILE * fp,char * filename,unsigned long * tarcs,unsigned long * ncallees)398 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs,
399 unsigned long *ncallees)
400 {
401 ProfBuffer prof_buffer;
402 arctype *arcp;
403 mod_info_t *mi;
404 nltype *nlp;
405
406 prof_buffer.type = PROF_BUFFER_T;
407 prof_buffer.version = PROF_BUFFER_VER;
408 prof_buffer.buffer = PROFBUF_SZ;
409 prof_buffer.bufsize = n_pcsamples;
410 prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address);
411 if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) {
412 perror(filename);
413 exit(EX_IOERR);
414 }
415 /* CONSTCOND */
416 if (BUF_FILLER)
417 (void) fseek(fp, BUF_FILLER, SEEK_CUR);
418
419 *tarcs = 0;
420 *ncallees = 0;
421 for (mi = &modules; mi; mi = mi->next) {
422 for (nlp = mi->nl; nlp < mi->npe; nlp++) {
423 if (nlp->nticks)
424 dump_hits(fp, filename, nlp);
425
426 nlp->ncallers = 0;
427 for (arcp = nlp->parents; arcp;
428 arcp = arcp->arc_parentlist) {
429 (nlp->ncallers)++;
430 }
431
432 if (nlp->ncallers) {
433 (*tarcs) += nlp->ncallers;
434 (*ncallees)++;
435 }
436 }
437 }
438 }
439
440 static void
dump_modules(FILE * fp,char * filename,size_t pbuf_sz)441 dump_modules(FILE *fp, char *filename, size_t pbuf_sz)
442 {
443 char *pbuf, *p;
444 size_t namelen;
445 Index off_nxt, off_path;
446 mod_info_t *mi;
447
448 ProfModuleList prof_modlist;
449 ProfModule prof_mod;
450
451 /* Allocate for path strings buffer */
452 pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN);
453 if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) {
454 (void) fprintf(stderr, "%s: no room for %d bytes\n",
455 whoami, pbuf_sz * sizeof (char));
456 exit(EX_OSERR);
457 }
458
459 /* Dump out PROF_MODULE_T info for all non-aout modules */
460 prof_modlist.type = PROF_MODULES_T;
461 prof_modlist.version = PROF_MODULES_VER;
462 prof_modlist.modules = PROFMODLIST_SZ;
463 prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ +
464 pbuf_sz;
465 if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) {
466 perror(filename);
467 exit(EX_IOERR);
468 }
469 /* CONSTCOND */
470 if (MODLIST_FILLER)
471 (void) fseek(fp, MODLIST_FILLER, SEEK_CUR);
472
473 /*
474 * Initialize offsets for ProfModule elements.
475 */
476 off_nxt = PROFMODLIST_SZ + PROFMOD_SZ;
477 off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ;
478
479 for (mi = modules.next; mi; mi = mi->next) {
480 if (mi->next)
481 prof_mod.next = off_nxt;
482 else
483 prof_mod.next = 0;
484 prof_mod.path = off_path;
485 prof_mod.startaddr = mi->load_base;
486 prof_mod.endaddr = mi->load_end;
487
488 if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) {
489 perror(filename);
490 exit(EX_IOERR);
491 }
492
493 /* CONSTCOND */
494 if (MOD_FILLER)
495 (void) fseek(fp, MOD_FILLER, SEEK_CUR);
496
497 (void) strcpy(p, mi->name);
498 namelen = strlen(mi->name);
499 p += namelen + 1;
500
501 /* Note that offset to every path str need not be aligned */
502 off_nxt += PROFMOD_SZ;
503 off_path += namelen + 1;
504 }
505
506 /* Write out the module path strings */
507 if (pbuf_sz) {
508 if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) {
509 perror(filename);
510 exit(EX_IOERR);
511 }
512
513 free(pbuf);
514 }
515 }
516
517 /*
518 * If we have inactive modules, their current load addresses may overlap with
519 * active ones, and so we've to assign fictitious, non-overlapping addresses
520 * to all modules before we dump them.
521 */
522 static void
fixup_maps(size_t * pathsz)523 fixup_maps(size_t *pathsz)
524 {
525 unsigned int n_inactive = 0;
526 Address lbase = 0, lend;
527 mod_info_t *mi;
528
529 /* Pick the lowest load address among modules */
530 *pathsz = 0;
531 for (mi = &modules; mi; mi = mi->next) {
532
533 if (mi->active == FALSE)
534 n_inactive++;
535
536 if (mi == &modules || mi->load_base < lbase)
537 lbase = mi->load_base;
538
539 /*
540 * Return total path size of non-aout modules only
541 */
542 if (mi != &modules)
543 *pathsz = (*pathsz) + strlen(mi->name) + 1;
544 }
545
546 /*
547 * All module info is in fine shape already if there are no
548 * inactive modules
549 */
550 if (n_inactive == 0)
551 return;
552
553 /*
554 * Assign fictitious load addresses to all (non-aout) modules so
555 * that sum info can be dumped out.
556 */
557 for (mi = modules.next; mi; mi = mi->next) {
558 lend = lbase + (mi->data_end - mi->txt_origin);
559 if ((lbase < modules.load_base && lend < modules.load_base) ||
560 (lbase > modules.load_end && lend > modules.load_end)) {
561
562 mi->load_base = lbase;
563 mi->load_end = lend;
564
565 /* just to give an appearance of reality */
566 lbase = CEIL(lend + PGSZ, PGSZ);
567 } else {
568 /*
569 * can't use this lbase & lend pair, as it
570 * overlaps with aout's addresses
571 */
572 mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ);
573 mi->load_end = mi->load_base + (lend - lbase);
574
575 lbase = CEIL(mi->load_end + PGSZ, PGSZ);
576 }
577 }
578 }
579
580 static void
dump_gprofhdr(FILE * fp,char * filename)581 dump_gprofhdr(FILE *fp, char *filename)
582 {
583 ProfHeader prof_hdr;
584
585 prof_hdr.h_magic = PROF_MAGIC;
586 prof_hdr.h_major_ver = PROF_MAJOR_VERSION;
587 prof_hdr.h_minor_ver = PROF_MINOR_VERSION;
588 prof_hdr.size = PROFHDR_SZ;
589 if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) {
590 perror(filename);
591 exit(EX_IOERR);
592 }
593
594 /* CONSTCOND */
595 if (HDR_FILLER)
596 (void) fseek(fp, HDR_FILLER, SEEK_CUR);
597 }
598
599 static void
dumpsum_ostyle(char * sumfile)600 dumpsum_ostyle(char *sumfile)
601 {
602 nltype *nlp;
603 arctype *arcp;
604 struct rawarc arc;
605 struct rawarc32 arc32;
606 FILE *sfile;
607
608 if ((sfile = fopen(sumfile, "w")) == NULL) {
609 perror(sumfile);
610 exit(EX_IOERR);
611 }
612 /*
613 * dump the header; use the last header read in
614 */
615 if (Bflag) {
616 if (fwrite(&h, sizeof (h), 1, sfile) != 1) {
617 perror(sumfile);
618 exit(EX_IOERR);
619 }
620 } else {
621 struct hdr32 hdr;
622 hdr.lowpc = (pctype32)h.lowpc;
623 hdr.highpc = (pctype32)h.highpc;
624 hdr.ncnt = (pctype32)h.ncnt;
625 if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) {
626 perror(sumfile);
627 exit(EX_IOERR);
628 }
629 }
630 /*
631 * dump the samples
632 */
633 if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) !=
634 nsamples) {
635 perror(sumfile);
636 exit(EX_IOERR);
637 }
638 /*
639 * dump the normalized raw arc information. For old-style dumping,
640 * the only namelist is in modules.nl
641 */
642 for (nlp = modules.nl; nlp < modules.npe; nlp++) {
643 for (arcp = nlp->children; arcp;
644 arcp = arcp->arc_childlist) {
645 if (Bflag) {
646 arc.raw_frompc = arcp->arc_parentp->value;
647 arc.raw_selfpc = arcp->arc_childp->value;
648 arc.raw_count = arcp->arc_count;
649 if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) {
650 perror(sumfile);
651 exit(EX_IOERR);
652 }
653 } else {
654 arc32.raw_frompc =
655 (pctype32)arcp->arc_parentp->value;
656 arc32.raw_selfpc =
657 (pctype32)arcp->arc_childp->value;
658 arc32.raw_count = (actype32)arcp->arc_count;
659 if (fwrite(&arc32, sizeof (arc32), 1, sfile) !=
660 1) {
661 perror(sumfile);
662 exit(EX_IOERR);
663 }
664 }
665 #ifdef DEBUG
666 if (debug & SAMPLEDEBUG) {
667 (void) printf(
668 "[dumpsum_ostyle] frompc 0x%llx selfpc "
669 "0x%llx count %lld\n", arc.raw_frompc,
670 arc.raw_selfpc, arc.raw_count);
671 }
672 #endif /* DEBUG */
673 }
674 }
675 (void) fclose(sfile);
676 }
677
678 /*
679 * dump out the gmon.sum file
680 */
681 static void
dumpsum(char * sumfile)682 dumpsum(char *sumfile)
683 {
684 FILE *sfile;
685 size_t pathbuf_sz;
686 unsigned long total_arcs; /* total number of arcs in all */
687 unsigned long ncallees; /* no. of callees with parents */
688
689 if (old_style) {
690 dumpsum_ostyle(sumfile);
691 return;
692 }
693
694 if ((sfile = fopen(sumfile, "w")) == NULL) {
695 perror(sumfile);
696 exit(EX_IOERR);
697 }
698
699 /*
700 * Dump the new-style gprof header. Even if one of the original
701 * profiled-files was of a older version, the summed file is of
702 * current version only.
703 */
704 dump_gprofhdr(sfile, sumfile);
705
706 /*
707 * Fix up load-maps and dump out modules info
708 *
709 * Fix up module load maps so inactive modules get *some* address
710 * (and btw, could you get the total size of non-aout module path
711 * strings please ?)
712 */
713 fixup_maps(&pathbuf_sz);
714 dump_modules(sfile, sumfile, pathbuf_sz);
715
716
717 /*
718 * Dump out the summ'd pcsamples
719 *
720 * For dumping call graph information later, we need certain
721 * statistics (like total arcs, number of callers for each node);
722 * collect these also while we are at it.
723 */
724 dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees);
725
726 /*
727 * Dump out the summ'd call graph information
728 */
729 dump_callgraph(sfile, sumfile, total_arcs, ncallees);
730
731
732 (void) fclose(sfile);
733 }
734
735 static void
tally(mod_info_t * caller_mod,mod_info_t * callee_mod,struct rawarc * rawp)736 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp)
737 {
738 nltype *parentp;
739 nltype *childp;
740
741 /*
742 * if count == 0 this is a null arc and
743 * we don't need to tally it.
744 */
745 if (rawp->raw_count == 0)
746 return;
747
748 /*
749 * Lookup the caller and callee pcs in namelists of
750 * appropriate modules
751 */
752 parentp = nllookup(caller_mod, rawp->raw_frompc, NULL);
753 childp = nllookup(callee_mod, rawp->raw_selfpc, NULL);
754 if (childp && parentp) {
755 if (!Dflag)
756 childp->ncall += rawp->raw_count;
757 else {
758 if (first_file)
759 childp->ncall += rawp->raw_count;
760 else {
761 childp->ncall -= rawp->raw_count;
762 if (childp->ncall < 0)
763 childp->ncall = 0;
764 }
765 }
766
767 #ifdef DEBUG
768 if (debug & TALLYDEBUG) {
769 (void) printf("[tally] arc from %s to %s traversed "
770 "%lld times\n", parentp->name,
771 childp->name, rawp->raw_count);
772 }
773 #endif /* DEBUG */
774 addarc(parentp, childp, rawp->raw_count);
775 }
776 }
777
778 /*
779 * Look up a module's base address in a sorted list of pc-hits. Unlike
780 * nllookup(), this deals with misses by mapping them to the next *higher*
781 * pc-hit. This is so that we get into the module's first pc-hit rightaway,
782 * even if the module's entry-point (load_base) itself is not a hit.
783 */
784 static Address *
locate(Address * pclist,size_t nelem,Address keypc)785 locate(Address *pclist, size_t nelem, Address keypc)
786 {
787 size_t low = 0, middle, high = nelem - 1;
788
789 if (keypc <= pclist[low])
790 return (pclist);
791
792 if (keypc > pclist[high])
793 return (NULL);
794
795 while (low != high) {
796 middle = (high + low) >> 1;
797
798 if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc))
799 return (&pclist[middle + 1]);
800
801 if (pclist[middle] >= keypc)
802 high = middle;
803 else
804 low = middle + 1;
805 }
806
807 /* must never reach here! */
808 return (NULL);
809 }
810
811 static void
assign_pcsamples(mod_info_t * module,Address * pcsmpl,size_t n_samples)812 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples)
813 {
814 Address *pcptr, *pcse = pcsmpl + n_samples;
815 pctype nxt_func;
816 nltype *fnl;
817 size_t func_nticks;
818 #ifdef DEBUG
819 size_t n_hits_in_module = 0;
820 #endif /* DEBUG */
821
822 /* Locate the first pc-hit for this module */
823 if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) {
824 #ifdef DEBUG
825 if (debug & PCSMPLDEBUG) {
826 (void) printf("[assign_pcsamples] no pc-hits in\n");
827 (void) printf(
828 " `%s'\n", module->name);
829 }
830 #endif /* DEBUG */
831 return; /* no pc-hits in this module */
832 }
833
834 /* Assign all pc-hits in this module to appropriate functions */
835 while ((pcptr < pcse) && (*pcptr < module->load_end)) {
836
837 /* Update the corresponding function's time */
838 fnl = nllookup(module, (pctype) *pcptr, &nxt_func);
839 if (fnl != NULL) {
840 /*
841 * Collect all pc-hits in this function. Each
842 * pc-hit counts as 1 tick.
843 */
844 func_nticks = 0;
845 while ((pcptr < pcse) && (*pcptr < nxt_func)) {
846 func_nticks++;
847 pcptr++;
848 }
849
850 if (func_nticks == 0)
851 pcptr++;
852 else {
853 fnl->nticks += func_nticks;
854 fnl->time += func_nticks;
855 totime += func_nticks;
856 }
857
858 #ifdef DEBUG
859 n_hits_in_module += func_nticks;
860 #endif /* DEBUG */
861 } else {
862 /*
863 * pc sample could not be assigned to function;
864 * probably in a PLT
865 */
866 pcptr++;
867 }
868 }
869
870 #ifdef DEBUG
871 if (debug & PCSMPLDEBUG) {
872 (void) printf(
873 "[assign_pcsamples] %ld hits in\n", n_hits_in_module);
874 (void) printf(" `%s'\n", module->name);
875 }
876 #endif /* DEBUG */
877 }
878
879 int
pc_cmp(const void * arg1,const void * arg2)880 pc_cmp(const void *arg1, const void *arg2)
881 {
882 Address *pc1 = (Address *)arg1;
883 Address *pc2 = (Address *)arg2;
884
885 if (*pc1 > *pc2)
886 return (1);
887
888 if (*pc1 < *pc2)
889 return (-1);
890
891 return (0);
892 }
893
894 static void
process_pcsamples(ProfBuffer * bufp)895 process_pcsamples(ProfBuffer *bufp)
896 {
897 Address *pc_samples;
898 mod_info_t *mi;
899 caddr_t p;
900 size_t chunk_size, nelem_read, nelem_to_read;
901
902 #ifdef DEBUG
903 if (debug & PCSMPLDEBUG) {
904 (void) printf(
905 "[process_pcsamples] number of pcsamples = %lld\n",
906 bufp->bufsize);
907 }
908 #endif /* DEBUG */
909
910 /* buffer with no pc samples ? */
911 if (bufp->bufsize == 0)
912 return;
913
914 /*
915 * If we're processing pcsamples of a profile sum, we could have
916 * more than PROF_BUFFER_SIZE number of samples. In such a case,
917 * we must read the pcsamples in chunks.
918 */
919 if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE)
920 chunk_size = PROF_BUFFER_SIZE;
921
922 /* Allocate for the pcsample chunk */
923 pc_samples = (Address *) calloc(chunk_size, sizeof (Address));
924 if (pc_samples == NULL) {
925 (void) fprintf(stderr, "%s: no room for %d sample pc's\n",
926 whoami, chunk_size);
927 exit(EX_OSERR);
928 }
929
930 /* Copy the current set of pcsamples */
931 nelem_read = 0;
932 nelem_to_read = bufp->bufsize;
933 p = (char *)bufp + bufp->buffer;
934
935 while (nelem_read < nelem_to_read) {
936 (void) memcpy((void *) pc_samples, p,
937 chunk_size * sizeof (Address));
938
939 /* Sort the pc samples */
940 qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp);
941
942 /*
943 * Assign pcsamples to functions in the currently active
944 * module list
945 */
946 for (mi = &modules; mi; mi = mi->next) {
947 if (mi->active == FALSE)
948 continue;
949 assign_pcsamples(mi, pc_samples, chunk_size);
950 }
951
952 p += (chunk_size * sizeof (Address));
953 nelem_read += chunk_size;
954
955 if ((nelem_to_read - nelem_read) < chunk_size)
956 chunk_size = nelem_to_read - nelem_read;
957 }
958
959 free(pc_samples);
960
961 /* Update total number of pcsamples read so far */
962 n_pcsamples += bufp->bufsize;
963 }
964
965 static mod_info_t *
find_module(Address addr)966 find_module(Address addr)
967 {
968 mod_info_t *mi;
969
970 for (mi = &modules; mi; mi = mi->next) {
971 if (mi->active == FALSE)
972 continue;
973
974 if (addr >= mi->load_base && addr < mi->load_end)
975 return (mi);
976 }
977
978 return (NULL);
979 }
980
981 static void
process_cgraph(ProfCallGraph * cgp)982 process_cgraph(ProfCallGraph *cgp)
983 {
984 struct rawarc arc;
985 mod_info_t *callee_mi, *caller_mi;
986 ProfFunction *calleep, *callerp;
987 Index caller_off, callee_off;
988
989 /*
990 * Note that *callee_off* increment in the for loop below
991 * uses *calleep* and *calleep* doesn't get set until the for loop
992 * is entered. We don't expect the increment to be executed before
993 * the loop body is executed atleast once, so this should be ok.
994 */
995 for (callee_off = cgp->functions; callee_off;
996 callee_off = calleep->next_to) {
997
998 /* LINTED: pointer cast */
999 calleep = (ProfFunction *)((char *)cgp + callee_off);
1000
1001 /*
1002 * We could choose either to sort the {caller, callee}
1003 * list twice and assign callee/caller to modules or inspect
1004 * each callee/caller in the active modules list. Since
1005 * the modules list is usually very small, we'l choose the
1006 * latter.
1007 */
1008
1009 /*
1010 * If we cannot identify a callee with a module, there's
1011 * no use worrying about who called it.
1012 */
1013 if ((callee_mi = find_module(calleep->topc)) == NULL) {
1014 #ifdef DEBUG
1015 if (debug & CGRAPHDEBUG) {
1016 (void) printf(
1017 "[process_cgraph] callee %#llx missed\n",
1018 calleep->topc);
1019 }
1020 #endif /* DEBUG */
1021 continue;
1022 } else
1023 arc.raw_selfpc = calleep->topc;
1024
1025 for (caller_off = callee_off; caller_off;
1026 caller_off = callerp->next_from) {
1027
1028 /* LINTED: pointer cast */
1029 callerp = (ProfFunction *)((char *)cgp + caller_off);
1030 if ((caller_mi = find_module(callerp->frompc)) ==
1031 NULL) {
1032 #ifdef DEBUG
1033 if (debug & CGRAPHDEBUG) {
1034 (void) printf(
1035 "[process_cgraph] caller %#llx "
1036 "missed\n", callerp->frompc);
1037 }
1038 #endif /* DEBUG */
1039 continue;
1040 }
1041
1042 arc.raw_frompc = callerp->frompc;
1043 arc.raw_count = callerp->count;
1044
1045 #ifdef DEBUG
1046 if (debug & CGRAPHDEBUG) {
1047 (void) printf(
1048 "[process_cgraph] arc <%#llx, %#llx, "
1049 "%lld>\n", arc.raw_frompc, arc.raw_selfpc,
1050 arc.raw_count);
1051 }
1052 #endif /* DEBUG */
1053 tally(caller_mi, callee_mi, &arc);
1054 }
1055 }
1056
1057 #ifdef DEBUG
1058 (void) puts("\n");
1059 #endif /* DEBUG */
1060 }
1061
1062 /*
1063 * Two modules overlap each other if they don't lie completely *outside*
1064 * each other.
1065 */
1066 static bool
does_overlap(ProfModule * new,mod_info_t * old)1067 does_overlap(ProfModule *new, mod_info_t *old)
1068 {
1069 /* case 1: new module lies completely *before* the old one */
1070 if (new->startaddr < old->load_base && new->endaddr <= old->load_base)
1071 return (FALSE);
1072
1073 /* case 2: new module lies completely *after* the old one */
1074 if (new->startaddr >= old->load_end && new->endaddr >= old->load_end)
1075 return (FALSE);
1076
1077 /* probably a dlopen: the modules overlap each other */
1078 return (TRUE);
1079 }
1080
1081 static bool
is_same_as_aout(char * modpath,struct stat * buf)1082 is_same_as_aout(char *modpath, struct stat *buf)
1083 {
1084 if (stat(modpath, buf) == -1) {
1085 (void) fprintf(stderr, "%s: can't get info on `%s'\n",
1086 whoami, modpath);
1087 exit(EX_NOINPUT);
1088 }
1089
1090 if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino))
1091 return (TRUE);
1092 else
1093 return (FALSE);
1094 }
1095
1096 static void
process_modules(ProfModuleList * modlp)1097 process_modules(ProfModuleList *modlp)
1098 {
1099 ProfModule *newmodp;
1100 mod_info_t *mi, *last, *new_module;
1101 char *so_path;
1102 bool more_modules = TRUE;
1103 struct stat so_statbuf;
1104
1105 #ifdef DEBUG
1106 if (debug & MODULEDEBUG) {
1107 (void) printf("[process_modules] module obj version %u\n",
1108 modlp->version);
1109 }
1110 #endif /* DEBUG */
1111
1112 /* Check version of module type object */
1113 if (modlp->version > PROF_MODULES_VER) {
1114 (void) fprintf(stderr, "%s: version %d for module type objects"
1115 "is not supported\n", whoami, modlp->version);
1116 exit(EX_SOFTWARE);
1117 }
1118
1119
1120 /*
1121 * Scan the PROF_MODULES_T list and add modules to current list
1122 * of modules, if they're not present already
1123 */
1124 /* LINTED: pointer cast */
1125 newmodp = (ProfModule *)((char *)modlp + modlp->modules);
1126 do {
1127 /*
1128 * Since the prog could've been renamed after its run, we
1129 * should see if this overlaps a.out. If it does, it is
1130 * probably the renamed aout. We should also skip any other
1131 * non-sharedobj's that we see (or should we report an error ?)
1132 */
1133 so_path = (caddr_t)modlp + newmodp->path;
1134 if (does_overlap(newmodp, &modules) ||
1135 is_same_as_aout(so_path, &so_statbuf) ||
1136 (!is_shared_obj(so_path))) {
1137
1138 if (!newmodp->next)
1139 more_modules = FALSE;
1140
1141 /* LINTED: pointer cast */
1142 newmodp = (ProfModule *)
1143 ((caddr_t)modlp + newmodp->next);
1144 #ifdef DEBUG
1145 if (debug & MODULEDEBUG) {
1146 (void) printf(
1147 "[process_modules] `%s'\n", so_path);
1148 (void) printf(" skipped\n");
1149 }
1150 #endif /* DEBUG */
1151 continue;
1152 }
1153 #ifdef DEBUG
1154 if (debug & MODULEDEBUG)
1155 (void) printf("[process_modules] `%s'...\n", so_path);
1156 #endif /* DEBUG */
1157
1158 /*
1159 * Check all modules (leave the first one, 'cos that
1160 * is the program executable info). If this module is already
1161 * there in the list, update the load addresses and proceed.
1162 */
1163 last = &modules;
1164 while ((mi = last->next) != NULL) {
1165 /*
1166 * We expect the full pathname for all shared objects
1167 * needed by the program executable. In this case, we
1168 * simply need to compare the paths to see if they are
1169 * the same file.
1170 */
1171 if (strcmp(mi->name, so_path) == 0)
1172 break;
1173
1174 /*
1175 * Check if this new shared object will overlap
1176 * any existing module. If yes, remove the old one
1177 * from the linked list (but don't free it, 'cos
1178 * there may be symbols referring to this module
1179 * still)
1180 */
1181 if (does_overlap(newmodp, mi)) {
1182 #ifdef DEBUG
1183 if (debug & MODULEDEBUG) {
1184 (void) printf(
1185 "[process_modules] `%s'\n",
1186 so_path);
1187 (void) printf(
1188 " overlaps\n");
1189 (void) printf(
1190 " `%s'\n",
1191 mi->name);
1192 }
1193 #endif /* DEBUG */
1194 mi->active = FALSE;
1195 }
1196
1197 last = mi;
1198 }
1199
1200 /* Module already there, skip it */
1201 if (mi != NULL) {
1202 mi->load_base = newmodp->startaddr;
1203 mi->load_end = newmodp->endaddr;
1204 mi->active = TRUE;
1205 if (!newmodp->next)
1206 more_modules = FALSE;
1207
1208 /* LINTED: pointer cast */
1209 newmodp = (ProfModule *)
1210 ((caddr_t)modlp + newmodp->next);
1211
1212 #ifdef DEBUG
1213 if (debug & MODULEDEBUG) {
1214 (void) printf("[process_modules] base=%#llx, "
1215 "end=%#llx\n", mi->load_base, mi->load_end);
1216 }
1217 #endif /* DEBUG */
1218 continue;
1219 }
1220
1221 /*
1222 * Check if gmon.out is outdated with respect to the new
1223 * module we want to add
1224 */
1225 if (gmonout_info.mtime < so_statbuf.st_mtime) {
1226 (void) fprintf(stderr,
1227 "%s: shared obj outdates prof info\n", whoami);
1228 (void) fprintf(stderr, "\t(newer %s)\n", so_path);
1229 exit(EX_NOINPUT);
1230 }
1231
1232 /* Create a new module element */
1233 new_module = malloc(sizeof (mod_info_t));
1234 if (new_module == NULL) {
1235 (void) fprintf(stderr, "%s: no room for %d bytes\n",
1236 whoami, sizeof (mod_info_t));
1237 exit(EX_OSERR);
1238 }
1239
1240 /* and fill in info... */
1241 new_module->id = n_modules + 1;
1242 new_module->load_base = newmodp->startaddr;
1243 new_module->load_end = newmodp->endaddr;
1244 new_module->name = malloc(strlen(so_path) + 1);
1245 if (new_module->name == NULL) {
1246 (void) fprintf(stderr, "%s: no room for %d bytes\n",
1247 whoami, strlen(so_path) + 1);
1248 exit(EX_OSERR);
1249 }
1250 (void) strcpy(new_module->name, so_path);
1251 #ifdef DEBUG
1252 if (debug & MODULEDEBUG) {
1253 (void) printf(
1254 "[process_modules] base=%#llx, end=%#llx\n",
1255 new_module->load_base, new_module->load_end);
1256 }
1257 #endif /* DEBUG */
1258
1259 /* Create this module's nameslist */
1260 process_namelist(new_module);
1261
1262 /* Add it to the tail of active module list */
1263 last->next = new_module;
1264 n_modules++;
1265
1266 #ifdef DEBUG
1267 if (debug & MODULEDEBUG) {
1268 (void) printf(
1269 "[process_modules] total shared objects = %ld\n",
1270 n_modules - 1);
1271 }
1272 #endif /* DEBUG */
1273 /*
1274 * Move to the next module in the PROF_MODULES_T list
1275 * (if present)
1276 */
1277 if (!newmodp->next)
1278 more_modules = FALSE;
1279
1280 /* LINTED: pointer cast */
1281 newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next);
1282
1283 } while (more_modules);
1284 }
1285
1286 static void
reset_active_modules(void)1287 reset_active_modules(void)
1288 {
1289 mod_info_t *mi;
1290
1291 /* Except the executable, no other module should remain active */
1292 for (mi = modules.next; mi; mi = mi->next)
1293 mi->active = FALSE;
1294 }
1295
1296 static void
getpfiledata(caddr_t memp,size_t fsz)1297 getpfiledata(caddr_t memp, size_t fsz)
1298 {
1299 ProfObject *objp;
1300 caddr_t file_end;
1301 bool found_pcsamples = FALSE, found_cgraph = FALSE;
1302
1303 /*
1304 * Before processing a new gmon.out, all modules except the
1305 * program executable must be made inactive, so that symbols
1306 * are searched only in the program executable, if we don't
1307 * find a MODULES_T object. Don't do it *after* we read a gmon.out,
1308 * because we need the active module data after we're done with
1309 * the last gmon.out, if we're doing summing.
1310 */
1311 reset_active_modules();
1312
1313 file_end = memp + fsz;
1314 /* LINTED: pointer cast */
1315 objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size);
1316 while ((caddr_t)objp < file_end) {
1317 #ifdef DEBUG
1318 {
1319 unsigned int type = 0;
1320
1321 if (debug & MONOUTDEBUG) {
1322 if (objp->type <= MAX_OBJTYPES)
1323 type = objp->type;
1324
1325 (void) printf(
1326 "\n[getpfiledata] object %s [%#lx]\n",
1327 objname[type], objp->type);
1328 }
1329 }
1330 #endif /* DEBUG */
1331 switch (objp->type) {
1332 case PROF_MODULES_T :
1333 process_modules((ProfModuleList *) objp);
1334 break;
1335
1336 case PROF_CALLGRAPH_T :
1337 process_cgraph((ProfCallGraph *) objp);
1338 found_cgraph = TRUE;
1339 break;
1340
1341 case PROF_BUFFER_T :
1342 process_pcsamples((ProfBuffer *) objp);
1343 found_pcsamples = TRUE;
1344 break;
1345
1346 default :
1347 (void) fprintf(stderr,
1348 "%s: unknown prof object type=%d\n",
1349 whoami, objp->type);
1350 exit(EX_SOFTWARE);
1351 }
1352 /* LINTED: pointer cast */
1353 objp = (ProfObject *)((caddr_t)objp + objp->size);
1354 }
1355
1356 if (!found_cgraph || !found_pcsamples) {
1357 (void) fprintf(stderr,
1358 "%s: missing callgraph/pcsamples object\n", whoami);
1359 exit(EX_SOFTWARE);
1360 }
1361
1362 if ((caddr_t)objp > file_end) {
1363 (void) fprintf(stderr, "%s: malformed profile file.\n", whoami);
1364 exit(EX_SOFTWARE);
1365 }
1366
1367 if (first_file)
1368 first_file = FALSE;
1369 }
1370
1371 static void
readarcs(FILE * pfile)1372 readarcs(FILE *pfile)
1373 {
1374 /*
1375 * the rest of the file consists of
1376 * a bunch of <from,self,count> tuples.
1377 */
1378 /* CONSTCOND */
1379 while (1) {
1380 struct rawarc arc;
1381
1382 if (rflag) {
1383 if (Bflag) {
1384 L_cgarc64 rtld_arc64;
1385
1386 /*
1387 * If rflag is set then this is an profiled
1388 * image generated by rtld. It needs to be
1389 * 'converted' to the standard data format.
1390 */
1391 if (fread(&rtld_arc64,
1392 sizeof (L_cgarc64), 1, pfile) != 1)
1393 break;
1394
1395 if (rtld_arc64.cg_from == PRF_OUTADDR64)
1396 arc.raw_frompc = s_highpc + 0x10;
1397 else
1398 arc.raw_frompc =
1399 (pctype)rtld_arc64.cg_from;
1400 arc.raw_selfpc = (pctype)rtld_arc64.cg_to;
1401 arc.raw_count = (actype)rtld_arc64.cg_count;
1402 } else {
1403 L_cgarc rtld_arc;
1404
1405 /*
1406 * If rflag is set then this is an profiled
1407 * image generated by rtld. It needs to be
1408 * 'converted' to the standard data format.
1409 */
1410 if (fread(&rtld_arc,
1411 sizeof (L_cgarc), 1, pfile) != 1)
1412 break;
1413
1414 if (rtld_arc.cg_from == PRF_OUTADDR)
1415 arc.raw_frompc = s_highpc + 0x10;
1416 else
1417 arc.raw_frompc = (pctype)
1418 (uintptr_t)rtld_arc.cg_from;
1419 arc.raw_selfpc = (pctype)
1420 (uintptr_t)rtld_arc.cg_to;
1421 arc.raw_count = (actype)rtld_arc.cg_count;
1422 }
1423 } else {
1424 if (Bflag) {
1425 if (fread(&arc, sizeof (struct rawarc), 1,
1426 pfile) != 1) {
1427 break;
1428 }
1429 } else {
1430 /*
1431 * If these aren't big %pc's, we need to read
1432 * into the 32-bit raw arc structure, and
1433 * assign the members into the actual arc.
1434 */
1435 struct rawarc32 arc32;
1436 if (fread(&arc32, sizeof (struct rawarc32),
1437 1, pfile) != 1)
1438 break;
1439 arc.raw_frompc = (pctype)arc32.raw_frompc;
1440 arc.raw_selfpc = (pctype)arc32.raw_selfpc;
1441 arc.raw_count = (actype)arc32.raw_count;
1442 }
1443 }
1444
1445 #ifdef DEBUG
1446 if (debug & SAMPLEDEBUG) {
1447 (void) printf("[getpfile] frompc 0x%llx selfpc "
1448 "0x%llx count %lld\n", arc.raw_frompc,
1449 arc.raw_selfpc, arc.raw_count);
1450 }
1451 #endif /* DEBUG */
1452 /*
1453 * add this arc
1454 */
1455 tally(&modules, &modules, &arc);
1456 }
1457 if (first_file)
1458 first_file = FALSE;
1459 }
1460
1461 static void
readsamples(FILE * pfile)1462 readsamples(FILE *pfile)
1463 {
1464 sztype i;
1465 unsigned_UNIT sample;
1466
1467 if (samples == 0) {
1468 samples = (unsigned_UNIT *) calloc(nsamples,
1469 sizeof (unsigned_UNIT));
1470 if (samples == 0) {
1471 (void) fprintf(stderr,
1472 "%s: No room for %d sample pc's\n",
1473 whoami, sampbytes / sizeof (unsigned_UNIT));
1474 exit(EX_OSERR);
1475 }
1476 }
1477
1478 for (i = 0; i < nsamples; i++) {
1479 (void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile);
1480 if (feof(pfile))
1481 break;
1482 samples[i] += sample;
1483 }
1484 if (i != nsamples) {
1485 (void) fprintf(stderr,
1486 "%s: unexpected EOF after reading %d/%d samples\n",
1487 whoami, --i, nsamples);
1488 exit(EX_IOERR);
1489 }
1490 }
1491
1492 static void *
handle_versioned(FILE * pfile,char * filename,size_t * fsz)1493 handle_versioned(FILE *pfile, char *filename, size_t *fsz)
1494 {
1495 int fd;
1496 bool invalid_version;
1497 caddr_t fmem;
1498 struct stat buf;
1499 ProfHeader prof_hdr;
1500 off_t lret;
1501
1502 /*
1503 * Check versioning info. For now, let's say we provide
1504 * backward compatibility, so we accept all older versions.
1505 */
1506 if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) {
1507 perror("fread()");
1508 exit(EX_IOERR);
1509 }
1510
1511 invalid_version = FALSE;
1512 if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION)
1513 invalid_version = TRUE;
1514 else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) {
1515 if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION)
1516 invalid_version = FALSE;
1517 }
1518
1519 if (invalid_version) {
1520 (void) fprintf(stderr, "%s: version %d.%d not supported\n",
1521 whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver);
1522 exit(EX_SOFTWARE);
1523 }
1524
1525 /*
1526 * Map gmon.out onto memory.
1527 */
1528 (void) fclose(pfile);
1529 if ((fd = open(filename, O_RDONLY)) == -1) {
1530 perror(filename);
1531 exit(EX_IOERR);
1532 }
1533
1534 if ((lret = lseek(fd, 0, SEEK_END)) == -1) {
1535 perror(filename);
1536 exit(EX_IOERR);
1537 }
1538 *fsz = lret;
1539
1540 fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0);
1541 if (fmem == MAP_FAILED) {
1542 (void) fprintf(stderr, "%s: can't map %s\n", whoami, filename);
1543 exit(EX_IOERR);
1544 }
1545
1546 /*
1547 * Before we close this fd, save this gmon.out's info to later verify
1548 * if the shared objects it references have changed since the time
1549 * they were used to generate this gmon.out
1550 */
1551 if (fstat(fd, &buf) == -1) {
1552 (void) fprintf(stderr, "%s: can't get info on `%s'\n",
1553 whoami, filename);
1554 exit(EX_NOINPUT);
1555 }
1556 gmonout_info.dev = buf.st_dev;
1557 gmonout_info.ino = buf.st_ino;
1558 gmonout_info.mtime = buf.st_mtime;
1559 gmonout_info.size = buf.st_size;
1560
1561 (void) close(fd);
1562
1563 return ((void *) fmem);
1564 }
1565
1566 static void *
openpfile(char * filename,size_t * fsz)1567 openpfile(char *filename, size_t *fsz)
1568 {
1569 struct hdr tmp;
1570 FILE *pfile;
1571 unsigned long magic_num;
1572 size_t hdrsize;
1573 static bool first_time = TRUE;
1574 extern bool old_style;
1575
1576 if ((pfile = fopen(filename, "r")) == NULL) {
1577 perror(filename);
1578 exit(EX_IOERR);
1579 }
1580
1581 /*
1582 * Read in the magic. Note that we changed the cast "unsigned long"
1583 * to "unsigned int" because that's how h_magic is defined in the
1584 * new format ProfHeader.
1585 */
1586 if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) {
1587 perror("fread()");
1588 exit(EX_IOERR);
1589 }
1590
1591 rewind(pfile);
1592
1593 /*
1594 * First check if this is versioned or *old-style* gmon.out
1595 */
1596 if (magic_num == (unsigned int)PROF_MAGIC) {
1597 if ((!first_time) && (old_style == TRUE)) {
1598 (void) fprintf(stderr, "%s: can't mix old & new format "
1599 "profiled files\n", whoami);
1600 exit(EX_SOFTWARE);
1601 }
1602 first_time = FALSE;
1603 old_style = FALSE;
1604 return (handle_versioned(pfile, filename, fsz));
1605 }
1606
1607 if ((!first_time) && (old_style == FALSE)) {
1608 (void) fprintf(stderr, "%s: can't mix old & new format "
1609 "profiled files\n", whoami);
1610 exit(EX_SOFTWARE);
1611 }
1612
1613 first_time = FALSE;
1614 old_style = TRUE;
1615 fsz = 0;
1616
1617 /*
1618 * Now, we need to determine if this is a run-time linker
1619 * profiled file or if it is a standard gmon.out.
1620 *
1621 * We do this by checking if magic matches PRF_MAGIC. If it
1622 * does, then this is a run-time linker profiled file, if it
1623 * doesn't, it must be a gmon.out file.
1624 */
1625 if (magic_num == (unsigned long)PRF_MAGIC)
1626 rflag = TRUE;
1627 else
1628 rflag = FALSE;
1629
1630 hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32);
1631
1632 if (rflag) {
1633 if (Bflag) {
1634 L_hdr64 l_hdr64;
1635
1636 /*
1637 * If the rflag is set then the input file is
1638 * rtld profiled data, we'll read it in and convert
1639 * it to the standard format (ie: make it look like
1640 * a gmon.out file).
1641 */
1642 if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) {
1643 perror("fread()");
1644 exit(EX_IOERR);
1645 }
1646 if (l_hdr64.hd_version != PRF_VERSION_64) {
1647 (void) fprintf(stderr,
1648 "%s: expected version %d, "
1649 "got version %d when processing 64-bit "
1650 "run-time linker profiled file.\n",
1651 whoami, PRF_VERSION_64, l_hdr64.hd_version);
1652 exit(EX_SOFTWARE);
1653 }
1654 tmp.lowpc = 0;
1655 tmp.highpc = (pctype)l_hdr64.hd_hpc;
1656 tmp.ncnt = hdrsize + l_hdr64.hd_psize;
1657 } else {
1658 L_hdr l_hdr;
1659
1660 /*
1661 * If the rflag is set then the input file is
1662 * rtld profiled data, we'll read it in and convert
1663 * it to the standard format (ie: make it look like
1664 * a gmon.out file).
1665 */
1666 if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) {
1667 perror("fread()");
1668 exit(EX_IOERR);
1669 }
1670 if (l_hdr.hd_version != PRF_VERSION) {
1671 (void) fprintf(stderr,
1672 "%s: expected version %d, "
1673 "got version %d when processing "
1674 "run-time linker profiled file.\n",
1675 whoami, PRF_VERSION, l_hdr.hd_version);
1676 exit(EX_SOFTWARE);
1677 }
1678 tmp.lowpc = 0;
1679 tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc;
1680 tmp.ncnt = hdrsize + l_hdr.hd_psize;
1681 }
1682 } else {
1683 if (Bflag) {
1684 if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) {
1685 perror("fread()");
1686 exit(EX_IOERR);
1687 }
1688 } else {
1689 /*
1690 * If we're not reading big %pc's, we need to read
1691 * the 32-bit header, and assign the members to
1692 * the actual header.
1693 */
1694 struct hdr32 hdr32;
1695 if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) {
1696 perror("fread()");
1697 exit(EX_IOERR);
1698 }
1699 tmp.lowpc = hdr32.lowpc;
1700 tmp.highpc = hdr32.highpc;
1701 tmp.ncnt = hdr32.ncnt;
1702 }
1703 }
1704
1705 /*
1706 * perform sanity check on profiled file we've opened.
1707 */
1708 if (tmp.lowpc >= tmp.highpc) {
1709 if (rflag)
1710 (void) fprintf(stderr,
1711 "%s: badly formed profiled data.\n",
1712 filename);
1713 else
1714 (void) fprintf(stderr,
1715 "%s: badly formed gmon.out file.\n",
1716 filename);
1717 exit(EX_SOFTWARE);
1718 }
1719
1720 if (s_highpc != 0 && (tmp.lowpc != h.lowpc ||
1721 tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) {
1722 (void) fprintf(stderr,
1723 "%s: incompatible with first gmon file\n",
1724 filename);
1725 exit(EX_IOERR);
1726 }
1727 h = tmp;
1728 s_lowpc = h.lowpc;
1729 s_highpc = h.highpc;
1730 lowpc = h.lowpc / sizeof (UNIT);
1731 highpc = h.highpc / sizeof (UNIT);
1732 sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0;
1733 nsamples = sampbytes / sizeof (unsigned_UNIT);
1734
1735 #ifdef DEBUG
1736 if (debug & SAMPLEDEBUG) {
1737 (void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc "
1738 "0x%llx hdr.ncnt %lld\n",
1739 h.lowpc, h.highpc, h.ncnt);
1740 (void) printf(
1741 "[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n",
1742 s_lowpc, s_highpc);
1743 (void) printf(
1744 "[openpfile] lowpc 0x%llx highpc 0x%llx\n",
1745 lowpc, highpc);
1746 (void) printf("[openpfile] sampbytes %d nsamples %d\n",
1747 sampbytes, nsamples);
1748 }
1749 #endif /* DEBUG */
1750
1751 return ((void *) pfile);
1752 }
1753
1754 /*
1755 * Information from a gmon.out file depends on whether it's versioned
1756 * or non-versioned, *old style* gmon.out. If old-style, it is in two
1757 * parts : an array of sampling hits within pc ranges, and the arcs. If
1758 * versioned, it contains a header, followed by any number of
1759 * modules/callgraph/pcsample_buffer objects.
1760 */
1761 static void
getpfile(char * filename)1762 getpfile(char *filename)
1763 {
1764 void *handle;
1765 size_t fsz;
1766
1767 handle = openpfile(filename, &fsz);
1768
1769 if (old_style) {
1770 readsamples((FILE *)handle);
1771 readarcs((FILE *)handle);
1772 (void) fclose((FILE *)handle);
1773 return;
1774 }
1775
1776 getpfiledata((caddr_t)handle, fsz);
1777 (void) munmap(handle, fsz);
1778 }
1779
1780 int
main(int argc,char ** argv)1781 main(int argc, char **argv)
1782 {
1783 char **sp;
1784 nltype **timesortnlp;
1785 int c;
1786 int errflg;
1787
1788 prog_name = *argv; /* preserve program name */
1789 debug = 0;
1790 nflag = FALSE;
1791 bflag = TRUE;
1792 lflag = FALSE;
1793 Cflag = FALSE;
1794 first_file = TRUE;
1795 rflag = FALSE;
1796 Bflag = FALSE;
1797 errflg = FALSE;
1798
1799 while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF)
1800 switch (c) {
1801 case 'a':
1802 aflag = TRUE;
1803 break;
1804 case 'b':
1805 bflag = FALSE;
1806 break;
1807 case 'c':
1808 cflag = TRUE;
1809 break;
1810 case 'C':
1811 Cflag = TRUE;
1812 break;
1813 case 'd':
1814 dflag = TRUE;
1815 debug |= atoi(optarg);
1816 (void) printf("[main] debug = 0x%x\n", debug);
1817 break;
1818 case 'D':
1819 Dflag = TRUE;
1820 break;
1821 case 'E':
1822 addlist(Elist, optarg);
1823 Eflag = TRUE;
1824 addlist(elist, optarg);
1825 eflag = TRUE;
1826 break;
1827 case 'e':
1828 addlist(elist, optarg);
1829 eflag = TRUE;
1830 break;
1831 case 'F':
1832 addlist(Flist, optarg);
1833 Fflag = TRUE;
1834 addlist(flist, optarg);
1835 fflag = TRUE;
1836 break;
1837 case 'f':
1838 addlist(flist, optarg);
1839 fflag = TRUE;
1840 break;
1841 case 'l':
1842 lflag = TRUE;
1843 break;
1844 case 'n':
1845 nflag = TRUE;
1846 number_funcs_toprint = atoi(optarg);
1847 break;
1848 case 's':
1849 sflag = TRUE;
1850 break;
1851 case 'z':
1852 zflag = TRUE;
1853 break;
1854 case '?':
1855 errflg++;
1856
1857 }
1858
1859 if (errflg) {
1860 (void) fprintf(stderr,
1861 "usage: gprof [ -abcCDlsz ] [ -e function-name ] "
1862 "[ -E function-name ]\n\t[ -f function-name ] "
1863 "[ -F function-name ]\n\t[ image-file "
1864 "[ profile-file ... ] ]\n");
1865 exit(EX_USAGE);
1866 }
1867
1868 if (optind < argc) {
1869 a_outname = argv[optind++];
1870 } else {
1871 a_outname = A_OUTNAME;
1872 }
1873 if (optind < argc) {
1874 gmonname = argv[optind++];
1875 } else {
1876 gmonname = GMONNAME;
1877 }
1878 /*
1879 * turn off default functions
1880 */
1881 for (sp = &defaultEs[0]; *sp; sp++) {
1882 Eflag = TRUE;
1883 addlist(Elist, *sp);
1884 eflag = TRUE;
1885 addlist(elist, *sp);
1886 }
1887 /*
1888 * how many ticks per second?
1889 * if we can't tell, report time in ticks.
1890 */
1891 hz = sysconf(_SC_CLK_TCK);
1892 if (hz == -1) {
1893 hz = 1;
1894 (void) fprintf(stderr, "time is in ticks, not seconds\n");
1895 }
1896
1897 getnfile(a_outname);
1898
1899 /*
1900 * get information about mon.out file(s).
1901 */
1902 do {
1903 getpfile(gmonname);
1904 if (optind < argc)
1905 gmonname = argv[optind++];
1906 else
1907 optind++;
1908 } while (optind <= argc);
1909 /*
1910 * dump out a gmon.sum file if requested
1911 */
1912 if (sflag || Dflag)
1913 dumpsum(GMONSUM);
1914
1915 if (old_style) {
1916 /*
1917 * assign samples to procedures
1918 */
1919 asgnsamples();
1920 }
1921
1922 /*
1923 * assemble the dynamic profile
1924 */
1925 timesortnlp = doarcs();
1926
1927 /*
1928 * print the dynamic profile
1929 */
1930 #ifdef DEBUG
1931 if (debug & ANYDEBUG) {
1932 /* raw output of all symbols in all their glory */
1933 int i;
1934 (void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
1935 "#calls, selfcalls, index \n");
1936 for (i = 0; i < modules.nname; i++) { /* Print each symbol */
1937 if (timesortnlp[i]->name)
1938 (void) printf(" %s ", timesortnlp[i]->name);
1939 else
1940 (void) printf(" <cycle> ");
1941 (void) printf(" %lld ", timesortnlp[i]->value);
1942 (void) printf(" %lld ", timesortnlp[i]->svalue);
1943 (void) printf(" %f ", timesortnlp[i]->time);
1944 (void) printf(" %lld ", timesortnlp[i]->ncall);
1945 (void) printf(" %lld ", timesortnlp[i]->selfcalls);
1946 (void) printf(" %d ", timesortnlp[i]->index);
1947 (void) printf(" \n");
1948 }
1949 }
1950 #endif /* DEBUG */
1951
1952 printgprof(timesortnlp);
1953 /*
1954 * print the flat profile
1955 */
1956 printprof();
1957 /*
1958 * print the index
1959 */
1960 printindex();
1961
1962 /*
1963 * print the modules
1964 */
1965 printmodules();
1966
1967 done();
1968 /* NOTREACHED */
1969 return (0);
1970 }
1971