1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 #include <sysexits.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <unistd.h>
32 #include "gprof.h"
33 #include "profile.h"
34
35 char *whoami = "gprof";
36 static pctype lowpc, highpc; /* range profiled, in UNIT's */
37
38 /*
39 * things which get -E excluded by default.
40 */
41 static char *defaultEs[] = {
42 "mcount",
43 "__mcleanup",
44 NULL
45 };
46
47 #ifdef DEBUG
48
49 static char *objname[] = {
50 "<invalid object>",
51 "PROF_BUFFER_T",
52 "PROF_CALLGRAPH_T",
53 "PROF_MODULES_T",
54 NULL
55 };
56 #define MAX_OBJTYPES 3
57
58 #endif /* DEBUG */
59
60 void
done(void)61 done(void)
62 {
63
64 exit(EX_OK);
65 }
66
67 static pctype
max(pctype a,pctype b)68 max(pctype a, pctype b)
69 {
70 if (a > b)
71 return (a);
72 return (b);
73 }
74
75 static pctype
min(pctype a,pctype b)76 min(pctype a, pctype b)
77 {
78 if (a < b)
79 return (a);
80 return (b);
81 }
82
83 /*
84 * calculate scaled entry point addresses (to save time in asgnsamples),
85 * and possibly push the scaled entry points over the entry mask,
86 * if it turns out that the entry point is in one bucket and the code
87 * for a routine is in the next bucket.
88 *
89 */
90 static void
alignentries(void)91 alignentries(void)
92 {
93 struct nl *nlp;
94 #ifdef DEBUG
95 pctype bucket_of_entry;
96 pctype bucket_of_code;
97 #endif /* DEBUG */
98
99 /* for old-style gmon.out, nameslist is only in modules.nl */
100
101 for (nlp = modules.nl; nlp < modules.npe; nlp++) {
102 nlp->svalue = nlp->value / sizeof (UNIT);
103 #ifdef DEBUG
104 bucket_of_entry = (nlp->svalue - lowpc) / scale;
105 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
106 if (bucket_of_entry < bucket_of_code) {
107 if (debug & SAMPLEDEBUG) {
108 (void) printf(
109 "[alignentries] pushing svalue 0x%llx "
110 "to 0x%llx\n", nlp->svalue,
111 nlp->svalue + UNITS_TO_CODE);
112 }
113 }
114 #endif /* DEBUG */
115 }
116 }
117
118 /*
119 * old-style gmon.out
120 * ------------------
121 *
122 * Assign samples to the procedures to which they belong.
123 *
124 * There are three cases as to where pcl and pch can be
125 * with respect to the routine entry addresses svalue0 and svalue1
126 * as shown in the following diagram. overlap computes the
127 * distance between the arrows, the fraction of the sample
128 * that is to be credited to the routine which starts at svalue0.
129 *
130 * svalue0 svalue1
131 * | |
132 * v v
133 *
134 * +-----------------------------------------------+
135 * | |
136 * | ->| |<- ->| |<- ->| |<- |
137 * | | | | | |
138 * +---------+ +---------+ +---------+
139 *
140 * ^ ^ ^ ^ ^ ^
141 * | | | | | |
142 * pcl pch pcl pch pcl pch
143 *
144 * For the vax we assert that samples will never fall in the first
145 * two bytes of any routine, since that is the entry mask,
146 * thus we give call alignentries() to adjust the entry points if
147 * the entry mask falls in one bucket but the code for the routine
148 * doesn't start until the next bucket. In conjunction with the
149 * alignment of routine addresses, this should allow us to have
150 * only one sample for every four bytes of text space and never
151 * have any overlap (the two end cases, above).
152 */
153 static void
asgnsamples(void)154 asgnsamples(void)
155 {
156 sztype i, j;
157 unsigned_UNIT ccnt;
158 double time;
159 pctype pcl, pch;
160 pctype overlap;
161 pctype svalue0, svalue1;
162
163 extern mod_info_t modules;
164 nltype *nl = modules.nl;
165 sztype nname = modules.nname;
166
167 /* read samples and assign to namelist symbols */
168 scale = highpc - lowpc;
169 scale /= nsamples;
170 alignentries();
171 for (i = 0, j = 1; i < nsamples; i++) {
172 ccnt = samples[i];
173 if (ccnt == 0)
174 continue;
175 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
176 pcl = lowpc + scale * i;
177 /*LINTED: E_ASSIGMENT_CAUSE_LOSS_PREC*/
178 pch = lowpc + scale * (i + 1);
179 time = ccnt;
180 #ifdef DEBUG
181 if (debug & SAMPLEDEBUG) {
182 (void) printf(
183 "[asgnsamples] pcl 0x%llx pch 0x%llx ccnt %d\n",
184 pcl, pch, ccnt);
185 }
186 #endif /* DEBUG */
187 totime += time;
188 for (j = (j ? j - 1 : 0); j < nname; j++) {
189 svalue0 = nl[j].svalue;
190 svalue1 = nl[j+1].svalue;
191 /*
192 * if high end of tick is below entry address,
193 * go for next tick.
194 */
195 if (pch < svalue0)
196 break;
197 /*
198 * if low end of tick into next routine,
199 * go for next routine.
200 */
201 if (pcl >= svalue1)
202 continue;
203 overlap = min(pch, svalue1) - max(pcl, svalue0);
204 if (overlap != 0) {
205 #ifdef DEBUG
206 if (debug & SAMPLEDEBUG) {
207 (void) printf("[asgnsamples] "
208 "(0x%llx->0x%llx-0x%llx) %s gets "
209 "%f ticks %lld overlap\n",
210 nl[j].value/sizeof (UNIT), svalue0,
211 svalue1, nl[j].name,
212 overlap * time / scale, overlap);
213 }
214 #endif /* DEBUG */
215 nl[j].time += overlap * time / scale;
216 }
217 }
218 }
219 #ifdef DEBUG
220 if (debug & SAMPLEDEBUG) {
221 (void) printf("[asgnsamples] totime %f\n", totime);
222 }
223 #endif /* DEBUG */
224 }
225
226
227 static void
dump_callgraph(FILE * fp,char * filename,unsigned long tarcs,unsigned long ncallees)228 dump_callgraph(FILE *fp, char *filename, unsigned long tarcs,
229 unsigned long ncallees)
230 {
231 ProfCallGraph prof_cgraph;
232 ProfFunction prof_func;
233 arctype *arcp;
234 mod_info_t *mi;
235 nltype *nlp;
236 size_t cur_offset;
237 unsigned long caller_id = 0, callee_id = 0;
238
239 /*
240 * Write the callgraph header
241 */
242 prof_cgraph.type = PROF_CALLGRAPH_T;
243 prof_cgraph.version = PROF_CALLGRAPH_VER;
244 prof_cgraph.functions = PROFCGRAPH_SZ;
245 prof_cgraph.size = PROFCGRAPH_SZ + tarcs * PROFFUNC_SZ;
246 if (fwrite(&prof_cgraph, sizeof (ProfCallGraph), 1, fp) != 1) {
247 perror(filename);
248 exit(EX_IOERR);
249 }
250 /* CONSTCOND */
251 if (CGRAPH_FILLER)
252 (void) fseek(fp, CGRAPH_FILLER, SEEK_CUR);
253
254 /* Current offset inside the callgraph object */
255 cur_offset = prof_cgraph.functions;
256
257 for (mi = &modules; mi; mi = mi->next) {
258 for (nlp = mi->nl; nlp < mi->npe; nlp++) {
259 if (nlp->ncallers == 0)
260 continue;
261
262 /* If this is the last callee, set next_to to 0 */
263 callee_id++;
264 if (callee_id == ncallees)
265 prof_func.next_to = 0;
266 else {
267 prof_func.next_to = cur_offset +
268 nlp->ncallers * PROFFUNC_SZ;
269 }
270
271 /*
272 * Dump this callee's raw arc information with all
273 * its callers
274 */
275 caller_id = 1;
276 for (arcp = nlp->parents; arcp;
277 arcp = arcp->arc_parentlist) {
278 /*
279 * If no more callers for this callee, set
280 * next_from to 0
281 */
282 if (caller_id == nlp->ncallers)
283 prof_func.next_from = 0;
284 else {
285 prof_func.next_from = cur_offset +
286 PROFFUNC_SZ;
287 }
288
289 prof_func.frompc =
290 arcp->arc_parentp->module->load_base +
291 (arcp->arc_parentp->value -
292 arcp->arc_parentp->module->txt_origin);
293 prof_func.topc = mi->load_base +
294 (nlp->value - mi->txt_origin);
295 prof_func.count = arcp->arc_count;
296
297
298 if (fwrite(&prof_func, sizeof (ProfFunction),
299 1, fp) != 1) {
300 perror(filename);
301 exit(EX_IOERR);
302 }
303 /* CONSTCOND */
304 if (FUNC_FILLER)
305 (void) fseek(fp, FUNC_FILLER, SEEK_CUR);
306
307 cur_offset += PROFFUNC_SZ;
308 caller_id++;
309 }
310 } /* for nlp... */
311 } /* for mi... */
312 }
313
314 /*
315 * To save all pc-hits in all the gmon.out's is infeasible, as this
316 * may become quite huge even with a small number of files to sum.
317 * Instead, we'll dump *fictitious hits* to correct functions
318 * by scanning module namelists. Again, since this is summing
319 * pc-hits, we may have to dump the pcsamples out in chunks if the
320 * number of pc-hits is high.
321 */
322 static void
dump_hits(FILE * fp,char * filename,nltype * nlp)323 dump_hits(FILE *fp, char *filename, nltype *nlp)
324 {
325 Address *p, hitpc;
326 size_t i, nelem, ntowrite;
327
328 if ((nelem = nlp->nticks) > PROF_BUFFER_SIZE)
329 nelem = PROF_BUFFER_SIZE;
330
331 if ((p = (Address *) calloc(nelem, sizeof (Address))) == NULL) {
332 (void) fprintf(stderr, "%s: no room for %d pcsamples\n",
333 whoami, nelem);
334 exit(EX_OSERR);
335 }
336
337 /*
338 * Set up *fictitious* hits (to function entry) buffer
339 */
340 hitpc = nlp->module->load_base + (nlp->value - nlp->module->txt_origin);
341 for (i = 0; i < nelem; i++)
342 p[i] = hitpc;
343
344 for (ntowrite = nlp->nticks; ntowrite >= nelem; ntowrite -= nelem) {
345 if (fwrite(p, nelem * sizeof (Address), 1, fp) != 1) {
346 perror(filename);
347 exit(EX_IOERR);
348 }
349 }
350
351 if (ntowrite) {
352 if (fwrite(p, ntowrite * sizeof (Address), 1, fp) != 1) {
353 perror(filename);
354 exit(EX_IOERR);
355 }
356 }
357
358 free(p);
359 }
360
361 static void
dump_pcsamples(FILE * fp,char * filename,unsigned long * tarcs,unsigned long * ncallees)362 dump_pcsamples(FILE *fp, char *filename, unsigned long *tarcs,
363 unsigned long *ncallees)
364 {
365 ProfBuffer prof_buffer;
366 arctype *arcp;
367 mod_info_t *mi;
368 nltype *nlp;
369
370 prof_buffer.type = PROF_BUFFER_T;
371 prof_buffer.version = PROF_BUFFER_VER;
372 prof_buffer.buffer = PROFBUF_SZ;
373 prof_buffer.bufsize = n_pcsamples;
374 prof_buffer.size = PROFBUF_SZ + n_pcsamples * sizeof (Address);
375 if (fwrite(&prof_buffer, sizeof (ProfBuffer), 1, fp) != 1) {
376 perror(filename);
377 exit(EX_IOERR);
378 }
379 /* CONSTCOND */
380 if (BUF_FILLER)
381 (void) fseek(fp, BUF_FILLER, SEEK_CUR);
382
383 *tarcs = 0;
384 *ncallees = 0;
385 for (mi = &modules; mi; mi = mi->next) {
386 for (nlp = mi->nl; nlp < mi->npe; nlp++) {
387 if (nlp->nticks)
388 dump_hits(fp, filename, nlp);
389
390 nlp->ncallers = 0;
391 for (arcp = nlp->parents; arcp;
392 arcp = arcp->arc_parentlist) {
393 (nlp->ncallers)++;
394 }
395
396 if (nlp->ncallers) {
397 (*tarcs) += nlp->ncallers;
398 (*ncallees)++;
399 }
400 }
401 }
402 }
403
404 static void
dump_modules(FILE * fp,char * filename,size_t pbuf_sz)405 dump_modules(FILE *fp, char *filename, size_t pbuf_sz)
406 {
407 char *pbuf, *p;
408 size_t namelen;
409 Index off_nxt, off_path;
410 mod_info_t *mi;
411
412 ProfModuleList prof_modlist;
413 ProfModule prof_mod;
414
415 /* Allocate for path strings buffer */
416 pbuf_sz = CEIL(pbuf_sz, STRUCT_ALIGN);
417 if ((p = pbuf = calloc(pbuf_sz, sizeof (char))) == NULL) {
418 (void) fprintf(stderr, "%s: no room for %d bytes\n",
419 whoami, pbuf_sz * sizeof (char));
420 exit(EX_OSERR);
421 }
422
423 /* Dump out PROF_MODULE_T info for all non-aout modules */
424 prof_modlist.type = PROF_MODULES_T;
425 prof_modlist.version = PROF_MODULES_VER;
426 prof_modlist.modules = PROFMODLIST_SZ;
427 prof_modlist.size = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ +
428 pbuf_sz;
429 if (fwrite(&prof_modlist, sizeof (ProfModuleList), 1, fp) != 1) {
430 perror(filename);
431 exit(EX_IOERR);
432 }
433 /* CONSTCOND */
434 if (MODLIST_FILLER)
435 (void) fseek(fp, MODLIST_FILLER, SEEK_CUR);
436
437 /*
438 * Initialize offsets for ProfModule elements.
439 */
440 off_nxt = PROFMODLIST_SZ + PROFMOD_SZ;
441 off_path = PROFMODLIST_SZ + (n_modules - 1) * PROFMOD_SZ;
442
443 for (mi = modules.next; mi; mi = mi->next) {
444 if (mi->next)
445 prof_mod.next = off_nxt;
446 else
447 prof_mod.next = 0;
448 prof_mod.path = off_path;
449 prof_mod.startaddr = mi->load_base;
450 prof_mod.endaddr = mi->load_end;
451
452 if (fwrite(&prof_mod, sizeof (ProfModule), 1, fp) != 1) {
453 perror(filename);
454 exit(EX_IOERR);
455 }
456
457 /* CONSTCOND */
458 if (MOD_FILLER)
459 (void) fseek(fp, MOD_FILLER, SEEK_CUR);
460
461 (void) strcpy(p, mi->name);
462 namelen = strlen(mi->name);
463 p += namelen + 1;
464
465 /* Note that offset to every path str need not be aligned */
466 off_nxt += PROFMOD_SZ;
467 off_path += namelen + 1;
468 }
469
470 /* Write out the module path strings */
471 if (pbuf_sz) {
472 if (fwrite(pbuf, pbuf_sz, 1, fp) != 1) {
473 perror(filename);
474 exit(EX_IOERR);
475 }
476
477 free(pbuf);
478 }
479 }
480
481 /*
482 * If we have inactive modules, their current load addresses may overlap with
483 * active ones, and so we've to assign fictitious, non-overlapping addresses
484 * to all modules before we dump them.
485 */
486 static void
fixup_maps(size_t * pathsz)487 fixup_maps(size_t *pathsz)
488 {
489 unsigned int n_inactive = 0;
490 Address lbase = 0, lend;
491 mod_info_t *mi;
492
493 /* Pick the lowest load address among modules */
494 *pathsz = 0;
495 for (mi = &modules; mi; mi = mi->next) {
496
497 if (mi->active == FALSE)
498 n_inactive++;
499
500 if (mi == &modules || mi->load_base < lbase)
501 lbase = mi->load_base;
502
503 /*
504 * Return total path size of non-aout modules only
505 */
506 if (mi != &modules)
507 *pathsz = (*pathsz) + strlen(mi->name) + 1;
508 }
509
510 /*
511 * All module info is in fine shape already if there are no
512 * inactive modules
513 */
514 if (n_inactive == 0)
515 return;
516
517 /*
518 * Assign fictitious load addresses to all (non-aout) modules so
519 * that sum info can be dumped out.
520 */
521 for (mi = modules.next; mi; mi = mi->next) {
522 lend = lbase + (mi->data_end - mi->txt_origin);
523 if ((lbase < modules.load_base && lend < modules.load_base) ||
524 (lbase > modules.load_end && lend > modules.load_end)) {
525
526 mi->load_base = lbase;
527 mi->load_end = lend;
528
529 /* just to give an appearance of reality */
530 lbase = CEIL(lend + PGSZ, PGSZ);
531 } else {
532 /*
533 * can't use this lbase & lend pair, as it
534 * overlaps with aout's addresses
535 */
536 mi->load_base = CEIL(modules.load_end + PGSZ, PGSZ);
537 mi->load_end = mi->load_base + (lend - lbase);
538
539 lbase = CEIL(mi->load_end + PGSZ, PGSZ);
540 }
541 }
542 }
543
544 static void
dump_gprofhdr(FILE * fp,char * filename)545 dump_gprofhdr(FILE *fp, char *filename)
546 {
547 ProfHeader prof_hdr;
548
549 prof_hdr.h_magic = PROF_MAGIC;
550 prof_hdr.h_major_ver = PROF_MAJOR_VERSION;
551 prof_hdr.h_minor_ver = PROF_MINOR_VERSION;
552 prof_hdr.size = PROFHDR_SZ;
553 if (fwrite(&prof_hdr, sizeof (prof_hdr), 1, fp) != 1) {
554 perror(filename);
555 exit(EX_IOERR);
556 }
557
558 /* CONSTCOND */
559 if (HDR_FILLER)
560 (void) fseek(fp, HDR_FILLER, SEEK_CUR);
561 }
562
563 static void
dumpsum_ostyle(char * sumfile)564 dumpsum_ostyle(char *sumfile)
565 {
566 nltype *nlp;
567 arctype *arcp;
568 struct rawarc arc;
569 struct rawarc32 arc32;
570 FILE *sfile;
571
572 if ((sfile = fopen(sumfile, "w")) == NULL) {
573 perror(sumfile);
574 exit(EX_IOERR);
575 }
576 /*
577 * dump the header; use the last header read in
578 */
579 if (Bflag) {
580 if (fwrite(&h, sizeof (h), 1, sfile) != 1) {
581 perror(sumfile);
582 exit(EX_IOERR);
583 }
584 } else {
585 struct hdr32 hdr;
586 hdr.lowpc = (pctype32)h.lowpc;
587 hdr.highpc = (pctype32)h.highpc;
588 hdr.ncnt = (pctype32)h.ncnt;
589 if (fwrite(&hdr, sizeof (hdr), 1, sfile) != 1) {
590 perror(sumfile);
591 exit(EX_IOERR);
592 }
593 }
594 /*
595 * dump the samples
596 */
597 if (fwrite(samples, sizeof (unsigned_UNIT), nsamples, sfile) !=
598 nsamples) {
599 perror(sumfile);
600 exit(EX_IOERR);
601 }
602 /*
603 * dump the normalized raw arc information. For old-style dumping,
604 * the only namelist is in modules.nl
605 */
606 for (nlp = modules.nl; nlp < modules.npe; nlp++) {
607 for (arcp = nlp->children; arcp;
608 arcp = arcp->arc_childlist) {
609 if (Bflag) {
610 arc.raw_frompc = arcp->arc_parentp->value;
611 arc.raw_selfpc = arcp->arc_childp->value;
612 arc.raw_count = arcp->arc_count;
613 if (fwrite(&arc, sizeof (arc), 1, sfile) != 1) {
614 perror(sumfile);
615 exit(EX_IOERR);
616 }
617 } else {
618 arc32.raw_frompc =
619 (pctype32)arcp->arc_parentp->value;
620 arc32.raw_selfpc =
621 (pctype32)arcp->arc_childp->value;
622 arc32.raw_count = (actype32)arcp->arc_count;
623 if (fwrite(&arc32, sizeof (arc32), 1, sfile) !=
624 1) {
625 perror(sumfile);
626 exit(EX_IOERR);
627 }
628 }
629 #ifdef DEBUG
630 if (debug & SAMPLEDEBUG) {
631 (void) printf(
632 "[dumpsum_ostyle] frompc 0x%llx selfpc "
633 "0x%llx count %lld\n", arc.raw_frompc,
634 arc.raw_selfpc, arc.raw_count);
635 }
636 #endif /* DEBUG */
637 }
638 }
639 (void) fclose(sfile);
640 }
641
642 /*
643 * dump out the gmon.sum file
644 */
645 static void
dumpsum(char * sumfile)646 dumpsum(char *sumfile)
647 {
648 FILE *sfile;
649 size_t pathbuf_sz;
650 unsigned long total_arcs; /* total number of arcs in all */
651 unsigned long ncallees; /* no. of callees with parents */
652
653 if (old_style) {
654 dumpsum_ostyle(sumfile);
655 return;
656 }
657
658 if ((sfile = fopen(sumfile, "w")) == NULL) {
659 perror(sumfile);
660 exit(EX_IOERR);
661 }
662
663 /*
664 * Dump the new-style gprof header. Even if one of the original
665 * profiled-files was of a older version, the summed file is of
666 * current version only.
667 */
668 dump_gprofhdr(sfile, sumfile);
669
670 /*
671 * Fix up load-maps and dump out modules info
672 *
673 * Fix up module load maps so inactive modules get *some* address
674 * (and btw, could you get the total size of non-aout module path
675 * strings please ?)
676 */
677 fixup_maps(&pathbuf_sz);
678 dump_modules(sfile, sumfile, pathbuf_sz);
679
680
681 /*
682 * Dump out the summ'd pcsamples
683 *
684 * For dumping call graph information later, we need certain
685 * statistics (like total arcs, number of callers for each node);
686 * collect these also while we are at it.
687 */
688 dump_pcsamples(sfile, sumfile, &total_arcs, &ncallees);
689
690 /*
691 * Dump out the summ'd call graph information
692 */
693 dump_callgraph(sfile, sumfile, total_arcs, ncallees);
694
695
696 (void) fclose(sfile);
697 }
698
699 static void
tally(mod_info_t * caller_mod,mod_info_t * callee_mod,struct rawarc * rawp)700 tally(mod_info_t *caller_mod, mod_info_t *callee_mod, struct rawarc *rawp)
701 {
702 nltype *parentp;
703 nltype *childp;
704
705 /*
706 * if count == 0 this is a null arc and
707 * we don't need to tally it.
708 */
709 if (rawp->raw_count == 0)
710 return;
711
712 /*
713 * Lookup the caller and callee pcs in namelists of
714 * appropriate modules
715 */
716 parentp = nllookup(caller_mod, rawp->raw_frompc, NULL);
717 childp = nllookup(callee_mod, rawp->raw_selfpc, NULL);
718 if (childp && parentp) {
719 if (!Dflag)
720 childp->ncall += rawp->raw_count;
721 else {
722 if (first_file)
723 childp->ncall += rawp->raw_count;
724 else {
725 childp->ncall -= rawp->raw_count;
726 if (childp->ncall < 0)
727 childp->ncall = 0;
728 }
729 }
730
731 #ifdef DEBUG
732 if (debug & TALLYDEBUG) {
733 (void) printf("[tally] arc from %s to %s traversed "
734 "%lld times\n", parentp->name,
735 childp->name, rawp->raw_count);
736 }
737 #endif /* DEBUG */
738 addarc(parentp, childp, rawp->raw_count);
739 }
740 }
741
742 /*
743 * Look up a module's base address in a sorted list of pc-hits. Unlike
744 * nllookup(), this deals with misses by mapping them to the next *higher*
745 * pc-hit. This is so that we get into the module's first pc-hit rightaway,
746 * even if the module's entry-point (load_base) itself is not a hit.
747 */
748 static Address *
locate(Address * pclist,size_t nelem,Address keypc)749 locate(Address *pclist, size_t nelem, Address keypc)
750 {
751 size_t low = 0, middle, high = nelem - 1;
752
753 if (keypc <= pclist[low])
754 return (pclist);
755
756 if (keypc > pclist[high])
757 return (NULL);
758
759 while (low != high) {
760 middle = (high + low) >> 1;
761
762 if ((pclist[middle] < keypc) && (pclist[middle + 1] >= keypc))
763 return (&pclist[middle + 1]);
764
765 if (pclist[middle] >= keypc)
766 high = middle;
767 else
768 low = middle + 1;
769 }
770
771 /* must never reach here! */
772 return (NULL);
773 }
774
775 static void
assign_pcsamples(mod_info_t * module,Address * pcsmpl,size_t n_samples)776 assign_pcsamples(mod_info_t *module, Address *pcsmpl, size_t n_samples)
777 {
778 Address *pcptr, *pcse = pcsmpl + n_samples;
779 pctype nxt_func;
780 nltype *fnl;
781 size_t func_nticks;
782 #ifdef DEBUG
783 size_t n_hits_in_module = 0;
784 #endif /* DEBUG */
785
786 /* Locate the first pc-hit for this module */
787 if ((pcptr = locate(pcsmpl, n_samples, module->load_base)) == NULL) {
788 #ifdef DEBUG
789 if (debug & PCSMPLDEBUG) {
790 (void) printf("[assign_pcsamples] no pc-hits in\n");
791 (void) printf(
792 " `%s'\n", module->name);
793 }
794 #endif /* DEBUG */
795 return; /* no pc-hits in this module */
796 }
797
798 /* Assign all pc-hits in this module to appropriate functions */
799 while ((pcptr < pcse) && (*pcptr < module->load_end)) {
800
801 /* Update the corresponding function's time */
802 if (fnl = nllookup(module, (pctype) *pcptr, &nxt_func)) {
803 /*
804 * Collect all pc-hits in this function. Each
805 * pc-hit counts as 1 tick.
806 */
807 func_nticks = 0;
808 while ((pcptr < pcse) && (*pcptr < nxt_func)) {
809 func_nticks++;
810 pcptr++;
811 }
812
813 if (func_nticks == 0)
814 pcptr++;
815 else {
816 fnl->nticks += func_nticks;
817 fnl->time += func_nticks;
818 totime += func_nticks;
819 }
820
821 #ifdef DEBUG
822 n_hits_in_module += func_nticks;
823 #endif /* DEBUG */
824 } else {
825 /*
826 * pc sample could not be assigned to function;
827 * probably in a PLT
828 */
829 pcptr++;
830 }
831 }
832
833 #ifdef DEBUG
834 if (debug & PCSMPLDEBUG) {
835 (void) printf(
836 "[assign_pcsamples] %ld hits in\n", n_hits_in_module);
837 (void) printf(" `%s'\n", module->name);
838 }
839 #endif /* DEBUG */
840 }
841
842 int
pc_cmp(const void * arg1,const void * arg2)843 pc_cmp(const void *arg1, const void *arg2)
844 {
845 Address *pc1 = (Address *)arg1;
846 Address *pc2 = (Address *)arg2;
847
848 if (*pc1 > *pc2)
849 return (1);
850
851 if (*pc1 < *pc2)
852 return (-1);
853
854 return (0);
855 }
856
857 static void
process_pcsamples(ProfBuffer * bufp)858 process_pcsamples(ProfBuffer *bufp)
859 {
860 Address *pc_samples;
861 mod_info_t *mi;
862 caddr_t p;
863 size_t chunk_size, nelem_read, nelem_to_read;
864
865 #ifdef DEBUG
866 if (debug & PCSMPLDEBUG) {
867 (void) printf(
868 "[process_pcsamples] number of pcsamples = %lld\n",
869 bufp->bufsize);
870 }
871 #endif /* DEBUG */
872
873 /* buffer with no pc samples ? */
874 if (bufp->bufsize == 0)
875 return;
876
877 /*
878 * If we're processing pcsamples of a profile sum, we could have
879 * more than PROF_BUFFER_SIZE number of samples. In such a case,
880 * we must read the pcsamples in chunks.
881 */
882 if ((chunk_size = bufp->bufsize) > PROF_BUFFER_SIZE)
883 chunk_size = PROF_BUFFER_SIZE;
884
885 /* Allocate for the pcsample chunk */
886 pc_samples = (Address *) calloc(chunk_size, sizeof (Address));
887 if (pc_samples == NULL) {
888 (void) fprintf(stderr, "%s: no room for %d sample pc's\n",
889 whoami, chunk_size);
890 exit(EX_OSERR);
891 }
892
893 /* Copy the current set of pcsamples */
894 nelem_read = 0;
895 nelem_to_read = bufp->bufsize;
896 p = (char *)bufp + bufp->buffer;
897
898 while (nelem_read < nelem_to_read) {
899 (void) memcpy((void *) pc_samples, p,
900 chunk_size * sizeof (Address));
901
902 /* Sort the pc samples */
903 qsort(pc_samples, chunk_size, sizeof (Address), pc_cmp);
904
905 /*
906 * Assign pcsamples to functions in the currently active
907 * module list
908 */
909 for (mi = &modules; mi; mi = mi->next) {
910 if (mi->active == FALSE)
911 continue;
912 assign_pcsamples(mi, pc_samples, chunk_size);
913 }
914
915 p += (chunk_size * sizeof (Address));
916 nelem_read += chunk_size;
917
918 if ((nelem_to_read - nelem_read) < chunk_size)
919 chunk_size = nelem_to_read - nelem_read;
920 }
921
922 free(pc_samples);
923
924 /* Update total number of pcsamples read so far */
925 n_pcsamples += bufp->bufsize;
926 }
927
928 static mod_info_t *
find_module(Address addr)929 find_module(Address addr)
930 {
931 mod_info_t *mi;
932
933 for (mi = &modules; mi; mi = mi->next) {
934 if (mi->active == FALSE)
935 continue;
936
937 if (addr >= mi->load_base && addr < mi->load_end)
938 return (mi);
939 }
940
941 return (NULL);
942 }
943
944 static void
process_cgraph(ProfCallGraph * cgp)945 process_cgraph(ProfCallGraph *cgp)
946 {
947 struct rawarc arc;
948 mod_info_t *callee_mi, *caller_mi;
949 ProfFunction *calleep, *callerp;
950 Index caller_off, callee_off;
951
952 /*
953 * Note that *callee_off* increment in the for loop below
954 * uses *calleep* and *calleep* doesn't get set until the for loop
955 * is entered. We don't expect the increment to be executed before
956 * the loop body is executed atleast once, so this should be ok.
957 */
958 for (callee_off = cgp->functions; callee_off;
959 callee_off = calleep->next_to) {
960
961 /* LINTED: pointer cast */
962 calleep = (ProfFunction *)((char *)cgp + callee_off);
963
964 /*
965 * We could choose either to sort the {caller, callee}
966 * list twice and assign callee/caller to modules or inspect
967 * each callee/caller in the active modules list. Since
968 * the modules list is usually very small, we'l choose the
969 * latter.
970 */
971
972 /*
973 * If we cannot identify a callee with a module, there's
974 * no use worrying about who called it.
975 */
976 if ((callee_mi = find_module(calleep->topc)) == NULL) {
977 #ifdef DEBUG
978 if (debug & CGRAPHDEBUG) {
979 (void) printf(
980 "[process_cgraph] callee %#llx missed\n",
981 calleep->topc);
982 }
983 #endif /* DEBUG */
984 continue;
985 } else
986 arc.raw_selfpc = calleep->topc;
987
988 for (caller_off = callee_off; caller_off;
989 caller_off = callerp->next_from) {
990
991 /* LINTED: pointer cast */
992 callerp = (ProfFunction *)((char *)cgp + caller_off);
993 if ((caller_mi = find_module(callerp->frompc)) ==
994 NULL) {
995 #ifdef DEBUG
996 if (debug & CGRAPHDEBUG) {
997 (void) printf(
998 "[process_cgraph] caller %#llx "
999 "missed\n", callerp->frompc);
1000 }
1001 #endif /* DEBUG */
1002 continue;
1003 }
1004
1005 arc.raw_frompc = callerp->frompc;
1006 arc.raw_count = callerp->count;
1007
1008 #ifdef DEBUG
1009 if (debug & CGRAPHDEBUG) {
1010 (void) printf(
1011 "[process_cgraph] arc <%#llx, %#llx, "
1012 "%lld>\n", arc.raw_frompc, arc.raw_selfpc,
1013 arc.raw_count);
1014 }
1015 #endif /* DEBUG */
1016 tally(caller_mi, callee_mi, &arc);
1017 }
1018 }
1019
1020 #ifdef DEBUG
1021 puts("\n");
1022 #endif /* DEBUG */
1023 }
1024
1025 /*
1026 * Two modules overlap each other if they don't lie completely *outside*
1027 * each other.
1028 */
1029 static bool
does_overlap(ProfModule * new,mod_info_t * old)1030 does_overlap(ProfModule *new, mod_info_t *old)
1031 {
1032 /* case 1: new module lies completely *before* the old one */
1033 if (new->startaddr < old->load_base && new->endaddr <= old->load_base)
1034 return (FALSE);
1035
1036 /* case 2: new module lies completely *after* the old one */
1037 if (new->startaddr >= old->load_end && new->endaddr >= old->load_end)
1038 return (FALSE);
1039
1040 /* probably a dlopen: the modules overlap each other */
1041 return (TRUE);
1042 }
1043
1044 static bool
is_same_as_aout(char * modpath,struct stat * buf)1045 is_same_as_aout(char *modpath, struct stat *buf)
1046 {
1047 if (stat(modpath, buf) == -1) {
1048 (void) fprintf(stderr, "%s: can't get info on `%s'\n",
1049 whoami, modpath);
1050 exit(EX_NOINPUT);
1051 }
1052
1053 if ((buf->st_dev == aout_info.dev) && (buf->st_ino == aout_info.ino))
1054 return (TRUE);
1055 else
1056 return (FALSE);
1057 }
1058
1059 static void
process_modules(ProfModuleList * modlp)1060 process_modules(ProfModuleList *modlp)
1061 {
1062 ProfModule *newmodp;
1063 mod_info_t *mi, *last, *new_module;
1064 char *so_path;
1065 bool more_modules = TRUE;
1066 struct stat so_statbuf;
1067
1068 #ifdef DEBUG
1069 if (debug & MODULEDEBUG) {
1070 (void) printf("[process_modules] module obj version %u\n",
1071 modlp->version);
1072 }
1073 #endif /* DEBUG */
1074
1075 /* Check version of module type object */
1076 if (modlp->version > PROF_MODULES_VER) {
1077 (void) fprintf(stderr, "%s: version %d for module type objects"
1078 "is not supported\n", whoami, modlp->version);
1079 exit(EX_SOFTWARE);
1080 }
1081
1082
1083 /*
1084 * Scan the PROF_MODULES_T list and add modules to current list
1085 * of modules, if they're not present already
1086 */
1087 /* LINTED: pointer cast */
1088 newmodp = (ProfModule *)((char *)modlp + modlp->modules);
1089 do {
1090 /*
1091 * Since the prog could've been renamed after its run, we
1092 * should see if this overlaps a.out. If it does, it is
1093 * probably the renamed aout. We should also skip any other
1094 * non-sharedobj's that we see (or should we report an error ?)
1095 */
1096 so_path = (caddr_t)modlp + newmodp->path;
1097 if (does_overlap(newmodp, &modules) ||
1098 is_same_as_aout(so_path, &so_statbuf) ||
1099 (!is_shared_obj(so_path))) {
1100
1101 if (!newmodp->next)
1102 more_modules = FALSE;
1103
1104 /* LINTED: pointer cast */
1105 newmodp = (ProfModule *)
1106 ((caddr_t)modlp + newmodp->next);
1107 #ifdef DEBUG
1108 if (debug & MODULEDEBUG) {
1109 (void) printf(
1110 "[process_modules] `%s'\n", so_path);
1111 (void) printf(" skipped\n");
1112 }
1113 #endif /* DEBUG */
1114 continue;
1115 }
1116 #ifdef DEBUG
1117 if (debug & MODULEDEBUG)
1118 (void) printf("[process_modules] `%s'...\n", so_path);
1119 #endif /* DEBUG */
1120
1121 /*
1122 * Check all modules (leave the first one, 'cos that
1123 * is the program executable info). If this module is already
1124 * there in the list, update the load addresses and proceed.
1125 */
1126 last = &modules;
1127 while ((mi = last->next) != NULL) {
1128 /*
1129 * We expect the full pathname for all shared objects
1130 * needed by the program executable. In this case, we
1131 * simply need to compare the paths to see if they are
1132 * the same file.
1133 */
1134 if (strcmp(mi->name, so_path) == 0)
1135 break;
1136
1137 /*
1138 * Check if this new shared object will overlap
1139 * any existing module. If yes, remove the old one
1140 * from the linked list (but don't free it, 'cos
1141 * there may be symbols referring to this module
1142 * still)
1143 */
1144 if (does_overlap(newmodp, mi)) {
1145 #ifdef DEBUG
1146 if (debug & MODULEDEBUG) {
1147 (void) printf(
1148 "[process_modules] `%s'\n",
1149 so_path);
1150 (void) printf(
1151 " overlaps\n");
1152 (void) printf(
1153 " `%s'\n",
1154 mi->name);
1155 }
1156 #endif /* DEBUG */
1157 mi->active = FALSE;
1158 }
1159
1160 last = mi;
1161 }
1162
1163 /* Module already there, skip it */
1164 if (mi != NULL) {
1165 mi->load_base = newmodp->startaddr;
1166 mi->load_end = newmodp->endaddr;
1167 mi->active = TRUE;
1168 if (!newmodp->next)
1169 more_modules = FALSE;
1170
1171 /* LINTED: pointer cast */
1172 newmodp = (ProfModule *)
1173 ((caddr_t)modlp + newmodp->next);
1174
1175 #ifdef DEBUG
1176 if (debug & MODULEDEBUG) {
1177 (void) printf("[process_modules] base=%#llx, "
1178 "end=%#llx\n", mi->load_base, mi->load_end);
1179 }
1180 #endif /* DEBUG */
1181 continue;
1182 }
1183
1184 /*
1185 * Check if gmon.out is outdated with respect to the new
1186 * module we want to add
1187 */
1188 if (gmonout_info.mtime < so_statbuf.st_mtime) {
1189 (void) fprintf(stderr,
1190 "%s: shared obj outdates prof info\n", whoami);
1191 (void) fprintf(stderr, "\t(newer %s)\n", so_path);
1192 exit(EX_NOINPUT);
1193 }
1194
1195 /* Create a new module element */
1196 new_module = malloc(sizeof (mod_info_t));
1197 if (new_module == NULL) {
1198 (void) fprintf(stderr, "%s: no room for %d bytes\n",
1199 whoami, sizeof (mod_info_t));
1200 exit(EX_OSERR);
1201 }
1202
1203 /* and fill in info... */
1204 new_module->id = n_modules + 1;
1205 new_module->load_base = newmodp->startaddr;
1206 new_module->load_end = newmodp->endaddr;
1207 new_module->name = malloc(strlen(so_path) + 1);
1208 if (new_module->name == NULL) {
1209 (void) fprintf(stderr, "%s: no room for %d bytes\n",
1210 whoami, strlen(so_path) + 1);
1211 exit(EX_OSERR);
1212 }
1213 (void) strcpy(new_module->name, so_path);
1214 #ifdef DEBUG
1215 if (debug & MODULEDEBUG) {
1216 (void) printf(
1217 "[process_modules] base=%#llx, end=%#llx\n",
1218 new_module->load_base, new_module->load_end);
1219 }
1220 #endif /* DEBUG */
1221
1222 /* Create this module's nameslist */
1223 process_namelist(new_module);
1224
1225 /* Add it to the tail of active module list */
1226 last->next = new_module;
1227 n_modules++;
1228
1229 #ifdef DEBUG
1230 if (debug & MODULEDEBUG) {
1231 (void) printf(
1232 "[process_modules] total shared objects = %ld\n",
1233 n_modules - 1);
1234 }
1235 #endif /* DEBUG */
1236 /*
1237 * Move to the next module in the PROF_MODULES_T list
1238 * (if present)
1239 */
1240 if (!newmodp->next)
1241 more_modules = FALSE;
1242
1243 /* LINTED: pointer cast */
1244 newmodp = (ProfModule *)((caddr_t)modlp + newmodp->next);
1245
1246 } while (more_modules);
1247 }
1248
1249 static void
reset_active_modules(void)1250 reset_active_modules(void)
1251 {
1252 mod_info_t *mi;
1253
1254 /* Except the executable, no other module should remain active */
1255 for (mi = modules.next; mi; mi = mi->next)
1256 mi->active = FALSE;
1257 }
1258
1259 static void
getpfiledata(caddr_t memp,size_t fsz)1260 getpfiledata(caddr_t memp, size_t fsz)
1261 {
1262 ProfObject *objp;
1263 caddr_t file_end;
1264 bool found_pcsamples = FALSE, found_cgraph = FALSE;
1265
1266 /*
1267 * Before processing a new gmon.out, all modules except the
1268 * program executable must be made inactive, so that symbols
1269 * are searched only in the program executable, if we don't
1270 * find a MODULES_T object. Don't do it *after* we read a gmon.out,
1271 * because we need the active module data after we're done with
1272 * the last gmon.out, if we're doing summing.
1273 */
1274 reset_active_modules();
1275
1276 file_end = memp + fsz;
1277 /* LINTED: pointer cast */
1278 objp = (ProfObject *)(memp + ((ProfHeader *)memp)->size);
1279 while ((caddr_t)objp < file_end) {
1280 #ifdef DEBUG
1281 {
1282 unsigned int type = 0;
1283
1284 if (debug & MONOUTDEBUG) {
1285 if (objp->type <= MAX_OBJTYPES)
1286 type = objp->type;
1287
1288 (void) printf(
1289 "\n[getpfiledata] object %s [%#lx]\n",
1290 objname[type], objp->type);
1291 }
1292 }
1293 #endif /* DEBUG */
1294 switch (objp->type) {
1295 case PROF_MODULES_T :
1296 process_modules((ProfModuleList *) objp);
1297 break;
1298
1299 case PROF_CALLGRAPH_T :
1300 process_cgraph((ProfCallGraph *) objp);
1301 found_cgraph = TRUE;
1302 break;
1303
1304 case PROF_BUFFER_T :
1305 process_pcsamples((ProfBuffer *) objp);
1306 found_pcsamples = TRUE;
1307 break;
1308
1309 default :
1310 (void) fprintf(stderr,
1311 "%s: unknown prof object type=%d\n",
1312 whoami, objp->type);
1313 exit(EX_SOFTWARE);
1314 }
1315 /* LINTED: pointer cast */
1316 objp = (ProfObject *)((caddr_t)objp + objp->size);
1317 }
1318
1319 if (!found_cgraph || !found_pcsamples) {
1320 (void) fprintf(stderr,
1321 "%s: missing callgraph/pcsamples object\n", whoami);
1322 exit(EX_SOFTWARE);
1323 }
1324
1325 if ((caddr_t)objp > file_end) {
1326 (void) fprintf(stderr, "%s: malformed profile file.\n", whoami);
1327 exit(EX_SOFTWARE);
1328 }
1329
1330 if (first_file)
1331 first_file = FALSE;
1332 }
1333
1334 static void
readarcs(FILE * pfile)1335 readarcs(FILE *pfile)
1336 {
1337 /*
1338 * the rest of the file consists of
1339 * a bunch of <from,self,count> tuples.
1340 */
1341 /* CONSTCOND */
1342 while (1) {
1343 struct rawarc arc;
1344
1345 if (rflag) {
1346 if (Bflag) {
1347 L_cgarc64 rtld_arc64;
1348
1349 /*
1350 * If rflag is set then this is an profiled
1351 * image generated by rtld. It needs to be
1352 * 'converted' to the standard data format.
1353 */
1354 if (fread(&rtld_arc64,
1355 sizeof (L_cgarc64), 1, pfile) != 1)
1356 break;
1357
1358 if (rtld_arc64.cg_from == PRF_OUTADDR64)
1359 arc.raw_frompc = s_highpc + 0x10;
1360 else
1361 arc.raw_frompc =
1362 (pctype)rtld_arc64.cg_from;
1363 arc.raw_selfpc = (pctype)rtld_arc64.cg_to;
1364 arc.raw_count = (actype)rtld_arc64.cg_count;
1365 } else {
1366 L_cgarc rtld_arc;
1367
1368 /*
1369 * If rflag is set then this is an profiled
1370 * image generated by rtld. It needs to be
1371 * 'converted' to the standard data format.
1372 */
1373 if (fread(&rtld_arc,
1374 sizeof (L_cgarc), 1, pfile) != 1)
1375 break;
1376
1377 if (rtld_arc.cg_from == PRF_OUTADDR)
1378 arc.raw_frompc = s_highpc + 0x10;
1379 else
1380 arc.raw_frompc = (pctype)
1381 (uintptr_t)rtld_arc.cg_from;
1382 arc.raw_selfpc = (pctype)
1383 (uintptr_t)rtld_arc.cg_to;
1384 arc.raw_count = (actype)rtld_arc.cg_count;
1385 }
1386 } else {
1387 if (Bflag) {
1388 if (fread(&arc, sizeof (struct rawarc), 1,
1389 pfile) != 1) {
1390 break;
1391 }
1392 } else {
1393 /*
1394 * If these aren't big %pc's, we need to read
1395 * into the 32-bit raw arc structure, and
1396 * assign the members into the actual arc.
1397 */
1398 struct rawarc32 arc32;
1399 if (fread(&arc32, sizeof (struct rawarc32),
1400 1, pfile) != 1)
1401 break;
1402 arc.raw_frompc = (pctype)arc32.raw_frompc;
1403 arc.raw_selfpc = (pctype)arc32.raw_selfpc;
1404 arc.raw_count = (actype)arc32.raw_count;
1405 }
1406 }
1407
1408 #ifdef DEBUG
1409 if (debug & SAMPLEDEBUG) {
1410 (void) printf("[getpfile] frompc 0x%llx selfpc "
1411 "0x%llx count %lld\n", arc.raw_frompc,
1412 arc.raw_selfpc, arc.raw_count);
1413 }
1414 #endif /* DEBUG */
1415 /*
1416 * add this arc
1417 */
1418 tally(&modules, &modules, &arc);
1419 }
1420 if (first_file)
1421 first_file = FALSE;
1422 }
1423
1424 static void
readsamples(FILE * pfile)1425 readsamples(FILE *pfile)
1426 {
1427 sztype i;
1428 unsigned_UNIT sample;
1429
1430 if (samples == 0) {
1431 samples = (unsigned_UNIT *) calloc(nsamples,
1432 sizeof (unsigned_UNIT));
1433 if (samples == 0) {
1434 (void) fprintf(stderr,
1435 "%s: No room for %d sample pc's\n",
1436 whoami, sampbytes / sizeof (unsigned_UNIT));
1437 exit(EX_OSERR);
1438 }
1439 }
1440
1441 for (i = 0; i < nsamples; i++) {
1442 (void) fread(&sample, sizeof (unsigned_UNIT), 1, pfile);
1443 if (feof(pfile))
1444 break;
1445 samples[i] += sample;
1446 }
1447 if (i != nsamples) {
1448 (void) fprintf(stderr,
1449 "%s: unexpected EOF after reading %d/%d samples\n",
1450 whoami, --i, nsamples);
1451 exit(EX_IOERR);
1452 }
1453 }
1454
1455 static void *
handle_versioned(FILE * pfile,char * filename,size_t * fsz)1456 handle_versioned(FILE *pfile, char *filename, size_t *fsz)
1457 {
1458 int fd;
1459 bool invalid_version;
1460 caddr_t fmem;
1461 struct stat buf;
1462 ProfHeader prof_hdr;
1463 off_t lret;
1464
1465 /*
1466 * Check versioning info. For now, let's say we provide
1467 * backward compatibility, so we accept all older versions.
1468 */
1469 if (fread(&prof_hdr, sizeof (ProfHeader), 1, pfile) == 0) {
1470 perror("fread()");
1471 exit(EX_IOERR);
1472 }
1473
1474 invalid_version = FALSE;
1475 if (prof_hdr.h_major_ver > PROF_MAJOR_VERSION)
1476 invalid_version = TRUE;
1477 else if (prof_hdr.h_major_ver == PROF_MAJOR_VERSION) {
1478 if (prof_hdr.h_minor_ver > PROF_MINOR_VERSION)
1479 invalid_version = FALSE;
1480 }
1481
1482 if (invalid_version) {
1483 (void) fprintf(stderr, "%s: version %d.%d not supported\n",
1484 whoami, prof_hdr.h_major_ver, prof_hdr.h_minor_ver);
1485 exit(EX_SOFTWARE);
1486 }
1487
1488 /*
1489 * Map gmon.out onto memory.
1490 */
1491 (void) fclose(pfile);
1492 if ((fd = open(filename, O_RDONLY)) == -1) {
1493 perror(filename);
1494 exit(EX_IOERR);
1495 }
1496
1497 if ((lret = lseek(fd, 0, SEEK_END)) == -1) {
1498 perror(filename);
1499 exit(EX_IOERR);
1500 }
1501 *fsz = lret;
1502
1503 fmem = mmap(0, *fsz, PROT_READ, MAP_PRIVATE, fd, 0);
1504 if (fmem == MAP_FAILED) {
1505 (void) fprintf(stderr, "%s: can't map %s\n", whoami, filename);
1506 exit(EX_IOERR);
1507 }
1508
1509 /*
1510 * Before we close this fd, save this gmon.out's info to later verify
1511 * if the shared objects it references have changed since the time
1512 * they were used to generate this gmon.out
1513 */
1514 if (fstat(fd, &buf) == -1) {
1515 (void) fprintf(stderr, "%s: can't get info on `%s'\n",
1516 whoami, filename);
1517 exit(EX_NOINPUT);
1518 }
1519 gmonout_info.dev = buf.st_dev;
1520 gmonout_info.ino = buf.st_ino;
1521 gmonout_info.mtime = buf.st_mtime;
1522 gmonout_info.size = buf.st_size;
1523
1524 (void) close(fd);
1525
1526 return ((void *) fmem);
1527 }
1528
1529 static void *
openpfile(char * filename,size_t * fsz)1530 openpfile(char *filename, size_t *fsz)
1531 {
1532 struct hdr tmp;
1533 FILE *pfile;
1534 unsigned long magic_num;
1535 size_t hdrsize;
1536 static bool first_time = TRUE;
1537 extern bool old_style;
1538
1539 if ((pfile = fopen(filename, "r")) == NULL) {
1540 perror(filename);
1541 exit(EX_IOERR);
1542 }
1543
1544 /*
1545 * Read in the magic. Note that we changed the cast "unsigned long"
1546 * to "unsigned int" because that's how h_magic is defined in the
1547 * new format ProfHeader.
1548 */
1549 if (fread(&magic_num, sizeof (unsigned int), 1, pfile) == 0) {
1550 perror("fread()");
1551 exit(EX_IOERR);
1552 }
1553
1554 rewind(pfile);
1555
1556 /*
1557 * First check if this is versioned or *old-style* gmon.out
1558 */
1559 if (magic_num == (unsigned int)PROF_MAGIC) {
1560 if ((!first_time) && (old_style == TRUE)) {
1561 (void) fprintf(stderr, "%s: can't mix old & new format "
1562 "profiled files\n", whoami);
1563 exit(EX_SOFTWARE);
1564 }
1565 first_time = FALSE;
1566 old_style = FALSE;
1567 return (handle_versioned(pfile, filename, fsz));
1568 }
1569
1570 if ((!first_time) && (old_style == FALSE)) {
1571 (void) fprintf(stderr, "%s: can't mix old & new format "
1572 "profiled files\n", whoami);
1573 exit(EX_SOFTWARE);
1574 }
1575
1576 first_time = FALSE;
1577 old_style = TRUE;
1578 fsz = 0;
1579
1580 /*
1581 * Now, we need to determine if this is a run-time linker
1582 * profiled file or if it is a standard gmon.out.
1583 *
1584 * We do this by checking if magic matches PRF_MAGIC. If it
1585 * does, then this is a run-time linker profiled file, if it
1586 * doesn't, it must be a gmon.out file.
1587 */
1588 if (magic_num == (unsigned long)PRF_MAGIC)
1589 rflag = TRUE;
1590 else
1591 rflag = FALSE;
1592
1593 hdrsize = Bflag ? sizeof (struct hdr) : sizeof (struct hdr32);
1594
1595 if (rflag) {
1596 if (Bflag) {
1597 L_hdr64 l_hdr64;
1598
1599 /*
1600 * If the rflag is set then the input file is
1601 * rtld profiled data, we'll read it in and convert
1602 * it to the standard format (ie: make it look like
1603 * a gmon.out file).
1604 */
1605 if (fread(&l_hdr64, sizeof (L_hdr64), 1, pfile) == 0) {
1606 perror("fread()");
1607 exit(EX_IOERR);
1608 }
1609 if (l_hdr64.hd_version != PRF_VERSION_64) {
1610 (void) fprintf(stderr,
1611 "%s: expected version %d, "
1612 "got version %d when processing 64-bit "
1613 "run-time linker profiled file.\n",
1614 whoami, PRF_VERSION_64, l_hdr64.hd_version);
1615 exit(EX_SOFTWARE);
1616 }
1617 tmp.lowpc = 0;
1618 tmp.highpc = (pctype)l_hdr64.hd_hpc;
1619 tmp.ncnt = hdrsize + l_hdr64.hd_psize;
1620 } else {
1621 L_hdr l_hdr;
1622
1623 /*
1624 * If the rflag is set then the input file is
1625 * rtld profiled data, we'll read it in and convert
1626 * it to the standard format (ie: make it look like
1627 * a gmon.out file).
1628 */
1629 if (fread(&l_hdr, sizeof (L_hdr), 1, pfile) == 0) {
1630 perror("fread()");
1631 exit(EX_IOERR);
1632 }
1633 if (l_hdr.hd_version != PRF_VERSION) {
1634 (void) fprintf(stderr,
1635 "%s: expected version %d, "
1636 "got version %d when processing "
1637 "run-time linker profiled file.\n",
1638 whoami, PRF_VERSION, l_hdr.hd_version);
1639 exit(EX_SOFTWARE);
1640 }
1641 tmp.lowpc = 0;
1642 tmp.highpc = (pctype)(uintptr_t)l_hdr.hd_hpc;
1643 tmp.ncnt = hdrsize + l_hdr.hd_psize;
1644 }
1645 } else {
1646 if (Bflag) {
1647 if (fread(&tmp, sizeof (struct hdr), 1, pfile) == 0) {
1648 perror("fread()");
1649 exit(EX_IOERR);
1650 }
1651 } else {
1652 /*
1653 * If we're not reading big %pc's, we need to read
1654 * the 32-bit header, and assign the members to
1655 * the actual header.
1656 */
1657 struct hdr32 hdr32;
1658 if (fread(&hdr32, sizeof (hdr32), 1, pfile) == 0) {
1659 perror("fread()");
1660 exit(EX_IOERR);
1661 }
1662 tmp.lowpc = hdr32.lowpc;
1663 tmp.highpc = hdr32.highpc;
1664 tmp.ncnt = hdr32.ncnt;
1665 }
1666 }
1667
1668 /*
1669 * perform sanity check on profiled file we've opened.
1670 */
1671 if (tmp.lowpc >= tmp.highpc) {
1672 if (rflag)
1673 (void) fprintf(stderr,
1674 "%s: badly formed profiled data.\n",
1675 filename);
1676 else
1677 (void) fprintf(stderr,
1678 "%s: badly formed gmon.out file.\n",
1679 filename);
1680 exit(EX_SOFTWARE);
1681 }
1682
1683 if (s_highpc != 0 && (tmp.lowpc != h.lowpc ||
1684 tmp.highpc != h.highpc || tmp.ncnt != h.ncnt)) {
1685 (void) fprintf(stderr,
1686 "%s: incompatible with first gmon file\n",
1687 filename);
1688 exit(EX_IOERR);
1689 }
1690 h = tmp;
1691 s_lowpc = h.lowpc;
1692 s_highpc = h.highpc;
1693 lowpc = h.lowpc / sizeof (UNIT);
1694 highpc = h.highpc / sizeof (UNIT);
1695 sampbytes = h.ncnt > hdrsize ? h.ncnt - hdrsize : 0;
1696 nsamples = sampbytes / sizeof (unsigned_UNIT);
1697
1698 #ifdef DEBUG
1699 if (debug & SAMPLEDEBUG) {
1700 (void) printf("[openpfile] hdr.lowpc 0x%llx hdr.highpc "
1701 "0x%llx hdr.ncnt %lld\n",
1702 h.lowpc, h.highpc, h.ncnt);
1703 (void) printf(
1704 "[openpfile] s_lowpc 0x%llx s_highpc 0x%llx\n",
1705 s_lowpc, s_highpc);
1706 (void) printf(
1707 "[openpfile] lowpc 0x%llx highpc 0x%llx\n",
1708 lowpc, highpc);
1709 (void) printf("[openpfile] sampbytes %d nsamples %d\n",
1710 sampbytes, nsamples);
1711 }
1712 #endif /* DEBUG */
1713
1714 return ((void *) pfile);
1715 }
1716
1717 /*
1718 * Information from a gmon.out file depends on whether it's versioned
1719 * or non-versioned, *old style* gmon.out. If old-style, it is in two
1720 * parts : an array of sampling hits within pc ranges, and the arcs. If
1721 * versioned, it contains a header, followed by any number of
1722 * modules/callgraph/pcsample_buffer objects.
1723 */
1724 static void
getpfile(char * filename)1725 getpfile(char *filename)
1726 {
1727 void *handle;
1728 size_t fsz;
1729
1730 handle = openpfile(filename, &fsz);
1731
1732 if (old_style) {
1733 readsamples((FILE *)handle);
1734 readarcs((FILE *)handle);
1735 (void) fclose((FILE *)handle);
1736 return;
1737 }
1738
1739 getpfiledata((caddr_t)handle, fsz);
1740 (void) munmap(handle, fsz);
1741 }
1742
1743 int
main(int argc,char ** argv)1744 main(int argc, char **argv)
1745 {
1746 char **sp;
1747 nltype **timesortnlp;
1748 int c;
1749 int errflg;
1750
1751 prog_name = *argv; /* preserve program name */
1752 debug = 0;
1753 nflag = FALSE;
1754 bflag = TRUE;
1755 lflag = FALSE;
1756 Cflag = FALSE;
1757 first_file = TRUE;
1758 rflag = FALSE;
1759 Bflag = FALSE;
1760 errflg = FALSE;
1761
1762 while ((c = getopt(argc, argv, "abd:CcDE:e:F:f:ln:sz")) != EOF)
1763 switch (c) {
1764 case 'a':
1765 aflag = TRUE;
1766 break;
1767 case 'b':
1768 bflag = FALSE;
1769 break;
1770 case 'c':
1771 cflag = TRUE;
1772 break;
1773 case 'C':
1774 Cflag = TRUE;
1775 break;
1776 case 'd':
1777 dflag = TRUE;
1778 debug |= atoi(optarg);
1779 (void) printf("[main] debug = 0x%x\n", debug);
1780 break;
1781 case 'D':
1782 Dflag = TRUE;
1783 break;
1784 case 'E':
1785 addlist(Elist, optarg);
1786 Eflag = TRUE;
1787 addlist(elist, optarg);
1788 eflag = TRUE;
1789 break;
1790 case 'e':
1791 addlist(elist, optarg);
1792 eflag = TRUE;
1793 break;
1794 case 'F':
1795 addlist(Flist, optarg);
1796 Fflag = TRUE;
1797 addlist(flist, optarg);
1798 fflag = TRUE;
1799 break;
1800 case 'f':
1801 addlist(flist, optarg);
1802 fflag = TRUE;
1803 break;
1804 case 'l':
1805 lflag = TRUE;
1806 break;
1807 case 'n':
1808 nflag = TRUE;
1809 number_funcs_toprint = atoi(optarg);
1810 break;
1811 case 's':
1812 sflag = TRUE;
1813 break;
1814 case 'z':
1815 zflag = TRUE;
1816 break;
1817 case '?':
1818 errflg++;
1819
1820 }
1821
1822 if (errflg) {
1823 (void) fprintf(stderr,
1824 "usage: gprof [ -abcCDlsz ] [ -e function-name ] "
1825 "[ -E function-name ]\n\t[ -f function-name ] "
1826 "[ -F function-name ]\n\t[ image-file "
1827 "[ profile-file ... ] ]\n");
1828 exit(EX_USAGE);
1829 }
1830
1831 if (optind < argc) {
1832 a_outname = argv[optind++];
1833 } else {
1834 a_outname = A_OUTNAME;
1835 }
1836 if (optind < argc) {
1837 gmonname = argv[optind++];
1838 } else {
1839 gmonname = GMONNAME;
1840 }
1841 /*
1842 * turn off default functions
1843 */
1844 for (sp = &defaultEs[0]; *sp; sp++) {
1845 Eflag = TRUE;
1846 addlist(Elist, *sp);
1847 eflag = TRUE;
1848 addlist(elist, *sp);
1849 }
1850 /*
1851 * how many ticks per second?
1852 * if we can't tell, report time in ticks.
1853 */
1854 hz = sysconf(_SC_CLK_TCK);
1855 if (hz == -1) {
1856 hz = 1;
1857 (void) fprintf(stderr, "time is in ticks, not seconds\n");
1858 }
1859
1860 getnfile(a_outname);
1861
1862 /*
1863 * get information about mon.out file(s).
1864 */
1865 do {
1866 getpfile(gmonname);
1867 if (optind < argc)
1868 gmonname = argv[optind++];
1869 else
1870 optind++;
1871 } while (optind <= argc);
1872 /*
1873 * dump out a gmon.sum file if requested
1874 */
1875 if (sflag || Dflag)
1876 dumpsum(GMONSUM);
1877
1878 if (old_style) {
1879 /*
1880 * assign samples to procedures
1881 */
1882 asgnsamples();
1883 }
1884
1885 /*
1886 * assemble the dynamic profile
1887 */
1888 timesortnlp = doarcs();
1889
1890 /*
1891 * print the dynamic profile
1892 */
1893 #ifdef DEBUG
1894 if (debug & ANYDEBUG) {
1895 /* raw output of all symbols in all their glory */
1896 int i;
1897 (void) printf(" Name, pc_entry_pt, svalue, tix_in_routine, "
1898 "#calls, selfcalls, index \n");
1899 for (i = 0; i < modules.nname; i++) { /* Print each symbol */
1900 if (timesortnlp[i]->name)
1901 (void) printf(" %s ", timesortnlp[i]->name);
1902 else
1903 (void) printf(" <cycle> ");
1904 (void) printf(" %lld ", timesortnlp[i]->value);
1905 (void) printf(" %lld ", timesortnlp[i]->svalue);
1906 (void) printf(" %f ", timesortnlp[i]->time);
1907 (void) printf(" %lld ", timesortnlp[i]->ncall);
1908 (void) printf(" %lld ", timesortnlp[i]->selfcalls);
1909 (void) printf(" %d ", timesortnlp[i]->index);
1910 (void) printf(" \n");
1911 }
1912 }
1913 #endif /* DEBUG */
1914
1915 printgprof(timesortnlp);
1916 /*
1917 * print the flat profile
1918 */
1919 printprof();
1920 /*
1921 * print the index
1922 */
1923 printindex();
1924
1925 /*
1926 * print the modules
1927 */
1928 printmodules();
1929
1930 done();
1931 /* NOTREACHED */
1932 return (0);
1933 }
1934