1 /*- 2 * Copyright (c) 2005-2006, Joseph Koshy 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 /* 28 * Transform a hwpmc(4) log into human readable form, and into 29 * gprof(1) compatible profiles. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/endian.h> 37 #include <sys/gmon.h> 38 #include <sys/imgact_aout.h> 39 #include <sys/imgact_elf.h> 40 #include <sys/mman.h> 41 #include <sys/pmc.h> 42 #include <sys/queue.h> 43 #include <sys/socket.h> 44 #include <sys/stat.h> 45 #include <sys/wait.h> 46 47 #include <netinet/in.h> 48 49 #include <assert.h> 50 #include <err.h> 51 #include <errno.h> 52 #include <fcntl.h> 53 #include <libgen.h> 54 #include <limits.h> 55 #include <netdb.h> 56 #include <pmc.h> 57 #include <pmclog.h> 58 #include <sysexits.h> 59 #include <stdint.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <unistd.h> 64 65 #include "pmcstat.h" 66 67 #define min(A,B) ((A) < (B) ? (A) : (B)) 68 #define max(A,B) ((A) > (B) ? (A) : (B)) 69 70 /* 71 * PUBLIC INTERFACES 72 * 73 * pmcstat_initialize_logging() initialize this module, called first 74 * pmcstat_shutdown_logging() orderly shutdown, called last 75 * pmcstat_open_log() open an eventlog for processing 76 * pmcstat_process_log() print/convert an event log 77 * pmcstat_close_log() finish processing an event log 78 * 79 * IMPLEMENTATION OF GMON OUTPUT 80 * 81 * We correlate each 'sample' seen in the event log back to an 82 * executable object in the system. Executable objects include: 83 * - program executables, 84 * - shared libraries loaded by the runtime loader, 85 * - dlopen()'ed objects loaded by the program, 86 * - the runtime loader itself, 87 * - the kernel and kernel modules. 88 * 89 * Each such executable object gets one 'gmon.out' profile, per PMC in 90 * use. Creation of 'gmon.out' profiles is done lazily. The 91 * 'gmon.out' profiles generated for a given sampling PMC are 92 * aggregates of all the samples for that particular executable 93 * object. 94 * 95 * Each process that we know about is treated as a set of regions that 96 * map to executable objects. Processes are described by 97 * 'pmcstat_process' structures. Executable objects are tracked by 98 * 'pmcstat_image' structures. The kernel and kernel modules are 99 * common to all processes (they reside at the same virtual addresses 100 * for all processes). Individual processes can have their text 101 * segments and shared libraries loaded at process-specific locations. 102 * 103 * A given executable object can be in use by multiple processes 104 * (e.g., libc.so) and loaded at a different address in each. 105 * pmcstat_pcmap structures track per-image mappings. 106 * 107 * The sample log could have samples from multiple PMCs; we 108 * generate one 'gmon.out' profile per PMC. 109 */ 110 111 typedef const void *pmcstat_interned_string; 112 113 /* 114 * 'pmcstat_pmcrecord' is a mapping from PMC ids to human-readable 115 * names. 116 */ 117 118 struct pmcstat_pmcrecord { 119 LIST_ENTRY(pmcstat_pmcrecord) pr_next; 120 pmc_id_t pr_pmcid; 121 pmcstat_interned_string pr_pmcname; 122 }; 123 124 static LIST_HEAD(,pmcstat_pmcrecord) pmcstat_pmcs = 125 LIST_HEAD_INITIALIZER(&pmcstat_pmcs); 126 127 128 /* 129 * struct pmcstat_gmonfile tracks a given 'gmon.out' file. These 130 * files are mmap()'ed in as needed. 131 */ 132 133 struct pmcstat_gmonfile { 134 LIST_ENTRY(pmcstat_gmonfile) pgf_next; /* list of entries */ 135 int pgf_overflow; /* whether a count overflowed */ 136 pmc_id_t pgf_pmcid; /* id of the associated pmc */ 137 size_t pgf_nbuckets; /* #buckets in this gmon.out */ 138 unsigned int pgf_nsamples; /* #samples in this gmon.out */ 139 pmcstat_interned_string pgf_name; /* pathname of gmon.out file */ 140 size_t pgf_ndatabytes; /* number of bytes mapped */ 141 void *pgf_gmondata; /* pointer to mmap'ed data */ 142 }; 143 144 /* 145 * A 'pmcstat_image' structure describes an executable program on 146 * disk. 'pi_execpath' is a cookie representing the pathname of 147 * the executable. 'pi_start' and 'pi_end' are the least and greatest 148 * virtual addresses for the text segments in the executable. 149 * 'pi_gmonlist' contains a linked list of gmon.out files associated 150 * with this image. 151 */ 152 153 enum pmcstat_image_type { 154 PMCSTAT_IMAGE_UNKNOWN = 0, /* never looked at the image */ 155 PMCSTAT_IMAGE_INDETERMINABLE, /* can't tell what the image is */ 156 PMCSTAT_IMAGE_ELF32, /* ELF 32 bit object */ 157 PMCSTAT_IMAGE_ELF64, /* ELF 64 bit object */ 158 PMCSTAT_IMAGE_AOUT /* AOUT object */ 159 }; 160 161 struct pmcstat_image { 162 LIST_ENTRY(pmcstat_image) pi_next; /* hash link */ 163 TAILQ_ENTRY(pmcstat_image) pi_lru; /* LRU list */ 164 pmcstat_interned_string pi_execpath;/* cookie */ 165 pmcstat_interned_string pi_samplename; /* sample path name */ 166 167 enum pmcstat_image_type pi_type; /* executable type */ 168 169 /* 170 * Executables have pi_start and pi_end; these are zero 171 * for shared libraries. 172 */ 173 uintfptr_t pi_start; /* start address (inclusive) */ 174 uintfptr_t pi_end; /* end address (exclusive) */ 175 uintfptr_t pi_entry; /* entry address */ 176 uintfptr_t pi_vaddr; /* virtual address where loaded */ 177 int pi_isdynamic; /* whether a dynamic 178 * object */ 179 int pi_iskernelmodule; 180 pmcstat_interned_string pi_dynlinkerpath; /* path in .interp */ 181 182 /* 183 * An image can be associated with one or more gmon.out files; 184 * one per PMC. 185 */ 186 LIST_HEAD(,pmcstat_gmonfile) pi_gmlist; 187 }; 188 189 /* 190 * All image descriptors are kept in a hash table. 191 */ 192 static LIST_HEAD(,pmcstat_image) pmcstat_image_hash[PMCSTAT_NHASH]; 193 static TAILQ_HEAD(,pmcstat_image) pmcstat_image_lru = 194 TAILQ_HEAD_INITIALIZER(pmcstat_image_lru); 195 196 /* 197 * A 'pmcstat_pcmap' structure maps a virtual address range to an 198 * underlying 'pmcstat_image' descriptor. 199 */ 200 struct pmcstat_pcmap { 201 TAILQ_ENTRY(pmcstat_pcmap) ppm_next; 202 uintfptr_t ppm_lowpc; 203 uintfptr_t ppm_highpc; 204 struct pmcstat_image *ppm_image; 205 }; 206 207 /* 208 * A 'pmcstat_process' structure models processes. Each process is 209 * associated with a set of pmcstat_pcmap structures that map 210 * addresses inside it to executable objects. This set is implemented 211 * as a list, kept sorted in ascending order of mapped addresses. 212 * 213 * 'pp_pid' holds the pid of the process. When a process exits, the 214 * 'pp_isactive' field is set to zero, but the process structure is 215 * not immediately reclaimed because there may still be samples in the 216 * log for this process. 217 */ 218 219 struct pmcstat_process { 220 LIST_ENTRY(pmcstat_process) pp_next; /* hash-next */ 221 pid_t pp_pid; /* associated pid */ 222 int pp_isactive; /* whether active */ 223 uintfptr_t pp_entryaddr; /* entry address */ 224 TAILQ_HEAD(,pmcstat_pcmap) pp_map; /* address range map */ 225 }; 226 227 #define PMCSTAT_ALLOCATE 1 228 229 /* 230 * All process descriptors are kept in a hash table. 231 */ 232 static LIST_HEAD(,pmcstat_process) pmcstat_process_hash[PMCSTAT_NHASH]; 233 234 static struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */ 235 236 /* Misc. statistics */ 237 static struct pmcstat_stats { 238 int ps_exec_aout; /* # a.out executables seen */ 239 int ps_exec_elf; /* # elf executables seen */ 240 int ps_exec_errors; /* # errors processing executables */ 241 int ps_exec_indeterminable; /* # unknown executables seen */ 242 int ps_samples_total; /* total number of samples processed */ 243 int ps_samples_unknown_offset; /* #samples not in any map */ 244 int ps_samples_indeterminable; /* #samples in indeterminable images */ 245 } pmcstat_stats; 246 247 /* 248 * Prototypes 249 */ 250 251 static void pmcstat_gmon_create_file(struct pmcstat_gmonfile *_pgf, 252 struct pmcstat_image *_image); 253 static pmcstat_interned_string pmcstat_gmon_create_name(const char *_sd, 254 struct pmcstat_image *_img, pmc_id_t _pmcid); 255 static void pmcstat_gmon_map_file(struct pmcstat_gmonfile *_pgf); 256 static void pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *_pgf); 257 258 static void pmcstat_image_determine_type(struct pmcstat_image *_image, 259 struct pmcstat_args *_a); 260 static struct pmcstat_image *pmcstat_image_from_path(pmcstat_interned_string 261 _path, int _iskernelmodule); 262 static void pmcstat_image_get_aout_params(struct pmcstat_image *_image, 263 struct pmcstat_args *_a); 264 static void pmcstat_image_get_elf_params(struct pmcstat_image *_image, 265 struct pmcstat_args *_a); 266 static void pmcstat_image_increment_bucket(struct pmcstat_pcmap *_pcm, 267 uintfptr_t _pc, pmc_id_t _pmcid, struct pmcstat_args *_a); 268 static void pmcstat_image_link(struct pmcstat_process *_pp, 269 struct pmcstat_image *_i, uintfptr_t _lpc); 270 271 static void pmcstat_pmcid_add(pmc_id_t _pmcid, 272 pmcstat_interned_string _name, struct pmcstat_args *_a); 273 static const char *pmcstat_pmcid_to_name(pmc_id_t _pmcid); 274 275 static void pmcstat_process_aout_exec(struct pmcstat_process *_pp, 276 struct pmcstat_image *_image, uintfptr_t _entryaddr, 277 struct pmcstat_args *_a); 278 static void pmcstat_process_elf_exec(struct pmcstat_process *_pp, 279 struct pmcstat_image *_image, uintfptr_t _entryaddr, 280 struct pmcstat_args *_a); 281 static void pmcstat_process_exec(struct pmcstat_process *_pp, 282 pmcstat_interned_string _path, uintfptr_t _entryaddr, 283 struct pmcstat_args *_ao); 284 static struct pmcstat_process *pmcstat_process_lookup(pid_t _pid, 285 int _allocate); 286 static struct pmcstat_pcmap *pmcstat_process_find_map( 287 struct pmcstat_process *_p, uintfptr_t _pc); 288 289 static int pmcstat_string_compute_hash(const char *_string); 290 static void pmcstat_string_initialize(void); 291 static pmcstat_interned_string pmcstat_string_intern(const char *_s); 292 static pmcstat_interned_string pmcstat_string_lookup(const char *_s); 293 static int pmcstat_string_lookup_hash(pmcstat_interned_string _is); 294 static void pmcstat_string_shutdown(void); 295 static const char *pmcstat_string_unintern(pmcstat_interned_string _is); 296 297 298 /* 299 * A simple implementation of interned strings. Each interned string 300 * is assigned a unique address, so that subsequent string compares 301 * can be done by a simple pointer comparision instead of using 302 * strcmp(). This speeds up hash table lookups and saves memory if 303 * duplicate strings are the norm. 304 */ 305 struct pmcstat_string { 306 LIST_ENTRY(pmcstat_string) ps_next; /* hash link */ 307 int ps_len; 308 int ps_hash; 309 char *ps_string; 310 }; 311 312 static LIST_HEAD(,pmcstat_string) pmcstat_string_hash[PMCSTAT_NHASH]; 313 314 /* 315 * Compute a 'hash' value for a string. 316 */ 317 318 static int 319 pmcstat_string_compute_hash(const char *s) 320 { 321 int hash; 322 323 for (hash = 0; *s; s++) 324 hash ^= *s; 325 326 return (hash & PMCSTAT_HASH_MASK); 327 } 328 329 /* 330 * Intern a copy of string 's', and return a pointer to the 331 * interned structure. 332 */ 333 334 static pmcstat_interned_string 335 pmcstat_string_intern(const char *s) 336 { 337 struct pmcstat_string *ps; 338 const struct pmcstat_string *cps; 339 int hash, len; 340 341 if ((cps = pmcstat_string_lookup(s)) != NULL) 342 return (cps); 343 344 hash = pmcstat_string_compute_hash(s); 345 len = strlen(s); 346 347 if ((ps = malloc(sizeof(*ps))) == NULL) 348 err(EX_OSERR, "ERROR: Could not intern string"); 349 ps->ps_len = len; 350 ps->ps_hash = hash; 351 ps->ps_string = strdup(s); 352 LIST_INSERT_HEAD(&pmcstat_string_hash[hash], ps, ps_next); 353 return ((pmcstat_interned_string) ps); 354 } 355 356 static const char * 357 pmcstat_string_unintern(pmcstat_interned_string str) 358 { 359 const char *s; 360 361 s = ((const struct pmcstat_string *) str)->ps_string; 362 return (s); 363 } 364 365 static pmcstat_interned_string 366 pmcstat_string_lookup(const char *s) 367 { 368 struct pmcstat_string *ps; 369 int hash, len; 370 371 hash = pmcstat_string_compute_hash(s); 372 len = strlen(s); 373 374 LIST_FOREACH(ps, &pmcstat_string_hash[hash], ps_next) 375 if (ps->ps_len == len && ps->ps_hash == hash && 376 strcmp(ps->ps_string, s) == 0) 377 return (ps); 378 return (NULL); 379 } 380 381 static int 382 pmcstat_string_lookup_hash(pmcstat_interned_string s) 383 { 384 const struct pmcstat_string *ps; 385 386 ps = (const struct pmcstat_string *) s; 387 return (ps->ps_hash); 388 } 389 390 /* 391 * Initialize the string interning facility. 392 */ 393 394 static void 395 pmcstat_string_initialize(void) 396 { 397 int i; 398 399 for (i = 0; i < PMCSTAT_NHASH; i++) 400 LIST_INIT(&pmcstat_string_hash[i]); 401 } 402 403 /* 404 * Destroy the string table, free'ing up space. 405 */ 406 407 static void 408 pmcstat_string_shutdown(void) 409 { 410 int i; 411 struct pmcstat_string *ps, *pstmp; 412 413 for (i = 0; i < PMCSTAT_NHASH; i++) 414 LIST_FOREACH_SAFE(ps, &pmcstat_string_hash[i], ps_next, 415 pstmp) { 416 LIST_REMOVE(ps, ps_next); 417 free(ps->ps_string); 418 free(ps); 419 } 420 } 421 422 /* 423 * Create a gmon.out file and size it. 424 */ 425 426 static void 427 pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf, 428 struct pmcstat_image *image) 429 { 430 int fd; 431 size_t count; 432 struct gmonhdr gm; 433 const char *pathname; 434 char buffer[DEFAULT_BUFFER_SIZE]; 435 436 pathname = pmcstat_string_unintern(pgf->pgf_name); 437 if ((fd = open(pathname, O_RDWR|O_NOFOLLOW|O_CREAT, 438 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0) 439 err(EX_OSERR, "ERROR: Cannot open \"%s\"", pathname); 440 441 gm.lpc = image->pi_start; 442 gm.hpc = image->pi_end; 443 gm.ncnt = (pgf->pgf_nbuckets * sizeof(HISTCOUNTER)) + 444 sizeof(struct gmonhdr); 445 gm.version = GMONVERSION; 446 gm.profrate = 0; /* use ticks */ 447 gm.histcounter_type = 0; /* compatibility with moncontrol() */ 448 gm.spare[0] = gm.spare[1] = 0; 449 450 /* Write out the gmon header */ 451 if (write(fd, &gm, sizeof(gm)) < 0) 452 goto error; 453 454 /* Zero fill the samples[] array */ 455 (void) memset(buffer, 0, sizeof(buffer)); 456 457 count = pgf->pgf_ndatabytes - sizeof(struct gmonhdr); 458 while (count > sizeof(buffer)) { 459 if (write(fd, &buffer, sizeof(buffer)) < 0) 460 goto error; 461 count -= sizeof(buffer); 462 } 463 464 if (write(fd, &buffer, count) < 0) 465 goto error; 466 467 /* TODO size the arc table */ 468 469 (void) close(fd); 470 471 return; 472 473 error: 474 err(EX_OSERR, "ERROR: Cannot write \"%s\"", pathname); 475 } 476 477 /* 478 * Determine the full pathname of a gmon.out file for a given 479 * (image,pmcid) combination. Return the interned string. 480 */ 481 482 pmcstat_interned_string 483 pmcstat_gmon_create_name(const char *samplesdir, struct pmcstat_image *image, 484 pmc_id_t pmcid) 485 { 486 const char *pmcname; 487 char fullpath[PATH_MAX]; 488 489 pmcname = pmcstat_pmcid_to_name(pmcid); 490 491 (void) snprintf(fullpath, sizeof(fullpath), 492 "%s/%s/%s", samplesdir, pmcname, 493 pmcstat_string_unintern(image->pi_samplename)); 494 495 return (pmcstat_string_intern(fullpath)); 496 } 497 498 499 /* 500 * Mmap in a gmon.out file for processing. 501 */ 502 503 static void 504 pmcstat_gmon_map_file(struct pmcstat_gmonfile *pgf) 505 { 506 int fd; 507 const char *pathname; 508 509 pathname = pmcstat_string_unintern(pgf->pgf_name); 510 511 /* the gmon.out file must already exist */ 512 if ((fd = open(pathname, O_RDWR | O_NOFOLLOW, 0)) < 0) 513 err(EX_OSERR, "ERROR: cannot open \"%s\"", pathname); 514 515 pgf->pgf_gmondata = mmap(NULL, pgf->pgf_ndatabytes, 516 PROT_READ|PROT_WRITE, MAP_NOSYNC|MAP_SHARED, fd, 0); 517 518 if (pgf->pgf_gmondata == MAP_FAILED) 519 err(EX_OSERR, "ERROR: cannot map \"%s\"", pathname); 520 521 (void) close(fd); 522 } 523 524 /* 525 * Unmap a gmon.out file after sync'ing its data to disk. 526 */ 527 528 static void 529 pmcstat_gmon_unmap_file(struct pmcstat_gmonfile *pgf) 530 { 531 (void) msync(pgf->pgf_gmondata, pgf->pgf_ndatabytes, 532 MS_SYNC); 533 (void) munmap(pgf->pgf_gmondata, pgf->pgf_ndatabytes); 534 pgf->pgf_gmondata = NULL; 535 } 536 537 /* 538 * Determine whether a given executable image is an A.OUT object, and 539 * if so, fill in its parameters from the text file. 540 * Sets image->pi_type. 541 */ 542 543 static void 544 pmcstat_image_get_aout_params(struct pmcstat_image *image, 545 struct pmcstat_args *a) 546 { 547 int fd; 548 ssize_t nbytes; 549 struct exec ex; 550 const char *path; 551 char buffer[PATH_MAX]; 552 553 path = pmcstat_string_unintern(image->pi_execpath); 554 assert(path != NULL); 555 556 if (image->pi_iskernelmodule) 557 errx(EX_SOFTWARE, "ERROR: a.out kernel modules are " 558 "unsupported \"%s\"", path); 559 560 (void) snprintf(buffer, sizeof(buffer), "%s%s", 561 a->pa_fsroot, path); 562 563 if ((fd = open(buffer, O_RDONLY, 0)) < 0 || 564 (nbytes = read(fd, &ex, sizeof(ex))) < 0) { 565 warn("WARNING: Cannot determine type of \"%s\"", path); 566 image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE; 567 if (fd != -1) 568 (void) close(fd); 569 return; 570 } 571 572 (void) close(fd); 573 574 if ((unsigned) nbytes != sizeof(ex) || 575 N_BADMAG(ex)) 576 return; 577 578 image->pi_type = PMCSTAT_IMAGE_AOUT; 579 580 /* TODO: the rest of a.out processing */ 581 582 return; 583 } 584 585 /* 586 * Examine an ELF file to determine the size of its text segment. 587 * Sets image->pi_type if anything conclusive can be determined about 588 * this image. 589 */ 590 591 static void 592 pmcstat_image_get_elf_params(struct pmcstat_image *image, 593 struct pmcstat_args *a) 594 { 595 int fd, i; 596 const char *path; 597 void *mapbase; 598 uintfptr_t minva, maxva; 599 const Elf_Ehdr *h; 600 const Elf_Phdr *ph; 601 const Elf_Shdr *sh; 602 #if defined(__amd64__) 603 const Elf32_Ehdr *h32; 604 const Elf32_Phdr *ph32; 605 const Elf32_Shdr *sh32; 606 #endif 607 enum pmcstat_image_type image_type; 608 struct stat st; 609 char buffer[PATH_MAX]; 610 611 assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN); 612 613 minva = ~(uintfptr_t) 0; 614 maxva = (uintfptr_t) 0; 615 path = pmcstat_string_unintern(image->pi_execpath); 616 617 assert(path != NULL); 618 619 /* 620 * Look for kernel modules under FSROOT/KERNELPATH/NAME, 621 * and user mode executable objects under FSROOT/PATHNAME. 622 */ 623 if (image->pi_iskernelmodule) 624 (void) snprintf(buffer, sizeof(buffer), "%s%s/%s", 625 a->pa_fsroot, a->pa_kernel, path); 626 else 627 (void) snprintf(buffer, sizeof(buffer), "%s%s", 628 a->pa_fsroot, path); 629 630 if ((fd = open(buffer, O_RDONLY, 0)) < 0 || 631 fstat(fd, &st) < 0 || 632 (mapbase = mmap(0, st.st_size, PROT_READ, MAP_SHARED, 633 fd, 0)) == MAP_FAILED) { 634 warn("WARNING: Cannot determine type of \"%s\"", buffer); 635 image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE; 636 if (fd != -1) 637 (void) close(fd); 638 return; 639 } 640 641 (void) close(fd); 642 643 /* Punt on non-ELF objects */ 644 h = (const Elf_Ehdr *) mapbase; 645 if (!IS_ELF(*h)) 646 return; 647 648 /* 649 * We only handle executable ELF objects and kernel 650 * modules. 651 */ 652 if (h->e_type != ET_EXEC && h->e_type != ET_DYN && 653 !(image->pi_iskernelmodule && h->e_type == ET_REL)) 654 return; 655 656 image->pi_isdynamic = 0; 657 image->pi_dynlinkerpath = NULL; 658 image->pi_vaddr = 0; 659 660 #define GET_VA(H, SH, MINVA, MAXVA) do { \ 661 for (i = 0; i < (H)->e_shnum; i++) \ 662 if ((SH)[i].sh_flags & SHF_EXECINSTR) { \ 663 (MINVA) = min((MINVA),(SH)[i].sh_addr); \ 664 (MAXVA) = max((MAXVA),(SH)[i].sh_addr + \ 665 (SH)[i].sh_size); \ 666 } \ 667 } while (0) 668 669 670 #define GET_PHDR_INFO(H, PH, IMAGE) do { \ 671 for (i = 0; i < (H)->e_phnum; i++) { \ 672 switch ((PH)[i].p_type) { \ 673 case PT_DYNAMIC: \ 674 image->pi_isdynamic = 1; \ 675 break; \ 676 case PT_INTERP: \ 677 image->pi_dynlinkerpath = \ 678 pmcstat_string_intern( \ 679 (char *) mapbase + \ 680 (PH)[i].p_offset); \ 681 break; \ 682 case PT_LOAD: \ 683 if ((PH)[i].p_offset == 0) \ 684 image->pi_vaddr = \ 685 (PH)[i].p_vaddr; \ 686 break; \ 687 } \ 688 } \ 689 } while (0) 690 691 switch (h->e_machine) { 692 case EM_386: 693 case EM_486: 694 #if defined(__amd64__) 695 /* a 32 bit executable */ 696 h32 = (const Elf32_Ehdr *) h; 697 sh32 = (const Elf32_Shdr *)((uintptr_t) mapbase + h32->e_shoff); 698 699 GET_VA(h32, sh32, minva, maxva); 700 701 image->pi_entry = h32->e_entry; 702 703 if (h32->e_type == ET_EXEC) { 704 ph32 = (const Elf32_Phdr *)((uintptr_t) mapbase + 705 h32->e_phoff); 706 GET_PHDR_INFO(h32, ph32, image); 707 } 708 image_type = PMCSTAT_IMAGE_ELF32; 709 break; 710 #endif 711 default: 712 sh = (const Elf_Shdr *)((uintptr_t) mapbase + h->e_shoff); 713 714 GET_VA(h, sh, minva, maxva); 715 716 image->pi_entry = h->e_entry; 717 718 if (h->e_type == ET_EXEC) { 719 ph = (const Elf_Phdr *)((uintptr_t) mapbase + 720 h->e_phoff); 721 GET_PHDR_INFO(h, ph, image); 722 } 723 image_type = PMCSTAT_IMAGE_ELF64; 724 break; 725 } 726 727 #undef GET_PHDR_INFO 728 #undef GET_VA 729 730 image->pi_start = minva; 731 image->pi_end = maxva; 732 image->pi_type = image_type; 733 734 if (munmap(mapbase, st.st_size) < 0) 735 err(EX_OSERR, "ERROR: Cannot unmap \"%s\"", path); 736 return; 737 } 738 739 /* 740 * Given an image descriptor, determine whether it is an ELF, or AOUT. 741 * If no handler claims the image, set its type to 'INDETERMINABLE'. 742 */ 743 744 static void 745 pmcstat_image_determine_type(struct pmcstat_image *image, 746 struct pmcstat_args *a) 747 { 748 assert(image->pi_type == PMCSTAT_IMAGE_UNKNOWN); 749 750 /* Try each kind of handler in turn */ 751 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) 752 pmcstat_image_get_elf_params(image, a); 753 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) 754 pmcstat_image_get_aout_params(image, a); 755 756 /* 757 * Otherwise, remember that we tried to determine 758 * the object's type and had failed. 759 */ 760 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) 761 image->pi_type = PMCSTAT_IMAGE_INDETERMINABLE; 762 } 763 764 /* 765 * Locate an image descriptor given an interned path, adding a fresh 766 * descriptor to the cache if necessary. This function also finds a 767 * suitable name for this image's sample file. 768 * 769 * We defer filling in the file format specific parts of the image 770 * structure till the time we actually see a sample that would fall 771 * into this image. 772 */ 773 774 static struct pmcstat_image * 775 pmcstat_image_from_path(pmcstat_interned_string internedpath, 776 int iskernelmodule) 777 { 778 int count, hash, nlen; 779 struct pmcstat_image *pi; 780 char *sn; 781 char name[NAME_MAX]; 782 783 hash = pmcstat_string_lookup_hash(internedpath); 784 785 /* First, look for an existing entry. */ 786 LIST_FOREACH(pi, &pmcstat_image_hash[hash], pi_next) 787 if (pi->pi_execpath == internedpath && 788 pi->pi_iskernelmodule == iskernelmodule) { 789 /* move descriptor to the head of the lru list */ 790 TAILQ_REMOVE(&pmcstat_image_lru, pi, pi_lru); 791 TAILQ_INSERT_HEAD(&pmcstat_image_lru, pi, pi_lru); 792 return (pi); 793 } 794 795 /* 796 * Allocate a new entry and place at the head of the hash and 797 * LRU lists. 798 */ 799 pi = malloc(sizeof(*pi)); 800 if (pi == NULL) 801 return (NULL); 802 803 pi->pi_type = PMCSTAT_IMAGE_UNKNOWN; 804 pi->pi_execpath = internedpath; 805 pi->pi_start = ~0; 806 pi->pi_entry = ~0; 807 pi->pi_end = 0; 808 pi->pi_iskernelmodule = iskernelmodule; 809 810 /* 811 * Look for a suitable name for the sample files associated 812 * with this image: if `basename(path)`+".gmon" is available, 813 * we use that, otherwise we try iterating through 814 * `basename(path)`+ "~" + NNN + ".gmon" till we get a free 815 * entry. 816 */ 817 if ((sn = basename(pmcstat_string_unintern(internedpath))) == NULL) 818 err(EX_OSERR, "ERROR: Cannot process \"%s\"", 819 pmcstat_string_unintern(internedpath)); 820 821 nlen = strlen(sn); 822 nlen = min(nlen, (int) (sizeof(name) - sizeof(".gmon"))); 823 824 snprintf(name, sizeof(name), "%.*s.gmon", nlen, sn); 825 826 /* try use the unabridged name first */ 827 if (pmcstat_string_lookup(name) == NULL) 828 pi->pi_samplename = pmcstat_string_intern(name); 829 else { 830 /* 831 * Otherwise use a prefix from the original name and 832 * upto 3 digits. 833 */ 834 nlen = strlen(sn); 835 nlen = min(nlen, (int) (sizeof(name)-sizeof("~NNN.gmon"))); 836 count = 0; 837 do { 838 if (++count > 999) 839 errx(EX_CANTCREAT, "ERROR: cannot create a gmon " 840 "file for \"%s\"", name); 841 snprintf(name, sizeof(name), "%.*s~%3.3d.gmon", 842 nlen, sn, count); 843 if (pmcstat_string_lookup(name) == NULL) { 844 pi->pi_samplename = pmcstat_string_intern(name); 845 count = 0; 846 } 847 } while (count > 0); 848 } 849 850 851 LIST_INIT(&pi->pi_gmlist); 852 853 LIST_INSERT_HEAD(&pmcstat_image_hash[hash], pi, pi_next); 854 TAILQ_INSERT_HEAD(&pmcstat_image_lru, pi, pi_lru); 855 856 return (pi); 857 } 858 859 /* 860 * Increment the bucket in the gmon.out file corresponding to 'pmcid' 861 * and 'pc'. 862 */ 863 864 static void 865 pmcstat_image_increment_bucket(struct pmcstat_pcmap *map, uintfptr_t pc, 866 pmc_id_t pmcid, struct pmcstat_args *a) 867 { 868 struct pmcstat_image *image; 869 struct pmcstat_gmonfile *pgf; 870 uintfptr_t bucket; 871 HISTCOUNTER *hc; 872 873 assert(pc >= map->ppm_lowpc && pc < map->ppm_highpc); 874 875 image = map->ppm_image; 876 877 /* 878 * If this is the first time we are seeing a sample for 879 * this executable image, try determine its parameters. 880 */ 881 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) 882 pmcstat_image_determine_type(image, a); 883 884 assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN); 885 886 /* Ignore samples in images that we know nothing about. */ 887 if (image->pi_type == PMCSTAT_IMAGE_INDETERMINABLE) { 888 pmcstat_stats.ps_samples_indeterminable++; 889 return; 890 } 891 892 /* 893 * Find the gmon file corresponding to 'pmcid', creating it if 894 * needed. 895 */ 896 LIST_FOREACH(pgf, &image->pi_gmlist, pgf_next) 897 if (pgf->pgf_pmcid == pmcid) 898 break; 899 900 /* If we don't have a gmon.out file for this PMCid, create one */ 901 if (pgf == NULL) { 902 if ((pgf = calloc(1, sizeof(*pgf))) == NULL) 903 err(EX_OSERR, "ERROR:"); 904 905 pgf->pgf_gmondata = NULL; /* mark as unmapped */ 906 pgf->pgf_name = pmcstat_gmon_create_name(a->pa_samplesdir, 907 image, pmcid); 908 pgf->pgf_pmcid = pmcid; 909 assert(image->pi_end > image->pi_start); 910 pgf->pgf_nbuckets = (image->pi_end - image->pi_start) / 911 FUNCTION_ALIGNMENT; /* see <machine/profile.h> */ 912 pgf->pgf_ndatabytes = sizeof(struct gmonhdr) + 913 pgf->pgf_nbuckets * sizeof(HISTCOUNTER); 914 pgf->pgf_nsamples = 0; 915 916 pmcstat_gmon_create_file(pgf, image); 917 918 LIST_INSERT_HEAD(&image->pi_gmlist, pgf, pgf_next); 919 } 920 921 /* 922 * Map the gmon file in if needed. It may have been mapped 923 * out under memory pressure. 924 */ 925 if (pgf->pgf_gmondata == NULL) 926 pmcstat_gmon_map_file(pgf); 927 928 assert(pgf->pgf_gmondata != NULL); 929 930 /* 931 * 932 */ 933 934 bucket = (pc - map->ppm_lowpc) / FUNCTION_ALIGNMENT; 935 936 assert(bucket < pgf->pgf_nbuckets); 937 938 hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata + 939 sizeof(struct gmonhdr)); 940 941 /* saturating add */ 942 if (hc[bucket] < 0xFFFFU) /* XXX tie this to sizeof(HISTCOUNTER) */ 943 hc[bucket]++; 944 else /* mark that an overflow occurred */ 945 pgf->pgf_overflow = 1; 946 947 pgf->pgf_nsamples++; 948 } 949 950 /* 951 * Record the fact that PC values from 'start' to 'end' come from 952 * image 'image'. 953 */ 954 955 static void 956 pmcstat_image_link(struct pmcstat_process *pp, struct pmcstat_image *image, 957 uintfptr_t start) 958 { 959 struct pmcstat_pcmap *pcm, *pcmnew; 960 uintfptr_t offset; 961 962 assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN && 963 image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE); 964 965 if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL) 966 err(EX_OSERR, "ERROR: Cannot create a map entry"); 967 968 /* 969 * Adjust the map entry to only cover the text portion 970 * of the object. 971 */ 972 973 offset = start - image->pi_vaddr; 974 pcmnew->ppm_lowpc = image->pi_start + offset; 975 pcmnew->ppm_highpc = image->pi_end + offset; 976 pcmnew->ppm_image = image; 977 978 assert(pcmnew->ppm_lowpc < pcmnew->ppm_highpc); 979 980 /* Overlapped mmap()'s are assumed to never occur. */ 981 TAILQ_FOREACH(pcm, &pp->pp_map, ppm_next) 982 if (pcm->ppm_lowpc >= pcmnew->ppm_highpc) 983 break; 984 985 if (pcm == NULL) 986 TAILQ_INSERT_TAIL(&pp->pp_map, pcmnew, ppm_next); 987 else 988 TAILQ_INSERT_BEFORE(pcm, pcmnew, ppm_next); 989 } 990 991 /* 992 * Unmap images in the range [start..end) associated with process 993 * 'pp'. 994 */ 995 996 static void 997 pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start, 998 uintfptr_t end) 999 { 1000 struct pmcstat_pcmap *pcm, *pcmtmp, *pcmnew; 1001 1002 assert(pp != NULL); 1003 assert(start < end); 1004 1005 /* 1006 * Cases: 1007 * - we could have the range completely in the middle of an 1008 * existing pcmap; in this case we have to split the pcmap 1009 * structure into two (i.e., generate a 'hole'). 1010 * - we could have the range covering multiple pcmaps; these 1011 * will have to be removed. 1012 * - we could have either 'start' or 'end' falling in the 1013 * middle of a pcmap; in this case shorten the entry. 1014 */ 1015 1016 TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) { 1017 assert(pcm->ppm_lowpc < pcm->ppm_highpc); 1018 if (pcm->ppm_highpc <= start) 1019 continue; 1020 if (pcm->ppm_lowpc > end) 1021 return; 1022 if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) { 1023 /* 1024 * The current pcmap is completely inside the 1025 * unmapped range: remove it entirely. 1026 */ 1027 TAILQ_REMOVE(&pp->pp_map, pcm, ppm_next); 1028 free(pcm); 1029 } else if (pcm->ppm_lowpc < start && pcm->ppm_highpc > end) { 1030 /* 1031 * Split this pcmap into two; curtail the 1032 * current map to end at [start-1], and start 1033 * the new one at [end]. 1034 */ 1035 if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL) 1036 err(EX_OSERR, "ERROR: Cannot split a map " 1037 "entry"); 1038 1039 pcmnew->ppm_image = pcm->ppm_image; 1040 1041 pcmnew->ppm_lowpc = end; 1042 pcmnew->ppm_highpc = pcm->ppm_highpc; 1043 1044 pcm->ppm_highpc = start; 1045 1046 TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next); 1047 1048 return; 1049 } else if (pcm->ppm_lowpc < start) 1050 pcm->ppm_lowpc = start; 1051 else if (pcm->ppm_highpc > end) 1052 pcm->ppm_highpc = end; 1053 else 1054 assert(0); 1055 } 1056 } 1057 1058 /* 1059 * Add a {pmcid,name} mapping. 1060 */ 1061 1062 static void 1063 pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps, 1064 struct pmcstat_args *a) 1065 { 1066 struct pmcstat_pmcrecord *pr; 1067 struct stat st; 1068 char fullpath[PATH_MAX]; 1069 1070 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) 1071 if (pr->pr_pmcid == pmcid) { 1072 pr->pr_pmcname = ps; 1073 return; 1074 } 1075 1076 if ((pr = malloc(sizeof(*pr))) == NULL) 1077 err(EX_OSERR, "ERROR: Cannot allocate pmc record"); 1078 1079 pr->pr_pmcid = pmcid; 1080 pr->pr_pmcname = ps; 1081 LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next); 1082 1083 (void) snprintf(fullpath, sizeof(fullpath), "%s/%s", a->pa_samplesdir, 1084 pmcstat_string_unintern(ps)); 1085 1086 /* If the path name exists, it should be a directory */ 1087 if (stat(fullpath, &st) == 0 && S_ISDIR(st.st_mode)) 1088 return; 1089 1090 if (mkdir(fullpath, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) < 0) 1091 err(EX_OSERR, "ERROR: Cannot create directory \"%s\"", 1092 fullpath); 1093 } 1094 1095 /* 1096 * Given a pmcid in use, find its human-readable name. 1097 */ 1098 1099 static const char * 1100 pmcstat_pmcid_to_name(pmc_id_t pmcid) 1101 { 1102 struct pmcstat_pmcrecord *pr; 1103 char fullpath[PATH_MAX]; 1104 1105 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) 1106 if (pr->pr_pmcid == pmcid) 1107 return (pmcstat_string_unintern(pr->pr_pmcname)); 1108 1109 /* create a default name and add this entry */ 1110 if ((pr = malloc(sizeof(*pr))) == NULL) 1111 err(EX_OSERR, "ERROR: "); 1112 pr->pr_pmcid = pmcid; 1113 1114 (void) snprintf(fullpath, sizeof(fullpath), "%X", (unsigned int) pmcid); 1115 pr->pr_pmcname = pmcstat_string_intern(fullpath); 1116 1117 LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next); 1118 1119 return (pmcstat_string_unintern(pr->pr_pmcname)); 1120 } 1121 1122 /* 1123 * Associate an AOUT image with a process. 1124 */ 1125 1126 static void 1127 pmcstat_process_aout_exec(struct pmcstat_process *pp, 1128 struct pmcstat_image *image, uintfptr_t entryaddr, 1129 struct pmcstat_args *a) 1130 { 1131 (void) pp; 1132 (void) image; 1133 (void) entryaddr; 1134 (void) a; 1135 /* TODO Implement a.out handling */ 1136 } 1137 1138 /* 1139 * Associate an ELF image with a process. 1140 */ 1141 1142 static void 1143 pmcstat_process_elf_exec(struct pmcstat_process *pp, 1144 struct pmcstat_image *image, uintfptr_t entryaddr, 1145 struct pmcstat_args *a) 1146 { 1147 uintmax_t libstart; 1148 struct pmcstat_image *rtldimage; 1149 1150 assert(image->pi_type == PMCSTAT_IMAGE_ELF32 || 1151 image->pi_type == PMCSTAT_IMAGE_ELF64); 1152 1153 /* Create a map entry for the base executable. */ 1154 pmcstat_image_link(pp, image, image->pi_vaddr); 1155 1156 /* 1157 * For dynamically linked executables we need to: 1158 * (a) find where the dynamic linker was mapped to for this 1159 * process, 1160 * (b) find all the executable objects that the dynamic linker 1161 * brought in. 1162 */ 1163 1164 if (image->pi_isdynamic) { 1165 1166 /* 1167 * The runtime loader gets loaded just after the maximum 1168 * possible heap address. Like so: 1169 * 1170 * [ TEXT DATA BSS HEAP -->*RTLD SHLIBS <--STACK] 1171 * ^ ^ 1172 * 0 VM_MAXUSER_ADDRESS 1173 1174 * 1175 * The exact address where the loader gets mapped in 1176 * will vary according to the size of the executable 1177 * and the limits on the size of the process'es data 1178 * segment at the time of exec(). The entry address 1179 * recorded at process exec time corresponds to the 1180 * 'start' address inside the dynamic linker. From 1181 * this we can figure out the address where the 1182 * runtime loader's file object had been mapped to. 1183 */ 1184 rtldimage = pmcstat_image_from_path(image->pi_dynlinkerpath, 1185 0); 1186 if (rtldimage == NULL) { 1187 warnx("WARNING: Cannot find image for \"%s\".", 1188 pmcstat_string_unintern(image->pi_dynlinkerpath)); 1189 pmcstat_stats.ps_exec_errors++; 1190 return; 1191 } 1192 1193 if (rtldimage->pi_type == PMCSTAT_IMAGE_UNKNOWN) 1194 pmcstat_image_get_elf_params(rtldimage, a); 1195 1196 if (rtldimage->pi_type != PMCSTAT_IMAGE_ELF32 && 1197 rtldimage->pi_type != PMCSTAT_IMAGE_ELF64) { 1198 warnx("WARNING: rtld not an ELF object \"%s\".", 1199 pmcstat_string_unintern(image->pi_dynlinkerpath)); 1200 return; 1201 } 1202 1203 libstart = entryaddr - rtldimage->pi_entry; 1204 pmcstat_image_link(pp, rtldimage, libstart); 1205 } 1206 } 1207 1208 /* 1209 * Find the process descriptor corresponding to a PID. If 'allocate' 1210 * is zero, we return a NULL if a pid descriptor could not be found or 1211 * a process descriptor process. If 'allocate' is non-zero, then we 1212 * will attempt to allocate a fresh process descriptor. Zombie 1213 * process descriptors are only removed if a fresh allocation for the 1214 * same PID is requested. 1215 */ 1216 1217 static struct pmcstat_process * 1218 pmcstat_process_lookup(pid_t pid, int allocate) 1219 { 1220 uint32_t hash; 1221 struct pmcstat_pcmap *ppm, *ppmtmp; 1222 struct pmcstat_process *pp, *pptmp; 1223 1224 hash = (uint32_t) pid & PMCSTAT_HASH_MASK; /* simplicity wins */ 1225 1226 LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[hash], pp_next, pptmp) 1227 if (pp->pp_pid == pid) { 1228 /* Found a descriptor, check and process zombies */ 1229 if (allocate && pp->pp_isactive == 0) { 1230 /* remove maps */ 1231 TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, 1232 ppmtmp) { 1233 TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next); 1234 free(ppm); 1235 } 1236 /* remove process entry */ 1237 LIST_REMOVE(pp, pp_next); 1238 free(pp); 1239 break; 1240 } 1241 return (pp); 1242 } 1243 1244 if (!allocate) 1245 return (NULL); 1246 1247 if ((pp = malloc(sizeof(*pp))) == NULL) 1248 err(EX_OSERR, "ERROR: Cannot allocate pid descriptor"); 1249 1250 pp->pp_pid = pid; 1251 pp->pp_isactive = 1; 1252 1253 TAILQ_INIT(&pp->pp_map); 1254 1255 LIST_INSERT_HEAD(&pmcstat_process_hash[hash], pp, pp_next); 1256 return (pp); 1257 } 1258 1259 /* 1260 * Associate an image and a process. 1261 */ 1262 1263 static void 1264 pmcstat_process_exec(struct pmcstat_process *pp, 1265 pmcstat_interned_string path, uintfptr_t entryaddr, 1266 struct pmcstat_args *a) 1267 { 1268 struct pmcstat_image *image; 1269 1270 if ((image = pmcstat_image_from_path(path, 0)) == NULL) { 1271 pmcstat_stats.ps_exec_errors++; 1272 return; 1273 } 1274 1275 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) 1276 pmcstat_image_determine_type(image, a); 1277 1278 assert(image->pi_type != PMCSTAT_IMAGE_UNKNOWN); 1279 1280 switch (image->pi_type) { 1281 case PMCSTAT_IMAGE_ELF32: 1282 case PMCSTAT_IMAGE_ELF64: 1283 pmcstat_stats.ps_exec_elf++; 1284 pmcstat_process_elf_exec(pp, image, entryaddr, a); 1285 break; 1286 1287 case PMCSTAT_IMAGE_AOUT: 1288 pmcstat_stats.ps_exec_aout++; 1289 pmcstat_process_aout_exec(pp, image, entryaddr, a); 1290 break; 1291 1292 case PMCSTAT_IMAGE_INDETERMINABLE: 1293 pmcstat_stats.ps_exec_indeterminable++; 1294 break; 1295 1296 default: 1297 err(EX_SOFTWARE, "ERROR: Unsupported executable type for " 1298 "\"%s\"", pmcstat_string_unintern(path)); 1299 } 1300 } 1301 1302 1303 /* 1304 * Find the map entry associated with process 'p' at PC value 'pc'. 1305 */ 1306 1307 static struct pmcstat_pcmap * 1308 pmcstat_process_find_map(struct pmcstat_process *p, uintfptr_t pc) 1309 { 1310 struct pmcstat_pcmap *ppm; 1311 1312 TAILQ_FOREACH(ppm, &p->pp_map, ppm_next) { 1313 if (pc >= ppm->ppm_lowpc && pc < ppm->ppm_highpc) 1314 return (ppm); 1315 if (pc < ppm->ppm_lowpc) 1316 return (NULL); 1317 } 1318 1319 return (NULL); 1320 } 1321 1322 1323 1324 static int 1325 pmcstat_convert_log(struct pmcstat_args *a) 1326 { 1327 uintfptr_t pc; 1328 pid_t pid; 1329 struct pmcstat_image *image; 1330 struct pmcstat_process *pp, *ppnew; 1331 struct pmcstat_pcmap *ppm, *ppmtmp; 1332 struct pmclog_ev ev; 1333 pmcstat_interned_string image_path; 1334 1335 while (pmclog_read(a->pa_logparser, &ev) == 0) { 1336 assert(ev.pl_state == PMCLOG_OK); 1337 1338 switch (ev.pl_type) { 1339 case PMCLOG_TYPE_INITIALIZE: 1340 if ((ev.pl_u.pl_i.pl_version & 0xFF000000) != 1341 PMC_VERSION_MAJOR << 24 && a->pa_verbosity > 0) 1342 warnx("WARNING: Log version 0x%x does not " 1343 "match compiled version 0x%x.", 1344 ev.pl_u.pl_i.pl_version, 1345 PMC_VERSION_MAJOR); 1346 break; 1347 case PMCLOG_TYPE_MAP_IN: 1348 /* 1349 * Introduce an address range mapping for a 1350 * userland process or the kernel (pid == -1). 1351 * 1352 * We always allocate a process descriptor so 1353 * that subsequent samples seen for this 1354 * address range are mapped to the current 1355 * object being mapped in. 1356 */ 1357 pid = ev.pl_u.pl_mi.pl_pid; 1358 if (pid == -1) 1359 pp = pmcstat_kernproc; 1360 else 1361 pp = pmcstat_process_lookup(pid, 1362 PMCSTAT_ALLOCATE); 1363 1364 assert(pp != NULL); 1365 1366 image_path = pmcstat_string_intern(ev.pl_u.pl_mi. 1367 pl_pathname); 1368 image = pmcstat_image_from_path(image_path, pid == -1); 1369 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN) 1370 pmcstat_image_determine_type(image, a); 1371 if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE) 1372 pmcstat_image_link(pp, image, 1373 ev.pl_u.pl_mi.pl_start); 1374 break; 1375 1376 case PMCLOG_TYPE_MAP_OUT: 1377 /* 1378 * Remove an address map. 1379 */ 1380 pid = ev.pl_u.pl_mo.pl_pid; 1381 if (pid == -1) 1382 pp = pmcstat_kernproc; 1383 else 1384 pp = pmcstat_process_lookup(pid, 0); 1385 1386 if (pp == NULL) /* unknown process */ 1387 break; 1388 1389 pmcstat_image_unmap(pp, ev.pl_u.pl_mo.pl_start, 1390 ev.pl_u.pl_mo.pl_end); 1391 break; 1392 1393 case PMCLOG_TYPE_PCSAMPLE: 1394 1395 /* 1396 * We bring in the gmon file for the image 1397 * currently associated with the PMC & pid 1398 * pair and increment the appropriate entry 1399 * bin inside this. 1400 */ 1401 pmcstat_stats.ps_samples_total++; 1402 1403 pc = ev.pl_u.pl_s.pl_pc; 1404 pp = pmcstat_process_lookup(ev.pl_u.pl_s.pl_pid, 1405 PMCSTAT_ALLOCATE); 1406 if ((ppm = pmcstat_process_find_map(pp, pc)) == NULL && 1407 (ppm = pmcstat_process_find_map(pmcstat_kernproc, 1408 pc)) == NULL) { /* unknown process,offset pair */ 1409 pmcstat_stats.ps_samples_unknown_offset++; 1410 break; 1411 } 1412 1413 pmcstat_image_increment_bucket(ppm, pc, 1414 ev.pl_u.pl_s.pl_pmcid, a); 1415 1416 break; 1417 1418 case PMCLOG_TYPE_PMCALLOCATE: 1419 /* 1420 * Record the association pmc id between this 1421 * PMC and its name. 1422 */ 1423 pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid, 1424 pmcstat_string_intern(ev.pl_u.pl_a.pl_evname), a); 1425 break; 1426 1427 case PMCLOG_TYPE_PROCEXEC: 1428 1429 /* 1430 * Change the executable image associated with 1431 * a process. 1432 */ 1433 pp = pmcstat_process_lookup(ev.pl_u.pl_x.pl_pid, 1434 PMCSTAT_ALLOCATE); 1435 1436 /* delete the current process map */ 1437 TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) { 1438 TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next); 1439 free(ppm); 1440 } 1441 1442 /* associate this process image */ 1443 image_path = pmcstat_string_intern( 1444 ev.pl_u.pl_x.pl_pathname); 1445 assert(image_path != NULL); 1446 pmcstat_process_exec(pp, image_path, 1447 ev.pl_u.pl_x.pl_entryaddr, a); 1448 break; 1449 1450 case PMCLOG_TYPE_PROCEXIT: 1451 1452 /* 1453 * Due to the way the log is generated, the 1454 * last few samples corresponding to a process 1455 * may appear in the log after the process 1456 * exit event is recorded. Thus we keep the 1457 * process' descriptor and associated data 1458 * structures around, but mark the process as 1459 * having exited. 1460 */ 1461 pp = pmcstat_process_lookup(ev.pl_u.pl_e.pl_pid, 0); 1462 if (pp == NULL) 1463 break; 1464 pp->pp_isactive = 0; /* mark as a zombie */ 1465 break; 1466 1467 case PMCLOG_TYPE_SYSEXIT: 1468 pp = pmcstat_process_lookup(ev.pl_u.pl_se.pl_pid, 0); 1469 if (pp == NULL) 1470 break; 1471 pp->pp_isactive = 0; /* make a zombie */ 1472 break; 1473 1474 case PMCLOG_TYPE_PROCFORK: 1475 1476 /* 1477 * Allocate a process descriptor for the new 1478 * (child) process. 1479 */ 1480 ppnew = 1481 pmcstat_process_lookup(ev.pl_u.pl_f.pl_newpid, 1482 PMCSTAT_ALLOCATE); 1483 1484 /* 1485 * If we had been tracking the parent, clone 1486 * its address maps. 1487 */ 1488 pp = pmcstat_process_lookup(ev.pl_u.pl_f.pl_oldpid, 0); 1489 if (pp == NULL) 1490 break; 1491 TAILQ_FOREACH(ppm, &pp->pp_map, ppm_next) 1492 pmcstat_image_link(ppnew, ppm->ppm_image, 1493 ppm->ppm_lowpc); 1494 break; 1495 1496 default: /* other types of entries are not relevant */ 1497 break; 1498 } 1499 } 1500 1501 if (ev.pl_state == PMCLOG_EOF) 1502 return (PMCSTAT_FINISHED); 1503 else if (ev.pl_state == PMCLOG_REQUIRE_DATA) 1504 return (PMCSTAT_RUNNING); 1505 1506 err(EX_DATAERR, "ERROR: event parsing failed (record %jd, " 1507 "offset 0x%jx)", (uintmax_t) ev.pl_count + 1, ev.pl_offset); 1508 } 1509 1510 /* 1511 * Print log entries as text. 1512 */ 1513 1514 static int 1515 pmcstat_print_log(struct pmcstat_args *a) 1516 { 1517 struct pmclog_ev ev; 1518 1519 while (pmclog_read(a->pa_logparser, &ev) == 0) { 1520 assert(ev.pl_state == PMCLOG_OK); 1521 switch (ev.pl_type) { 1522 case PMCLOG_TYPE_CLOSELOG: 1523 PMCSTAT_PRINT_ENTRY(a,"closelog",); 1524 break; 1525 case PMCLOG_TYPE_DROPNOTIFY: 1526 PMCSTAT_PRINT_ENTRY(a,"drop",); 1527 break; 1528 case PMCLOG_TYPE_INITIALIZE: 1529 PMCSTAT_PRINT_ENTRY(a,"initlog","0x%x \"%s\"", 1530 ev.pl_u.pl_i.pl_version, 1531 pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch)); 1532 break; 1533 case PMCLOG_TYPE_MAP_IN: 1534 PMCSTAT_PRINT_ENTRY(a,"map-in","%d %p \"%s\"", 1535 ev.pl_u.pl_mi.pl_pid, 1536 (void *) ev.pl_u.pl_mi.pl_start, 1537 ev.pl_u.pl_mi.pl_pathname); 1538 break; 1539 case PMCLOG_TYPE_MAP_OUT: 1540 PMCSTAT_PRINT_ENTRY(a,"map-out","%d %p %p", 1541 ev.pl_u.pl_mo.pl_pid, 1542 (void *) ev.pl_u.pl_mo.pl_start, 1543 (void *) ev.pl_u.pl_mo.pl_end); 1544 break; 1545 case PMCLOG_TYPE_PCSAMPLE: 1546 PMCSTAT_PRINT_ENTRY(a,"sample","0x%x %d %p %c", 1547 ev.pl_u.pl_s.pl_pmcid, 1548 ev.pl_u.pl_s.pl_pid, 1549 (void *) ev.pl_u.pl_s.pl_pc, 1550 ev.pl_u.pl_s.pl_usermode ? 'u' : 's'); 1551 break; 1552 case PMCLOG_TYPE_PMCALLOCATE: 1553 PMCSTAT_PRINT_ENTRY(a,"allocate","0x%x \"%s\" 0x%x", 1554 ev.pl_u.pl_a.pl_pmcid, 1555 ev.pl_u.pl_a.pl_evname, 1556 ev.pl_u.pl_a.pl_flags); 1557 break; 1558 case PMCLOG_TYPE_PMCATTACH: 1559 PMCSTAT_PRINT_ENTRY(a,"attach","0x%x %d \"%s\"", 1560 ev.pl_u.pl_t.pl_pmcid, 1561 ev.pl_u.pl_t.pl_pid, 1562 ev.pl_u.pl_t.pl_pathname); 1563 break; 1564 case PMCLOG_TYPE_PMCDETACH: 1565 PMCSTAT_PRINT_ENTRY(a,"detach","0x%x %d", 1566 ev.pl_u.pl_d.pl_pmcid, 1567 ev.pl_u.pl_d.pl_pid); 1568 break; 1569 case PMCLOG_TYPE_PROCCSW: 1570 PMCSTAT_PRINT_ENTRY(a,"cswval","0x%x %d %jd", 1571 ev.pl_u.pl_c.pl_pmcid, 1572 ev.pl_u.pl_c.pl_pid, 1573 ev.pl_u.pl_c.pl_value); 1574 break; 1575 case PMCLOG_TYPE_PROCEXEC: 1576 PMCSTAT_PRINT_ENTRY(a,"exec","0x%x %d %p \"%s\"", 1577 ev.pl_u.pl_x.pl_pmcid, 1578 ev.pl_u.pl_x.pl_pid, 1579 (void *) ev.pl_u.pl_x.pl_entryaddr, 1580 ev.pl_u.pl_x.pl_pathname); 1581 break; 1582 case PMCLOG_TYPE_PROCEXIT: 1583 PMCSTAT_PRINT_ENTRY(a,"exitval","0x%x %d %jd", 1584 ev.pl_u.pl_e.pl_pmcid, 1585 ev.pl_u.pl_e.pl_pid, 1586 ev.pl_u.pl_e.pl_value); 1587 break; 1588 case PMCLOG_TYPE_PROCFORK: 1589 PMCSTAT_PRINT_ENTRY(a,"fork","%d %d", 1590 ev.pl_u.pl_f.pl_oldpid, 1591 ev.pl_u.pl_f.pl_newpid); 1592 break; 1593 case PMCLOG_TYPE_USERDATA: 1594 PMCSTAT_PRINT_ENTRY(a,"userdata","0x%x", 1595 ev.pl_u.pl_u.pl_userdata); 1596 break; 1597 case PMCLOG_TYPE_SYSEXIT: 1598 PMCSTAT_PRINT_ENTRY(a,"exit","%d", 1599 ev.pl_u.pl_se.pl_pid); 1600 break; 1601 default: 1602 fprintf(a->pa_printfile, "unknown %d", 1603 ev.pl_type); 1604 } 1605 } 1606 1607 if (ev.pl_state == PMCLOG_EOF) 1608 return (PMCSTAT_FINISHED); 1609 else if (ev.pl_state == PMCLOG_REQUIRE_DATA) 1610 return (PMCSTAT_RUNNING); 1611 1612 err(EX_DATAERR, "ERROR: event parsing failed " 1613 "(record %jd, offset 0x%jx)", 1614 (uintmax_t) ev.pl_count + 1, ev.pl_offset); 1615 /*NOTREACHED*/ 1616 } 1617 1618 /* 1619 * Public Interfaces. 1620 */ 1621 1622 /* 1623 * Close a logfile, after first flushing all in-module queued data. 1624 */ 1625 1626 int 1627 pmcstat_close_log(struct pmcstat_args *a) 1628 { 1629 if (pmc_flush_logfile() < 0 || 1630 pmc_configure_logfile(-1) < 0) 1631 err(EX_OSERR, "ERROR: logging failed"); 1632 a->pa_flags &= ~(FLAG_HAS_OUTPUT_LOGFILE | FLAG_HAS_PIPE); 1633 return (a->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING : 1634 PMCSTAT_FINISHED); 1635 } 1636 1637 1638 1639 /* 1640 * Open a log file, for reading or writing. 1641 * 1642 * The function returns the fd of a successfully opened log or -1 in 1643 * case of failure. 1644 */ 1645 1646 int 1647 pmcstat_open_log(const char *path, int mode) 1648 { 1649 int error, fd; 1650 size_t hlen; 1651 const char *p, *errstr; 1652 struct addrinfo hints, *res, *res0; 1653 char hostname[MAXHOSTNAMELEN]; 1654 1655 errstr = NULL; 1656 fd = -1; 1657 1658 /* 1659 * If 'path' is "-" then open one of stdin or stdout depending 1660 * on the value of 'mode'. 1661 * 1662 * If 'path' contains a ':' and does not start with a '/' or '.', 1663 * and is being opened for writing, treat it as a "host:port" 1664 * specification and open a network socket. 1665 * 1666 * Otherwise, treat 'path' as a file name and open that. 1667 */ 1668 if (path[0] == '-' && path[1] == '\0') 1669 fd = (mode == PMCSTAT_OPEN_FOR_READ) ? 0 : 1; 1670 else if (mode == PMCSTAT_OPEN_FOR_WRITE && path[0] != '/' && 1671 path[0] != '.' && strchr(path, ':') != NULL) { 1672 1673 p = strrchr(path, ':'); 1674 hlen = p - path; 1675 if (p == path || hlen >= sizeof(hostname)) { 1676 errstr = strerror(EINVAL); 1677 goto done; 1678 } 1679 1680 assert(hlen < sizeof(hostname)); 1681 (void) strncpy(hostname, path, hlen); 1682 hostname[hlen] = '\0'; 1683 1684 (void) memset(&hints, 0, sizeof(hints)); 1685 hints.ai_family = AF_UNSPEC; 1686 hints.ai_socktype = SOCK_STREAM; 1687 if ((error = getaddrinfo(hostname, p+1, &hints, &res0)) != 0) { 1688 errstr = gai_strerror(error); 1689 goto done; 1690 } 1691 1692 fd = -1; 1693 for (res = res0; res; res = res->ai_next) { 1694 if ((fd = socket(res->ai_family, res->ai_socktype, 1695 res->ai_protocol)) < 0) { 1696 errstr = strerror(errno); 1697 continue; 1698 } 1699 if (connect(fd, res->ai_addr, res->ai_addrlen) < 0) { 1700 errstr = strerror(errno); 1701 (void) close(fd); 1702 fd = -1; 1703 continue; 1704 } 1705 errstr = NULL; 1706 break; 1707 } 1708 freeaddrinfo(res0); 1709 1710 } else if ((fd = open(path, mode == PMCSTAT_OPEN_FOR_READ ? 1711 O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC), 1712 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0) 1713 errstr = strerror(errno); 1714 1715 done: 1716 if (errstr) 1717 errx(EX_OSERR, "ERROR: Cannot open \"%s\" for %s: %s.", path, 1718 (mode == PMCSTAT_OPEN_FOR_READ ? "reading" : "writing"), 1719 errstr); 1720 1721 return (fd); 1722 } 1723 1724 /* 1725 * Process a log file in offline analysis mode. 1726 */ 1727 1728 int 1729 pmcstat_process_log(struct pmcstat_args *a) 1730 { 1731 1732 /* 1733 * If gprof style profiles haven't been asked for, just print the 1734 * log to the current output file. 1735 */ 1736 if (a->pa_flags & FLAG_DO_PRINT) 1737 return (pmcstat_print_log(a)); 1738 else 1739 /* convert the log to gprof compatible profiles */ 1740 return (pmcstat_convert_log(a)); 1741 } 1742 1743 /* 1744 * Initialize module. 1745 */ 1746 1747 void 1748 pmcstat_initialize_logging(struct pmcstat_args *a) 1749 { 1750 int i; 1751 1752 (void) a; 1753 1754 /* use a convenient format for 'ldd' output */ 1755 if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0) 1756 err(EX_OSERR, "ERROR: Cannot setenv"); 1757 1758 /* Initialize hash tables */ 1759 pmcstat_string_initialize(); 1760 for (i = 0; i < PMCSTAT_NHASH; i++) { 1761 LIST_INIT(&pmcstat_image_hash[i]); 1762 LIST_INIT(&pmcstat_process_hash[i]); 1763 } 1764 1765 /* 1766 * Create a fake 'process' entry for the kernel with pid -1. 1767 * hwpmc(4) will subsequently inform us about where the kernel 1768 * and any loaded kernel modules are mapped. 1769 */ 1770 if ((pmcstat_kernproc = pmcstat_process_lookup((pid_t) -1, 1771 PMCSTAT_ALLOCATE)) == NULL) 1772 err(EX_OSERR, "ERROR: Cannot initialize logging"); 1773 } 1774 1775 /* 1776 * Shutdown module. 1777 */ 1778 1779 void 1780 pmcstat_shutdown_logging(struct pmcstat_args *a) 1781 { 1782 int i; 1783 FILE *mf; 1784 struct pmcstat_gmonfile *pgf, *pgftmp; 1785 struct pmcstat_image *pi, *pitmp; 1786 struct pmcstat_process *pp, *pptmp; 1787 1788 /* determine where to send the map file */ 1789 mf = NULL; 1790 if (a->pa_mapfilename != NULL) 1791 mf = (strcmp(a->pa_mapfilename, "-") == 0) ? 1792 a->pa_printfile : fopen(a->pa_mapfilename, "w"); 1793 1794 if (mf == NULL && a->pa_flags & FLAG_DO_GPROF && 1795 a->pa_verbosity >= 2) 1796 mf = a->pa_printfile; 1797 1798 if (mf) 1799 (void) fprintf(mf, "MAP:\n"); 1800 1801 for (i = 0; i < PMCSTAT_NHASH; i++) { 1802 LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next, pitmp) { 1803 1804 if (mf) 1805 (void) fprintf(mf, " \"%s\" => \"%s\"", 1806 pmcstat_string_unintern(pi->pi_execpath), 1807 pmcstat_string_unintern(pi->pi_samplename)); 1808 1809 /* flush gmon.out data to disk */ 1810 LIST_FOREACH_SAFE(pgf, &pi->pi_gmlist, pgf_next, 1811 pgftmp) { 1812 pmcstat_gmon_unmap_file(pgf); 1813 LIST_REMOVE(pgf, pgf_next); 1814 if (mf) 1815 (void) fprintf(mf, " %s/%d", 1816 pmcstat_pmcid_to_name(pgf->pgf_pmcid), 1817 pgf->pgf_nsamples); 1818 if (pgf->pgf_overflow && a->pa_verbosity >= 1) 1819 warnx("WARNING: profile \"%s\" " 1820 "overflowed.", 1821 pmcstat_string_unintern( 1822 pgf->pgf_name)); 1823 free(pgf); 1824 } 1825 1826 if (mf) 1827 (void) fprintf(mf, "\n"); 1828 1829 LIST_REMOVE(pi, pi_next); 1830 free(pi); 1831 } 1832 LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next, 1833 pptmp) { 1834 LIST_REMOVE(pp, pp_next); 1835 free(pp); 1836 } 1837 } 1838 1839 pmcstat_string_shutdown(); 1840 1841 /* 1842 * Print errors unless -q was specified. Print all statistics 1843 * if verbosity > 1. 1844 */ 1845 #define PRINT(N,V,A) do { \ 1846 if (pmcstat_stats.ps_##V || (A)->pa_verbosity >= 2) \ 1847 (void) fprintf((A)->pa_printfile, " %-40s %d\n",\ 1848 N, pmcstat_stats.ps_##V); \ 1849 } while (0) 1850 1851 if (a->pa_verbosity >= 1 && a->pa_flags & FLAG_DO_GPROF) { 1852 (void) fprintf(a->pa_printfile, "CONVERSION STATISTICS:\n"); 1853 PRINT("#exec/a.out", exec_aout, a); 1854 PRINT("#exec/elf", exec_elf, a); 1855 PRINT("#exec/unknown", exec_indeterminable, a); 1856 PRINT("#exec handling errors", exec_errors, a); 1857 PRINT("#samples/total", samples_total, a); 1858 PRINT("#samples/unclaimed", samples_unknown_offset, a); 1859 PRINT("#samples/unknown-object", samples_indeterminable, a); 1860 } 1861 1862 if (mf) 1863 (void) fclose(mf); 1864 } 1865