1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 10 #include <byteswap.h> 11 #include "asm/bug.h" 12 #include "evsel.h" 13 #include "evlist.h" 14 #include "util.h" 15 #include "cpumap.h" 16 #include "thread_map.h" 17 #include "target.h" 18 #include "../../include/linux/perf_event.h" 19 20 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 21 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) 22 23 int __perf_evsel__sample_size(u64 sample_type) 24 { 25 u64 mask = sample_type & PERF_SAMPLE_MASK; 26 int size = 0; 27 int i; 28 29 for (i = 0; i < 64; i++) { 30 if (mask & (1ULL << i)) 31 size++; 32 } 33 34 size *= sizeof(u64); 35 36 return size; 37 } 38 39 void hists__init(struct hists *hists) 40 { 41 memset(hists, 0, sizeof(*hists)); 42 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; 43 hists->entries_in = &hists->entries_in_array[0]; 44 hists->entries_collapsed = RB_ROOT; 45 hists->entries = RB_ROOT; 46 pthread_mutex_init(&hists->lock, NULL); 47 } 48 49 void perf_evsel__init(struct perf_evsel *evsel, 50 struct perf_event_attr *attr, int idx) 51 { 52 evsel->idx = idx; 53 evsel->attr = *attr; 54 INIT_LIST_HEAD(&evsel->node); 55 hists__init(&evsel->hists); 56 } 57 58 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 59 { 60 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 61 62 if (evsel != NULL) 63 perf_evsel__init(evsel, attr, idx); 64 65 return evsel; 66 } 67 68 static const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { 69 "cycles", 70 "instructions", 71 "cache-references", 72 "cache-misses", 73 "branches", 74 "branch-misses", 75 "bus-cycles", 76 "stalled-cycles-frontend", 77 "stalled-cycles-backend", 78 "ref-cycles", 79 }; 80 81 const char *__perf_evsel__hw_name(u64 config) 82 { 83 if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config]) 84 return perf_evsel__hw_names[config]; 85 86 return "unknown-hardware"; 87 } 88 89 static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) 90 { 91 int colon = 0; 92 struct perf_event_attr *attr = &evsel->attr; 93 int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(attr->config)); 94 bool exclude_guest_default = false; 95 96 #define MOD_PRINT(context, mod) do { \ 97 if (!attr->exclude_##context) { \ 98 if (!colon) colon = r++; \ 99 r += scnprintf(bf + r, size - r, "%c", mod); \ 100 } } while(0) 101 102 if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) { 103 MOD_PRINT(kernel, 'k'); 104 MOD_PRINT(user, 'u'); 105 MOD_PRINT(hv, 'h'); 106 exclude_guest_default = true; 107 } 108 109 if (attr->precise_ip) { 110 if (!colon) 111 colon = r++; 112 r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); 113 exclude_guest_default = true; 114 } 115 116 if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { 117 MOD_PRINT(host, 'H'); 118 MOD_PRINT(guest, 'G'); 119 } 120 #undef MOD_PRINT 121 if (colon) 122 bf[colon] = ':'; 123 return r; 124 } 125 126 int perf_evsel__name(struct perf_evsel *evsel, char *bf, size_t size) 127 { 128 int ret; 129 130 switch (evsel->attr.type) { 131 case PERF_TYPE_RAW: 132 ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config); 133 break; 134 135 case PERF_TYPE_HARDWARE: 136 ret = perf_evsel__hw_name(evsel, bf, size); 137 break; 138 default: 139 /* 140 * FIXME 141 * 142 * This is the minimal perf_evsel__name so that we can 143 * reconstruct event names taking into account event modifiers. 144 * 145 * The old event_name uses it now for raw anr hw events, so that 146 * we don't drag all the parsing stuff into the python binding. 147 * 148 * On the next devel cycle the rest of the event naming will be 149 * brought here. 150 */ 151 return 0; 152 } 153 154 return ret; 155 } 156 157 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts, 158 struct perf_evsel *first) 159 { 160 struct perf_event_attr *attr = &evsel->attr; 161 int track = !evsel->idx; /* only the first counter needs these */ 162 163 attr->disabled = 1; 164 attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1; 165 attr->inherit = !opts->no_inherit; 166 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 167 PERF_FORMAT_TOTAL_TIME_RUNNING | 168 PERF_FORMAT_ID; 169 170 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 171 172 /* 173 * We default some events to a 1 default interval. But keep 174 * it a weak assumption overridable by the user. 175 */ 176 if (!attr->sample_period || (opts->user_freq != UINT_MAX && 177 opts->user_interval != ULLONG_MAX)) { 178 if (opts->freq) { 179 attr->sample_type |= PERF_SAMPLE_PERIOD; 180 attr->freq = 1; 181 attr->sample_freq = opts->freq; 182 } else { 183 attr->sample_period = opts->default_interval; 184 } 185 } 186 187 if (opts->no_samples) 188 attr->sample_freq = 0; 189 190 if (opts->inherit_stat) 191 attr->inherit_stat = 1; 192 193 if (opts->sample_address) { 194 attr->sample_type |= PERF_SAMPLE_ADDR; 195 attr->mmap_data = track; 196 } 197 198 if (opts->call_graph) 199 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 200 201 if (perf_target__has_cpu(&opts->target)) 202 attr->sample_type |= PERF_SAMPLE_CPU; 203 204 if (opts->period) 205 attr->sample_type |= PERF_SAMPLE_PERIOD; 206 207 if (!opts->sample_id_all_missing && 208 (opts->sample_time || !opts->no_inherit || 209 perf_target__has_cpu(&opts->target))) 210 attr->sample_type |= PERF_SAMPLE_TIME; 211 212 if (opts->raw_samples) { 213 attr->sample_type |= PERF_SAMPLE_TIME; 214 attr->sample_type |= PERF_SAMPLE_RAW; 215 attr->sample_type |= PERF_SAMPLE_CPU; 216 } 217 218 if (opts->no_delay) { 219 attr->watermark = 0; 220 attr->wakeup_events = 1; 221 } 222 if (opts->branch_stack) { 223 attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; 224 attr->branch_sample_type = opts->branch_stack; 225 } 226 227 attr->mmap = track; 228 attr->comm = track; 229 230 if (perf_target__none(&opts->target) && 231 (!opts->group || evsel == first)) { 232 attr->enable_on_exec = 1; 233 } 234 } 235 236 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 237 { 238 int cpu, thread; 239 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 240 241 if (evsel->fd) { 242 for (cpu = 0; cpu < ncpus; cpu++) { 243 for (thread = 0; thread < nthreads; thread++) { 244 FD(evsel, cpu, thread) = -1; 245 } 246 } 247 } 248 249 return evsel->fd != NULL ? 0 : -ENOMEM; 250 } 251 252 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) 253 { 254 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); 255 if (evsel->sample_id == NULL) 256 return -ENOMEM; 257 258 evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); 259 if (evsel->id == NULL) { 260 xyarray__delete(evsel->sample_id); 261 evsel->sample_id = NULL; 262 return -ENOMEM; 263 } 264 265 return 0; 266 } 267 268 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 269 { 270 evsel->counts = zalloc((sizeof(*evsel->counts) + 271 (ncpus * sizeof(struct perf_counts_values)))); 272 return evsel->counts != NULL ? 0 : -ENOMEM; 273 } 274 275 void perf_evsel__free_fd(struct perf_evsel *evsel) 276 { 277 xyarray__delete(evsel->fd); 278 evsel->fd = NULL; 279 } 280 281 void perf_evsel__free_id(struct perf_evsel *evsel) 282 { 283 xyarray__delete(evsel->sample_id); 284 evsel->sample_id = NULL; 285 free(evsel->id); 286 evsel->id = NULL; 287 } 288 289 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 290 { 291 int cpu, thread; 292 293 for (cpu = 0; cpu < ncpus; cpu++) 294 for (thread = 0; thread < nthreads; ++thread) { 295 close(FD(evsel, cpu, thread)); 296 FD(evsel, cpu, thread) = -1; 297 } 298 } 299 300 void perf_evsel__exit(struct perf_evsel *evsel) 301 { 302 assert(list_empty(&evsel->node)); 303 xyarray__delete(evsel->fd); 304 xyarray__delete(evsel->sample_id); 305 free(evsel->id); 306 } 307 308 void perf_evsel__delete(struct perf_evsel *evsel) 309 { 310 perf_evsel__exit(evsel); 311 close_cgroup(evsel->cgrp); 312 free(evsel->name); 313 free(evsel); 314 } 315 316 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 317 int cpu, int thread, bool scale) 318 { 319 struct perf_counts_values count; 320 size_t nv = scale ? 3 : 1; 321 322 if (FD(evsel, cpu, thread) < 0) 323 return -EINVAL; 324 325 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 326 return -ENOMEM; 327 328 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 329 return -errno; 330 331 if (scale) { 332 if (count.run == 0) 333 count.val = 0; 334 else if (count.run < count.ena) 335 count.val = (u64)((double)count.val * count.ena / count.run + 0.5); 336 } else 337 count.ena = count.run = 0; 338 339 evsel->counts->cpu[cpu] = count; 340 return 0; 341 } 342 343 int __perf_evsel__read(struct perf_evsel *evsel, 344 int ncpus, int nthreads, bool scale) 345 { 346 size_t nv = scale ? 3 : 1; 347 int cpu, thread; 348 struct perf_counts_values *aggr = &evsel->counts->aggr, count; 349 350 aggr->val = aggr->ena = aggr->run = 0; 351 352 for (cpu = 0; cpu < ncpus; cpu++) { 353 for (thread = 0; thread < nthreads; thread++) { 354 if (FD(evsel, cpu, thread) < 0) 355 continue; 356 357 if (readn(FD(evsel, cpu, thread), 358 &count, nv * sizeof(u64)) < 0) 359 return -errno; 360 361 aggr->val += count.val; 362 if (scale) { 363 aggr->ena += count.ena; 364 aggr->run += count.run; 365 } 366 } 367 } 368 369 evsel->counts->scaled = 0; 370 if (scale) { 371 if (aggr->run == 0) { 372 evsel->counts->scaled = -1; 373 aggr->val = 0; 374 return 0; 375 } 376 377 if (aggr->run < aggr->ena) { 378 evsel->counts->scaled = 1; 379 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); 380 } 381 } else 382 aggr->ena = aggr->run = 0; 383 384 return 0; 385 } 386 387 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 388 struct thread_map *threads, bool group, 389 struct xyarray *group_fds) 390 { 391 int cpu, thread; 392 unsigned long flags = 0; 393 int pid = -1, err; 394 395 if (evsel->fd == NULL && 396 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 397 return -ENOMEM; 398 399 if (evsel->cgrp) { 400 flags = PERF_FLAG_PID_CGROUP; 401 pid = evsel->cgrp->fd; 402 } 403 404 for (cpu = 0; cpu < cpus->nr; cpu++) { 405 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; 406 407 for (thread = 0; thread < threads->nr; thread++) { 408 409 if (!evsel->cgrp) 410 pid = threads->map[thread]; 411 412 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 413 pid, 414 cpus->map[cpu], 415 group_fd, flags); 416 if (FD(evsel, cpu, thread) < 0) { 417 err = -errno; 418 goto out_close; 419 } 420 421 if (group && group_fd == -1) 422 group_fd = FD(evsel, cpu, thread); 423 } 424 } 425 426 return 0; 427 428 out_close: 429 do { 430 while (--thread >= 0) { 431 close(FD(evsel, cpu, thread)); 432 FD(evsel, cpu, thread) = -1; 433 } 434 thread = threads->nr; 435 } while (--cpu >= 0); 436 return err; 437 } 438 439 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads) 440 { 441 if (evsel->fd == NULL) 442 return; 443 444 perf_evsel__close_fd(evsel, ncpus, nthreads); 445 perf_evsel__free_fd(evsel); 446 evsel->fd = NULL; 447 } 448 449 static struct { 450 struct cpu_map map; 451 int cpus[1]; 452 } empty_cpu_map = { 453 .map.nr = 1, 454 .cpus = { -1, }, 455 }; 456 457 static struct { 458 struct thread_map map; 459 int threads[1]; 460 } empty_thread_map = { 461 .map.nr = 1, 462 .threads = { -1, }, 463 }; 464 465 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 466 struct thread_map *threads, bool group, 467 struct xyarray *group_fd) 468 { 469 if (cpus == NULL) { 470 /* Work around old compiler warnings about strict aliasing */ 471 cpus = &empty_cpu_map.map; 472 } 473 474 if (threads == NULL) 475 threads = &empty_thread_map.map; 476 477 return __perf_evsel__open(evsel, cpus, threads, group, group_fd); 478 } 479 480 int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 481 struct cpu_map *cpus, bool group, 482 struct xyarray *group_fd) 483 { 484 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, 485 group_fd); 486 } 487 488 int perf_evsel__open_per_thread(struct perf_evsel *evsel, 489 struct thread_map *threads, bool group, 490 struct xyarray *group_fd) 491 { 492 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, 493 group_fd); 494 } 495 496 static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 497 struct perf_sample *sample, 498 bool swapped) 499 { 500 const u64 *array = event->sample.array; 501 union u64_swap u; 502 503 array += ((event->header.size - 504 sizeof(event->header)) / sizeof(u64)) - 1; 505 506 if (type & PERF_SAMPLE_CPU) { 507 u.val64 = *array; 508 if (swapped) { 509 /* undo swap of u64, then swap on individual u32s */ 510 u.val64 = bswap_64(u.val64); 511 u.val32[0] = bswap_32(u.val32[0]); 512 } 513 514 sample->cpu = u.val32[0]; 515 array--; 516 } 517 518 if (type & PERF_SAMPLE_STREAM_ID) { 519 sample->stream_id = *array; 520 array--; 521 } 522 523 if (type & PERF_SAMPLE_ID) { 524 sample->id = *array; 525 array--; 526 } 527 528 if (type & PERF_SAMPLE_TIME) { 529 sample->time = *array; 530 array--; 531 } 532 533 if (type & PERF_SAMPLE_TID) { 534 u.val64 = *array; 535 if (swapped) { 536 /* undo swap of u64, then swap on individual u32s */ 537 u.val64 = bswap_64(u.val64); 538 u.val32[0] = bswap_32(u.val32[0]); 539 u.val32[1] = bswap_32(u.val32[1]); 540 } 541 542 sample->pid = u.val32[0]; 543 sample->tid = u.val32[1]; 544 } 545 546 return 0; 547 } 548 549 static bool sample_overlap(const union perf_event *event, 550 const void *offset, u64 size) 551 { 552 const void *base = event; 553 554 if (offset + size > base + event->header.size) 555 return true; 556 557 return false; 558 } 559 560 int perf_event__parse_sample(const union perf_event *event, u64 type, 561 int sample_size, bool sample_id_all, 562 struct perf_sample *data, bool swapped) 563 { 564 const u64 *array; 565 566 /* 567 * used for cross-endian analysis. See git commit 65014ab3 568 * for why this goofiness is needed. 569 */ 570 union u64_swap u; 571 572 memset(data, 0, sizeof(*data)); 573 data->cpu = data->pid = data->tid = -1; 574 data->stream_id = data->id = data->time = -1ULL; 575 data->period = 1; 576 577 if (event->header.type != PERF_RECORD_SAMPLE) { 578 if (!sample_id_all) 579 return 0; 580 return perf_event__parse_id_sample(event, type, data, swapped); 581 } 582 583 array = event->sample.array; 584 585 if (sample_size + sizeof(event->header) > event->header.size) 586 return -EFAULT; 587 588 if (type & PERF_SAMPLE_IP) { 589 data->ip = event->ip.ip; 590 array++; 591 } 592 593 if (type & PERF_SAMPLE_TID) { 594 u.val64 = *array; 595 if (swapped) { 596 /* undo swap of u64, then swap on individual u32s */ 597 u.val64 = bswap_64(u.val64); 598 u.val32[0] = bswap_32(u.val32[0]); 599 u.val32[1] = bswap_32(u.val32[1]); 600 } 601 602 data->pid = u.val32[0]; 603 data->tid = u.val32[1]; 604 array++; 605 } 606 607 if (type & PERF_SAMPLE_TIME) { 608 data->time = *array; 609 array++; 610 } 611 612 data->addr = 0; 613 if (type & PERF_SAMPLE_ADDR) { 614 data->addr = *array; 615 array++; 616 } 617 618 data->id = -1ULL; 619 if (type & PERF_SAMPLE_ID) { 620 data->id = *array; 621 array++; 622 } 623 624 if (type & PERF_SAMPLE_STREAM_ID) { 625 data->stream_id = *array; 626 array++; 627 } 628 629 if (type & PERF_SAMPLE_CPU) { 630 631 u.val64 = *array; 632 if (swapped) { 633 /* undo swap of u64, then swap on individual u32s */ 634 u.val64 = bswap_64(u.val64); 635 u.val32[0] = bswap_32(u.val32[0]); 636 } 637 638 data->cpu = u.val32[0]; 639 array++; 640 } 641 642 if (type & PERF_SAMPLE_PERIOD) { 643 data->period = *array; 644 array++; 645 } 646 647 if (type & PERF_SAMPLE_READ) { 648 fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n"); 649 return -1; 650 } 651 652 if (type & PERF_SAMPLE_CALLCHAIN) { 653 if (sample_overlap(event, array, sizeof(data->callchain->nr))) 654 return -EFAULT; 655 656 data->callchain = (struct ip_callchain *)array; 657 658 if (sample_overlap(event, array, data->callchain->nr)) 659 return -EFAULT; 660 661 array += 1 + data->callchain->nr; 662 } 663 664 if (type & PERF_SAMPLE_RAW) { 665 const u64 *pdata; 666 667 u.val64 = *array; 668 if (WARN_ONCE(swapped, 669 "Endianness of raw data not corrected!\n")) { 670 /* undo swap of u64, then swap on individual u32s */ 671 u.val64 = bswap_64(u.val64); 672 u.val32[0] = bswap_32(u.val32[0]); 673 u.val32[1] = bswap_32(u.val32[1]); 674 } 675 676 if (sample_overlap(event, array, sizeof(u32))) 677 return -EFAULT; 678 679 data->raw_size = u.val32[0]; 680 pdata = (void *) array + sizeof(u32); 681 682 if (sample_overlap(event, pdata, data->raw_size)) 683 return -EFAULT; 684 685 data->raw_data = (void *) pdata; 686 687 array = (void *)array + data->raw_size + sizeof(u32); 688 } 689 690 if (type & PERF_SAMPLE_BRANCH_STACK) { 691 u64 sz; 692 693 data->branch_stack = (struct branch_stack *)array; 694 array++; /* nr */ 695 696 sz = data->branch_stack->nr * sizeof(struct branch_entry); 697 sz /= sizeof(u64); 698 array += sz; 699 } 700 return 0; 701 } 702 703 int perf_event__synthesize_sample(union perf_event *event, u64 type, 704 const struct perf_sample *sample, 705 bool swapped) 706 { 707 u64 *array; 708 709 /* 710 * used for cross-endian analysis. See git commit 65014ab3 711 * for why this goofiness is needed. 712 */ 713 union u64_swap u; 714 715 array = event->sample.array; 716 717 if (type & PERF_SAMPLE_IP) { 718 event->ip.ip = sample->ip; 719 array++; 720 } 721 722 if (type & PERF_SAMPLE_TID) { 723 u.val32[0] = sample->pid; 724 u.val32[1] = sample->tid; 725 if (swapped) { 726 /* 727 * Inverse of what is done in perf_event__parse_sample 728 */ 729 u.val32[0] = bswap_32(u.val32[0]); 730 u.val32[1] = bswap_32(u.val32[1]); 731 u.val64 = bswap_64(u.val64); 732 } 733 734 *array = u.val64; 735 array++; 736 } 737 738 if (type & PERF_SAMPLE_TIME) { 739 *array = sample->time; 740 array++; 741 } 742 743 if (type & PERF_SAMPLE_ADDR) { 744 *array = sample->addr; 745 array++; 746 } 747 748 if (type & PERF_SAMPLE_ID) { 749 *array = sample->id; 750 array++; 751 } 752 753 if (type & PERF_SAMPLE_STREAM_ID) { 754 *array = sample->stream_id; 755 array++; 756 } 757 758 if (type & PERF_SAMPLE_CPU) { 759 u.val32[0] = sample->cpu; 760 if (swapped) { 761 /* 762 * Inverse of what is done in perf_event__parse_sample 763 */ 764 u.val32[0] = bswap_32(u.val32[0]); 765 u.val64 = bswap_64(u.val64); 766 } 767 *array = u.val64; 768 array++; 769 } 770 771 if (type & PERF_SAMPLE_PERIOD) { 772 *array = sample->period; 773 array++; 774 } 775 776 return 0; 777 } 778