1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include "asm/bug.h" 19 #include <unistd.h> 20 21 #include "parse-events.h" 22 #include <subcmd/parse-options.h> 23 24 #include <sys/mman.h> 25 26 #include <linux/bitops.h> 27 #include <linux/hash.h> 28 #include <linux/log2.h> 29 #include <linux/err.h> 30 31 static void perf_mmap__munmap(struct perf_mmap *map); 32 static void perf_mmap__put(struct perf_mmap *map); 33 34 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 35 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 36 37 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 38 struct thread_map *threads) 39 { 40 int i; 41 42 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 43 INIT_HLIST_HEAD(&evlist->heads[i]); 44 INIT_LIST_HEAD(&evlist->entries); 45 perf_evlist__set_maps(evlist, cpus, threads); 46 fdarray__init(&evlist->pollfd, 64); 47 evlist->workload.pid = -1; 48 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 49 } 50 51 struct perf_evlist *perf_evlist__new(void) 52 { 53 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 54 55 if (evlist != NULL) 56 perf_evlist__init(evlist, NULL, NULL); 57 58 return evlist; 59 } 60 61 struct perf_evlist *perf_evlist__new_default(void) 62 { 63 struct perf_evlist *evlist = perf_evlist__new(); 64 65 if (evlist && perf_evlist__add_default(evlist)) { 66 perf_evlist__delete(evlist); 67 evlist = NULL; 68 } 69 70 return evlist; 71 } 72 73 struct perf_evlist *perf_evlist__new_dummy(void) 74 { 75 struct perf_evlist *evlist = perf_evlist__new(); 76 77 if (evlist && perf_evlist__add_dummy(evlist)) { 78 perf_evlist__delete(evlist); 79 evlist = NULL; 80 } 81 82 return evlist; 83 } 84 85 /** 86 * perf_evlist__set_id_pos - set the positions of event ids. 87 * @evlist: selected event list 88 * 89 * Events with compatible sample types all have the same id_pos 90 * and is_pos. For convenience, put a copy on evlist. 91 */ 92 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 93 { 94 struct perf_evsel *first = perf_evlist__first(evlist); 95 96 evlist->id_pos = first->id_pos; 97 evlist->is_pos = first->is_pos; 98 } 99 100 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 101 { 102 struct perf_evsel *evsel; 103 104 evlist__for_each_entry(evlist, evsel) 105 perf_evsel__calc_id_pos(evsel); 106 107 perf_evlist__set_id_pos(evlist); 108 } 109 110 static void perf_evlist__purge(struct perf_evlist *evlist) 111 { 112 struct perf_evsel *pos, *n; 113 114 evlist__for_each_entry_safe(evlist, n, pos) { 115 list_del_init(&pos->node); 116 pos->evlist = NULL; 117 perf_evsel__delete(pos); 118 } 119 120 evlist->nr_entries = 0; 121 } 122 123 void perf_evlist__exit(struct perf_evlist *evlist) 124 { 125 zfree(&evlist->mmap); 126 zfree(&evlist->backward_mmap); 127 fdarray__exit(&evlist->pollfd); 128 } 129 130 void perf_evlist__delete(struct perf_evlist *evlist) 131 { 132 if (evlist == NULL) 133 return; 134 135 perf_evlist__munmap(evlist); 136 perf_evlist__close(evlist); 137 cpu_map__put(evlist->cpus); 138 thread_map__put(evlist->threads); 139 evlist->cpus = NULL; 140 evlist->threads = NULL; 141 perf_evlist__purge(evlist); 142 perf_evlist__exit(evlist); 143 free(evlist); 144 } 145 146 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 147 struct perf_evsel *evsel) 148 { 149 /* 150 * We already have cpus for evsel (via PMU sysfs) so 151 * keep it, if there's no target cpu list defined. 152 */ 153 if (!evsel->own_cpus || evlist->has_user_cpus) { 154 cpu_map__put(evsel->cpus); 155 evsel->cpus = cpu_map__get(evlist->cpus); 156 } else if (evsel->cpus != evsel->own_cpus) { 157 cpu_map__put(evsel->cpus); 158 evsel->cpus = cpu_map__get(evsel->own_cpus); 159 } 160 161 thread_map__put(evsel->threads); 162 evsel->threads = thread_map__get(evlist->threads); 163 } 164 165 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 166 { 167 struct perf_evsel *evsel; 168 169 evlist__for_each_entry(evlist, evsel) 170 __perf_evlist__propagate_maps(evlist, evsel); 171 } 172 173 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 174 { 175 entry->evlist = evlist; 176 list_add_tail(&entry->node, &evlist->entries); 177 entry->idx = evlist->nr_entries; 178 entry->tracking = !entry->idx; 179 180 if (!evlist->nr_entries++) 181 perf_evlist__set_id_pos(evlist); 182 183 __perf_evlist__propagate_maps(evlist, entry); 184 } 185 186 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 187 { 188 evsel->evlist = NULL; 189 list_del_init(&evsel->node); 190 evlist->nr_entries -= 1; 191 } 192 193 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 194 struct list_head *list) 195 { 196 struct perf_evsel *evsel, *temp; 197 198 __evlist__for_each_entry_safe(list, temp, evsel) { 199 list_del_init(&evsel->node); 200 perf_evlist__add(evlist, evsel); 201 } 202 } 203 204 void __perf_evlist__set_leader(struct list_head *list) 205 { 206 struct perf_evsel *evsel, *leader; 207 208 leader = list_entry(list->next, struct perf_evsel, node); 209 evsel = list_entry(list->prev, struct perf_evsel, node); 210 211 leader->nr_members = evsel->idx - leader->idx + 1; 212 213 __evlist__for_each_entry(list, evsel) { 214 evsel->leader = leader; 215 } 216 } 217 218 void perf_evlist__set_leader(struct perf_evlist *evlist) 219 { 220 if (evlist->nr_entries) { 221 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 222 __perf_evlist__set_leader(&evlist->entries); 223 } 224 } 225 226 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 227 { 228 attr->precise_ip = 3; 229 230 while (attr->precise_ip != 0) { 231 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 232 if (fd != -1) { 233 close(fd); 234 break; 235 } 236 --attr->precise_ip; 237 } 238 } 239 240 int perf_evlist__add_default(struct perf_evlist *evlist) 241 { 242 struct perf_evsel *evsel = perf_evsel__new_cycles(); 243 244 if (evsel == NULL) 245 return -ENOMEM; 246 247 perf_evlist__add(evlist, evsel); 248 return 0; 249 } 250 251 int perf_evlist__add_dummy(struct perf_evlist *evlist) 252 { 253 struct perf_event_attr attr = { 254 .type = PERF_TYPE_SOFTWARE, 255 .config = PERF_COUNT_SW_DUMMY, 256 .size = sizeof(attr), /* to capture ABI version */ 257 }; 258 struct perf_evsel *evsel = perf_evsel__new(&attr); 259 260 if (evsel == NULL) 261 return -ENOMEM; 262 263 perf_evlist__add(evlist, evsel); 264 return 0; 265 } 266 267 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 268 struct perf_event_attr *attrs, size_t nr_attrs) 269 { 270 struct perf_evsel *evsel, *n; 271 LIST_HEAD(head); 272 size_t i; 273 274 for (i = 0; i < nr_attrs; i++) { 275 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 276 if (evsel == NULL) 277 goto out_delete_partial_list; 278 list_add_tail(&evsel->node, &head); 279 } 280 281 perf_evlist__splice_list_tail(evlist, &head); 282 283 return 0; 284 285 out_delete_partial_list: 286 __evlist__for_each_entry_safe(&head, n, evsel) 287 perf_evsel__delete(evsel); 288 return -1; 289 } 290 291 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 292 struct perf_event_attr *attrs, size_t nr_attrs) 293 { 294 size_t i; 295 296 for (i = 0; i < nr_attrs; i++) 297 event_attr_init(attrs + i); 298 299 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 300 } 301 302 struct perf_evsel * 303 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 304 { 305 struct perf_evsel *evsel; 306 307 evlist__for_each_entry(evlist, evsel) { 308 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 309 (int)evsel->attr.config == id) 310 return evsel; 311 } 312 313 return NULL; 314 } 315 316 struct perf_evsel * 317 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 318 const char *name) 319 { 320 struct perf_evsel *evsel; 321 322 evlist__for_each_entry(evlist, evsel) { 323 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 324 (strcmp(evsel->name, name) == 0)) 325 return evsel; 326 } 327 328 return NULL; 329 } 330 331 int perf_evlist__add_newtp(struct perf_evlist *evlist, 332 const char *sys, const char *name, void *handler) 333 { 334 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 335 336 if (IS_ERR(evsel)) 337 return -1; 338 339 evsel->handler = handler; 340 perf_evlist__add(evlist, evsel); 341 return 0; 342 } 343 344 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 345 struct perf_evsel *evsel) 346 { 347 if (evsel->system_wide) 348 return 1; 349 else 350 return thread_map__nr(evlist->threads); 351 } 352 353 void perf_evlist__disable(struct perf_evlist *evlist) 354 { 355 struct perf_evsel *pos; 356 357 evlist__for_each_entry(evlist, pos) { 358 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 359 continue; 360 perf_evsel__disable(pos); 361 } 362 363 evlist->enabled = false; 364 } 365 366 void perf_evlist__enable(struct perf_evlist *evlist) 367 { 368 struct perf_evsel *pos; 369 370 evlist__for_each_entry(evlist, pos) { 371 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 372 continue; 373 perf_evsel__enable(pos); 374 } 375 376 evlist->enabled = true; 377 } 378 379 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 380 { 381 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 382 } 383 384 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 385 struct perf_evsel *evsel, int cpu) 386 { 387 int thread, err; 388 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 389 390 if (!evsel->fd) 391 return -EINVAL; 392 393 for (thread = 0; thread < nr_threads; thread++) { 394 err = ioctl(FD(evsel, cpu, thread), 395 PERF_EVENT_IOC_ENABLE, 0); 396 if (err) 397 return err; 398 } 399 return 0; 400 } 401 402 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 403 struct perf_evsel *evsel, 404 int thread) 405 { 406 int cpu, err; 407 int nr_cpus = cpu_map__nr(evlist->cpus); 408 409 if (!evsel->fd) 410 return -EINVAL; 411 412 for (cpu = 0; cpu < nr_cpus; cpu++) { 413 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 414 if (err) 415 return err; 416 } 417 return 0; 418 } 419 420 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 421 struct perf_evsel *evsel, int idx) 422 { 423 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 424 425 if (per_cpu_mmaps) 426 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 427 else 428 return perf_evlist__enable_event_thread(evlist, evsel, idx); 429 } 430 431 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 432 { 433 int nr_cpus = cpu_map__nr(evlist->cpus); 434 int nr_threads = thread_map__nr(evlist->threads); 435 int nfds = 0; 436 struct perf_evsel *evsel; 437 438 evlist__for_each_entry(evlist, evsel) { 439 if (evsel->system_wide) 440 nfds += nr_cpus; 441 else 442 nfds += nr_cpus * nr_threads; 443 } 444 445 if (fdarray__available_entries(&evlist->pollfd) < nfds && 446 fdarray__grow(&evlist->pollfd, nfds) < 0) 447 return -ENOMEM; 448 449 return 0; 450 } 451 452 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 453 struct perf_mmap *map, short revent) 454 { 455 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 456 /* 457 * Save the idx so that when we filter out fds POLLHUP'ed we can 458 * close the associated evlist->mmap[] entry. 459 */ 460 if (pos >= 0) { 461 evlist->pollfd.priv[pos].ptr = map; 462 463 fcntl(fd, F_SETFL, O_NONBLOCK); 464 } 465 466 return pos; 467 } 468 469 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 470 { 471 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 472 } 473 474 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 475 void *arg __maybe_unused) 476 { 477 struct perf_mmap *map = fda->priv[fd].ptr; 478 479 if (map) 480 perf_mmap__put(map); 481 } 482 483 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 484 { 485 return fdarray__filter(&evlist->pollfd, revents_and_mask, 486 perf_evlist__munmap_filtered, NULL); 487 } 488 489 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 490 { 491 return fdarray__poll(&evlist->pollfd, timeout); 492 } 493 494 static void perf_evlist__id_hash(struct perf_evlist *evlist, 495 struct perf_evsel *evsel, 496 int cpu, int thread, u64 id) 497 { 498 int hash; 499 struct perf_sample_id *sid = SID(evsel, cpu, thread); 500 501 sid->id = id; 502 sid->evsel = evsel; 503 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 504 hlist_add_head(&sid->node, &evlist->heads[hash]); 505 } 506 507 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 508 int cpu, int thread, u64 id) 509 { 510 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 511 evsel->id[evsel->ids++] = id; 512 } 513 514 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 515 struct perf_evsel *evsel, 516 int cpu, int thread, int fd) 517 { 518 u64 read_data[4] = { 0, }; 519 int id_idx = 1; /* The first entry is the counter value */ 520 u64 id; 521 int ret; 522 523 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 524 if (!ret) 525 goto add; 526 527 if (errno != ENOTTY) 528 return -1; 529 530 /* Legacy way to get event id.. All hail to old kernels! */ 531 532 /* 533 * This way does not work with group format read, so bail 534 * out in that case. 535 */ 536 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 537 return -1; 538 539 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 540 read(fd, &read_data, sizeof(read_data)) == -1) 541 return -1; 542 543 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 544 ++id_idx; 545 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 546 ++id_idx; 547 548 id = read_data[id_idx]; 549 550 add: 551 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 552 return 0; 553 } 554 555 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 556 struct perf_evsel *evsel, int idx, int cpu, 557 int thread) 558 { 559 struct perf_sample_id *sid = SID(evsel, cpu, thread); 560 sid->idx = idx; 561 if (evlist->cpus && cpu >= 0) 562 sid->cpu = evlist->cpus->map[cpu]; 563 else 564 sid->cpu = -1; 565 if (!evsel->system_wide && evlist->threads && thread >= 0) 566 sid->tid = thread_map__pid(evlist->threads, thread); 567 else 568 sid->tid = -1; 569 } 570 571 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 572 { 573 struct hlist_head *head; 574 struct perf_sample_id *sid; 575 int hash; 576 577 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 578 head = &evlist->heads[hash]; 579 580 hlist_for_each_entry(sid, head, node) 581 if (sid->id == id) 582 return sid; 583 584 return NULL; 585 } 586 587 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 588 { 589 struct perf_sample_id *sid; 590 591 if (evlist->nr_entries == 1 || !id) 592 return perf_evlist__first(evlist); 593 594 sid = perf_evlist__id2sid(evlist, id); 595 if (sid) 596 return sid->evsel; 597 598 if (!perf_evlist__sample_id_all(evlist)) 599 return perf_evlist__first(evlist); 600 601 return NULL; 602 } 603 604 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 605 u64 id) 606 { 607 struct perf_sample_id *sid; 608 609 if (!id) 610 return NULL; 611 612 sid = perf_evlist__id2sid(evlist, id); 613 if (sid) 614 return sid->evsel; 615 616 return NULL; 617 } 618 619 static int perf_evlist__event2id(struct perf_evlist *evlist, 620 union perf_event *event, u64 *id) 621 { 622 const u64 *array = event->sample.array; 623 ssize_t n; 624 625 n = (event->header.size - sizeof(event->header)) >> 3; 626 627 if (event->header.type == PERF_RECORD_SAMPLE) { 628 if (evlist->id_pos >= n) 629 return -1; 630 *id = array[evlist->id_pos]; 631 } else { 632 if (evlist->is_pos > n) 633 return -1; 634 n -= evlist->is_pos; 635 *id = array[n]; 636 } 637 return 0; 638 } 639 640 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 641 union perf_event *event) 642 { 643 struct perf_evsel *first = perf_evlist__first(evlist); 644 struct hlist_head *head; 645 struct perf_sample_id *sid; 646 int hash; 647 u64 id; 648 649 if (evlist->nr_entries == 1) 650 return first; 651 652 if (!first->attr.sample_id_all && 653 event->header.type != PERF_RECORD_SAMPLE) 654 return first; 655 656 if (perf_evlist__event2id(evlist, event, &id)) 657 return NULL; 658 659 /* Synthesized events have an id of zero */ 660 if (!id) 661 return first; 662 663 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 664 head = &evlist->heads[hash]; 665 666 hlist_for_each_entry(sid, head, node) { 667 if (sid->id == id) 668 return sid->evsel; 669 } 670 return NULL; 671 } 672 673 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 674 { 675 int i; 676 677 if (!evlist->backward_mmap) 678 return 0; 679 680 for (i = 0; i < evlist->nr_mmaps; i++) { 681 int fd = evlist->backward_mmap[i].fd; 682 int err; 683 684 if (fd < 0) 685 continue; 686 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 687 if (err) 688 return err; 689 } 690 return 0; 691 } 692 693 static int perf_evlist__pause(struct perf_evlist *evlist) 694 { 695 return perf_evlist__set_paused(evlist, true); 696 } 697 698 static int perf_evlist__resume(struct perf_evlist *evlist) 699 { 700 return perf_evlist__set_paused(evlist, false); 701 } 702 703 /* When check_messup is true, 'end' must points to a good entry */ 704 static union perf_event * 705 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, 706 u64 end, u64 *prev) 707 { 708 unsigned char *data = md->base + page_size; 709 union perf_event *event = NULL; 710 int diff = end - start; 711 712 if (check_messup) { 713 /* 714 * If we're further behind than half the buffer, there's a chance 715 * the writer will bite our tail and mess up the samples under us. 716 * 717 * If we somehow ended up ahead of the 'end', we got messed up. 718 * 719 * In either case, truncate and restart at 'end'. 720 */ 721 if (diff > md->mask / 2 || diff < 0) { 722 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 723 724 /* 725 * 'end' points to a known good entry, start there. 726 */ 727 start = end; 728 diff = 0; 729 } 730 } 731 732 if (diff >= (int)sizeof(event->header)) { 733 size_t size; 734 735 event = (union perf_event *)&data[start & md->mask]; 736 size = event->header.size; 737 738 if (size < sizeof(event->header) || diff < (int)size) { 739 event = NULL; 740 goto broken_event; 741 } 742 743 /* 744 * Event straddles the mmap boundary -- header should always 745 * be inside due to u64 alignment of output. 746 */ 747 if ((start & md->mask) + size != ((start + size) & md->mask)) { 748 unsigned int offset = start; 749 unsigned int len = min(sizeof(*event), size), cpy; 750 void *dst = md->event_copy; 751 752 do { 753 cpy = min(md->mask + 1 - (offset & md->mask), len); 754 memcpy(dst, &data[offset & md->mask], cpy); 755 offset += cpy; 756 dst += cpy; 757 len -= cpy; 758 } while (len); 759 760 event = (union perf_event *) md->event_copy; 761 } 762 763 start += size; 764 } 765 766 broken_event: 767 if (prev) 768 *prev = start; 769 770 return event; 771 } 772 773 union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) 774 { 775 u64 head; 776 u64 old = md->prev; 777 778 /* 779 * Check if event was unmapped due to a POLLHUP/POLLERR. 780 */ 781 if (!atomic_read(&md->refcnt)) 782 return NULL; 783 784 head = perf_mmap__read_head(md); 785 786 return perf_mmap__read(md, check_messup, old, head, &md->prev); 787 } 788 789 union perf_event * 790 perf_mmap__read_backward(struct perf_mmap *md) 791 { 792 u64 head, end; 793 u64 start = md->prev; 794 795 /* 796 * Check if event was unmapped due to a POLLHUP/POLLERR. 797 */ 798 if (!atomic_read(&md->refcnt)) 799 return NULL; 800 801 head = perf_mmap__read_head(md); 802 if (!head) 803 return NULL; 804 805 /* 806 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 807 * it each time when kernel writes to it, so in fact 'head' is 808 * negative. 'end' pointer is made manually by adding the size of 809 * the ring buffer to 'head' pointer, means the validate data can 810 * read is the whole ring buffer. If 'end' is positive, the ring 811 * buffer has not fully filled, so we must adjust 'end' to 0. 812 * 813 * However, since both 'head' and 'end' is unsigned, we can't 814 * simply compare 'end' against 0. Here we compare '-head' and 815 * the size of the ring buffer, where -head is the number of bytes 816 * kernel write to the ring buffer. 817 */ 818 if (-head < (u64)(md->mask + 1)) 819 end = 0; 820 else 821 end = head + md->mask + 1; 822 823 return perf_mmap__read(md, false, start, end, &md->prev); 824 } 825 826 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) 827 { 828 struct perf_mmap *md = &evlist->mmap[idx]; 829 830 /* 831 * Check messup is required for forward overwritable ring buffer: 832 * memory pointed by md->prev can be overwritten in this case. 833 * No need for read-write ring buffer: kernel stop outputting when 834 * it hit md->prev (perf_mmap__consume()). 835 */ 836 return perf_mmap__read_forward(md, evlist->overwrite); 837 } 838 839 union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) 840 { 841 struct perf_mmap *md = &evlist->mmap[idx]; 842 843 /* 844 * No need to check messup for backward ring buffer: 845 * We can always read arbitrary long data from a backward 846 * ring buffer unless we forget to pause it before reading. 847 */ 848 return perf_mmap__read_backward(md); 849 } 850 851 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 852 { 853 return perf_evlist__mmap_read_forward(evlist, idx); 854 } 855 856 void perf_mmap__read_catchup(struct perf_mmap *md) 857 { 858 u64 head; 859 860 if (!atomic_read(&md->refcnt)) 861 return; 862 863 head = perf_mmap__read_head(md); 864 md->prev = head; 865 } 866 867 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 868 { 869 perf_mmap__read_catchup(&evlist->mmap[idx]); 870 } 871 872 static bool perf_mmap__empty(struct perf_mmap *md) 873 { 874 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 875 } 876 877 static void perf_mmap__get(struct perf_mmap *map) 878 { 879 atomic_inc(&map->refcnt); 880 } 881 882 static void perf_mmap__put(struct perf_mmap *md) 883 { 884 BUG_ON(md->base && atomic_read(&md->refcnt) == 0); 885 886 if (atomic_dec_and_test(&md->refcnt)) 887 perf_mmap__munmap(md); 888 } 889 890 void perf_mmap__consume(struct perf_mmap *md, bool overwrite) 891 { 892 if (!overwrite) { 893 u64 old = md->prev; 894 895 perf_mmap__write_tail(md, old); 896 } 897 898 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 899 perf_mmap__put(md); 900 } 901 902 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 903 { 904 perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); 905 } 906 907 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 908 struct auxtrace_mmap_params *mp __maybe_unused, 909 void *userpg __maybe_unused, 910 int fd __maybe_unused) 911 { 912 return 0; 913 } 914 915 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 916 { 917 } 918 919 void __weak auxtrace_mmap_params__init( 920 struct auxtrace_mmap_params *mp __maybe_unused, 921 off_t auxtrace_offset __maybe_unused, 922 unsigned int auxtrace_pages __maybe_unused, 923 bool auxtrace_overwrite __maybe_unused) 924 { 925 } 926 927 void __weak auxtrace_mmap_params__set_idx( 928 struct auxtrace_mmap_params *mp __maybe_unused, 929 struct perf_evlist *evlist __maybe_unused, 930 int idx __maybe_unused, 931 bool per_cpu __maybe_unused) 932 { 933 } 934 935 static void perf_mmap__munmap(struct perf_mmap *map) 936 { 937 if (map->base != NULL) { 938 munmap(map->base, perf_mmap__mmap_len(map)); 939 map->base = NULL; 940 map->fd = -1; 941 atomic_set(&map->refcnt, 0); 942 } 943 auxtrace_mmap__munmap(&map->auxtrace_mmap); 944 } 945 946 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 947 { 948 int i; 949 950 if (evlist->mmap) 951 for (i = 0; i < evlist->nr_mmaps; i++) 952 perf_mmap__munmap(&evlist->mmap[i]); 953 954 if (evlist->backward_mmap) 955 for (i = 0; i < evlist->nr_mmaps; i++) 956 perf_mmap__munmap(&evlist->backward_mmap[i]); 957 } 958 959 void perf_evlist__munmap(struct perf_evlist *evlist) 960 { 961 perf_evlist__munmap_nofree(evlist); 962 zfree(&evlist->mmap); 963 zfree(&evlist->backward_mmap); 964 } 965 966 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) 967 { 968 int i; 969 struct perf_mmap *map; 970 971 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 972 if (cpu_map__empty(evlist->cpus)) 973 evlist->nr_mmaps = thread_map__nr(evlist->threads); 974 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 975 if (!map) 976 return NULL; 977 978 for (i = 0; i < evlist->nr_mmaps; i++) 979 map[i].fd = -1; 980 return map; 981 } 982 983 struct mmap_params { 984 int prot; 985 int mask; 986 struct auxtrace_mmap_params auxtrace_mp; 987 }; 988 989 static int perf_mmap__mmap(struct perf_mmap *map, 990 struct mmap_params *mp, int fd) 991 { 992 /* 993 * The last one will be done at perf_evlist__mmap_consume(), so that we 994 * make sure we don't prevent tools from consuming every last event in 995 * the ring buffer. 996 * 997 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 998 * anymore, but the last events for it are still in the ring buffer, 999 * waiting to be consumed. 1000 * 1001 * Tools can chose to ignore this at their own discretion, but the 1002 * evlist layer can't just drop it when filtering events in 1003 * perf_evlist__filter_pollfd(). 1004 */ 1005 atomic_set(&map->refcnt, 2); 1006 map->prev = 0; 1007 map->mask = mp->mask; 1008 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, 1009 MAP_SHARED, fd, 0); 1010 if (map->base == MAP_FAILED) { 1011 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 1012 errno); 1013 map->base = NULL; 1014 return -1; 1015 } 1016 map->fd = fd; 1017 1018 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 1019 &mp->auxtrace_mp, map->base, fd)) 1020 return -1; 1021 1022 return 0; 1023 } 1024 1025 static bool 1026 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 1027 struct perf_evsel *evsel) 1028 { 1029 if (evsel->attr.write_backward) 1030 return false; 1031 return true; 1032 } 1033 1034 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 1035 struct mmap_params *mp, int cpu_idx, 1036 int thread, int *_output, int *_output_backward) 1037 { 1038 struct perf_evsel *evsel; 1039 int revent; 1040 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 1041 1042 evlist__for_each_entry(evlist, evsel) { 1043 struct perf_mmap *maps = evlist->mmap; 1044 int *output = _output; 1045 int fd; 1046 int cpu; 1047 1048 if (evsel->attr.write_backward) { 1049 output = _output_backward; 1050 maps = evlist->backward_mmap; 1051 1052 if (!maps) { 1053 maps = perf_evlist__alloc_mmap(evlist); 1054 if (!maps) 1055 return -1; 1056 evlist->backward_mmap = maps; 1057 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 1058 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 1059 } 1060 } 1061 1062 if (evsel->system_wide && thread) 1063 continue; 1064 1065 cpu = cpu_map__idx(evsel->cpus, evlist_cpu); 1066 if (cpu == -1) 1067 continue; 1068 1069 fd = FD(evsel, cpu, thread); 1070 1071 if (*output == -1) { 1072 *output = fd; 1073 1074 if (perf_mmap__mmap(&maps[idx], mp, *output) < 0) 1075 return -1; 1076 } else { 1077 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 1078 return -1; 1079 1080 perf_mmap__get(&maps[idx]); 1081 } 1082 1083 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 1084 1085 /* 1086 * The system_wide flag causes a selected event to be opened 1087 * always without a pid. Consequently it will never get a 1088 * POLLHUP, but it is used for tracking in combination with 1089 * other events, so it should not need to be polled anyway. 1090 * Therefore don't add it for polling. 1091 */ 1092 if (!evsel->system_wide && 1093 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 1094 perf_mmap__put(&maps[idx]); 1095 return -1; 1096 } 1097 1098 if (evsel->attr.read_format & PERF_FORMAT_ID) { 1099 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 1100 fd) < 0) 1101 return -1; 1102 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 1103 thread); 1104 } 1105 } 1106 1107 return 0; 1108 } 1109 1110 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 1111 struct mmap_params *mp) 1112 { 1113 int cpu, thread; 1114 int nr_cpus = cpu_map__nr(evlist->cpus); 1115 int nr_threads = thread_map__nr(evlist->threads); 1116 1117 pr_debug2("perf event ring buffer mmapped per cpu\n"); 1118 for (cpu = 0; cpu < nr_cpus; cpu++) { 1119 int output = -1; 1120 int output_backward = -1; 1121 1122 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 1123 true); 1124 1125 for (thread = 0; thread < nr_threads; thread++) { 1126 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 1127 thread, &output, &output_backward)) 1128 goto out_unmap; 1129 } 1130 } 1131 1132 return 0; 1133 1134 out_unmap: 1135 perf_evlist__munmap_nofree(evlist); 1136 return -1; 1137 } 1138 1139 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1140 struct mmap_params *mp) 1141 { 1142 int thread; 1143 int nr_threads = thread_map__nr(evlist->threads); 1144 1145 pr_debug2("perf event ring buffer mmapped per thread\n"); 1146 for (thread = 0; thread < nr_threads; thread++) { 1147 int output = -1; 1148 int output_backward = -1; 1149 1150 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1151 false); 1152 1153 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1154 &output, &output_backward)) 1155 goto out_unmap; 1156 } 1157 1158 return 0; 1159 1160 out_unmap: 1161 perf_evlist__munmap_nofree(evlist); 1162 return -1; 1163 } 1164 1165 unsigned long perf_event_mlock_kb_in_pages(void) 1166 { 1167 unsigned long pages; 1168 int max; 1169 1170 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1171 /* 1172 * Pick a once upon a time good value, i.e. things look 1173 * strange since we can't read a sysctl value, but lets not 1174 * die yet... 1175 */ 1176 max = 512; 1177 } else { 1178 max -= (page_size / 1024); 1179 } 1180 1181 pages = (max * 1024) / page_size; 1182 if (!is_power_of_2(pages)) 1183 pages = rounddown_pow_of_two(pages); 1184 1185 return pages; 1186 } 1187 1188 static size_t perf_evlist__mmap_size(unsigned long pages) 1189 { 1190 if (pages == UINT_MAX) 1191 pages = perf_event_mlock_kb_in_pages(); 1192 else if (!is_power_of_2(pages)) 1193 return 0; 1194 1195 return (pages + 1) * page_size; 1196 } 1197 1198 static long parse_pages_arg(const char *str, unsigned long min, 1199 unsigned long max) 1200 { 1201 unsigned long pages, val; 1202 static struct parse_tag tags[] = { 1203 { .tag = 'B', .mult = 1 }, 1204 { .tag = 'K', .mult = 1 << 10 }, 1205 { .tag = 'M', .mult = 1 << 20 }, 1206 { .tag = 'G', .mult = 1 << 30 }, 1207 { .tag = 0 }, 1208 }; 1209 1210 if (str == NULL) 1211 return -EINVAL; 1212 1213 val = parse_tag_value(str, tags); 1214 if (val != (unsigned long) -1) { 1215 /* we got file size value */ 1216 pages = PERF_ALIGN(val, page_size) / page_size; 1217 } else { 1218 /* we got pages count value */ 1219 char *eptr; 1220 pages = strtoul(str, &eptr, 10); 1221 if (*eptr != '\0') 1222 return -EINVAL; 1223 } 1224 1225 if (pages == 0 && min == 0) { 1226 /* leave number of pages at 0 */ 1227 } else if (!is_power_of_2(pages)) { 1228 /* round pages up to next power of 2 */ 1229 pages = roundup_pow_of_two(pages); 1230 if (!pages) 1231 return -EINVAL; 1232 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1233 pages * page_size, pages); 1234 } 1235 1236 if (pages > max) 1237 return -EINVAL; 1238 1239 return pages; 1240 } 1241 1242 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1243 { 1244 unsigned long max = UINT_MAX; 1245 long pages; 1246 1247 if (max > SIZE_MAX / page_size) 1248 max = SIZE_MAX / page_size; 1249 1250 pages = parse_pages_arg(str, 1, max); 1251 if (pages < 0) { 1252 pr_err("Invalid argument for --mmap_pages/-m\n"); 1253 return -1; 1254 } 1255 1256 *mmap_pages = pages; 1257 return 0; 1258 } 1259 1260 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1261 int unset __maybe_unused) 1262 { 1263 return __perf_evlist__parse_mmap_pages(opt->value, str); 1264 } 1265 1266 /** 1267 * perf_evlist__mmap_ex - Create mmaps to receive events. 1268 * @evlist: list of events 1269 * @pages: map length in pages 1270 * @overwrite: overwrite older events? 1271 * @auxtrace_pages - auxtrace map length in pages 1272 * @auxtrace_overwrite - overwrite older auxtrace data? 1273 * 1274 * If @overwrite is %false the user needs to signal event consumption using 1275 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1276 * automatically. 1277 * 1278 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1279 * consumption using auxtrace_mmap__write_tail(). 1280 * 1281 * Return: %0 on success, negative error code otherwise. 1282 */ 1283 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1284 bool overwrite, unsigned int auxtrace_pages, 1285 bool auxtrace_overwrite) 1286 { 1287 struct perf_evsel *evsel; 1288 const struct cpu_map *cpus = evlist->cpus; 1289 const struct thread_map *threads = evlist->threads; 1290 struct mmap_params mp = { 1291 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1292 }; 1293 1294 if (!evlist->mmap) 1295 evlist->mmap = perf_evlist__alloc_mmap(evlist); 1296 if (!evlist->mmap) 1297 return -ENOMEM; 1298 1299 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1300 return -ENOMEM; 1301 1302 evlist->overwrite = overwrite; 1303 evlist->mmap_len = perf_evlist__mmap_size(pages); 1304 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1305 mp.mask = evlist->mmap_len - page_size - 1; 1306 1307 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1308 auxtrace_pages, auxtrace_overwrite); 1309 1310 evlist__for_each_entry(evlist, evsel) { 1311 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1312 evsel->sample_id == NULL && 1313 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1314 return -ENOMEM; 1315 } 1316 1317 if (cpu_map__empty(cpus)) 1318 return perf_evlist__mmap_per_thread(evlist, &mp); 1319 1320 return perf_evlist__mmap_per_cpu(evlist, &mp); 1321 } 1322 1323 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1324 bool overwrite) 1325 { 1326 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1327 } 1328 1329 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1330 { 1331 struct cpu_map *cpus; 1332 struct thread_map *threads; 1333 1334 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1335 1336 if (!threads) 1337 return -1; 1338 1339 if (target__uses_dummy_map(target)) 1340 cpus = cpu_map__dummy_new(); 1341 else 1342 cpus = cpu_map__new(target->cpu_list); 1343 1344 if (!cpus) 1345 goto out_delete_threads; 1346 1347 evlist->has_user_cpus = !!target->cpu_list; 1348 1349 perf_evlist__set_maps(evlist, cpus, threads); 1350 1351 return 0; 1352 1353 out_delete_threads: 1354 thread_map__put(threads); 1355 return -1; 1356 } 1357 1358 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1359 struct thread_map *threads) 1360 { 1361 /* 1362 * Allow for the possibility that one or another of the maps isn't being 1363 * changed i.e. don't put it. Note we are assuming the maps that are 1364 * being applied are brand new and evlist is taking ownership of the 1365 * original reference count of 1. If that is not the case it is up to 1366 * the caller to increase the reference count. 1367 */ 1368 if (cpus != evlist->cpus) { 1369 cpu_map__put(evlist->cpus); 1370 evlist->cpus = cpu_map__get(cpus); 1371 } 1372 1373 if (threads != evlist->threads) { 1374 thread_map__put(evlist->threads); 1375 evlist->threads = thread_map__get(threads); 1376 } 1377 1378 perf_evlist__propagate_maps(evlist); 1379 } 1380 1381 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1382 enum perf_event_sample_format bit) 1383 { 1384 struct perf_evsel *evsel; 1385 1386 evlist__for_each_entry(evlist, evsel) 1387 __perf_evsel__set_sample_bit(evsel, bit); 1388 } 1389 1390 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1391 enum perf_event_sample_format bit) 1392 { 1393 struct perf_evsel *evsel; 1394 1395 evlist__for_each_entry(evlist, evsel) 1396 __perf_evsel__reset_sample_bit(evsel, bit); 1397 } 1398 1399 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1400 { 1401 struct perf_evsel *evsel; 1402 int err = 0; 1403 const int ncpus = cpu_map__nr(evlist->cpus), 1404 nthreads = thread_map__nr(evlist->threads); 1405 1406 evlist__for_each_entry(evlist, evsel) { 1407 if (evsel->filter == NULL) 1408 continue; 1409 1410 /* 1411 * filters only work for tracepoint event, which doesn't have cpu limit. 1412 * So evlist and evsel should always be same. 1413 */ 1414 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1415 if (err) { 1416 *err_evsel = evsel; 1417 break; 1418 } 1419 } 1420 1421 return err; 1422 } 1423 1424 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1425 { 1426 struct perf_evsel *evsel; 1427 int err = 0; 1428 1429 evlist__for_each_entry(evlist, evsel) { 1430 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1431 continue; 1432 1433 err = perf_evsel__set_filter(evsel, filter); 1434 if (err) 1435 break; 1436 } 1437 1438 return err; 1439 } 1440 1441 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1442 { 1443 char *filter; 1444 int ret = -1; 1445 size_t i; 1446 1447 for (i = 0; i < npids; ++i) { 1448 if (i == 0) { 1449 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1450 return -1; 1451 } else { 1452 char *tmp; 1453 1454 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1455 goto out_free; 1456 1457 free(filter); 1458 filter = tmp; 1459 } 1460 } 1461 1462 ret = perf_evlist__set_filter(evlist, filter); 1463 out_free: 1464 free(filter); 1465 return ret; 1466 } 1467 1468 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1469 { 1470 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1471 } 1472 1473 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1474 { 1475 struct perf_evsel *pos; 1476 1477 if (evlist->nr_entries == 1) 1478 return true; 1479 1480 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1481 return false; 1482 1483 evlist__for_each_entry(evlist, pos) { 1484 if (pos->id_pos != evlist->id_pos || 1485 pos->is_pos != evlist->is_pos) 1486 return false; 1487 } 1488 1489 return true; 1490 } 1491 1492 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1493 { 1494 struct perf_evsel *evsel; 1495 1496 if (evlist->combined_sample_type) 1497 return evlist->combined_sample_type; 1498 1499 evlist__for_each_entry(evlist, evsel) 1500 evlist->combined_sample_type |= evsel->attr.sample_type; 1501 1502 return evlist->combined_sample_type; 1503 } 1504 1505 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1506 { 1507 evlist->combined_sample_type = 0; 1508 return __perf_evlist__combined_sample_type(evlist); 1509 } 1510 1511 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1512 { 1513 struct perf_evsel *evsel; 1514 u64 branch_type = 0; 1515 1516 evlist__for_each_entry(evlist, evsel) 1517 branch_type |= evsel->attr.branch_sample_type; 1518 return branch_type; 1519 } 1520 1521 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1522 { 1523 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1524 u64 read_format = first->attr.read_format; 1525 u64 sample_type = first->attr.sample_type; 1526 1527 evlist__for_each_entry(evlist, pos) { 1528 if (read_format != pos->attr.read_format) 1529 return false; 1530 } 1531 1532 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1533 if ((sample_type & PERF_SAMPLE_READ) && 1534 !(read_format & PERF_FORMAT_ID)) { 1535 return false; 1536 } 1537 1538 return true; 1539 } 1540 1541 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1542 { 1543 struct perf_evsel *first = perf_evlist__first(evlist); 1544 return first->attr.read_format; 1545 } 1546 1547 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1548 { 1549 struct perf_evsel *first = perf_evlist__first(evlist); 1550 struct perf_sample *data; 1551 u64 sample_type; 1552 u16 size = 0; 1553 1554 if (!first->attr.sample_id_all) 1555 goto out; 1556 1557 sample_type = first->attr.sample_type; 1558 1559 if (sample_type & PERF_SAMPLE_TID) 1560 size += sizeof(data->tid) * 2; 1561 1562 if (sample_type & PERF_SAMPLE_TIME) 1563 size += sizeof(data->time); 1564 1565 if (sample_type & PERF_SAMPLE_ID) 1566 size += sizeof(data->id); 1567 1568 if (sample_type & PERF_SAMPLE_STREAM_ID) 1569 size += sizeof(data->stream_id); 1570 1571 if (sample_type & PERF_SAMPLE_CPU) 1572 size += sizeof(data->cpu) * 2; 1573 1574 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1575 size += sizeof(data->id); 1576 out: 1577 return size; 1578 } 1579 1580 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1581 { 1582 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1583 1584 evlist__for_each_entry_continue(evlist, pos) { 1585 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1586 return false; 1587 } 1588 1589 return true; 1590 } 1591 1592 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1593 { 1594 struct perf_evsel *first = perf_evlist__first(evlist); 1595 return first->attr.sample_id_all; 1596 } 1597 1598 void perf_evlist__set_selected(struct perf_evlist *evlist, 1599 struct perf_evsel *evsel) 1600 { 1601 evlist->selected = evsel; 1602 } 1603 1604 void perf_evlist__close(struct perf_evlist *evlist) 1605 { 1606 struct perf_evsel *evsel; 1607 int ncpus = cpu_map__nr(evlist->cpus); 1608 int nthreads = thread_map__nr(evlist->threads); 1609 int n; 1610 1611 evlist__for_each_entry_reverse(evlist, evsel) { 1612 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1613 perf_evsel__close(evsel, n, nthreads); 1614 } 1615 } 1616 1617 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1618 { 1619 struct cpu_map *cpus; 1620 struct thread_map *threads; 1621 int err = -ENOMEM; 1622 1623 /* 1624 * Try reading /sys/devices/system/cpu/online to get 1625 * an all cpus map. 1626 * 1627 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1628 * code needs an overhaul to properly forward the 1629 * error, and we may not want to do that fallback to a 1630 * default cpu identity map :-\ 1631 */ 1632 cpus = cpu_map__new(NULL); 1633 if (!cpus) 1634 goto out; 1635 1636 threads = thread_map__new_dummy(); 1637 if (!threads) 1638 goto out_put; 1639 1640 perf_evlist__set_maps(evlist, cpus, threads); 1641 out: 1642 return err; 1643 out_put: 1644 cpu_map__put(cpus); 1645 goto out; 1646 } 1647 1648 int perf_evlist__open(struct perf_evlist *evlist) 1649 { 1650 struct perf_evsel *evsel; 1651 int err; 1652 1653 /* 1654 * Default: one fd per CPU, all threads, aka systemwide 1655 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1656 */ 1657 if (evlist->threads == NULL && evlist->cpus == NULL) { 1658 err = perf_evlist__create_syswide_maps(evlist); 1659 if (err < 0) 1660 goto out_err; 1661 } 1662 1663 perf_evlist__update_id_pos(evlist); 1664 1665 evlist__for_each_entry(evlist, evsel) { 1666 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1667 if (err < 0) 1668 goto out_err; 1669 } 1670 1671 return 0; 1672 out_err: 1673 perf_evlist__close(evlist); 1674 errno = -err; 1675 return err; 1676 } 1677 1678 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1679 const char *argv[], bool pipe_output, 1680 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1681 { 1682 int child_ready_pipe[2], go_pipe[2]; 1683 char bf; 1684 1685 if (pipe(child_ready_pipe) < 0) { 1686 perror("failed to create 'ready' pipe"); 1687 return -1; 1688 } 1689 1690 if (pipe(go_pipe) < 0) { 1691 perror("failed to create 'go' pipe"); 1692 goto out_close_ready_pipe; 1693 } 1694 1695 evlist->workload.pid = fork(); 1696 if (evlist->workload.pid < 0) { 1697 perror("failed to fork"); 1698 goto out_close_pipes; 1699 } 1700 1701 if (!evlist->workload.pid) { 1702 int ret; 1703 1704 if (pipe_output) 1705 dup2(2, 1); 1706 1707 signal(SIGTERM, SIG_DFL); 1708 1709 close(child_ready_pipe[0]); 1710 close(go_pipe[1]); 1711 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1712 1713 /* 1714 * Tell the parent we're ready to go 1715 */ 1716 close(child_ready_pipe[1]); 1717 1718 /* 1719 * Wait until the parent tells us to go. 1720 */ 1721 ret = read(go_pipe[0], &bf, 1); 1722 /* 1723 * The parent will ask for the execvp() to be performed by 1724 * writing exactly one byte, in workload.cork_fd, usually via 1725 * perf_evlist__start_workload(). 1726 * 1727 * For cancelling the workload without actually running it, 1728 * the parent will just close workload.cork_fd, without writing 1729 * anything, i.e. read will return zero and we just exit() 1730 * here. 1731 */ 1732 if (ret != 1) { 1733 if (ret == -1) 1734 perror("unable to read pipe"); 1735 exit(ret); 1736 } 1737 1738 execvp(argv[0], (char **)argv); 1739 1740 if (exec_error) { 1741 union sigval val; 1742 1743 val.sival_int = errno; 1744 if (sigqueue(getppid(), SIGUSR1, val)) 1745 perror(argv[0]); 1746 } else 1747 perror(argv[0]); 1748 exit(-1); 1749 } 1750 1751 if (exec_error) { 1752 struct sigaction act = { 1753 .sa_flags = SA_SIGINFO, 1754 .sa_sigaction = exec_error, 1755 }; 1756 sigaction(SIGUSR1, &act, NULL); 1757 } 1758 1759 if (target__none(target)) { 1760 if (evlist->threads == NULL) { 1761 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1762 __func__, __LINE__); 1763 goto out_close_pipes; 1764 } 1765 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1766 } 1767 1768 close(child_ready_pipe[1]); 1769 close(go_pipe[0]); 1770 /* 1771 * wait for child to settle 1772 */ 1773 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1774 perror("unable to read pipe"); 1775 goto out_close_pipes; 1776 } 1777 1778 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1779 evlist->workload.cork_fd = go_pipe[1]; 1780 close(child_ready_pipe[0]); 1781 return 0; 1782 1783 out_close_pipes: 1784 close(go_pipe[0]); 1785 close(go_pipe[1]); 1786 out_close_ready_pipe: 1787 close(child_ready_pipe[0]); 1788 close(child_ready_pipe[1]); 1789 return -1; 1790 } 1791 1792 int perf_evlist__start_workload(struct perf_evlist *evlist) 1793 { 1794 if (evlist->workload.cork_fd > 0) { 1795 char bf = 0; 1796 int ret; 1797 /* 1798 * Remove the cork, let it rip! 1799 */ 1800 ret = write(evlist->workload.cork_fd, &bf, 1); 1801 if (ret < 0) 1802 perror("enable to write to pipe"); 1803 1804 close(evlist->workload.cork_fd); 1805 return ret; 1806 } 1807 1808 return 0; 1809 } 1810 1811 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1812 struct perf_sample *sample) 1813 { 1814 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1815 1816 if (!evsel) 1817 return -EFAULT; 1818 return perf_evsel__parse_sample(evsel, event, sample); 1819 } 1820 1821 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1822 { 1823 struct perf_evsel *evsel; 1824 size_t printed = 0; 1825 1826 evlist__for_each_entry(evlist, evsel) { 1827 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1828 perf_evsel__name(evsel)); 1829 } 1830 1831 return printed + fprintf(fp, "\n"); 1832 } 1833 1834 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1835 int err, char *buf, size_t size) 1836 { 1837 int printed, value; 1838 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1839 1840 switch (err) { 1841 case EACCES: 1842 case EPERM: 1843 printed = scnprintf(buf, size, 1844 "Error:\t%s.\n" 1845 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1846 1847 value = perf_event_paranoid(); 1848 1849 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1850 1851 if (value >= 2) { 1852 printed += scnprintf(buf + printed, size - printed, 1853 "For your workloads it needs to be <= 1\nHint:\t"); 1854 } 1855 printed += scnprintf(buf + printed, size - printed, 1856 "For system wide tracing it needs to be set to -1.\n"); 1857 1858 printed += scnprintf(buf + printed, size - printed, 1859 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1860 "Hint:\tThe current value is %d.", value); 1861 break; 1862 case EINVAL: { 1863 struct perf_evsel *first = perf_evlist__first(evlist); 1864 int max_freq; 1865 1866 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1867 goto out_default; 1868 1869 if (first->attr.sample_freq < (u64)max_freq) 1870 goto out_default; 1871 1872 printed = scnprintf(buf, size, 1873 "Error:\t%s.\n" 1874 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1875 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1876 emsg, max_freq, first->attr.sample_freq); 1877 break; 1878 } 1879 default: 1880 out_default: 1881 scnprintf(buf, size, "%s", emsg); 1882 break; 1883 } 1884 1885 return 0; 1886 } 1887 1888 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1889 { 1890 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1891 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1892 1893 switch (err) { 1894 case EPERM: 1895 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1896 printed += scnprintf(buf + printed, size - printed, 1897 "Error:\t%s.\n" 1898 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1899 "Hint:\tTried using %zd kB.\n", 1900 emsg, pages_max_per_user, pages_attempted); 1901 1902 if (pages_attempted >= pages_max_per_user) { 1903 printed += scnprintf(buf + printed, size - printed, 1904 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1905 pages_max_per_user + pages_attempted); 1906 } 1907 1908 printed += scnprintf(buf + printed, size - printed, 1909 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1910 break; 1911 default: 1912 scnprintf(buf, size, "%s", emsg); 1913 break; 1914 } 1915 1916 return 0; 1917 } 1918 1919 void perf_evlist__to_front(struct perf_evlist *evlist, 1920 struct perf_evsel *move_evsel) 1921 { 1922 struct perf_evsel *evsel, *n; 1923 LIST_HEAD(move); 1924 1925 if (move_evsel == perf_evlist__first(evlist)) 1926 return; 1927 1928 evlist__for_each_entry_safe(evlist, n, evsel) { 1929 if (evsel->leader == move_evsel->leader) 1930 list_move_tail(&evsel->node, &move); 1931 } 1932 1933 list_splice(&move, &evlist->entries); 1934 } 1935 1936 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1937 struct perf_evsel *tracking_evsel) 1938 { 1939 struct perf_evsel *evsel; 1940 1941 if (tracking_evsel->tracking) 1942 return; 1943 1944 evlist__for_each_entry(evlist, evsel) { 1945 if (evsel != tracking_evsel) 1946 evsel->tracking = false; 1947 } 1948 1949 tracking_evsel->tracking = true; 1950 } 1951 1952 struct perf_evsel * 1953 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1954 const char *str) 1955 { 1956 struct perf_evsel *evsel; 1957 1958 evlist__for_each_entry(evlist, evsel) { 1959 if (!evsel->name) 1960 continue; 1961 if (strcmp(str, evsel->name) == 0) 1962 return evsel; 1963 } 1964 1965 return NULL; 1966 } 1967 1968 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1969 enum bkw_mmap_state state) 1970 { 1971 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1972 enum action { 1973 NONE, 1974 PAUSE, 1975 RESUME, 1976 } action = NONE; 1977 1978 if (!evlist->backward_mmap) 1979 return; 1980 1981 switch (old_state) { 1982 case BKW_MMAP_NOTREADY: { 1983 if (state != BKW_MMAP_RUNNING) 1984 goto state_err;; 1985 break; 1986 } 1987 case BKW_MMAP_RUNNING: { 1988 if (state != BKW_MMAP_DATA_PENDING) 1989 goto state_err; 1990 action = PAUSE; 1991 break; 1992 } 1993 case BKW_MMAP_DATA_PENDING: { 1994 if (state != BKW_MMAP_EMPTY) 1995 goto state_err; 1996 break; 1997 } 1998 case BKW_MMAP_EMPTY: { 1999 if (state != BKW_MMAP_RUNNING) 2000 goto state_err; 2001 action = RESUME; 2002 break; 2003 } 2004 default: 2005 WARN_ONCE(1, "Shouldn't get there\n"); 2006 } 2007 2008 evlist->bkw_mmap_state = state; 2009 2010 switch (action) { 2011 case PAUSE: 2012 perf_evlist__pause(evlist); 2013 break; 2014 case RESUME: 2015 perf_evlist__resume(evlist); 2016 break; 2017 case NONE: 2018 default: 2019 break; 2020 } 2021 2022 state_err: 2023 return; 2024 } 2025