1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include "asm/bug.h" 19 #include <unistd.h> 20 21 #include "parse-events.h" 22 #include <subcmd/parse-options.h> 23 24 #include <sys/mman.h> 25 26 #include <linux/bitops.h> 27 #include <linux/hash.h> 28 #include <linux/log2.h> 29 #include <linux/err.h> 30 31 static void perf_mmap__munmap(struct perf_mmap *map); 32 static void perf_mmap__put(struct perf_mmap *map); 33 34 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 35 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 36 37 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 38 struct thread_map *threads) 39 { 40 int i; 41 42 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 43 INIT_HLIST_HEAD(&evlist->heads[i]); 44 INIT_LIST_HEAD(&evlist->entries); 45 perf_evlist__set_maps(evlist, cpus, threads); 46 fdarray__init(&evlist->pollfd, 64); 47 evlist->workload.pid = -1; 48 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 49 } 50 51 struct perf_evlist *perf_evlist__new(void) 52 { 53 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 54 55 if (evlist != NULL) 56 perf_evlist__init(evlist, NULL, NULL); 57 58 return evlist; 59 } 60 61 struct perf_evlist *perf_evlist__new_default(void) 62 { 63 struct perf_evlist *evlist = perf_evlist__new(); 64 65 if (evlist && perf_evlist__add_default(evlist)) { 66 perf_evlist__delete(evlist); 67 evlist = NULL; 68 } 69 70 return evlist; 71 } 72 73 struct perf_evlist *perf_evlist__new_dummy(void) 74 { 75 struct perf_evlist *evlist = perf_evlist__new(); 76 77 if (evlist && perf_evlist__add_dummy(evlist)) { 78 perf_evlist__delete(evlist); 79 evlist = NULL; 80 } 81 82 return evlist; 83 } 84 85 /** 86 * perf_evlist__set_id_pos - set the positions of event ids. 87 * @evlist: selected event list 88 * 89 * Events with compatible sample types all have the same id_pos 90 * and is_pos. For convenience, put a copy on evlist. 91 */ 92 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 93 { 94 struct perf_evsel *first = perf_evlist__first(evlist); 95 96 evlist->id_pos = first->id_pos; 97 evlist->is_pos = first->is_pos; 98 } 99 100 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 101 { 102 struct perf_evsel *evsel; 103 104 evlist__for_each_entry(evlist, evsel) 105 perf_evsel__calc_id_pos(evsel); 106 107 perf_evlist__set_id_pos(evlist); 108 } 109 110 static void perf_evlist__purge(struct perf_evlist *evlist) 111 { 112 struct perf_evsel *pos, *n; 113 114 evlist__for_each_entry_safe(evlist, n, pos) { 115 list_del_init(&pos->node); 116 pos->evlist = NULL; 117 perf_evsel__delete(pos); 118 } 119 120 evlist->nr_entries = 0; 121 } 122 123 void perf_evlist__exit(struct perf_evlist *evlist) 124 { 125 zfree(&evlist->mmap); 126 zfree(&evlist->backward_mmap); 127 fdarray__exit(&evlist->pollfd); 128 } 129 130 void perf_evlist__delete(struct perf_evlist *evlist) 131 { 132 if (evlist == NULL) 133 return; 134 135 perf_evlist__munmap(evlist); 136 perf_evlist__close(evlist); 137 cpu_map__put(evlist->cpus); 138 thread_map__put(evlist->threads); 139 evlist->cpus = NULL; 140 evlist->threads = NULL; 141 perf_evlist__purge(evlist); 142 perf_evlist__exit(evlist); 143 free(evlist); 144 } 145 146 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 147 struct perf_evsel *evsel) 148 { 149 /* 150 * We already have cpus for evsel (via PMU sysfs) so 151 * keep it, if there's no target cpu list defined. 152 */ 153 if (!evsel->own_cpus || evlist->has_user_cpus) { 154 cpu_map__put(evsel->cpus); 155 evsel->cpus = cpu_map__get(evlist->cpus); 156 } else if (evsel->cpus != evsel->own_cpus) { 157 cpu_map__put(evsel->cpus); 158 evsel->cpus = cpu_map__get(evsel->own_cpus); 159 } 160 161 thread_map__put(evsel->threads); 162 evsel->threads = thread_map__get(evlist->threads); 163 } 164 165 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 166 { 167 struct perf_evsel *evsel; 168 169 evlist__for_each_entry(evlist, evsel) 170 __perf_evlist__propagate_maps(evlist, evsel); 171 } 172 173 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 174 { 175 entry->evlist = evlist; 176 list_add_tail(&entry->node, &evlist->entries); 177 entry->idx = evlist->nr_entries; 178 entry->tracking = !entry->idx; 179 180 if (!evlist->nr_entries++) 181 perf_evlist__set_id_pos(evlist); 182 183 __perf_evlist__propagate_maps(evlist, entry); 184 } 185 186 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 187 { 188 evsel->evlist = NULL; 189 list_del_init(&evsel->node); 190 evlist->nr_entries -= 1; 191 } 192 193 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 194 struct list_head *list) 195 { 196 struct perf_evsel *evsel, *temp; 197 198 __evlist__for_each_entry_safe(list, temp, evsel) { 199 list_del_init(&evsel->node); 200 perf_evlist__add(evlist, evsel); 201 } 202 } 203 204 void __perf_evlist__set_leader(struct list_head *list) 205 { 206 struct perf_evsel *evsel, *leader; 207 208 leader = list_entry(list->next, struct perf_evsel, node); 209 evsel = list_entry(list->prev, struct perf_evsel, node); 210 211 leader->nr_members = evsel->idx - leader->idx + 1; 212 213 __evlist__for_each_entry(list, evsel) { 214 evsel->leader = leader; 215 } 216 } 217 218 void perf_evlist__set_leader(struct perf_evlist *evlist) 219 { 220 if (evlist->nr_entries) { 221 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 222 __perf_evlist__set_leader(&evlist->entries); 223 } 224 } 225 226 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 227 { 228 attr->precise_ip = 3; 229 230 while (attr->precise_ip != 0) { 231 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 232 if (fd != -1) { 233 close(fd); 234 break; 235 } 236 --attr->precise_ip; 237 } 238 } 239 240 int perf_evlist__add_default(struct perf_evlist *evlist) 241 { 242 struct perf_evsel *evsel = perf_evsel__new_cycles(); 243 244 if (evsel == NULL) 245 return -ENOMEM; 246 247 perf_evlist__add(evlist, evsel); 248 return 0; 249 } 250 251 int perf_evlist__add_dummy(struct perf_evlist *evlist) 252 { 253 struct perf_event_attr attr = { 254 .type = PERF_TYPE_SOFTWARE, 255 .config = PERF_COUNT_SW_DUMMY, 256 .size = sizeof(attr), /* to capture ABI version */ 257 }; 258 struct perf_evsel *evsel = perf_evsel__new(&attr); 259 260 if (evsel == NULL) 261 return -ENOMEM; 262 263 perf_evlist__add(evlist, evsel); 264 return 0; 265 } 266 267 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 268 struct perf_event_attr *attrs, size_t nr_attrs) 269 { 270 struct perf_evsel *evsel, *n; 271 LIST_HEAD(head); 272 size_t i; 273 274 for (i = 0; i < nr_attrs; i++) { 275 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 276 if (evsel == NULL) 277 goto out_delete_partial_list; 278 list_add_tail(&evsel->node, &head); 279 } 280 281 perf_evlist__splice_list_tail(evlist, &head); 282 283 return 0; 284 285 out_delete_partial_list: 286 __evlist__for_each_entry_safe(&head, n, evsel) 287 perf_evsel__delete(evsel); 288 return -1; 289 } 290 291 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 292 struct perf_event_attr *attrs, size_t nr_attrs) 293 { 294 size_t i; 295 296 for (i = 0; i < nr_attrs; i++) 297 event_attr_init(attrs + i); 298 299 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 300 } 301 302 struct perf_evsel * 303 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 304 { 305 struct perf_evsel *evsel; 306 307 evlist__for_each_entry(evlist, evsel) { 308 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 309 (int)evsel->attr.config == id) 310 return evsel; 311 } 312 313 return NULL; 314 } 315 316 struct perf_evsel * 317 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 318 const char *name) 319 { 320 struct perf_evsel *evsel; 321 322 evlist__for_each_entry(evlist, evsel) { 323 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 324 (strcmp(evsel->name, name) == 0)) 325 return evsel; 326 } 327 328 return NULL; 329 } 330 331 int perf_evlist__add_newtp(struct perf_evlist *evlist, 332 const char *sys, const char *name, void *handler) 333 { 334 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 335 336 if (IS_ERR(evsel)) 337 return -1; 338 339 evsel->handler = handler; 340 perf_evlist__add(evlist, evsel); 341 return 0; 342 } 343 344 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 345 struct perf_evsel *evsel) 346 { 347 if (evsel->system_wide) 348 return 1; 349 else 350 return thread_map__nr(evlist->threads); 351 } 352 353 void perf_evlist__disable(struct perf_evlist *evlist) 354 { 355 struct perf_evsel *pos; 356 357 evlist__for_each_entry(evlist, pos) { 358 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 359 continue; 360 perf_evsel__disable(pos); 361 } 362 363 evlist->enabled = false; 364 } 365 366 void perf_evlist__enable(struct perf_evlist *evlist) 367 { 368 struct perf_evsel *pos; 369 370 evlist__for_each_entry(evlist, pos) { 371 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 372 continue; 373 perf_evsel__enable(pos); 374 } 375 376 evlist->enabled = true; 377 } 378 379 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 380 { 381 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 382 } 383 384 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 385 struct perf_evsel *evsel, int cpu) 386 { 387 int thread; 388 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 389 390 if (!evsel->fd) 391 return -EINVAL; 392 393 for (thread = 0; thread < nr_threads; thread++) { 394 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 395 if (err) 396 return err; 397 } 398 return 0; 399 } 400 401 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 402 struct perf_evsel *evsel, 403 int thread) 404 { 405 int cpu; 406 int nr_cpus = cpu_map__nr(evlist->cpus); 407 408 if (!evsel->fd) 409 return -EINVAL; 410 411 for (cpu = 0; cpu < nr_cpus; cpu++) { 412 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 413 if (err) 414 return err; 415 } 416 return 0; 417 } 418 419 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 420 struct perf_evsel *evsel, int idx) 421 { 422 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 423 424 if (per_cpu_mmaps) 425 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 426 else 427 return perf_evlist__enable_event_thread(evlist, evsel, idx); 428 } 429 430 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 431 { 432 int nr_cpus = cpu_map__nr(evlist->cpus); 433 int nr_threads = thread_map__nr(evlist->threads); 434 int nfds = 0; 435 struct perf_evsel *evsel; 436 437 evlist__for_each_entry(evlist, evsel) { 438 if (evsel->system_wide) 439 nfds += nr_cpus; 440 else 441 nfds += nr_cpus * nr_threads; 442 } 443 444 if (fdarray__available_entries(&evlist->pollfd) < nfds && 445 fdarray__grow(&evlist->pollfd, nfds) < 0) 446 return -ENOMEM; 447 448 return 0; 449 } 450 451 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 452 struct perf_mmap *map, short revent) 453 { 454 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 455 /* 456 * Save the idx so that when we filter out fds POLLHUP'ed we can 457 * close the associated evlist->mmap[] entry. 458 */ 459 if (pos >= 0) { 460 evlist->pollfd.priv[pos].ptr = map; 461 462 fcntl(fd, F_SETFL, O_NONBLOCK); 463 } 464 465 return pos; 466 } 467 468 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 469 { 470 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 471 } 472 473 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 474 void *arg __maybe_unused) 475 { 476 struct perf_mmap *map = fda->priv[fd].ptr; 477 478 if (map) 479 perf_mmap__put(map); 480 } 481 482 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 483 { 484 return fdarray__filter(&evlist->pollfd, revents_and_mask, 485 perf_evlist__munmap_filtered, NULL); 486 } 487 488 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 489 { 490 return fdarray__poll(&evlist->pollfd, timeout); 491 } 492 493 static void perf_evlist__id_hash(struct perf_evlist *evlist, 494 struct perf_evsel *evsel, 495 int cpu, int thread, u64 id) 496 { 497 int hash; 498 struct perf_sample_id *sid = SID(evsel, cpu, thread); 499 500 sid->id = id; 501 sid->evsel = evsel; 502 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 503 hlist_add_head(&sid->node, &evlist->heads[hash]); 504 } 505 506 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 507 int cpu, int thread, u64 id) 508 { 509 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 510 evsel->id[evsel->ids++] = id; 511 } 512 513 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 514 struct perf_evsel *evsel, 515 int cpu, int thread, int fd) 516 { 517 u64 read_data[4] = { 0, }; 518 int id_idx = 1; /* The first entry is the counter value */ 519 u64 id; 520 int ret; 521 522 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 523 if (!ret) 524 goto add; 525 526 if (errno != ENOTTY) 527 return -1; 528 529 /* Legacy way to get event id.. All hail to old kernels! */ 530 531 /* 532 * This way does not work with group format read, so bail 533 * out in that case. 534 */ 535 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 536 return -1; 537 538 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 539 read(fd, &read_data, sizeof(read_data)) == -1) 540 return -1; 541 542 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 543 ++id_idx; 544 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 545 ++id_idx; 546 547 id = read_data[id_idx]; 548 549 add: 550 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 551 return 0; 552 } 553 554 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 555 struct perf_evsel *evsel, int idx, int cpu, 556 int thread) 557 { 558 struct perf_sample_id *sid = SID(evsel, cpu, thread); 559 sid->idx = idx; 560 if (evlist->cpus && cpu >= 0) 561 sid->cpu = evlist->cpus->map[cpu]; 562 else 563 sid->cpu = -1; 564 if (!evsel->system_wide && evlist->threads && thread >= 0) 565 sid->tid = thread_map__pid(evlist->threads, thread); 566 else 567 sid->tid = -1; 568 } 569 570 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 571 { 572 struct hlist_head *head; 573 struct perf_sample_id *sid; 574 int hash; 575 576 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 577 head = &evlist->heads[hash]; 578 579 hlist_for_each_entry(sid, head, node) 580 if (sid->id == id) 581 return sid; 582 583 return NULL; 584 } 585 586 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 587 { 588 struct perf_sample_id *sid; 589 590 if (evlist->nr_entries == 1 || !id) 591 return perf_evlist__first(evlist); 592 593 sid = perf_evlist__id2sid(evlist, id); 594 if (sid) 595 return sid->evsel; 596 597 if (!perf_evlist__sample_id_all(evlist)) 598 return perf_evlist__first(evlist); 599 600 return NULL; 601 } 602 603 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 604 u64 id) 605 { 606 struct perf_sample_id *sid; 607 608 if (!id) 609 return NULL; 610 611 sid = perf_evlist__id2sid(evlist, id); 612 if (sid) 613 return sid->evsel; 614 615 return NULL; 616 } 617 618 static int perf_evlist__event2id(struct perf_evlist *evlist, 619 union perf_event *event, u64 *id) 620 { 621 const u64 *array = event->sample.array; 622 ssize_t n; 623 624 n = (event->header.size - sizeof(event->header)) >> 3; 625 626 if (event->header.type == PERF_RECORD_SAMPLE) { 627 if (evlist->id_pos >= n) 628 return -1; 629 *id = array[evlist->id_pos]; 630 } else { 631 if (evlist->is_pos > n) 632 return -1; 633 n -= evlist->is_pos; 634 *id = array[n]; 635 } 636 return 0; 637 } 638 639 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 640 union perf_event *event) 641 { 642 struct perf_evsel *first = perf_evlist__first(evlist); 643 struct hlist_head *head; 644 struct perf_sample_id *sid; 645 int hash; 646 u64 id; 647 648 if (evlist->nr_entries == 1) 649 return first; 650 651 if (!first->attr.sample_id_all && 652 event->header.type != PERF_RECORD_SAMPLE) 653 return first; 654 655 if (perf_evlist__event2id(evlist, event, &id)) 656 return NULL; 657 658 /* Synthesized events have an id of zero */ 659 if (!id) 660 return first; 661 662 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 663 head = &evlist->heads[hash]; 664 665 hlist_for_each_entry(sid, head, node) { 666 if (sid->id == id) 667 return sid->evsel; 668 } 669 return NULL; 670 } 671 672 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 673 { 674 int i; 675 676 if (!evlist->backward_mmap) 677 return 0; 678 679 for (i = 0; i < evlist->nr_mmaps; i++) { 680 int fd = evlist->backward_mmap[i].fd; 681 int err; 682 683 if (fd < 0) 684 continue; 685 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 686 if (err) 687 return err; 688 } 689 return 0; 690 } 691 692 static int perf_evlist__pause(struct perf_evlist *evlist) 693 { 694 return perf_evlist__set_paused(evlist, true); 695 } 696 697 static int perf_evlist__resume(struct perf_evlist *evlist) 698 { 699 return perf_evlist__set_paused(evlist, false); 700 } 701 702 /* When check_messup is true, 'end' must points to a good entry */ 703 static union perf_event * 704 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, 705 u64 end, u64 *prev) 706 { 707 unsigned char *data = md->base + page_size; 708 union perf_event *event = NULL; 709 int diff = end - start; 710 711 if (check_messup) { 712 /* 713 * If we're further behind than half the buffer, there's a chance 714 * the writer will bite our tail and mess up the samples under us. 715 * 716 * If we somehow ended up ahead of the 'end', we got messed up. 717 * 718 * In either case, truncate and restart at 'end'. 719 */ 720 if (diff > md->mask / 2 || diff < 0) { 721 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 722 723 /* 724 * 'end' points to a known good entry, start there. 725 */ 726 start = end; 727 diff = 0; 728 } 729 } 730 731 if (diff >= (int)sizeof(event->header)) { 732 size_t size; 733 734 event = (union perf_event *)&data[start & md->mask]; 735 size = event->header.size; 736 737 if (size < sizeof(event->header) || diff < (int)size) { 738 event = NULL; 739 goto broken_event; 740 } 741 742 /* 743 * Event straddles the mmap boundary -- header should always 744 * be inside due to u64 alignment of output. 745 */ 746 if ((start & md->mask) + size != ((start + size) & md->mask)) { 747 unsigned int offset = start; 748 unsigned int len = min(sizeof(*event), size), cpy; 749 void *dst = md->event_copy; 750 751 do { 752 cpy = min(md->mask + 1 - (offset & md->mask), len); 753 memcpy(dst, &data[offset & md->mask], cpy); 754 offset += cpy; 755 dst += cpy; 756 len -= cpy; 757 } while (len); 758 759 event = (union perf_event *) md->event_copy; 760 } 761 762 start += size; 763 } 764 765 broken_event: 766 if (prev) 767 *prev = start; 768 769 return event; 770 } 771 772 union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) 773 { 774 u64 head; 775 u64 old = md->prev; 776 777 /* 778 * Check if event was unmapped due to a POLLHUP/POLLERR. 779 */ 780 if (!refcount_read(&md->refcnt)) 781 return NULL; 782 783 head = perf_mmap__read_head(md); 784 785 return perf_mmap__read(md, check_messup, old, head, &md->prev); 786 } 787 788 union perf_event * 789 perf_mmap__read_backward(struct perf_mmap *md) 790 { 791 u64 head, end; 792 u64 start = md->prev; 793 794 /* 795 * Check if event was unmapped due to a POLLHUP/POLLERR. 796 */ 797 if (!refcount_read(&md->refcnt)) 798 return NULL; 799 800 head = perf_mmap__read_head(md); 801 if (!head) 802 return NULL; 803 804 /* 805 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 806 * it each time when kernel writes to it, so in fact 'head' is 807 * negative. 'end' pointer is made manually by adding the size of 808 * the ring buffer to 'head' pointer, means the validate data can 809 * read is the whole ring buffer. If 'end' is positive, the ring 810 * buffer has not fully filled, so we must adjust 'end' to 0. 811 * 812 * However, since both 'head' and 'end' is unsigned, we can't 813 * simply compare 'end' against 0. Here we compare '-head' and 814 * the size of the ring buffer, where -head is the number of bytes 815 * kernel write to the ring buffer. 816 */ 817 if (-head < (u64)(md->mask + 1)) 818 end = 0; 819 else 820 end = head + md->mask + 1; 821 822 return perf_mmap__read(md, false, start, end, &md->prev); 823 } 824 825 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) 826 { 827 struct perf_mmap *md = &evlist->mmap[idx]; 828 829 /* 830 * Check messup is required for forward overwritable ring buffer: 831 * memory pointed by md->prev can be overwritten in this case. 832 * No need for read-write ring buffer: kernel stop outputting when 833 * it hit md->prev (perf_mmap__consume()). 834 */ 835 return perf_mmap__read_forward(md, evlist->overwrite); 836 } 837 838 union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) 839 { 840 struct perf_mmap *md = &evlist->mmap[idx]; 841 842 /* 843 * No need to check messup for backward ring buffer: 844 * We can always read arbitrary long data from a backward 845 * ring buffer unless we forget to pause it before reading. 846 */ 847 return perf_mmap__read_backward(md); 848 } 849 850 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 851 { 852 return perf_evlist__mmap_read_forward(evlist, idx); 853 } 854 855 void perf_mmap__read_catchup(struct perf_mmap *md) 856 { 857 u64 head; 858 859 if (!refcount_read(&md->refcnt)) 860 return; 861 862 head = perf_mmap__read_head(md); 863 md->prev = head; 864 } 865 866 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 867 { 868 perf_mmap__read_catchup(&evlist->mmap[idx]); 869 } 870 871 static bool perf_mmap__empty(struct perf_mmap *md) 872 { 873 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 874 } 875 876 static void perf_mmap__get(struct perf_mmap *map) 877 { 878 refcount_inc(&map->refcnt); 879 } 880 881 static void perf_mmap__put(struct perf_mmap *md) 882 { 883 BUG_ON(md->base && refcount_read(&md->refcnt) == 0); 884 885 if (refcount_dec_and_test(&md->refcnt)) 886 perf_mmap__munmap(md); 887 } 888 889 void perf_mmap__consume(struct perf_mmap *md, bool overwrite) 890 { 891 if (!overwrite) { 892 u64 old = md->prev; 893 894 perf_mmap__write_tail(md, old); 895 } 896 897 if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 898 perf_mmap__put(md); 899 } 900 901 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 902 { 903 perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); 904 } 905 906 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 907 struct auxtrace_mmap_params *mp __maybe_unused, 908 void *userpg __maybe_unused, 909 int fd __maybe_unused) 910 { 911 return 0; 912 } 913 914 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 915 { 916 } 917 918 void __weak auxtrace_mmap_params__init( 919 struct auxtrace_mmap_params *mp __maybe_unused, 920 off_t auxtrace_offset __maybe_unused, 921 unsigned int auxtrace_pages __maybe_unused, 922 bool auxtrace_overwrite __maybe_unused) 923 { 924 } 925 926 void __weak auxtrace_mmap_params__set_idx( 927 struct auxtrace_mmap_params *mp __maybe_unused, 928 struct perf_evlist *evlist __maybe_unused, 929 int idx __maybe_unused, 930 bool per_cpu __maybe_unused) 931 { 932 } 933 934 static void perf_mmap__munmap(struct perf_mmap *map) 935 { 936 if (map->base != NULL) { 937 munmap(map->base, perf_mmap__mmap_len(map)); 938 map->base = NULL; 939 map->fd = -1; 940 refcount_set(&map->refcnt, 0); 941 } 942 auxtrace_mmap__munmap(&map->auxtrace_mmap); 943 } 944 945 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 946 { 947 int i; 948 949 if (evlist->mmap) 950 for (i = 0; i < evlist->nr_mmaps; i++) 951 perf_mmap__munmap(&evlist->mmap[i]); 952 953 if (evlist->backward_mmap) 954 for (i = 0; i < evlist->nr_mmaps; i++) 955 perf_mmap__munmap(&evlist->backward_mmap[i]); 956 } 957 958 void perf_evlist__munmap(struct perf_evlist *evlist) 959 { 960 perf_evlist__munmap_nofree(evlist); 961 zfree(&evlist->mmap); 962 zfree(&evlist->backward_mmap); 963 } 964 965 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) 966 { 967 int i; 968 struct perf_mmap *map; 969 970 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 971 if (cpu_map__empty(evlist->cpus)) 972 evlist->nr_mmaps = thread_map__nr(evlist->threads); 973 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 974 if (!map) 975 return NULL; 976 977 for (i = 0; i < evlist->nr_mmaps; i++) { 978 map[i].fd = -1; 979 /* 980 * When the perf_mmap() call is made we grab one refcount, plus 981 * one extra to let perf_evlist__mmap_consume() get the last 982 * events after all real references (perf_mmap__get()) are 983 * dropped. 984 * 985 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 986 * thus does perf_mmap__get() on it. 987 */ 988 refcount_set(&map[i].refcnt, 0); 989 } 990 return map; 991 } 992 993 struct mmap_params { 994 int prot; 995 int mask; 996 struct auxtrace_mmap_params auxtrace_mp; 997 }; 998 999 static int perf_mmap__mmap(struct perf_mmap *map, 1000 struct mmap_params *mp, int fd) 1001 { 1002 /* 1003 * The last one will be done at perf_evlist__mmap_consume(), so that we 1004 * make sure we don't prevent tools from consuming every last event in 1005 * the ring buffer. 1006 * 1007 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 1008 * anymore, but the last events for it are still in the ring buffer, 1009 * waiting to be consumed. 1010 * 1011 * Tools can chose to ignore this at their own discretion, but the 1012 * evlist layer can't just drop it when filtering events in 1013 * perf_evlist__filter_pollfd(). 1014 */ 1015 refcount_set(&map->refcnt, 2); 1016 map->prev = 0; 1017 map->mask = mp->mask; 1018 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, 1019 MAP_SHARED, fd, 0); 1020 if (map->base == MAP_FAILED) { 1021 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 1022 errno); 1023 map->base = NULL; 1024 return -1; 1025 } 1026 map->fd = fd; 1027 1028 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 1029 &mp->auxtrace_mp, map->base, fd)) 1030 return -1; 1031 1032 return 0; 1033 } 1034 1035 static bool 1036 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 1037 struct perf_evsel *evsel) 1038 { 1039 if (evsel->attr.write_backward) 1040 return false; 1041 return true; 1042 } 1043 1044 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 1045 struct mmap_params *mp, int cpu_idx, 1046 int thread, int *_output, int *_output_backward) 1047 { 1048 struct perf_evsel *evsel; 1049 int revent; 1050 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 1051 1052 evlist__for_each_entry(evlist, evsel) { 1053 struct perf_mmap *maps = evlist->mmap; 1054 int *output = _output; 1055 int fd; 1056 int cpu; 1057 1058 if (evsel->attr.write_backward) { 1059 output = _output_backward; 1060 maps = evlist->backward_mmap; 1061 1062 if (!maps) { 1063 maps = perf_evlist__alloc_mmap(evlist); 1064 if (!maps) 1065 return -1; 1066 evlist->backward_mmap = maps; 1067 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 1068 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 1069 } 1070 } 1071 1072 if (evsel->system_wide && thread) 1073 continue; 1074 1075 cpu = cpu_map__idx(evsel->cpus, evlist_cpu); 1076 if (cpu == -1) 1077 continue; 1078 1079 fd = FD(evsel, cpu, thread); 1080 1081 if (*output == -1) { 1082 *output = fd; 1083 1084 if (perf_mmap__mmap(&maps[idx], mp, *output) < 0) 1085 return -1; 1086 } else { 1087 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 1088 return -1; 1089 1090 perf_mmap__get(&maps[idx]); 1091 } 1092 1093 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 1094 1095 /* 1096 * The system_wide flag causes a selected event to be opened 1097 * always without a pid. Consequently it will never get a 1098 * POLLHUP, but it is used for tracking in combination with 1099 * other events, so it should not need to be polled anyway. 1100 * Therefore don't add it for polling. 1101 */ 1102 if (!evsel->system_wide && 1103 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 1104 perf_mmap__put(&maps[idx]); 1105 return -1; 1106 } 1107 1108 if (evsel->attr.read_format & PERF_FORMAT_ID) { 1109 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 1110 fd) < 0) 1111 return -1; 1112 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 1113 thread); 1114 } 1115 } 1116 1117 return 0; 1118 } 1119 1120 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 1121 struct mmap_params *mp) 1122 { 1123 int cpu, thread; 1124 int nr_cpus = cpu_map__nr(evlist->cpus); 1125 int nr_threads = thread_map__nr(evlist->threads); 1126 1127 pr_debug2("perf event ring buffer mmapped per cpu\n"); 1128 for (cpu = 0; cpu < nr_cpus; cpu++) { 1129 int output = -1; 1130 int output_backward = -1; 1131 1132 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 1133 true); 1134 1135 for (thread = 0; thread < nr_threads; thread++) { 1136 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 1137 thread, &output, &output_backward)) 1138 goto out_unmap; 1139 } 1140 } 1141 1142 return 0; 1143 1144 out_unmap: 1145 perf_evlist__munmap_nofree(evlist); 1146 return -1; 1147 } 1148 1149 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1150 struct mmap_params *mp) 1151 { 1152 int thread; 1153 int nr_threads = thread_map__nr(evlist->threads); 1154 1155 pr_debug2("perf event ring buffer mmapped per thread\n"); 1156 for (thread = 0; thread < nr_threads; thread++) { 1157 int output = -1; 1158 int output_backward = -1; 1159 1160 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1161 false); 1162 1163 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1164 &output, &output_backward)) 1165 goto out_unmap; 1166 } 1167 1168 return 0; 1169 1170 out_unmap: 1171 perf_evlist__munmap_nofree(evlist); 1172 return -1; 1173 } 1174 1175 unsigned long perf_event_mlock_kb_in_pages(void) 1176 { 1177 unsigned long pages; 1178 int max; 1179 1180 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1181 /* 1182 * Pick a once upon a time good value, i.e. things look 1183 * strange since we can't read a sysctl value, but lets not 1184 * die yet... 1185 */ 1186 max = 512; 1187 } else { 1188 max -= (page_size / 1024); 1189 } 1190 1191 pages = (max * 1024) / page_size; 1192 if (!is_power_of_2(pages)) 1193 pages = rounddown_pow_of_two(pages); 1194 1195 return pages; 1196 } 1197 1198 size_t perf_evlist__mmap_size(unsigned long pages) 1199 { 1200 if (pages == UINT_MAX) 1201 pages = perf_event_mlock_kb_in_pages(); 1202 else if (!is_power_of_2(pages)) 1203 return 0; 1204 1205 return (pages + 1) * page_size; 1206 } 1207 1208 static long parse_pages_arg(const char *str, unsigned long min, 1209 unsigned long max) 1210 { 1211 unsigned long pages, val; 1212 static struct parse_tag tags[] = { 1213 { .tag = 'B', .mult = 1 }, 1214 { .tag = 'K', .mult = 1 << 10 }, 1215 { .tag = 'M', .mult = 1 << 20 }, 1216 { .tag = 'G', .mult = 1 << 30 }, 1217 { .tag = 0 }, 1218 }; 1219 1220 if (str == NULL) 1221 return -EINVAL; 1222 1223 val = parse_tag_value(str, tags); 1224 if (val != (unsigned long) -1) { 1225 /* we got file size value */ 1226 pages = PERF_ALIGN(val, page_size) / page_size; 1227 } else { 1228 /* we got pages count value */ 1229 char *eptr; 1230 pages = strtoul(str, &eptr, 10); 1231 if (*eptr != '\0') 1232 return -EINVAL; 1233 } 1234 1235 if (pages == 0 && min == 0) { 1236 /* leave number of pages at 0 */ 1237 } else if (!is_power_of_2(pages)) { 1238 char buf[100]; 1239 1240 /* round pages up to next power of 2 */ 1241 pages = roundup_pow_of_two(pages); 1242 if (!pages) 1243 return -EINVAL; 1244 1245 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 1246 pr_info("rounding mmap pages size to %s (%lu pages)\n", 1247 buf, pages); 1248 } 1249 1250 if (pages > max) 1251 return -EINVAL; 1252 1253 return pages; 1254 } 1255 1256 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1257 { 1258 unsigned long max = UINT_MAX; 1259 long pages; 1260 1261 if (max > SIZE_MAX / page_size) 1262 max = SIZE_MAX / page_size; 1263 1264 pages = parse_pages_arg(str, 1, max); 1265 if (pages < 0) { 1266 pr_err("Invalid argument for --mmap_pages/-m\n"); 1267 return -1; 1268 } 1269 1270 *mmap_pages = pages; 1271 return 0; 1272 } 1273 1274 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1275 int unset __maybe_unused) 1276 { 1277 return __perf_evlist__parse_mmap_pages(opt->value, str); 1278 } 1279 1280 /** 1281 * perf_evlist__mmap_ex - Create mmaps to receive events. 1282 * @evlist: list of events 1283 * @pages: map length in pages 1284 * @overwrite: overwrite older events? 1285 * @auxtrace_pages - auxtrace map length in pages 1286 * @auxtrace_overwrite - overwrite older auxtrace data? 1287 * 1288 * If @overwrite is %false the user needs to signal event consumption using 1289 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1290 * automatically. 1291 * 1292 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1293 * consumption using auxtrace_mmap__write_tail(). 1294 * 1295 * Return: %0 on success, negative error code otherwise. 1296 */ 1297 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1298 bool overwrite, unsigned int auxtrace_pages, 1299 bool auxtrace_overwrite) 1300 { 1301 struct perf_evsel *evsel; 1302 const struct cpu_map *cpus = evlist->cpus; 1303 const struct thread_map *threads = evlist->threads; 1304 struct mmap_params mp = { 1305 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1306 }; 1307 1308 if (!evlist->mmap) 1309 evlist->mmap = perf_evlist__alloc_mmap(evlist); 1310 if (!evlist->mmap) 1311 return -ENOMEM; 1312 1313 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1314 return -ENOMEM; 1315 1316 evlist->overwrite = overwrite; 1317 evlist->mmap_len = perf_evlist__mmap_size(pages); 1318 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1319 mp.mask = evlist->mmap_len - page_size - 1; 1320 1321 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1322 auxtrace_pages, auxtrace_overwrite); 1323 1324 evlist__for_each_entry(evlist, evsel) { 1325 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1326 evsel->sample_id == NULL && 1327 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1328 return -ENOMEM; 1329 } 1330 1331 if (cpu_map__empty(cpus)) 1332 return perf_evlist__mmap_per_thread(evlist, &mp); 1333 1334 return perf_evlist__mmap_per_cpu(evlist, &mp); 1335 } 1336 1337 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1338 bool overwrite) 1339 { 1340 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1341 } 1342 1343 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1344 { 1345 struct cpu_map *cpus; 1346 struct thread_map *threads; 1347 1348 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1349 1350 if (!threads) 1351 return -1; 1352 1353 if (target__uses_dummy_map(target)) 1354 cpus = cpu_map__dummy_new(); 1355 else 1356 cpus = cpu_map__new(target->cpu_list); 1357 1358 if (!cpus) 1359 goto out_delete_threads; 1360 1361 evlist->has_user_cpus = !!target->cpu_list; 1362 1363 perf_evlist__set_maps(evlist, cpus, threads); 1364 1365 return 0; 1366 1367 out_delete_threads: 1368 thread_map__put(threads); 1369 return -1; 1370 } 1371 1372 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1373 struct thread_map *threads) 1374 { 1375 /* 1376 * Allow for the possibility that one or another of the maps isn't being 1377 * changed i.e. don't put it. Note we are assuming the maps that are 1378 * being applied are brand new and evlist is taking ownership of the 1379 * original reference count of 1. If that is not the case it is up to 1380 * the caller to increase the reference count. 1381 */ 1382 if (cpus != evlist->cpus) { 1383 cpu_map__put(evlist->cpus); 1384 evlist->cpus = cpu_map__get(cpus); 1385 } 1386 1387 if (threads != evlist->threads) { 1388 thread_map__put(evlist->threads); 1389 evlist->threads = thread_map__get(threads); 1390 } 1391 1392 perf_evlist__propagate_maps(evlist); 1393 } 1394 1395 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1396 enum perf_event_sample_format bit) 1397 { 1398 struct perf_evsel *evsel; 1399 1400 evlist__for_each_entry(evlist, evsel) 1401 __perf_evsel__set_sample_bit(evsel, bit); 1402 } 1403 1404 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1405 enum perf_event_sample_format bit) 1406 { 1407 struct perf_evsel *evsel; 1408 1409 evlist__for_each_entry(evlist, evsel) 1410 __perf_evsel__reset_sample_bit(evsel, bit); 1411 } 1412 1413 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1414 { 1415 struct perf_evsel *evsel; 1416 int err = 0; 1417 const int ncpus = cpu_map__nr(evlist->cpus), 1418 nthreads = thread_map__nr(evlist->threads); 1419 1420 evlist__for_each_entry(evlist, evsel) { 1421 if (evsel->filter == NULL) 1422 continue; 1423 1424 /* 1425 * filters only work for tracepoint event, which doesn't have cpu limit. 1426 * So evlist and evsel should always be same. 1427 */ 1428 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1429 if (err) { 1430 *err_evsel = evsel; 1431 break; 1432 } 1433 } 1434 1435 return err; 1436 } 1437 1438 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1439 { 1440 struct perf_evsel *evsel; 1441 int err = 0; 1442 1443 evlist__for_each_entry(evlist, evsel) { 1444 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1445 continue; 1446 1447 err = perf_evsel__set_filter(evsel, filter); 1448 if (err) 1449 break; 1450 } 1451 1452 return err; 1453 } 1454 1455 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1456 { 1457 char *filter; 1458 int ret = -1; 1459 size_t i; 1460 1461 for (i = 0; i < npids; ++i) { 1462 if (i == 0) { 1463 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1464 return -1; 1465 } else { 1466 char *tmp; 1467 1468 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1469 goto out_free; 1470 1471 free(filter); 1472 filter = tmp; 1473 } 1474 } 1475 1476 ret = perf_evlist__set_filter(evlist, filter); 1477 out_free: 1478 free(filter); 1479 return ret; 1480 } 1481 1482 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1483 { 1484 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1485 } 1486 1487 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1488 { 1489 struct perf_evsel *pos; 1490 1491 if (evlist->nr_entries == 1) 1492 return true; 1493 1494 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1495 return false; 1496 1497 evlist__for_each_entry(evlist, pos) { 1498 if (pos->id_pos != evlist->id_pos || 1499 pos->is_pos != evlist->is_pos) 1500 return false; 1501 } 1502 1503 return true; 1504 } 1505 1506 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1507 { 1508 struct perf_evsel *evsel; 1509 1510 if (evlist->combined_sample_type) 1511 return evlist->combined_sample_type; 1512 1513 evlist__for_each_entry(evlist, evsel) 1514 evlist->combined_sample_type |= evsel->attr.sample_type; 1515 1516 return evlist->combined_sample_type; 1517 } 1518 1519 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1520 { 1521 evlist->combined_sample_type = 0; 1522 return __perf_evlist__combined_sample_type(evlist); 1523 } 1524 1525 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1526 { 1527 struct perf_evsel *evsel; 1528 u64 branch_type = 0; 1529 1530 evlist__for_each_entry(evlist, evsel) 1531 branch_type |= evsel->attr.branch_sample_type; 1532 return branch_type; 1533 } 1534 1535 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1536 { 1537 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1538 u64 read_format = first->attr.read_format; 1539 u64 sample_type = first->attr.sample_type; 1540 1541 evlist__for_each_entry(evlist, pos) { 1542 if (read_format != pos->attr.read_format) 1543 return false; 1544 } 1545 1546 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1547 if ((sample_type & PERF_SAMPLE_READ) && 1548 !(read_format & PERF_FORMAT_ID)) { 1549 return false; 1550 } 1551 1552 return true; 1553 } 1554 1555 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1556 { 1557 struct perf_evsel *first = perf_evlist__first(evlist); 1558 return first->attr.read_format; 1559 } 1560 1561 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1562 { 1563 struct perf_evsel *first = perf_evlist__first(evlist); 1564 struct perf_sample *data; 1565 u64 sample_type; 1566 u16 size = 0; 1567 1568 if (!first->attr.sample_id_all) 1569 goto out; 1570 1571 sample_type = first->attr.sample_type; 1572 1573 if (sample_type & PERF_SAMPLE_TID) 1574 size += sizeof(data->tid) * 2; 1575 1576 if (sample_type & PERF_SAMPLE_TIME) 1577 size += sizeof(data->time); 1578 1579 if (sample_type & PERF_SAMPLE_ID) 1580 size += sizeof(data->id); 1581 1582 if (sample_type & PERF_SAMPLE_STREAM_ID) 1583 size += sizeof(data->stream_id); 1584 1585 if (sample_type & PERF_SAMPLE_CPU) 1586 size += sizeof(data->cpu) * 2; 1587 1588 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1589 size += sizeof(data->id); 1590 out: 1591 return size; 1592 } 1593 1594 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1595 { 1596 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1597 1598 evlist__for_each_entry_continue(evlist, pos) { 1599 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1600 return false; 1601 } 1602 1603 return true; 1604 } 1605 1606 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1607 { 1608 struct perf_evsel *first = perf_evlist__first(evlist); 1609 return first->attr.sample_id_all; 1610 } 1611 1612 void perf_evlist__set_selected(struct perf_evlist *evlist, 1613 struct perf_evsel *evsel) 1614 { 1615 evlist->selected = evsel; 1616 } 1617 1618 void perf_evlist__close(struct perf_evlist *evlist) 1619 { 1620 struct perf_evsel *evsel; 1621 int ncpus = cpu_map__nr(evlist->cpus); 1622 int nthreads = thread_map__nr(evlist->threads); 1623 1624 evlist__for_each_entry_reverse(evlist, evsel) { 1625 int n = evsel->cpus ? evsel->cpus->nr : ncpus; 1626 perf_evsel__close(evsel, n, nthreads); 1627 } 1628 } 1629 1630 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1631 { 1632 struct cpu_map *cpus; 1633 struct thread_map *threads; 1634 int err = -ENOMEM; 1635 1636 /* 1637 * Try reading /sys/devices/system/cpu/online to get 1638 * an all cpus map. 1639 * 1640 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1641 * code needs an overhaul to properly forward the 1642 * error, and we may not want to do that fallback to a 1643 * default cpu identity map :-\ 1644 */ 1645 cpus = cpu_map__new(NULL); 1646 if (!cpus) 1647 goto out; 1648 1649 threads = thread_map__new_dummy(); 1650 if (!threads) 1651 goto out_put; 1652 1653 perf_evlist__set_maps(evlist, cpus, threads); 1654 out: 1655 return err; 1656 out_put: 1657 cpu_map__put(cpus); 1658 goto out; 1659 } 1660 1661 int perf_evlist__open(struct perf_evlist *evlist) 1662 { 1663 struct perf_evsel *evsel; 1664 int err; 1665 1666 /* 1667 * Default: one fd per CPU, all threads, aka systemwide 1668 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1669 */ 1670 if (evlist->threads == NULL && evlist->cpus == NULL) { 1671 err = perf_evlist__create_syswide_maps(evlist); 1672 if (err < 0) 1673 goto out_err; 1674 } 1675 1676 perf_evlist__update_id_pos(evlist); 1677 1678 evlist__for_each_entry(evlist, evsel) { 1679 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1680 if (err < 0) 1681 goto out_err; 1682 } 1683 1684 return 0; 1685 out_err: 1686 perf_evlist__close(evlist); 1687 errno = -err; 1688 return err; 1689 } 1690 1691 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1692 const char *argv[], bool pipe_output, 1693 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1694 { 1695 int child_ready_pipe[2], go_pipe[2]; 1696 char bf; 1697 1698 if (pipe(child_ready_pipe) < 0) { 1699 perror("failed to create 'ready' pipe"); 1700 return -1; 1701 } 1702 1703 if (pipe(go_pipe) < 0) { 1704 perror("failed to create 'go' pipe"); 1705 goto out_close_ready_pipe; 1706 } 1707 1708 evlist->workload.pid = fork(); 1709 if (evlist->workload.pid < 0) { 1710 perror("failed to fork"); 1711 goto out_close_pipes; 1712 } 1713 1714 if (!evlist->workload.pid) { 1715 int ret; 1716 1717 if (pipe_output) 1718 dup2(2, 1); 1719 1720 signal(SIGTERM, SIG_DFL); 1721 1722 close(child_ready_pipe[0]); 1723 close(go_pipe[1]); 1724 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1725 1726 /* 1727 * Tell the parent we're ready to go 1728 */ 1729 close(child_ready_pipe[1]); 1730 1731 /* 1732 * Wait until the parent tells us to go. 1733 */ 1734 ret = read(go_pipe[0], &bf, 1); 1735 /* 1736 * The parent will ask for the execvp() to be performed by 1737 * writing exactly one byte, in workload.cork_fd, usually via 1738 * perf_evlist__start_workload(). 1739 * 1740 * For cancelling the workload without actually running it, 1741 * the parent will just close workload.cork_fd, without writing 1742 * anything, i.e. read will return zero and we just exit() 1743 * here. 1744 */ 1745 if (ret != 1) { 1746 if (ret == -1) 1747 perror("unable to read pipe"); 1748 exit(ret); 1749 } 1750 1751 execvp(argv[0], (char **)argv); 1752 1753 if (exec_error) { 1754 union sigval val; 1755 1756 val.sival_int = errno; 1757 if (sigqueue(getppid(), SIGUSR1, val)) 1758 perror(argv[0]); 1759 } else 1760 perror(argv[0]); 1761 exit(-1); 1762 } 1763 1764 if (exec_error) { 1765 struct sigaction act = { 1766 .sa_flags = SA_SIGINFO, 1767 .sa_sigaction = exec_error, 1768 }; 1769 sigaction(SIGUSR1, &act, NULL); 1770 } 1771 1772 if (target__none(target)) { 1773 if (evlist->threads == NULL) { 1774 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1775 __func__, __LINE__); 1776 goto out_close_pipes; 1777 } 1778 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1779 } 1780 1781 close(child_ready_pipe[1]); 1782 close(go_pipe[0]); 1783 /* 1784 * wait for child to settle 1785 */ 1786 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1787 perror("unable to read pipe"); 1788 goto out_close_pipes; 1789 } 1790 1791 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1792 evlist->workload.cork_fd = go_pipe[1]; 1793 close(child_ready_pipe[0]); 1794 return 0; 1795 1796 out_close_pipes: 1797 close(go_pipe[0]); 1798 close(go_pipe[1]); 1799 out_close_ready_pipe: 1800 close(child_ready_pipe[0]); 1801 close(child_ready_pipe[1]); 1802 return -1; 1803 } 1804 1805 int perf_evlist__start_workload(struct perf_evlist *evlist) 1806 { 1807 if (evlist->workload.cork_fd > 0) { 1808 char bf = 0; 1809 int ret; 1810 /* 1811 * Remove the cork, let it rip! 1812 */ 1813 ret = write(evlist->workload.cork_fd, &bf, 1); 1814 if (ret < 0) 1815 perror("unable to write to pipe"); 1816 1817 close(evlist->workload.cork_fd); 1818 return ret; 1819 } 1820 1821 return 0; 1822 } 1823 1824 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1825 struct perf_sample *sample) 1826 { 1827 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1828 1829 if (!evsel) 1830 return -EFAULT; 1831 return perf_evsel__parse_sample(evsel, event, sample); 1832 } 1833 1834 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1835 { 1836 struct perf_evsel *evsel; 1837 size_t printed = 0; 1838 1839 evlist__for_each_entry(evlist, evsel) { 1840 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1841 perf_evsel__name(evsel)); 1842 } 1843 1844 return printed + fprintf(fp, "\n"); 1845 } 1846 1847 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1848 int err, char *buf, size_t size) 1849 { 1850 int printed, value; 1851 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1852 1853 switch (err) { 1854 case EACCES: 1855 case EPERM: 1856 printed = scnprintf(buf, size, 1857 "Error:\t%s.\n" 1858 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1859 1860 value = perf_event_paranoid(); 1861 1862 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1863 1864 if (value >= 2) { 1865 printed += scnprintf(buf + printed, size - printed, 1866 "For your workloads it needs to be <= 1\nHint:\t"); 1867 } 1868 printed += scnprintf(buf + printed, size - printed, 1869 "For system wide tracing it needs to be set to -1.\n"); 1870 1871 printed += scnprintf(buf + printed, size - printed, 1872 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1873 "Hint:\tThe current value is %d.", value); 1874 break; 1875 case EINVAL: { 1876 struct perf_evsel *first = perf_evlist__first(evlist); 1877 int max_freq; 1878 1879 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1880 goto out_default; 1881 1882 if (first->attr.sample_freq < (u64)max_freq) 1883 goto out_default; 1884 1885 printed = scnprintf(buf, size, 1886 "Error:\t%s.\n" 1887 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1888 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1889 emsg, max_freq, first->attr.sample_freq); 1890 break; 1891 } 1892 default: 1893 out_default: 1894 scnprintf(buf, size, "%s", emsg); 1895 break; 1896 } 1897 1898 return 0; 1899 } 1900 1901 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1902 { 1903 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1904 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1905 1906 switch (err) { 1907 case EPERM: 1908 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1909 printed += scnprintf(buf + printed, size - printed, 1910 "Error:\t%s.\n" 1911 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1912 "Hint:\tTried using %zd kB.\n", 1913 emsg, pages_max_per_user, pages_attempted); 1914 1915 if (pages_attempted >= pages_max_per_user) { 1916 printed += scnprintf(buf + printed, size - printed, 1917 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1918 pages_max_per_user + pages_attempted); 1919 } 1920 1921 printed += scnprintf(buf + printed, size - printed, 1922 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1923 break; 1924 default: 1925 scnprintf(buf, size, "%s", emsg); 1926 break; 1927 } 1928 1929 return 0; 1930 } 1931 1932 void perf_evlist__to_front(struct perf_evlist *evlist, 1933 struct perf_evsel *move_evsel) 1934 { 1935 struct perf_evsel *evsel, *n; 1936 LIST_HEAD(move); 1937 1938 if (move_evsel == perf_evlist__first(evlist)) 1939 return; 1940 1941 evlist__for_each_entry_safe(evlist, n, evsel) { 1942 if (evsel->leader == move_evsel->leader) 1943 list_move_tail(&evsel->node, &move); 1944 } 1945 1946 list_splice(&move, &evlist->entries); 1947 } 1948 1949 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1950 struct perf_evsel *tracking_evsel) 1951 { 1952 struct perf_evsel *evsel; 1953 1954 if (tracking_evsel->tracking) 1955 return; 1956 1957 evlist__for_each_entry(evlist, evsel) { 1958 if (evsel != tracking_evsel) 1959 evsel->tracking = false; 1960 } 1961 1962 tracking_evsel->tracking = true; 1963 } 1964 1965 struct perf_evsel * 1966 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1967 const char *str) 1968 { 1969 struct perf_evsel *evsel; 1970 1971 evlist__for_each_entry(evlist, evsel) { 1972 if (!evsel->name) 1973 continue; 1974 if (strcmp(str, evsel->name) == 0) 1975 return evsel; 1976 } 1977 1978 return NULL; 1979 } 1980 1981 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1982 enum bkw_mmap_state state) 1983 { 1984 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1985 enum action { 1986 NONE, 1987 PAUSE, 1988 RESUME, 1989 } action = NONE; 1990 1991 if (!evlist->backward_mmap) 1992 return; 1993 1994 switch (old_state) { 1995 case BKW_MMAP_NOTREADY: { 1996 if (state != BKW_MMAP_RUNNING) 1997 goto state_err;; 1998 break; 1999 } 2000 case BKW_MMAP_RUNNING: { 2001 if (state != BKW_MMAP_DATA_PENDING) 2002 goto state_err; 2003 action = PAUSE; 2004 break; 2005 } 2006 case BKW_MMAP_DATA_PENDING: { 2007 if (state != BKW_MMAP_EMPTY) 2008 goto state_err; 2009 break; 2010 } 2011 case BKW_MMAP_EMPTY: { 2012 if (state != BKW_MMAP_RUNNING) 2013 goto state_err; 2014 action = RESUME; 2015 break; 2016 } 2017 default: 2018 WARN_ONCE(1, "Shouldn't get there\n"); 2019 } 2020 2021 evlist->bkw_mmap_state = state; 2022 2023 switch (action) { 2024 case PAUSE: 2025 perf_evlist__pause(evlist); 2026 break; 2027 case RESUME: 2028 perf_evlist__resume(evlist); 2029 break; 2030 case NONE: 2031 default: 2032 break; 2033 } 2034 2035 state_err: 2036 return; 2037 } 2038