1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 4 * 5 * Parts came from builtin-{top,stat,record}.c, see those files for further 6 * copyright notes. 7 */ 8 #include <api/fs/fs.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include "units.h" 19 #include "asm/bug.h" 20 #include "bpf-event.h" 21 #include <signal.h> 22 #include <unistd.h> 23 24 #include "parse-events.h" 25 #include <subcmd/parse-options.h> 26 27 #include <fcntl.h> 28 #include <sys/ioctl.h> 29 #include <sys/mman.h> 30 31 #include <linux/bitops.h> 32 #include <linux/hash.h> 33 #include <linux/log2.h> 34 #include <linux/err.h> 35 #include <linux/zalloc.h> 36 #include <perf/evlist.h> 37 #include <perf/evsel.h> 38 #include <perf/cpumap.h> 39 40 #ifdef LACKS_SIGQUEUE_PROTOTYPE 41 int sigqueue(pid_t pid, int sig, const union sigval value); 42 #endif 43 44 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 45 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 46 47 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, 48 struct perf_thread_map *threads) 49 { 50 int i; 51 52 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 53 INIT_HLIST_HEAD(&evlist->heads[i]); 54 perf_evlist__init(&evlist->core); 55 perf_evlist__set_maps(&evlist->core, cpus, threads); 56 fdarray__init(&evlist->pollfd, 64); 57 evlist->workload.pid = -1; 58 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 59 } 60 61 struct evlist *evlist__new(void) 62 { 63 struct evlist *evlist = zalloc(sizeof(*evlist)); 64 65 if (evlist != NULL) 66 evlist__init(evlist, NULL, NULL); 67 68 return evlist; 69 } 70 71 struct evlist *perf_evlist__new_default(void) 72 { 73 struct evlist *evlist = evlist__new(); 74 75 if (evlist && perf_evlist__add_default(evlist)) { 76 evlist__delete(evlist); 77 evlist = NULL; 78 } 79 80 return evlist; 81 } 82 83 struct evlist *perf_evlist__new_dummy(void) 84 { 85 struct evlist *evlist = evlist__new(); 86 87 if (evlist && perf_evlist__add_dummy(evlist)) { 88 evlist__delete(evlist); 89 evlist = NULL; 90 } 91 92 return evlist; 93 } 94 95 /** 96 * perf_evlist__set_id_pos - set the positions of event ids. 97 * @evlist: selected event list 98 * 99 * Events with compatible sample types all have the same id_pos 100 * and is_pos. For convenience, put a copy on evlist. 101 */ 102 void perf_evlist__set_id_pos(struct evlist *evlist) 103 { 104 struct evsel *first = perf_evlist__first(evlist); 105 106 evlist->id_pos = first->id_pos; 107 evlist->is_pos = first->is_pos; 108 } 109 110 static void perf_evlist__update_id_pos(struct evlist *evlist) 111 { 112 struct evsel *evsel; 113 114 evlist__for_each_entry(evlist, evsel) 115 perf_evsel__calc_id_pos(evsel); 116 117 perf_evlist__set_id_pos(evlist); 118 } 119 120 static void perf_evlist__purge(struct evlist *evlist) 121 { 122 struct evsel *pos, *n; 123 124 evlist__for_each_entry_safe(evlist, n, pos) { 125 list_del_init(&pos->core.node); 126 pos->evlist = NULL; 127 evsel__delete(pos); 128 } 129 130 evlist->core.nr_entries = 0; 131 } 132 133 void perf_evlist__exit(struct evlist *evlist) 134 { 135 zfree(&evlist->mmap); 136 zfree(&evlist->overwrite_mmap); 137 fdarray__exit(&evlist->pollfd); 138 } 139 140 void evlist__delete(struct evlist *evlist) 141 { 142 if (evlist == NULL) 143 return; 144 145 perf_evlist__munmap(evlist); 146 evlist__close(evlist); 147 perf_cpu_map__put(evlist->core.cpus); 148 perf_thread_map__put(evlist->core.threads); 149 evlist->core.cpus = NULL; 150 evlist->core.threads = NULL; 151 perf_evlist__purge(evlist); 152 perf_evlist__exit(evlist); 153 free(evlist); 154 } 155 156 void evlist__add(struct evlist *evlist, struct evsel *entry) 157 { 158 entry->evlist = evlist; 159 entry->idx = evlist->core.nr_entries; 160 entry->tracking = !entry->idx; 161 162 perf_evlist__add(&evlist->core, &entry->core); 163 164 if (evlist->core.nr_entries == 1) 165 perf_evlist__set_id_pos(evlist); 166 } 167 168 void evlist__remove(struct evlist *evlist, struct evsel *evsel) 169 { 170 evsel->evlist = NULL; 171 perf_evlist__remove(&evlist->core, &evsel->core); 172 } 173 174 void perf_evlist__splice_list_tail(struct evlist *evlist, 175 struct list_head *list) 176 { 177 struct evsel *evsel, *temp; 178 179 __evlist__for_each_entry_safe(list, temp, evsel) { 180 list_del_init(&evsel->core.node); 181 evlist__add(evlist, evsel); 182 } 183 } 184 185 void __perf_evlist__set_leader(struct list_head *list) 186 { 187 struct evsel *evsel, *leader; 188 189 leader = list_entry(list->next, struct evsel, core.node); 190 evsel = list_entry(list->prev, struct evsel, core.node); 191 192 leader->core.nr_members = evsel->idx - leader->idx + 1; 193 194 __evlist__for_each_entry(list, evsel) { 195 evsel->leader = leader; 196 } 197 } 198 199 void perf_evlist__set_leader(struct evlist *evlist) 200 { 201 if (evlist->core.nr_entries) { 202 evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0; 203 __perf_evlist__set_leader(&evlist->core.entries); 204 } 205 } 206 207 int __perf_evlist__add_default(struct evlist *evlist, bool precise) 208 { 209 struct evsel *evsel = perf_evsel__new_cycles(precise); 210 211 if (evsel == NULL) 212 return -ENOMEM; 213 214 evlist__add(evlist, evsel); 215 return 0; 216 } 217 218 int perf_evlist__add_dummy(struct evlist *evlist) 219 { 220 struct perf_event_attr attr = { 221 .type = PERF_TYPE_SOFTWARE, 222 .config = PERF_COUNT_SW_DUMMY, 223 .size = sizeof(attr), /* to capture ABI version */ 224 }; 225 struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries); 226 227 if (evsel == NULL) 228 return -ENOMEM; 229 230 evlist__add(evlist, evsel); 231 return 0; 232 } 233 234 static int evlist__add_attrs(struct evlist *evlist, 235 struct perf_event_attr *attrs, size_t nr_attrs) 236 { 237 struct evsel *evsel, *n; 238 LIST_HEAD(head); 239 size_t i; 240 241 for (i = 0; i < nr_attrs; i++) { 242 evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i); 243 if (evsel == NULL) 244 goto out_delete_partial_list; 245 list_add_tail(&evsel->core.node, &head); 246 } 247 248 perf_evlist__splice_list_tail(evlist, &head); 249 250 return 0; 251 252 out_delete_partial_list: 253 __evlist__for_each_entry_safe(&head, n, evsel) 254 evsel__delete(evsel); 255 return -1; 256 } 257 258 int __perf_evlist__add_default_attrs(struct evlist *evlist, 259 struct perf_event_attr *attrs, size_t nr_attrs) 260 { 261 size_t i; 262 263 for (i = 0; i < nr_attrs; i++) 264 event_attr_init(attrs + i); 265 266 return evlist__add_attrs(evlist, attrs, nr_attrs); 267 } 268 269 struct evsel * 270 perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id) 271 { 272 struct evsel *evsel; 273 274 evlist__for_each_entry(evlist, evsel) { 275 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && 276 (int)evsel->core.attr.config == id) 277 return evsel; 278 } 279 280 return NULL; 281 } 282 283 struct evsel * 284 perf_evlist__find_tracepoint_by_name(struct evlist *evlist, 285 const char *name) 286 { 287 struct evsel *evsel; 288 289 evlist__for_each_entry(evlist, evsel) { 290 if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) && 291 (strcmp(evsel->name, name) == 0)) 292 return evsel; 293 } 294 295 return NULL; 296 } 297 298 int perf_evlist__add_newtp(struct evlist *evlist, 299 const char *sys, const char *name, void *handler) 300 { 301 struct evsel *evsel = perf_evsel__newtp(sys, name); 302 303 if (IS_ERR(evsel)) 304 return -1; 305 306 evsel->handler = handler; 307 evlist__add(evlist, evsel); 308 return 0; 309 } 310 311 static int perf_evlist__nr_threads(struct evlist *evlist, 312 struct evsel *evsel) 313 { 314 if (evsel->system_wide) 315 return 1; 316 else 317 return thread_map__nr(evlist->core.threads); 318 } 319 320 void evlist__disable(struct evlist *evlist) 321 { 322 struct evsel *pos; 323 324 evlist__for_each_entry(evlist, pos) { 325 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd) 326 continue; 327 evsel__disable(pos); 328 } 329 330 evlist->enabled = false; 331 } 332 333 void evlist__enable(struct evlist *evlist) 334 { 335 struct evsel *pos; 336 337 evlist__for_each_entry(evlist, pos) { 338 if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) 339 continue; 340 evsel__enable(pos); 341 } 342 343 evlist->enabled = true; 344 } 345 346 void perf_evlist__toggle_enable(struct evlist *evlist) 347 { 348 (evlist->enabled ? evlist__disable : evlist__enable)(evlist); 349 } 350 351 static int perf_evlist__enable_event_cpu(struct evlist *evlist, 352 struct evsel *evsel, int cpu) 353 { 354 int thread; 355 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 356 357 if (!evsel->core.fd) 358 return -EINVAL; 359 360 for (thread = 0; thread < nr_threads; thread++) { 361 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 362 if (err) 363 return err; 364 } 365 return 0; 366 } 367 368 static int perf_evlist__enable_event_thread(struct evlist *evlist, 369 struct evsel *evsel, 370 int thread) 371 { 372 int cpu; 373 int nr_cpus = cpu_map__nr(evlist->core.cpus); 374 375 if (!evsel->core.fd) 376 return -EINVAL; 377 378 for (cpu = 0; cpu < nr_cpus; cpu++) { 379 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 380 if (err) 381 return err; 382 } 383 return 0; 384 } 385 386 int perf_evlist__enable_event_idx(struct evlist *evlist, 387 struct evsel *evsel, int idx) 388 { 389 bool per_cpu_mmaps = !cpu_map__empty(evlist->core.cpus); 390 391 if (per_cpu_mmaps) 392 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 393 else 394 return perf_evlist__enable_event_thread(evlist, evsel, idx); 395 } 396 397 int perf_evlist__alloc_pollfd(struct evlist *evlist) 398 { 399 int nr_cpus = cpu_map__nr(evlist->core.cpus); 400 int nr_threads = thread_map__nr(evlist->core.threads); 401 int nfds = 0; 402 struct evsel *evsel; 403 404 evlist__for_each_entry(evlist, evsel) { 405 if (evsel->system_wide) 406 nfds += nr_cpus; 407 else 408 nfds += nr_cpus * nr_threads; 409 } 410 411 if (fdarray__available_entries(&evlist->pollfd) < nfds && 412 fdarray__grow(&evlist->pollfd, nfds) < 0) 413 return -ENOMEM; 414 415 return 0; 416 } 417 418 static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, 419 struct perf_mmap *map, short revent) 420 { 421 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 422 /* 423 * Save the idx so that when we filter out fds POLLHUP'ed we can 424 * close the associated evlist->mmap[] entry. 425 */ 426 if (pos >= 0) { 427 evlist->pollfd.priv[pos].ptr = map; 428 429 fcntl(fd, F_SETFL, O_NONBLOCK); 430 } 431 432 return pos; 433 } 434 435 int perf_evlist__add_pollfd(struct evlist *evlist, int fd) 436 { 437 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 438 } 439 440 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 441 void *arg __maybe_unused) 442 { 443 struct perf_mmap *map = fda->priv[fd].ptr; 444 445 if (map) 446 perf_mmap__put(map); 447 } 448 449 int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) 450 { 451 return fdarray__filter(&evlist->pollfd, revents_and_mask, 452 perf_evlist__munmap_filtered, NULL); 453 } 454 455 int perf_evlist__poll(struct evlist *evlist, int timeout) 456 { 457 return fdarray__poll(&evlist->pollfd, timeout); 458 } 459 460 static void perf_evlist__id_hash(struct evlist *evlist, 461 struct evsel *evsel, 462 int cpu, int thread, u64 id) 463 { 464 int hash; 465 struct perf_sample_id *sid = SID(evsel, cpu, thread); 466 467 sid->id = id; 468 sid->evsel = evsel; 469 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 470 hlist_add_head(&sid->node, &evlist->heads[hash]); 471 } 472 473 void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, 474 int cpu, int thread, u64 id) 475 { 476 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 477 evsel->id[evsel->ids++] = id; 478 } 479 480 int perf_evlist__id_add_fd(struct evlist *evlist, 481 struct evsel *evsel, 482 int cpu, int thread, int fd) 483 { 484 u64 read_data[4] = { 0, }; 485 int id_idx = 1; /* The first entry is the counter value */ 486 u64 id; 487 int ret; 488 489 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 490 if (!ret) 491 goto add; 492 493 if (errno != ENOTTY) 494 return -1; 495 496 /* Legacy way to get event id.. All hail to old kernels! */ 497 498 /* 499 * This way does not work with group format read, so bail 500 * out in that case. 501 */ 502 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 503 return -1; 504 505 if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) || 506 read(fd, &read_data, sizeof(read_data)) == -1) 507 return -1; 508 509 if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 510 ++id_idx; 511 if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 512 ++id_idx; 513 514 id = read_data[id_idx]; 515 516 add: 517 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 518 return 0; 519 } 520 521 static void perf_evlist__set_sid_idx(struct evlist *evlist, 522 struct evsel *evsel, int idx, int cpu, 523 int thread) 524 { 525 struct perf_sample_id *sid = SID(evsel, cpu, thread); 526 sid->idx = idx; 527 if (evlist->core.cpus && cpu >= 0) 528 sid->cpu = evlist->core.cpus->map[cpu]; 529 else 530 sid->cpu = -1; 531 if (!evsel->system_wide && evlist->core.threads && thread >= 0) 532 sid->tid = thread_map__pid(evlist->core.threads, thread); 533 else 534 sid->tid = -1; 535 } 536 537 struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) 538 { 539 struct hlist_head *head; 540 struct perf_sample_id *sid; 541 int hash; 542 543 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 544 head = &evlist->heads[hash]; 545 546 hlist_for_each_entry(sid, head, node) 547 if (sid->id == id) 548 return sid; 549 550 return NULL; 551 } 552 553 struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) 554 { 555 struct perf_sample_id *sid; 556 557 if (evlist->core.nr_entries == 1 || !id) 558 return perf_evlist__first(evlist); 559 560 sid = perf_evlist__id2sid(evlist, id); 561 if (sid) 562 return sid->evsel; 563 564 if (!perf_evlist__sample_id_all(evlist)) 565 return perf_evlist__first(evlist); 566 567 return NULL; 568 } 569 570 struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, 571 u64 id) 572 { 573 struct perf_sample_id *sid; 574 575 if (!id) 576 return NULL; 577 578 sid = perf_evlist__id2sid(evlist, id); 579 if (sid) 580 return sid->evsel; 581 582 return NULL; 583 } 584 585 static int perf_evlist__event2id(struct evlist *evlist, 586 union perf_event *event, u64 *id) 587 { 588 const u64 *array = event->sample.array; 589 ssize_t n; 590 591 n = (event->header.size - sizeof(event->header)) >> 3; 592 593 if (event->header.type == PERF_RECORD_SAMPLE) { 594 if (evlist->id_pos >= n) 595 return -1; 596 *id = array[evlist->id_pos]; 597 } else { 598 if (evlist->is_pos > n) 599 return -1; 600 n -= evlist->is_pos; 601 *id = array[n]; 602 } 603 return 0; 604 } 605 606 struct evsel *perf_evlist__event2evsel(struct evlist *evlist, 607 union perf_event *event) 608 { 609 struct evsel *first = perf_evlist__first(evlist); 610 struct hlist_head *head; 611 struct perf_sample_id *sid; 612 int hash; 613 u64 id; 614 615 if (evlist->core.nr_entries == 1) 616 return first; 617 618 if (!first->core.attr.sample_id_all && 619 event->header.type != PERF_RECORD_SAMPLE) 620 return first; 621 622 if (perf_evlist__event2id(evlist, event, &id)) 623 return NULL; 624 625 /* Synthesized events have an id of zero */ 626 if (!id) 627 return first; 628 629 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 630 head = &evlist->heads[hash]; 631 632 hlist_for_each_entry(sid, head, node) { 633 if (sid->id == id) 634 return sid->evsel; 635 } 636 return NULL; 637 } 638 639 static int perf_evlist__set_paused(struct evlist *evlist, bool value) 640 { 641 int i; 642 643 if (!evlist->overwrite_mmap) 644 return 0; 645 646 for (i = 0; i < evlist->nr_mmaps; i++) { 647 int fd = evlist->overwrite_mmap[i].fd; 648 int err; 649 650 if (fd < 0) 651 continue; 652 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 653 if (err) 654 return err; 655 } 656 return 0; 657 } 658 659 static int perf_evlist__pause(struct evlist *evlist) 660 { 661 return perf_evlist__set_paused(evlist, true); 662 } 663 664 static int perf_evlist__resume(struct evlist *evlist) 665 { 666 return perf_evlist__set_paused(evlist, false); 667 } 668 669 static void perf_evlist__munmap_nofree(struct evlist *evlist) 670 { 671 int i; 672 673 if (evlist->mmap) 674 for (i = 0; i < evlist->nr_mmaps; i++) 675 perf_mmap__munmap(&evlist->mmap[i]); 676 677 if (evlist->overwrite_mmap) 678 for (i = 0; i < evlist->nr_mmaps; i++) 679 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 680 } 681 682 void perf_evlist__munmap(struct evlist *evlist) 683 { 684 perf_evlist__munmap_nofree(evlist); 685 zfree(&evlist->mmap); 686 zfree(&evlist->overwrite_mmap); 687 } 688 689 static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist, 690 bool overwrite) 691 { 692 int i; 693 struct perf_mmap *map; 694 695 evlist->nr_mmaps = cpu_map__nr(evlist->core.cpus); 696 if (cpu_map__empty(evlist->core.cpus)) 697 evlist->nr_mmaps = thread_map__nr(evlist->core.threads); 698 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 699 if (!map) 700 return NULL; 701 702 for (i = 0; i < evlist->nr_mmaps; i++) { 703 map[i].fd = -1; 704 map[i].overwrite = overwrite; 705 /* 706 * When the perf_mmap() call is made we grab one refcount, plus 707 * one extra to let perf_mmap__consume() get the last 708 * events after all real references (perf_mmap__get()) are 709 * dropped. 710 * 711 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 712 * thus does perf_mmap__get() on it. 713 */ 714 refcount_set(&map[i].refcnt, 0); 715 } 716 return map; 717 } 718 719 static bool 720 perf_evlist__should_poll(struct evlist *evlist __maybe_unused, 721 struct evsel *evsel) 722 { 723 if (evsel->core.attr.write_backward) 724 return false; 725 return true; 726 } 727 728 static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, 729 struct mmap_params *mp, int cpu_idx, 730 int thread, int *_output, int *_output_overwrite) 731 { 732 struct evsel *evsel; 733 int revent; 734 int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx); 735 736 evlist__for_each_entry(evlist, evsel) { 737 struct perf_mmap *maps = evlist->mmap; 738 int *output = _output; 739 int fd; 740 int cpu; 741 742 mp->prot = PROT_READ | PROT_WRITE; 743 if (evsel->core.attr.write_backward) { 744 output = _output_overwrite; 745 maps = evlist->overwrite_mmap; 746 747 if (!maps) { 748 maps = perf_evlist__alloc_mmap(evlist, true); 749 if (!maps) 750 return -1; 751 evlist->overwrite_mmap = maps; 752 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 753 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 754 } 755 mp->prot &= ~PROT_WRITE; 756 } 757 758 if (evsel->system_wide && thread) 759 continue; 760 761 cpu = cpu_map__idx(evsel->core.cpus, evlist_cpu); 762 if (cpu == -1) 763 continue; 764 765 fd = FD(evsel, cpu, thread); 766 767 if (*output == -1) { 768 *output = fd; 769 770 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 771 return -1; 772 } else { 773 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 774 return -1; 775 776 perf_mmap__get(&maps[idx]); 777 } 778 779 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 780 781 /* 782 * The system_wide flag causes a selected event to be opened 783 * always without a pid. Consequently it will never get a 784 * POLLHUP, but it is used for tracking in combination with 785 * other events, so it should not need to be polled anyway. 786 * Therefore don't add it for polling. 787 */ 788 if (!evsel->system_wide && 789 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 790 perf_mmap__put(&maps[idx]); 791 return -1; 792 } 793 794 if (evsel->core.attr.read_format & PERF_FORMAT_ID) { 795 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 796 fd) < 0) 797 return -1; 798 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 799 thread); 800 } 801 } 802 803 return 0; 804 } 805 806 static int perf_evlist__mmap_per_cpu(struct evlist *evlist, 807 struct mmap_params *mp) 808 { 809 int cpu, thread; 810 int nr_cpus = cpu_map__nr(evlist->core.cpus); 811 int nr_threads = thread_map__nr(evlist->core.threads); 812 813 pr_debug2("perf event ring buffer mmapped per cpu\n"); 814 for (cpu = 0; cpu < nr_cpus; cpu++) { 815 int output = -1; 816 int output_overwrite = -1; 817 818 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 819 true); 820 821 for (thread = 0; thread < nr_threads; thread++) { 822 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 823 thread, &output, &output_overwrite)) 824 goto out_unmap; 825 } 826 } 827 828 return 0; 829 830 out_unmap: 831 perf_evlist__munmap_nofree(evlist); 832 return -1; 833 } 834 835 static int perf_evlist__mmap_per_thread(struct evlist *evlist, 836 struct mmap_params *mp) 837 { 838 int thread; 839 int nr_threads = thread_map__nr(evlist->core.threads); 840 841 pr_debug2("perf event ring buffer mmapped per thread\n"); 842 for (thread = 0; thread < nr_threads; thread++) { 843 int output = -1; 844 int output_overwrite = -1; 845 846 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 847 false); 848 849 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 850 &output, &output_overwrite)) 851 goto out_unmap; 852 } 853 854 return 0; 855 856 out_unmap: 857 perf_evlist__munmap_nofree(evlist); 858 return -1; 859 } 860 861 unsigned long perf_event_mlock_kb_in_pages(void) 862 { 863 unsigned long pages; 864 int max; 865 866 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 867 /* 868 * Pick a once upon a time good value, i.e. things look 869 * strange since we can't read a sysctl value, but lets not 870 * die yet... 871 */ 872 max = 512; 873 } else { 874 max -= (page_size / 1024); 875 } 876 877 pages = (max * 1024) / page_size; 878 if (!is_power_of_2(pages)) 879 pages = rounddown_pow_of_two(pages); 880 881 return pages; 882 } 883 884 size_t perf_evlist__mmap_size(unsigned long pages) 885 { 886 if (pages == UINT_MAX) 887 pages = perf_event_mlock_kb_in_pages(); 888 else if (!is_power_of_2(pages)) 889 return 0; 890 891 return (pages + 1) * page_size; 892 } 893 894 static long parse_pages_arg(const char *str, unsigned long min, 895 unsigned long max) 896 { 897 unsigned long pages, val; 898 static struct parse_tag tags[] = { 899 { .tag = 'B', .mult = 1 }, 900 { .tag = 'K', .mult = 1 << 10 }, 901 { .tag = 'M', .mult = 1 << 20 }, 902 { .tag = 'G', .mult = 1 << 30 }, 903 { .tag = 0 }, 904 }; 905 906 if (str == NULL) 907 return -EINVAL; 908 909 val = parse_tag_value(str, tags); 910 if (val != (unsigned long) -1) { 911 /* we got file size value */ 912 pages = PERF_ALIGN(val, page_size) / page_size; 913 } else { 914 /* we got pages count value */ 915 char *eptr; 916 pages = strtoul(str, &eptr, 10); 917 if (*eptr != '\0') 918 return -EINVAL; 919 } 920 921 if (pages == 0 && min == 0) { 922 /* leave number of pages at 0 */ 923 } else if (!is_power_of_2(pages)) { 924 char buf[100]; 925 926 /* round pages up to next power of 2 */ 927 pages = roundup_pow_of_two(pages); 928 if (!pages) 929 return -EINVAL; 930 931 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 932 pr_info("rounding mmap pages size to %s (%lu pages)\n", 933 buf, pages); 934 } 935 936 if (pages > max) 937 return -EINVAL; 938 939 return pages; 940 } 941 942 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 943 { 944 unsigned long max = UINT_MAX; 945 long pages; 946 947 if (max > SIZE_MAX / page_size) 948 max = SIZE_MAX / page_size; 949 950 pages = parse_pages_arg(str, 1, max); 951 if (pages < 0) { 952 pr_err("Invalid argument for --mmap_pages/-m\n"); 953 return -1; 954 } 955 956 *mmap_pages = pages; 957 return 0; 958 } 959 960 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 961 int unset __maybe_unused) 962 { 963 return __perf_evlist__parse_mmap_pages(opt->value, str); 964 } 965 966 /** 967 * perf_evlist__mmap_ex - Create mmaps to receive events. 968 * @evlist: list of events 969 * @pages: map length in pages 970 * @overwrite: overwrite older events? 971 * @auxtrace_pages - auxtrace map length in pages 972 * @auxtrace_overwrite - overwrite older auxtrace data? 973 * 974 * If @overwrite is %false the user needs to signal event consumption using 975 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 976 * automatically. 977 * 978 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 979 * consumption using auxtrace_mmap__write_tail(). 980 * 981 * Return: %0 on success, negative error code otherwise. 982 */ 983 int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, 984 unsigned int auxtrace_pages, 985 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, 986 int comp_level) 987 { 988 struct evsel *evsel; 989 const struct perf_cpu_map *cpus = evlist->core.cpus; 990 const struct perf_thread_map *threads = evlist->core.threads; 991 /* 992 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 993 * Its value is decided by evsel's write_backward. 994 * So &mp should not be passed through const pointer. 995 */ 996 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, 997 .comp_level = comp_level }; 998 999 if (!evlist->mmap) 1000 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1001 if (!evlist->mmap) 1002 return -ENOMEM; 1003 1004 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1005 return -ENOMEM; 1006 1007 evlist->mmap_len = perf_evlist__mmap_size(pages); 1008 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1009 mp.mask = evlist->mmap_len - page_size - 1; 1010 1011 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1012 auxtrace_pages, auxtrace_overwrite); 1013 1014 evlist__for_each_entry(evlist, evsel) { 1015 if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && 1016 evsel->sample_id == NULL && 1017 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1018 return -ENOMEM; 1019 } 1020 1021 if (cpu_map__empty(cpus)) 1022 return perf_evlist__mmap_per_thread(evlist, &mp); 1023 1024 return perf_evlist__mmap_per_cpu(evlist, &mp); 1025 } 1026 1027 int perf_evlist__mmap(struct evlist *evlist, unsigned int pages) 1028 { 1029 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); 1030 } 1031 1032 int perf_evlist__create_maps(struct evlist *evlist, struct target *target) 1033 { 1034 bool all_threads = (target->per_thread && target->system_wide); 1035 struct perf_cpu_map *cpus; 1036 struct perf_thread_map *threads; 1037 1038 /* 1039 * If specify '-a' and '--per-thread' to perf record, perf record 1040 * will override '--per-thread'. target->per_thread = false and 1041 * target->system_wide = true. 1042 * 1043 * If specify '--per-thread' only to perf record, 1044 * target->per_thread = true and target->system_wide = false. 1045 * 1046 * So target->per_thread && target->system_wide is false. 1047 * For perf record, thread_map__new_str doesn't call 1048 * thread_map__new_all_cpus. That will keep perf record's 1049 * current behavior. 1050 * 1051 * For perf stat, it allows the case that target->per_thread and 1052 * target->system_wide are all true. It means to collect system-wide 1053 * per-thread data. thread_map__new_str will call 1054 * thread_map__new_all_cpus to enumerate all threads. 1055 */ 1056 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1057 all_threads); 1058 1059 if (!threads) 1060 return -1; 1061 1062 if (target__uses_dummy_map(target)) 1063 cpus = perf_cpu_map__dummy_new(); 1064 else 1065 cpus = perf_cpu_map__new(target->cpu_list); 1066 1067 if (!cpus) 1068 goto out_delete_threads; 1069 1070 evlist->core.has_user_cpus = !!target->cpu_list; 1071 1072 perf_evlist__set_maps(&evlist->core, cpus, threads); 1073 1074 return 0; 1075 1076 out_delete_threads: 1077 perf_thread_map__put(threads); 1078 return -1; 1079 } 1080 1081 void __perf_evlist__set_sample_bit(struct evlist *evlist, 1082 enum perf_event_sample_format bit) 1083 { 1084 struct evsel *evsel; 1085 1086 evlist__for_each_entry(evlist, evsel) 1087 __perf_evsel__set_sample_bit(evsel, bit); 1088 } 1089 1090 void __perf_evlist__reset_sample_bit(struct evlist *evlist, 1091 enum perf_event_sample_format bit) 1092 { 1093 struct evsel *evsel; 1094 1095 evlist__for_each_entry(evlist, evsel) 1096 __perf_evsel__reset_sample_bit(evsel, bit); 1097 } 1098 1099 int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) 1100 { 1101 struct evsel *evsel; 1102 int err = 0; 1103 1104 evlist__for_each_entry(evlist, evsel) { 1105 if (evsel->filter == NULL) 1106 continue; 1107 1108 /* 1109 * filters only work for tracepoint event, which doesn't have cpu limit. 1110 * So evlist and evsel should always be same. 1111 */ 1112 err = perf_evsel__apply_filter(&evsel->core, evsel->filter); 1113 if (err) { 1114 *err_evsel = evsel; 1115 break; 1116 } 1117 } 1118 1119 return err; 1120 } 1121 1122 int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) 1123 { 1124 struct evsel *evsel; 1125 int err = 0; 1126 1127 evlist__for_each_entry(evlist, evsel) { 1128 if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) 1129 continue; 1130 1131 err = perf_evsel__set_filter(evsel, filter); 1132 if (err) 1133 break; 1134 } 1135 1136 return err; 1137 } 1138 1139 int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) 1140 { 1141 char *filter; 1142 int ret = -1; 1143 size_t i; 1144 1145 for (i = 0; i < npids; ++i) { 1146 if (i == 0) { 1147 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1148 return -1; 1149 } else { 1150 char *tmp; 1151 1152 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1153 goto out_free; 1154 1155 free(filter); 1156 filter = tmp; 1157 } 1158 } 1159 1160 ret = perf_evlist__set_tp_filter(evlist, filter); 1161 out_free: 1162 free(filter); 1163 return ret; 1164 } 1165 1166 int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) 1167 { 1168 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1169 } 1170 1171 bool perf_evlist__valid_sample_type(struct evlist *evlist) 1172 { 1173 struct evsel *pos; 1174 1175 if (evlist->core.nr_entries == 1) 1176 return true; 1177 1178 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1179 return false; 1180 1181 evlist__for_each_entry(evlist, pos) { 1182 if (pos->id_pos != evlist->id_pos || 1183 pos->is_pos != evlist->is_pos) 1184 return false; 1185 } 1186 1187 return true; 1188 } 1189 1190 u64 __perf_evlist__combined_sample_type(struct evlist *evlist) 1191 { 1192 struct evsel *evsel; 1193 1194 if (evlist->combined_sample_type) 1195 return evlist->combined_sample_type; 1196 1197 evlist__for_each_entry(evlist, evsel) 1198 evlist->combined_sample_type |= evsel->core.attr.sample_type; 1199 1200 return evlist->combined_sample_type; 1201 } 1202 1203 u64 perf_evlist__combined_sample_type(struct evlist *evlist) 1204 { 1205 evlist->combined_sample_type = 0; 1206 return __perf_evlist__combined_sample_type(evlist); 1207 } 1208 1209 u64 perf_evlist__combined_branch_type(struct evlist *evlist) 1210 { 1211 struct evsel *evsel; 1212 u64 branch_type = 0; 1213 1214 evlist__for_each_entry(evlist, evsel) 1215 branch_type |= evsel->core.attr.branch_sample_type; 1216 return branch_type; 1217 } 1218 1219 bool perf_evlist__valid_read_format(struct evlist *evlist) 1220 { 1221 struct evsel *first = perf_evlist__first(evlist), *pos = first; 1222 u64 read_format = first->core.attr.read_format; 1223 u64 sample_type = first->core.attr.sample_type; 1224 1225 evlist__for_each_entry(evlist, pos) { 1226 if (read_format != pos->core.attr.read_format) 1227 return false; 1228 } 1229 1230 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1231 if ((sample_type & PERF_SAMPLE_READ) && 1232 !(read_format & PERF_FORMAT_ID)) { 1233 return false; 1234 } 1235 1236 return true; 1237 } 1238 1239 u64 perf_evlist__read_format(struct evlist *evlist) 1240 { 1241 struct evsel *first = perf_evlist__first(evlist); 1242 return first->core.attr.read_format; 1243 } 1244 1245 u16 perf_evlist__id_hdr_size(struct evlist *evlist) 1246 { 1247 struct evsel *first = perf_evlist__first(evlist); 1248 struct perf_sample *data; 1249 u64 sample_type; 1250 u16 size = 0; 1251 1252 if (!first->core.attr.sample_id_all) 1253 goto out; 1254 1255 sample_type = first->core.attr.sample_type; 1256 1257 if (sample_type & PERF_SAMPLE_TID) 1258 size += sizeof(data->tid) * 2; 1259 1260 if (sample_type & PERF_SAMPLE_TIME) 1261 size += sizeof(data->time); 1262 1263 if (sample_type & PERF_SAMPLE_ID) 1264 size += sizeof(data->id); 1265 1266 if (sample_type & PERF_SAMPLE_STREAM_ID) 1267 size += sizeof(data->stream_id); 1268 1269 if (sample_type & PERF_SAMPLE_CPU) 1270 size += sizeof(data->cpu) * 2; 1271 1272 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1273 size += sizeof(data->id); 1274 out: 1275 return size; 1276 } 1277 1278 bool perf_evlist__valid_sample_id_all(struct evlist *evlist) 1279 { 1280 struct evsel *first = perf_evlist__first(evlist), *pos = first; 1281 1282 evlist__for_each_entry_continue(evlist, pos) { 1283 if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all) 1284 return false; 1285 } 1286 1287 return true; 1288 } 1289 1290 bool perf_evlist__sample_id_all(struct evlist *evlist) 1291 { 1292 struct evsel *first = perf_evlist__first(evlist); 1293 return first->core.attr.sample_id_all; 1294 } 1295 1296 void perf_evlist__set_selected(struct evlist *evlist, 1297 struct evsel *evsel) 1298 { 1299 evlist->selected = evsel; 1300 } 1301 1302 void evlist__close(struct evlist *evlist) 1303 { 1304 struct evsel *evsel; 1305 1306 evlist__for_each_entry_reverse(evlist, evsel) 1307 evsel__close(evsel); 1308 } 1309 1310 static int perf_evlist__create_syswide_maps(struct evlist *evlist) 1311 { 1312 struct perf_cpu_map *cpus; 1313 struct perf_thread_map *threads; 1314 int err = -ENOMEM; 1315 1316 /* 1317 * Try reading /sys/devices/system/cpu/online to get 1318 * an all cpus map. 1319 * 1320 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1321 * code needs an overhaul to properly forward the 1322 * error, and we may not want to do that fallback to a 1323 * default cpu identity map :-\ 1324 */ 1325 cpus = perf_cpu_map__new(NULL); 1326 if (!cpus) 1327 goto out; 1328 1329 threads = perf_thread_map__new_dummy(); 1330 if (!threads) 1331 goto out_put; 1332 1333 perf_evlist__set_maps(&evlist->core, cpus, threads); 1334 out: 1335 return err; 1336 out_put: 1337 perf_cpu_map__put(cpus); 1338 goto out; 1339 } 1340 1341 int evlist__open(struct evlist *evlist) 1342 { 1343 struct evsel *evsel; 1344 int err; 1345 1346 /* 1347 * Default: one fd per CPU, all threads, aka systemwide 1348 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1349 */ 1350 if (evlist->core.threads == NULL && evlist->core.cpus == NULL) { 1351 err = perf_evlist__create_syswide_maps(evlist); 1352 if (err < 0) 1353 goto out_err; 1354 } 1355 1356 perf_evlist__update_id_pos(evlist); 1357 1358 evlist__for_each_entry(evlist, evsel) { 1359 err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads); 1360 if (err < 0) 1361 goto out_err; 1362 } 1363 1364 return 0; 1365 out_err: 1366 evlist__close(evlist); 1367 errno = -err; 1368 return err; 1369 } 1370 1371 int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target, 1372 const char *argv[], bool pipe_output, 1373 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1374 { 1375 int child_ready_pipe[2], go_pipe[2]; 1376 char bf; 1377 1378 if (pipe(child_ready_pipe) < 0) { 1379 perror("failed to create 'ready' pipe"); 1380 return -1; 1381 } 1382 1383 if (pipe(go_pipe) < 0) { 1384 perror("failed to create 'go' pipe"); 1385 goto out_close_ready_pipe; 1386 } 1387 1388 evlist->workload.pid = fork(); 1389 if (evlist->workload.pid < 0) { 1390 perror("failed to fork"); 1391 goto out_close_pipes; 1392 } 1393 1394 if (!evlist->workload.pid) { 1395 int ret; 1396 1397 if (pipe_output) 1398 dup2(2, 1); 1399 1400 signal(SIGTERM, SIG_DFL); 1401 1402 close(child_ready_pipe[0]); 1403 close(go_pipe[1]); 1404 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1405 1406 /* 1407 * Tell the parent we're ready to go 1408 */ 1409 close(child_ready_pipe[1]); 1410 1411 /* 1412 * Wait until the parent tells us to go. 1413 */ 1414 ret = read(go_pipe[0], &bf, 1); 1415 /* 1416 * The parent will ask for the execvp() to be performed by 1417 * writing exactly one byte, in workload.cork_fd, usually via 1418 * perf_evlist__start_workload(). 1419 * 1420 * For cancelling the workload without actually running it, 1421 * the parent will just close workload.cork_fd, without writing 1422 * anything, i.e. read will return zero and we just exit() 1423 * here. 1424 */ 1425 if (ret != 1) { 1426 if (ret == -1) 1427 perror("unable to read pipe"); 1428 exit(ret); 1429 } 1430 1431 execvp(argv[0], (char **)argv); 1432 1433 if (exec_error) { 1434 union sigval val; 1435 1436 val.sival_int = errno; 1437 if (sigqueue(getppid(), SIGUSR1, val)) 1438 perror(argv[0]); 1439 } else 1440 perror(argv[0]); 1441 exit(-1); 1442 } 1443 1444 if (exec_error) { 1445 struct sigaction act = { 1446 .sa_flags = SA_SIGINFO, 1447 .sa_sigaction = exec_error, 1448 }; 1449 sigaction(SIGUSR1, &act, NULL); 1450 } 1451 1452 if (target__none(target)) { 1453 if (evlist->core.threads == NULL) { 1454 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1455 __func__, __LINE__); 1456 goto out_close_pipes; 1457 } 1458 perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid); 1459 } 1460 1461 close(child_ready_pipe[1]); 1462 close(go_pipe[0]); 1463 /* 1464 * wait for child to settle 1465 */ 1466 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1467 perror("unable to read pipe"); 1468 goto out_close_pipes; 1469 } 1470 1471 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1472 evlist->workload.cork_fd = go_pipe[1]; 1473 close(child_ready_pipe[0]); 1474 return 0; 1475 1476 out_close_pipes: 1477 close(go_pipe[0]); 1478 close(go_pipe[1]); 1479 out_close_ready_pipe: 1480 close(child_ready_pipe[0]); 1481 close(child_ready_pipe[1]); 1482 return -1; 1483 } 1484 1485 int perf_evlist__start_workload(struct evlist *evlist) 1486 { 1487 if (evlist->workload.cork_fd > 0) { 1488 char bf = 0; 1489 int ret; 1490 /* 1491 * Remove the cork, let it rip! 1492 */ 1493 ret = write(evlist->workload.cork_fd, &bf, 1); 1494 if (ret < 0) 1495 perror("unable to write to pipe"); 1496 1497 close(evlist->workload.cork_fd); 1498 return ret; 1499 } 1500 1501 return 0; 1502 } 1503 1504 int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, 1505 struct perf_sample *sample) 1506 { 1507 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1508 1509 if (!evsel) 1510 return -EFAULT; 1511 return perf_evsel__parse_sample(evsel, event, sample); 1512 } 1513 1514 int perf_evlist__parse_sample_timestamp(struct evlist *evlist, 1515 union perf_event *event, 1516 u64 *timestamp) 1517 { 1518 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1519 1520 if (!evsel) 1521 return -EFAULT; 1522 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1523 } 1524 1525 size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp) 1526 { 1527 struct evsel *evsel; 1528 size_t printed = 0; 1529 1530 evlist__for_each_entry(evlist, evsel) { 1531 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1532 perf_evsel__name(evsel)); 1533 } 1534 1535 return printed + fprintf(fp, "\n"); 1536 } 1537 1538 int perf_evlist__strerror_open(struct evlist *evlist, 1539 int err, char *buf, size_t size) 1540 { 1541 int printed, value; 1542 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1543 1544 switch (err) { 1545 case EACCES: 1546 case EPERM: 1547 printed = scnprintf(buf, size, 1548 "Error:\t%s.\n" 1549 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1550 1551 value = perf_event_paranoid(); 1552 1553 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1554 1555 if (value >= 2) { 1556 printed += scnprintf(buf + printed, size - printed, 1557 "For your workloads it needs to be <= 1\nHint:\t"); 1558 } 1559 printed += scnprintf(buf + printed, size - printed, 1560 "For system wide tracing it needs to be set to -1.\n"); 1561 1562 printed += scnprintf(buf + printed, size - printed, 1563 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1564 "Hint:\tThe current value is %d.", value); 1565 break; 1566 case EINVAL: { 1567 struct evsel *first = perf_evlist__first(evlist); 1568 int max_freq; 1569 1570 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1571 goto out_default; 1572 1573 if (first->core.attr.sample_freq < (u64)max_freq) 1574 goto out_default; 1575 1576 printed = scnprintf(buf, size, 1577 "Error:\t%s.\n" 1578 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1579 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1580 emsg, max_freq, first->core.attr.sample_freq); 1581 break; 1582 } 1583 default: 1584 out_default: 1585 scnprintf(buf, size, "%s", emsg); 1586 break; 1587 } 1588 1589 return 0; 1590 } 1591 1592 int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) 1593 { 1594 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1595 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1596 1597 switch (err) { 1598 case EPERM: 1599 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1600 printed += scnprintf(buf + printed, size - printed, 1601 "Error:\t%s.\n" 1602 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1603 "Hint:\tTried using %zd kB.\n", 1604 emsg, pages_max_per_user, pages_attempted); 1605 1606 if (pages_attempted >= pages_max_per_user) { 1607 printed += scnprintf(buf + printed, size - printed, 1608 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1609 pages_max_per_user + pages_attempted); 1610 } 1611 1612 printed += scnprintf(buf + printed, size - printed, 1613 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1614 break; 1615 default: 1616 scnprintf(buf, size, "%s", emsg); 1617 break; 1618 } 1619 1620 return 0; 1621 } 1622 1623 void perf_evlist__to_front(struct evlist *evlist, 1624 struct evsel *move_evsel) 1625 { 1626 struct evsel *evsel, *n; 1627 LIST_HEAD(move); 1628 1629 if (move_evsel == perf_evlist__first(evlist)) 1630 return; 1631 1632 evlist__for_each_entry_safe(evlist, n, evsel) { 1633 if (evsel->leader == move_evsel->leader) 1634 list_move_tail(&evsel->core.node, &move); 1635 } 1636 1637 list_splice(&move, &evlist->core.entries); 1638 } 1639 1640 void perf_evlist__set_tracking_event(struct evlist *evlist, 1641 struct evsel *tracking_evsel) 1642 { 1643 struct evsel *evsel; 1644 1645 if (tracking_evsel->tracking) 1646 return; 1647 1648 evlist__for_each_entry(evlist, evsel) { 1649 if (evsel != tracking_evsel) 1650 evsel->tracking = false; 1651 } 1652 1653 tracking_evsel->tracking = true; 1654 } 1655 1656 struct evsel * 1657 perf_evlist__find_evsel_by_str(struct evlist *evlist, 1658 const char *str) 1659 { 1660 struct evsel *evsel; 1661 1662 evlist__for_each_entry(evlist, evsel) { 1663 if (!evsel->name) 1664 continue; 1665 if (strcmp(str, evsel->name) == 0) 1666 return evsel; 1667 } 1668 1669 return NULL; 1670 } 1671 1672 void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, 1673 enum bkw_mmap_state state) 1674 { 1675 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1676 enum action { 1677 NONE, 1678 PAUSE, 1679 RESUME, 1680 } action = NONE; 1681 1682 if (!evlist->overwrite_mmap) 1683 return; 1684 1685 switch (old_state) { 1686 case BKW_MMAP_NOTREADY: { 1687 if (state != BKW_MMAP_RUNNING) 1688 goto state_err; 1689 break; 1690 } 1691 case BKW_MMAP_RUNNING: { 1692 if (state != BKW_MMAP_DATA_PENDING) 1693 goto state_err; 1694 action = PAUSE; 1695 break; 1696 } 1697 case BKW_MMAP_DATA_PENDING: { 1698 if (state != BKW_MMAP_EMPTY) 1699 goto state_err; 1700 break; 1701 } 1702 case BKW_MMAP_EMPTY: { 1703 if (state != BKW_MMAP_RUNNING) 1704 goto state_err; 1705 action = RESUME; 1706 break; 1707 } 1708 default: 1709 WARN_ONCE(1, "Shouldn't get there\n"); 1710 } 1711 1712 evlist->bkw_mmap_state = state; 1713 1714 switch (action) { 1715 case PAUSE: 1716 perf_evlist__pause(evlist); 1717 break; 1718 case RESUME: 1719 perf_evlist__resume(evlist); 1720 break; 1721 case NONE: 1722 default: 1723 break; 1724 } 1725 1726 state_err: 1727 return; 1728 } 1729 1730 bool perf_evlist__exclude_kernel(struct evlist *evlist) 1731 { 1732 struct evsel *evsel; 1733 1734 evlist__for_each_entry(evlist, evsel) { 1735 if (!evsel->core.attr.exclude_kernel) 1736 return false; 1737 } 1738 1739 return true; 1740 } 1741 1742 /* 1743 * Events in data file are not collect in groups, but we still want 1744 * the group display. Set the artificial group and set the leader's 1745 * forced_leader flag to notify the display code. 1746 */ 1747 void perf_evlist__force_leader(struct evlist *evlist) 1748 { 1749 if (!evlist->nr_groups) { 1750 struct evsel *leader = perf_evlist__first(evlist); 1751 1752 perf_evlist__set_leader(evlist); 1753 leader->forced_leader = true; 1754 } 1755 } 1756 1757 struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, 1758 struct evsel *evsel) 1759 { 1760 struct evsel *c2, *leader; 1761 bool is_open = true; 1762 1763 leader = evsel->leader; 1764 pr_debug("Weak group for %s/%d failed\n", 1765 leader->name, leader->core.nr_members); 1766 1767 /* 1768 * for_each_group_member doesn't work here because it doesn't 1769 * include the first entry. 1770 */ 1771 evlist__for_each_entry(evsel_list, c2) { 1772 if (c2 == evsel) 1773 is_open = false; 1774 if (c2->leader == leader) { 1775 if (is_open) 1776 evsel__close(c2); 1777 c2->leader = c2; 1778 c2->core.nr_members = 0; 1779 } 1780 } 1781 return leader; 1782 } 1783 1784 int perf_evlist__add_sb_event(struct evlist **evlist, 1785 struct perf_event_attr *attr, 1786 perf_evsel__sb_cb_t cb, 1787 void *data) 1788 { 1789 struct evsel *evsel; 1790 bool new_evlist = (*evlist) == NULL; 1791 1792 if (*evlist == NULL) 1793 *evlist = evlist__new(); 1794 if (*evlist == NULL) 1795 return -1; 1796 1797 if (!attr->sample_id_all) { 1798 pr_warning("enabling sample_id_all for all side band events\n"); 1799 attr->sample_id_all = 1; 1800 } 1801 1802 evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries); 1803 if (!evsel) 1804 goto out_err; 1805 1806 evsel->side_band.cb = cb; 1807 evsel->side_band.data = data; 1808 evlist__add(*evlist, evsel); 1809 return 0; 1810 1811 out_err: 1812 if (new_evlist) { 1813 evlist__delete(*evlist); 1814 *evlist = NULL; 1815 } 1816 return -1; 1817 } 1818 1819 static void *perf_evlist__poll_thread(void *arg) 1820 { 1821 struct evlist *evlist = arg; 1822 bool draining = false; 1823 int i, done = 0; 1824 1825 while (!done) { 1826 bool got_data = false; 1827 1828 if (evlist->thread.done) 1829 draining = true; 1830 1831 if (!draining) 1832 perf_evlist__poll(evlist, 1000); 1833 1834 for (i = 0; i < evlist->nr_mmaps; i++) { 1835 struct perf_mmap *map = &evlist->mmap[i]; 1836 union perf_event *event; 1837 1838 if (perf_mmap__read_init(map)) 1839 continue; 1840 while ((event = perf_mmap__read_event(map)) != NULL) { 1841 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1842 1843 if (evsel && evsel->side_band.cb) 1844 evsel->side_band.cb(event, evsel->side_band.data); 1845 else 1846 pr_warning("cannot locate proper evsel for the side band event\n"); 1847 1848 perf_mmap__consume(map); 1849 got_data = true; 1850 } 1851 perf_mmap__read_done(map); 1852 } 1853 1854 if (draining && !got_data) 1855 break; 1856 } 1857 return NULL; 1858 } 1859 1860 int perf_evlist__start_sb_thread(struct evlist *evlist, 1861 struct target *target) 1862 { 1863 struct evsel *counter; 1864 1865 if (!evlist) 1866 return 0; 1867 1868 if (perf_evlist__create_maps(evlist, target)) 1869 goto out_delete_evlist; 1870 1871 evlist__for_each_entry(evlist, counter) { 1872 if (evsel__open(counter, evlist->core.cpus, 1873 evlist->core.threads) < 0) 1874 goto out_delete_evlist; 1875 } 1876 1877 if (perf_evlist__mmap(evlist, UINT_MAX)) 1878 goto out_delete_evlist; 1879 1880 evlist__for_each_entry(evlist, counter) { 1881 if (evsel__enable(counter)) 1882 goto out_delete_evlist; 1883 } 1884 1885 evlist->thread.done = 0; 1886 if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) 1887 goto out_delete_evlist; 1888 1889 return 0; 1890 1891 out_delete_evlist: 1892 evlist__delete(evlist); 1893 evlist = NULL; 1894 return -1; 1895 } 1896 1897 void perf_evlist__stop_sb_thread(struct evlist *evlist) 1898 { 1899 if (!evlist) 1900 return; 1901 evlist->thread.done = 1; 1902 pthread_join(evlist->thread.th, NULL); 1903 evlist__delete(evlist); 1904 } 1905