1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 4 * 5 * Parts came from builtin-{top,stat,record}.c, see those files for further 6 * copyright notes. 7 */ 8 #include <api/fs/fs.h> 9 #include <errno.h> 10 #include <inttypes.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include "units.h" 19 #include "asm/bug.h" 20 #include "bpf-event.h" 21 #include <signal.h> 22 #include <unistd.h> 23 24 #include "parse-events.h" 25 #include <subcmd/parse-options.h> 26 27 #include <fcntl.h> 28 #include <sys/ioctl.h> 29 #include <sys/mman.h> 30 31 #include <linux/bitops.h> 32 #include <linux/hash.h> 33 #include <linux/log2.h> 34 #include <linux/err.h> 35 #include <linux/zalloc.h> 36 #include <perf/evlist.h> 37 #include <perf/cpumap.h> 38 39 #ifdef LACKS_SIGQUEUE_PROTOTYPE 40 int sigqueue(pid_t pid, int sig, const union sigval value); 41 #endif 42 43 #define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) 44 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 45 46 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, 47 struct perf_thread_map *threads) 48 { 49 int i; 50 51 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 52 INIT_HLIST_HEAD(&evlist->heads[i]); 53 perf_evlist__init(&evlist->core); 54 perf_evlist__set_maps(&evlist->core, cpus, threads); 55 fdarray__init(&evlist->pollfd, 64); 56 evlist->workload.pid = -1; 57 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 58 } 59 60 struct evlist *evlist__new(void) 61 { 62 struct evlist *evlist = zalloc(sizeof(*evlist)); 63 64 if (evlist != NULL) 65 evlist__init(evlist, NULL, NULL); 66 67 return evlist; 68 } 69 70 struct evlist *perf_evlist__new_default(void) 71 { 72 struct evlist *evlist = evlist__new(); 73 74 if (evlist && perf_evlist__add_default(evlist)) { 75 evlist__delete(evlist); 76 evlist = NULL; 77 } 78 79 return evlist; 80 } 81 82 struct evlist *perf_evlist__new_dummy(void) 83 { 84 struct evlist *evlist = evlist__new(); 85 86 if (evlist && perf_evlist__add_dummy(evlist)) { 87 evlist__delete(evlist); 88 evlist = NULL; 89 } 90 91 return evlist; 92 } 93 94 /** 95 * perf_evlist__set_id_pos - set the positions of event ids. 96 * @evlist: selected event list 97 * 98 * Events with compatible sample types all have the same id_pos 99 * and is_pos. For convenience, put a copy on evlist. 100 */ 101 void perf_evlist__set_id_pos(struct evlist *evlist) 102 { 103 struct evsel *first = perf_evlist__first(evlist); 104 105 evlist->id_pos = first->id_pos; 106 evlist->is_pos = first->is_pos; 107 } 108 109 static void perf_evlist__update_id_pos(struct evlist *evlist) 110 { 111 struct evsel *evsel; 112 113 evlist__for_each_entry(evlist, evsel) 114 perf_evsel__calc_id_pos(evsel); 115 116 perf_evlist__set_id_pos(evlist); 117 } 118 119 static void perf_evlist__purge(struct evlist *evlist) 120 { 121 struct evsel *pos, *n; 122 123 evlist__for_each_entry_safe(evlist, n, pos) { 124 list_del_init(&pos->core.node); 125 pos->evlist = NULL; 126 evsel__delete(pos); 127 } 128 129 evlist->core.nr_entries = 0; 130 } 131 132 void perf_evlist__exit(struct evlist *evlist) 133 { 134 zfree(&evlist->mmap); 135 zfree(&evlist->overwrite_mmap); 136 fdarray__exit(&evlist->pollfd); 137 } 138 139 void evlist__delete(struct evlist *evlist) 140 { 141 if (evlist == NULL) 142 return; 143 144 perf_evlist__munmap(evlist); 145 evlist__close(evlist); 146 perf_cpu_map__put(evlist->core.cpus); 147 perf_thread_map__put(evlist->core.threads); 148 evlist->core.cpus = NULL; 149 evlist->core.threads = NULL; 150 perf_evlist__purge(evlist); 151 perf_evlist__exit(evlist); 152 free(evlist); 153 } 154 155 void evlist__add(struct evlist *evlist, struct evsel *entry) 156 { 157 entry->evlist = evlist; 158 entry->idx = evlist->core.nr_entries; 159 entry->tracking = !entry->idx; 160 161 perf_evlist__add(&evlist->core, &entry->core); 162 163 if (evlist->core.nr_entries == 1) 164 perf_evlist__set_id_pos(evlist); 165 } 166 167 void evlist__remove(struct evlist *evlist, struct evsel *evsel) 168 { 169 evsel->evlist = NULL; 170 perf_evlist__remove(&evlist->core, &evsel->core); 171 } 172 173 void perf_evlist__splice_list_tail(struct evlist *evlist, 174 struct list_head *list) 175 { 176 struct evsel *evsel, *temp; 177 178 __evlist__for_each_entry_safe(list, temp, evsel) { 179 list_del_init(&evsel->core.node); 180 evlist__add(evlist, evsel); 181 } 182 } 183 184 void __perf_evlist__set_leader(struct list_head *list) 185 { 186 struct evsel *evsel, *leader; 187 188 leader = list_entry(list->next, struct evsel, core.node); 189 evsel = list_entry(list->prev, struct evsel, core.node); 190 191 leader->core.nr_members = evsel->idx - leader->idx + 1; 192 193 __evlist__for_each_entry(list, evsel) { 194 evsel->leader = leader; 195 } 196 } 197 198 void perf_evlist__set_leader(struct evlist *evlist) 199 { 200 if (evlist->core.nr_entries) { 201 evlist->nr_groups = evlist->core.nr_entries > 1 ? 1 : 0; 202 __perf_evlist__set_leader(&evlist->core.entries); 203 } 204 } 205 206 int __perf_evlist__add_default(struct evlist *evlist, bool precise) 207 { 208 struct evsel *evsel = perf_evsel__new_cycles(precise); 209 210 if (evsel == NULL) 211 return -ENOMEM; 212 213 evlist__add(evlist, evsel); 214 return 0; 215 } 216 217 int perf_evlist__add_dummy(struct evlist *evlist) 218 { 219 struct perf_event_attr attr = { 220 .type = PERF_TYPE_SOFTWARE, 221 .config = PERF_COUNT_SW_DUMMY, 222 .size = sizeof(attr), /* to capture ABI version */ 223 }; 224 struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries); 225 226 if (evsel == NULL) 227 return -ENOMEM; 228 229 evlist__add(evlist, evsel); 230 return 0; 231 } 232 233 static int evlist__add_attrs(struct evlist *evlist, 234 struct perf_event_attr *attrs, size_t nr_attrs) 235 { 236 struct evsel *evsel, *n; 237 LIST_HEAD(head); 238 size_t i; 239 240 for (i = 0; i < nr_attrs; i++) { 241 evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i); 242 if (evsel == NULL) 243 goto out_delete_partial_list; 244 list_add_tail(&evsel->core.node, &head); 245 } 246 247 perf_evlist__splice_list_tail(evlist, &head); 248 249 return 0; 250 251 out_delete_partial_list: 252 __evlist__for_each_entry_safe(&head, n, evsel) 253 evsel__delete(evsel); 254 return -1; 255 } 256 257 int __perf_evlist__add_default_attrs(struct evlist *evlist, 258 struct perf_event_attr *attrs, size_t nr_attrs) 259 { 260 size_t i; 261 262 for (i = 0; i < nr_attrs; i++) 263 event_attr_init(attrs + i); 264 265 return evlist__add_attrs(evlist, attrs, nr_attrs); 266 } 267 268 struct evsel * 269 perf_evlist__find_tracepoint_by_id(struct evlist *evlist, int id) 270 { 271 struct evsel *evsel; 272 273 evlist__for_each_entry(evlist, evsel) { 274 if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && 275 (int)evsel->core.attr.config == id) 276 return evsel; 277 } 278 279 return NULL; 280 } 281 282 struct evsel * 283 perf_evlist__find_tracepoint_by_name(struct evlist *evlist, 284 const char *name) 285 { 286 struct evsel *evsel; 287 288 evlist__for_each_entry(evlist, evsel) { 289 if ((evsel->core.attr.type == PERF_TYPE_TRACEPOINT) && 290 (strcmp(evsel->name, name) == 0)) 291 return evsel; 292 } 293 294 return NULL; 295 } 296 297 int perf_evlist__add_newtp(struct evlist *evlist, 298 const char *sys, const char *name, void *handler) 299 { 300 struct evsel *evsel = perf_evsel__newtp(sys, name); 301 302 if (IS_ERR(evsel)) 303 return -1; 304 305 evsel->handler = handler; 306 evlist__add(evlist, evsel); 307 return 0; 308 } 309 310 static int perf_evlist__nr_threads(struct evlist *evlist, 311 struct evsel *evsel) 312 { 313 if (evsel->system_wide) 314 return 1; 315 else 316 return thread_map__nr(evlist->core.threads); 317 } 318 319 void evlist__disable(struct evlist *evlist) 320 { 321 struct evsel *pos; 322 323 evlist__for_each_entry(evlist, pos) { 324 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd) 325 continue; 326 evsel__disable(pos); 327 } 328 329 evlist->enabled = false; 330 } 331 332 void evlist__enable(struct evlist *evlist) 333 { 334 struct evsel *pos; 335 336 evlist__for_each_entry(evlist, pos) { 337 if (!perf_evsel__is_group_leader(pos) || !pos->core.fd) 338 continue; 339 evsel__enable(pos); 340 } 341 342 evlist->enabled = true; 343 } 344 345 void perf_evlist__toggle_enable(struct evlist *evlist) 346 { 347 (evlist->enabled ? evlist__disable : evlist__enable)(evlist); 348 } 349 350 static int perf_evlist__enable_event_cpu(struct evlist *evlist, 351 struct evsel *evsel, int cpu) 352 { 353 int thread; 354 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 355 356 if (!evsel->core.fd) 357 return -EINVAL; 358 359 for (thread = 0; thread < nr_threads; thread++) { 360 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 361 if (err) 362 return err; 363 } 364 return 0; 365 } 366 367 static int perf_evlist__enable_event_thread(struct evlist *evlist, 368 struct evsel *evsel, 369 int thread) 370 { 371 int cpu; 372 int nr_cpus = cpu_map__nr(evlist->core.cpus); 373 374 if (!evsel->core.fd) 375 return -EINVAL; 376 377 for (cpu = 0; cpu < nr_cpus; cpu++) { 378 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 379 if (err) 380 return err; 381 } 382 return 0; 383 } 384 385 int perf_evlist__enable_event_idx(struct evlist *evlist, 386 struct evsel *evsel, int idx) 387 { 388 bool per_cpu_mmaps = !cpu_map__empty(evlist->core.cpus); 389 390 if (per_cpu_mmaps) 391 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 392 else 393 return perf_evlist__enable_event_thread(evlist, evsel, idx); 394 } 395 396 int perf_evlist__alloc_pollfd(struct evlist *evlist) 397 { 398 int nr_cpus = cpu_map__nr(evlist->core.cpus); 399 int nr_threads = thread_map__nr(evlist->core.threads); 400 int nfds = 0; 401 struct evsel *evsel; 402 403 evlist__for_each_entry(evlist, evsel) { 404 if (evsel->system_wide) 405 nfds += nr_cpus; 406 else 407 nfds += nr_cpus * nr_threads; 408 } 409 410 if (fdarray__available_entries(&evlist->pollfd) < nfds && 411 fdarray__grow(&evlist->pollfd, nfds) < 0) 412 return -ENOMEM; 413 414 return 0; 415 } 416 417 static int __perf_evlist__add_pollfd(struct evlist *evlist, int fd, 418 struct perf_mmap *map, short revent) 419 { 420 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 421 /* 422 * Save the idx so that when we filter out fds POLLHUP'ed we can 423 * close the associated evlist->mmap[] entry. 424 */ 425 if (pos >= 0) { 426 evlist->pollfd.priv[pos].ptr = map; 427 428 fcntl(fd, F_SETFL, O_NONBLOCK); 429 } 430 431 return pos; 432 } 433 434 int perf_evlist__add_pollfd(struct evlist *evlist, int fd) 435 { 436 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 437 } 438 439 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 440 void *arg __maybe_unused) 441 { 442 struct perf_mmap *map = fda->priv[fd].ptr; 443 444 if (map) 445 perf_mmap__put(map); 446 } 447 448 int perf_evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) 449 { 450 return fdarray__filter(&evlist->pollfd, revents_and_mask, 451 perf_evlist__munmap_filtered, NULL); 452 } 453 454 int perf_evlist__poll(struct evlist *evlist, int timeout) 455 { 456 return fdarray__poll(&evlist->pollfd, timeout); 457 } 458 459 static void perf_evlist__id_hash(struct evlist *evlist, 460 struct evsel *evsel, 461 int cpu, int thread, u64 id) 462 { 463 int hash; 464 struct perf_sample_id *sid = SID(evsel, cpu, thread); 465 466 sid->id = id; 467 sid->evsel = evsel; 468 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 469 hlist_add_head(&sid->node, &evlist->heads[hash]); 470 } 471 472 void perf_evlist__id_add(struct evlist *evlist, struct evsel *evsel, 473 int cpu, int thread, u64 id) 474 { 475 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 476 evsel->id[evsel->ids++] = id; 477 } 478 479 int perf_evlist__id_add_fd(struct evlist *evlist, 480 struct evsel *evsel, 481 int cpu, int thread, int fd) 482 { 483 u64 read_data[4] = { 0, }; 484 int id_idx = 1; /* The first entry is the counter value */ 485 u64 id; 486 int ret; 487 488 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 489 if (!ret) 490 goto add; 491 492 if (errno != ENOTTY) 493 return -1; 494 495 /* Legacy way to get event id.. All hail to old kernels! */ 496 497 /* 498 * This way does not work with group format read, so bail 499 * out in that case. 500 */ 501 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 502 return -1; 503 504 if (!(evsel->core.attr.read_format & PERF_FORMAT_ID) || 505 read(fd, &read_data, sizeof(read_data)) == -1) 506 return -1; 507 508 if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 509 ++id_idx; 510 if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 511 ++id_idx; 512 513 id = read_data[id_idx]; 514 515 add: 516 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 517 return 0; 518 } 519 520 static void perf_evlist__set_sid_idx(struct evlist *evlist, 521 struct evsel *evsel, int idx, int cpu, 522 int thread) 523 { 524 struct perf_sample_id *sid = SID(evsel, cpu, thread); 525 sid->idx = idx; 526 if (evlist->core.cpus && cpu >= 0) 527 sid->cpu = evlist->core.cpus->map[cpu]; 528 else 529 sid->cpu = -1; 530 if (!evsel->system_wide && evlist->core.threads && thread >= 0) 531 sid->tid = thread_map__pid(evlist->core.threads, thread); 532 else 533 sid->tid = -1; 534 } 535 536 struct perf_sample_id *perf_evlist__id2sid(struct evlist *evlist, u64 id) 537 { 538 struct hlist_head *head; 539 struct perf_sample_id *sid; 540 int hash; 541 542 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 543 head = &evlist->heads[hash]; 544 545 hlist_for_each_entry(sid, head, node) 546 if (sid->id == id) 547 return sid; 548 549 return NULL; 550 } 551 552 struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id) 553 { 554 struct perf_sample_id *sid; 555 556 if (evlist->core.nr_entries == 1 || !id) 557 return perf_evlist__first(evlist); 558 559 sid = perf_evlist__id2sid(evlist, id); 560 if (sid) 561 return sid->evsel; 562 563 if (!perf_evlist__sample_id_all(evlist)) 564 return perf_evlist__first(evlist); 565 566 return NULL; 567 } 568 569 struct evsel *perf_evlist__id2evsel_strict(struct evlist *evlist, 570 u64 id) 571 { 572 struct perf_sample_id *sid; 573 574 if (!id) 575 return NULL; 576 577 sid = perf_evlist__id2sid(evlist, id); 578 if (sid) 579 return sid->evsel; 580 581 return NULL; 582 } 583 584 static int perf_evlist__event2id(struct evlist *evlist, 585 union perf_event *event, u64 *id) 586 { 587 const u64 *array = event->sample.array; 588 ssize_t n; 589 590 n = (event->header.size - sizeof(event->header)) >> 3; 591 592 if (event->header.type == PERF_RECORD_SAMPLE) { 593 if (evlist->id_pos >= n) 594 return -1; 595 *id = array[evlist->id_pos]; 596 } else { 597 if (evlist->is_pos > n) 598 return -1; 599 n -= evlist->is_pos; 600 *id = array[n]; 601 } 602 return 0; 603 } 604 605 struct evsel *perf_evlist__event2evsel(struct evlist *evlist, 606 union perf_event *event) 607 { 608 struct evsel *first = perf_evlist__first(evlist); 609 struct hlist_head *head; 610 struct perf_sample_id *sid; 611 int hash; 612 u64 id; 613 614 if (evlist->core.nr_entries == 1) 615 return first; 616 617 if (!first->core.attr.sample_id_all && 618 event->header.type != PERF_RECORD_SAMPLE) 619 return first; 620 621 if (perf_evlist__event2id(evlist, event, &id)) 622 return NULL; 623 624 /* Synthesized events have an id of zero */ 625 if (!id) 626 return first; 627 628 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 629 head = &evlist->heads[hash]; 630 631 hlist_for_each_entry(sid, head, node) { 632 if (sid->id == id) 633 return sid->evsel; 634 } 635 return NULL; 636 } 637 638 static int perf_evlist__set_paused(struct evlist *evlist, bool value) 639 { 640 int i; 641 642 if (!evlist->overwrite_mmap) 643 return 0; 644 645 for (i = 0; i < evlist->nr_mmaps; i++) { 646 int fd = evlist->overwrite_mmap[i].fd; 647 int err; 648 649 if (fd < 0) 650 continue; 651 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 652 if (err) 653 return err; 654 } 655 return 0; 656 } 657 658 static int perf_evlist__pause(struct evlist *evlist) 659 { 660 return perf_evlist__set_paused(evlist, true); 661 } 662 663 static int perf_evlist__resume(struct evlist *evlist) 664 { 665 return perf_evlist__set_paused(evlist, false); 666 } 667 668 static void perf_evlist__munmap_nofree(struct evlist *evlist) 669 { 670 int i; 671 672 if (evlist->mmap) 673 for (i = 0; i < evlist->nr_mmaps; i++) 674 perf_mmap__munmap(&evlist->mmap[i]); 675 676 if (evlist->overwrite_mmap) 677 for (i = 0; i < evlist->nr_mmaps; i++) 678 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 679 } 680 681 void perf_evlist__munmap(struct evlist *evlist) 682 { 683 perf_evlist__munmap_nofree(evlist); 684 zfree(&evlist->mmap); 685 zfree(&evlist->overwrite_mmap); 686 } 687 688 static struct perf_mmap *perf_evlist__alloc_mmap(struct evlist *evlist, 689 bool overwrite) 690 { 691 int i; 692 struct perf_mmap *map; 693 694 evlist->nr_mmaps = cpu_map__nr(evlist->core.cpus); 695 if (cpu_map__empty(evlist->core.cpus)) 696 evlist->nr_mmaps = thread_map__nr(evlist->core.threads); 697 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 698 if (!map) 699 return NULL; 700 701 for (i = 0; i < evlist->nr_mmaps; i++) { 702 map[i].fd = -1; 703 map[i].overwrite = overwrite; 704 /* 705 * When the perf_mmap() call is made we grab one refcount, plus 706 * one extra to let perf_mmap__consume() get the last 707 * events after all real references (perf_mmap__get()) are 708 * dropped. 709 * 710 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 711 * thus does perf_mmap__get() on it. 712 */ 713 refcount_set(&map[i].refcnt, 0); 714 } 715 return map; 716 } 717 718 static bool 719 perf_evlist__should_poll(struct evlist *evlist __maybe_unused, 720 struct evsel *evsel) 721 { 722 if (evsel->core.attr.write_backward) 723 return false; 724 return true; 725 } 726 727 static int perf_evlist__mmap_per_evsel(struct evlist *evlist, int idx, 728 struct mmap_params *mp, int cpu_idx, 729 int thread, int *_output, int *_output_overwrite) 730 { 731 struct evsel *evsel; 732 int revent; 733 int evlist_cpu = cpu_map__cpu(evlist->core.cpus, cpu_idx); 734 735 evlist__for_each_entry(evlist, evsel) { 736 struct perf_mmap *maps = evlist->mmap; 737 int *output = _output; 738 int fd; 739 int cpu; 740 741 mp->prot = PROT_READ | PROT_WRITE; 742 if (evsel->core.attr.write_backward) { 743 output = _output_overwrite; 744 maps = evlist->overwrite_mmap; 745 746 if (!maps) { 747 maps = perf_evlist__alloc_mmap(evlist, true); 748 if (!maps) 749 return -1; 750 evlist->overwrite_mmap = maps; 751 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 752 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 753 } 754 mp->prot &= ~PROT_WRITE; 755 } 756 757 if (evsel->system_wide && thread) 758 continue; 759 760 cpu = cpu_map__idx(evsel->core.cpus, evlist_cpu); 761 if (cpu == -1) 762 continue; 763 764 fd = FD(evsel, cpu, thread); 765 766 if (*output == -1) { 767 *output = fd; 768 769 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 770 return -1; 771 } else { 772 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 773 return -1; 774 775 perf_mmap__get(&maps[idx]); 776 } 777 778 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 779 780 /* 781 * The system_wide flag causes a selected event to be opened 782 * always without a pid. Consequently it will never get a 783 * POLLHUP, but it is used for tracking in combination with 784 * other events, so it should not need to be polled anyway. 785 * Therefore don't add it for polling. 786 */ 787 if (!evsel->system_wide && 788 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 789 perf_mmap__put(&maps[idx]); 790 return -1; 791 } 792 793 if (evsel->core.attr.read_format & PERF_FORMAT_ID) { 794 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 795 fd) < 0) 796 return -1; 797 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 798 thread); 799 } 800 } 801 802 return 0; 803 } 804 805 static int perf_evlist__mmap_per_cpu(struct evlist *evlist, 806 struct mmap_params *mp) 807 { 808 int cpu, thread; 809 int nr_cpus = cpu_map__nr(evlist->core.cpus); 810 int nr_threads = thread_map__nr(evlist->core.threads); 811 812 pr_debug2("perf event ring buffer mmapped per cpu\n"); 813 for (cpu = 0; cpu < nr_cpus; cpu++) { 814 int output = -1; 815 int output_overwrite = -1; 816 817 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 818 true); 819 820 for (thread = 0; thread < nr_threads; thread++) { 821 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 822 thread, &output, &output_overwrite)) 823 goto out_unmap; 824 } 825 } 826 827 return 0; 828 829 out_unmap: 830 perf_evlist__munmap_nofree(evlist); 831 return -1; 832 } 833 834 static int perf_evlist__mmap_per_thread(struct evlist *evlist, 835 struct mmap_params *mp) 836 { 837 int thread; 838 int nr_threads = thread_map__nr(evlist->core.threads); 839 840 pr_debug2("perf event ring buffer mmapped per thread\n"); 841 for (thread = 0; thread < nr_threads; thread++) { 842 int output = -1; 843 int output_overwrite = -1; 844 845 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 846 false); 847 848 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 849 &output, &output_overwrite)) 850 goto out_unmap; 851 } 852 853 return 0; 854 855 out_unmap: 856 perf_evlist__munmap_nofree(evlist); 857 return -1; 858 } 859 860 unsigned long perf_event_mlock_kb_in_pages(void) 861 { 862 unsigned long pages; 863 int max; 864 865 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 866 /* 867 * Pick a once upon a time good value, i.e. things look 868 * strange since we can't read a sysctl value, but lets not 869 * die yet... 870 */ 871 max = 512; 872 } else { 873 max -= (page_size / 1024); 874 } 875 876 pages = (max * 1024) / page_size; 877 if (!is_power_of_2(pages)) 878 pages = rounddown_pow_of_two(pages); 879 880 return pages; 881 } 882 883 size_t perf_evlist__mmap_size(unsigned long pages) 884 { 885 if (pages == UINT_MAX) 886 pages = perf_event_mlock_kb_in_pages(); 887 else if (!is_power_of_2(pages)) 888 return 0; 889 890 return (pages + 1) * page_size; 891 } 892 893 static long parse_pages_arg(const char *str, unsigned long min, 894 unsigned long max) 895 { 896 unsigned long pages, val; 897 static struct parse_tag tags[] = { 898 { .tag = 'B', .mult = 1 }, 899 { .tag = 'K', .mult = 1 << 10 }, 900 { .tag = 'M', .mult = 1 << 20 }, 901 { .tag = 'G', .mult = 1 << 30 }, 902 { .tag = 0 }, 903 }; 904 905 if (str == NULL) 906 return -EINVAL; 907 908 val = parse_tag_value(str, tags); 909 if (val != (unsigned long) -1) { 910 /* we got file size value */ 911 pages = PERF_ALIGN(val, page_size) / page_size; 912 } else { 913 /* we got pages count value */ 914 char *eptr; 915 pages = strtoul(str, &eptr, 10); 916 if (*eptr != '\0') 917 return -EINVAL; 918 } 919 920 if (pages == 0 && min == 0) { 921 /* leave number of pages at 0 */ 922 } else if (!is_power_of_2(pages)) { 923 char buf[100]; 924 925 /* round pages up to next power of 2 */ 926 pages = roundup_pow_of_two(pages); 927 if (!pages) 928 return -EINVAL; 929 930 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 931 pr_info("rounding mmap pages size to %s (%lu pages)\n", 932 buf, pages); 933 } 934 935 if (pages > max) 936 return -EINVAL; 937 938 return pages; 939 } 940 941 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 942 { 943 unsigned long max = UINT_MAX; 944 long pages; 945 946 if (max > SIZE_MAX / page_size) 947 max = SIZE_MAX / page_size; 948 949 pages = parse_pages_arg(str, 1, max); 950 if (pages < 0) { 951 pr_err("Invalid argument for --mmap_pages/-m\n"); 952 return -1; 953 } 954 955 *mmap_pages = pages; 956 return 0; 957 } 958 959 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 960 int unset __maybe_unused) 961 { 962 return __perf_evlist__parse_mmap_pages(opt->value, str); 963 } 964 965 /** 966 * perf_evlist__mmap_ex - Create mmaps to receive events. 967 * @evlist: list of events 968 * @pages: map length in pages 969 * @overwrite: overwrite older events? 970 * @auxtrace_pages - auxtrace map length in pages 971 * @auxtrace_overwrite - overwrite older auxtrace data? 972 * 973 * If @overwrite is %false the user needs to signal event consumption using 974 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 975 * automatically. 976 * 977 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 978 * consumption using auxtrace_mmap__write_tail(). 979 * 980 * Return: %0 on success, negative error code otherwise. 981 */ 982 int perf_evlist__mmap_ex(struct evlist *evlist, unsigned int pages, 983 unsigned int auxtrace_pages, 984 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, 985 int comp_level) 986 { 987 struct evsel *evsel; 988 const struct perf_cpu_map *cpus = evlist->core.cpus; 989 const struct perf_thread_map *threads = evlist->core.threads; 990 /* 991 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 992 * Its value is decided by evsel's write_backward. 993 * So &mp should not be passed through const pointer. 994 */ 995 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, 996 .comp_level = comp_level }; 997 998 if (!evlist->mmap) 999 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1000 if (!evlist->mmap) 1001 return -ENOMEM; 1002 1003 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1004 return -ENOMEM; 1005 1006 evlist->mmap_len = perf_evlist__mmap_size(pages); 1007 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1008 mp.mask = evlist->mmap_len - page_size - 1; 1009 1010 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1011 auxtrace_pages, auxtrace_overwrite); 1012 1013 evlist__for_each_entry(evlist, evsel) { 1014 if ((evsel->core.attr.read_format & PERF_FORMAT_ID) && 1015 evsel->sample_id == NULL && 1016 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1017 return -ENOMEM; 1018 } 1019 1020 if (cpu_map__empty(cpus)) 1021 return perf_evlist__mmap_per_thread(evlist, &mp); 1022 1023 return perf_evlist__mmap_per_cpu(evlist, &mp); 1024 } 1025 1026 int perf_evlist__mmap(struct evlist *evlist, unsigned int pages) 1027 { 1028 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); 1029 } 1030 1031 int perf_evlist__create_maps(struct evlist *evlist, struct target *target) 1032 { 1033 bool all_threads = (target->per_thread && target->system_wide); 1034 struct perf_cpu_map *cpus; 1035 struct perf_thread_map *threads; 1036 1037 /* 1038 * If specify '-a' and '--per-thread' to perf record, perf record 1039 * will override '--per-thread'. target->per_thread = false and 1040 * target->system_wide = true. 1041 * 1042 * If specify '--per-thread' only to perf record, 1043 * target->per_thread = true and target->system_wide = false. 1044 * 1045 * So target->per_thread && target->system_wide is false. 1046 * For perf record, thread_map__new_str doesn't call 1047 * thread_map__new_all_cpus. That will keep perf record's 1048 * current behavior. 1049 * 1050 * For perf stat, it allows the case that target->per_thread and 1051 * target->system_wide are all true. It means to collect system-wide 1052 * per-thread data. thread_map__new_str will call 1053 * thread_map__new_all_cpus to enumerate all threads. 1054 */ 1055 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1056 all_threads); 1057 1058 if (!threads) 1059 return -1; 1060 1061 if (target__uses_dummy_map(target)) 1062 cpus = perf_cpu_map__dummy_new(); 1063 else 1064 cpus = perf_cpu_map__new(target->cpu_list); 1065 1066 if (!cpus) 1067 goto out_delete_threads; 1068 1069 evlist->core.has_user_cpus = !!target->cpu_list; 1070 1071 perf_evlist__set_maps(&evlist->core, cpus, threads); 1072 1073 return 0; 1074 1075 out_delete_threads: 1076 perf_thread_map__put(threads); 1077 return -1; 1078 } 1079 1080 void __perf_evlist__set_sample_bit(struct evlist *evlist, 1081 enum perf_event_sample_format bit) 1082 { 1083 struct evsel *evsel; 1084 1085 evlist__for_each_entry(evlist, evsel) 1086 __perf_evsel__set_sample_bit(evsel, bit); 1087 } 1088 1089 void __perf_evlist__reset_sample_bit(struct evlist *evlist, 1090 enum perf_event_sample_format bit) 1091 { 1092 struct evsel *evsel; 1093 1094 evlist__for_each_entry(evlist, evsel) 1095 __perf_evsel__reset_sample_bit(evsel, bit); 1096 } 1097 1098 int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel) 1099 { 1100 struct evsel *evsel; 1101 int err = 0; 1102 1103 evlist__for_each_entry(evlist, evsel) { 1104 if (evsel->filter == NULL) 1105 continue; 1106 1107 /* 1108 * filters only work for tracepoint event, which doesn't have cpu limit. 1109 * So evlist and evsel should always be same. 1110 */ 1111 err = evsel__apply_filter(evsel, evsel->filter); 1112 if (err) { 1113 *err_evsel = evsel; 1114 break; 1115 } 1116 } 1117 1118 return err; 1119 } 1120 1121 int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter) 1122 { 1123 struct evsel *evsel; 1124 int err = 0; 1125 1126 evlist__for_each_entry(evlist, evsel) { 1127 if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT) 1128 continue; 1129 1130 err = perf_evsel__set_filter(evsel, filter); 1131 if (err) 1132 break; 1133 } 1134 1135 return err; 1136 } 1137 1138 int perf_evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) 1139 { 1140 char *filter; 1141 int ret = -1; 1142 size_t i; 1143 1144 for (i = 0; i < npids; ++i) { 1145 if (i == 0) { 1146 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1147 return -1; 1148 } else { 1149 char *tmp; 1150 1151 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1152 goto out_free; 1153 1154 free(filter); 1155 filter = tmp; 1156 } 1157 } 1158 1159 ret = perf_evlist__set_tp_filter(evlist, filter); 1160 out_free: 1161 free(filter); 1162 return ret; 1163 } 1164 1165 int perf_evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) 1166 { 1167 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1168 } 1169 1170 bool perf_evlist__valid_sample_type(struct evlist *evlist) 1171 { 1172 struct evsel *pos; 1173 1174 if (evlist->core.nr_entries == 1) 1175 return true; 1176 1177 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1178 return false; 1179 1180 evlist__for_each_entry(evlist, pos) { 1181 if (pos->id_pos != evlist->id_pos || 1182 pos->is_pos != evlist->is_pos) 1183 return false; 1184 } 1185 1186 return true; 1187 } 1188 1189 u64 __perf_evlist__combined_sample_type(struct evlist *evlist) 1190 { 1191 struct evsel *evsel; 1192 1193 if (evlist->combined_sample_type) 1194 return evlist->combined_sample_type; 1195 1196 evlist__for_each_entry(evlist, evsel) 1197 evlist->combined_sample_type |= evsel->core.attr.sample_type; 1198 1199 return evlist->combined_sample_type; 1200 } 1201 1202 u64 perf_evlist__combined_sample_type(struct evlist *evlist) 1203 { 1204 evlist->combined_sample_type = 0; 1205 return __perf_evlist__combined_sample_type(evlist); 1206 } 1207 1208 u64 perf_evlist__combined_branch_type(struct evlist *evlist) 1209 { 1210 struct evsel *evsel; 1211 u64 branch_type = 0; 1212 1213 evlist__for_each_entry(evlist, evsel) 1214 branch_type |= evsel->core.attr.branch_sample_type; 1215 return branch_type; 1216 } 1217 1218 bool perf_evlist__valid_read_format(struct evlist *evlist) 1219 { 1220 struct evsel *first = perf_evlist__first(evlist), *pos = first; 1221 u64 read_format = first->core.attr.read_format; 1222 u64 sample_type = first->core.attr.sample_type; 1223 1224 evlist__for_each_entry(evlist, pos) { 1225 if (read_format != pos->core.attr.read_format) 1226 return false; 1227 } 1228 1229 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1230 if ((sample_type & PERF_SAMPLE_READ) && 1231 !(read_format & PERF_FORMAT_ID)) { 1232 return false; 1233 } 1234 1235 return true; 1236 } 1237 1238 u64 perf_evlist__read_format(struct evlist *evlist) 1239 { 1240 struct evsel *first = perf_evlist__first(evlist); 1241 return first->core.attr.read_format; 1242 } 1243 1244 u16 perf_evlist__id_hdr_size(struct evlist *evlist) 1245 { 1246 struct evsel *first = perf_evlist__first(evlist); 1247 struct perf_sample *data; 1248 u64 sample_type; 1249 u16 size = 0; 1250 1251 if (!first->core.attr.sample_id_all) 1252 goto out; 1253 1254 sample_type = first->core.attr.sample_type; 1255 1256 if (sample_type & PERF_SAMPLE_TID) 1257 size += sizeof(data->tid) * 2; 1258 1259 if (sample_type & PERF_SAMPLE_TIME) 1260 size += sizeof(data->time); 1261 1262 if (sample_type & PERF_SAMPLE_ID) 1263 size += sizeof(data->id); 1264 1265 if (sample_type & PERF_SAMPLE_STREAM_ID) 1266 size += sizeof(data->stream_id); 1267 1268 if (sample_type & PERF_SAMPLE_CPU) 1269 size += sizeof(data->cpu) * 2; 1270 1271 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1272 size += sizeof(data->id); 1273 out: 1274 return size; 1275 } 1276 1277 bool perf_evlist__valid_sample_id_all(struct evlist *evlist) 1278 { 1279 struct evsel *first = perf_evlist__first(evlist), *pos = first; 1280 1281 evlist__for_each_entry_continue(evlist, pos) { 1282 if (first->core.attr.sample_id_all != pos->core.attr.sample_id_all) 1283 return false; 1284 } 1285 1286 return true; 1287 } 1288 1289 bool perf_evlist__sample_id_all(struct evlist *evlist) 1290 { 1291 struct evsel *first = perf_evlist__first(evlist); 1292 return first->core.attr.sample_id_all; 1293 } 1294 1295 void perf_evlist__set_selected(struct evlist *evlist, 1296 struct evsel *evsel) 1297 { 1298 evlist->selected = evsel; 1299 } 1300 1301 void evlist__close(struct evlist *evlist) 1302 { 1303 struct evsel *evsel; 1304 1305 evlist__for_each_entry_reverse(evlist, evsel) 1306 perf_evsel__close(evsel); 1307 } 1308 1309 static int perf_evlist__create_syswide_maps(struct evlist *evlist) 1310 { 1311 struct perf_cpu_map *cpus; 1312 struct perf_thread_map *threads; 1313 int err = -ENOMEM; 1314 1315 /* 1316 * Try reading /sys/devices/system/cpu/online to get 1317 * an all cpus map. 1318 * 1319 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1320 * code needs an overhaul to properly forward the 1321 * error, and we may not want to do that fallback to a 1322 * default cpu identity map :-\ 1323 */ 1324 cpus = perf_cpu_map__new(NULL); 1325 if (!cpus) 1326 goto out; 1327 1328 threads = perf_thread_map__new_dummy(); 1329 if (!threads) 1330 goto out_put; 1331 1332 perf_evlist__set_maps(&evlist->core, cpus, threads); 1333 out: 1334 return err; 1335 out_put: 1336 perf_cpu_map__put(cpus); 1337 goto out; 1338 } 1339 1340 int evlist__open(struct evlist *evlist) 1341 { 1342 struct evsel *evsel; 1343 int err; 1344 1345 /* 1346 * Default: one fd per CPU, all threads, aka systemwide 1347 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1348 */ 1349 if (evlist->core.threads == NULL && evlist->core.cpus == NULL) { 1350 err = perf_evlist__create_syswide_maps(evlist); 1351 if (err < 0) 1352 goto out_err; 1353 } 1354 1355 perf_evlist__update_id_pos(evlist); 1356 1357 evlist__for_each_entry(evlist, evsel) { 1358 err = evsel__open(evsel, evsel->core.cpus, evsel->core.threads); 1359 if (err < 0) 1360 goto out_err; 1361 } 1362 1363 return 0; 1364 out_err: 1365 evlist__close(evlist); 1366 errno = -err; 1367 return err; 1368 } 1369 1370 int perf_evlist__prepare_workload(struct evlist *evlist, struct target *target, 1371 const char *argv[], bool pipe_output, 1372 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1373 { 1374 int child_ready_pipe[2], go_pipe[2]; 1375 char bf; 1376 1377 if (pipe(child_ready_pipe) < 0) { 1378 perror("failed to create 'ready' pipe"); 1379 return -1; 1380 } 1381 1382 if (pipe(go_pipe) < 0) { 1383 perror("failed to create 'go' pipe"); 1384 goto out_close_ready_pipe; 1385 } 1386 1387 evlist->workload.pid = fork(); 1388 if (evlist->workload.pid < 0) { 1389 perror("failed to fork"); 1390 goto out_close_pipes; 1391 } 1392 1393 if (!evlist->workload.pid) { 1394 int ret; 1395 1396 if (pipe_output) 1397 dup2(2, 1); 1398 1399 signal(SIGTERM, SIG_DFL); 1400 1401 close(child_ready_pipe[0]); 1402 close(go_pipe[1]); 1403 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1404 1405 /* 1406 * Tell the parent we're ready to go 1407 */ 1408 close(child_ready_pipe[1]); 1409 1410 /* 1411 * Wait until the parent tells us to go. 1412 */ 1413 ret = read(go_pipe[0], &bf, 1); 1414 /* 1415 * The parent will ask for the execvp() to be performed by 1416 * writing exactly one byte, in workload.cork_fd, usually via 1417 * perf_evlist__start_workload(). 1418 * 1419 * For cancelling the workload without actually running it, 1420 * the parent will just close workload.cork_fd, without writing 1421 * anything, i.e. read will return zero and we just exit() 1422 * here. 1423 */ 1424 if (ret != 1) { 1425 if (ret == -1) 1426 perror("unable to read pipe"); 1427 exit(ret); 1428 } 1429 1430 execvp(argv[0], (char **)argv); 1431 1432 if (exec_error) { 1433 union sigval val; 1434 1435 val.sival_int = errno; 1436 if (sigqueue(getppid(), SIGUSR1, val)) 1437 perror(argv[0]); 1438 } else 1439 perror(argv[0]); 1440 exit(-1); 1441 } 1442 1443 if (exec_error) { 1444 struct sigaction act = { 1445 .sa_flags = SA_SIGINFO, 1446 .sa_sigaction = exec_error, 1447 }; 1448 sigaction(SIGUSR1, &act, NULL); 1449 } 1450 1451 if (target__none(target)) { 1452 if (evlist->core.threads == NULL) { 1453 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1454 __func__, __LINE__); 1455 goto out_close_pipes; 1456 } 1457 perf_thread_map__set_pid(evlist->core.threads, 0, evlist->workload.pid); 1458 } 1459 1460 close(child_ready_pipe[1]); 1461 close(go_pipe[0]); 1462 /* 1463 * wait for child to settle 1464 */ 1465 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1466 perror("unable to read pipe"); 1467 goto out_close_pipes; 1468 } 1469 1470 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1471 evlist->workload.cork_fd = go_pipe[1]; 1472 close(child_ready_pipe[0]); 1473 return 0; 1474 1475 out_close_pipes: 1476 close(go_pipe[0]); 1477 close(go_pipe[1]); 1478 out_close_ready_pipe: 1479 close(child_ready_pipe[0]); 1480 close(child_ready_pipe[1]); 1481 return -1; 1482 } 1483 1484 int perf_evlist__start_workload(struct evlist *evlist) 1485 { 1486 if (evlist->workload.cork_fd > 0) { 1487 char bf = 0; 1488 int ret; 1489 /* 1490 * Remove the cork, let it rip! 1491 */ 1492 ret = write(evlist->workload.cork_fd, &bf, 1); 1493 if (ret < 0) 1494 perror("unable to write to pipe"); 1495 1496 close(evlist->workload.cork_fd); 1497 return ret; 1498 } 1499 1500 return 0; 1501 } 1502 1503 int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event, 1504 struct perf_sample *sample) 1505 { 1506 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1507 1508 if (!evsel) 1509 return -EFAULT; 1510 return perf_evsel__parse_sample(evsel, event, sample); 1511 } 1512 1513 int perf_evlist__parse_sample_timestamp(struct evlist *evlist, 1514 union perf_event *event, 1515 u64 *timestamp) 1516 { 1517 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1518 1519 if (!evsel) 1520 return -EFAULT; 1521 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1522 } 1523 1524 size_t perf_evlist__fprintf(struct evlist *evlist, FILE *fp) 1525 { 1526 struct evsel *evsel; 1527 size_t printed = 0; 1528 1529 evlist__for_each_entry(evlist, evsel) { 1530 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1531 perf_evsel__name(evsel)); 1532 } 1533 1534 return printed + fprintf(fp, "\n"); 1535 } 1536 1537 int perf_evlist__strerror_open(struct evlist *evlist, 1538 int err, char *buf, size_t size) 1539 { 1540 int printed, value; 1541 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1542 1543 switch (err) { 1544 case EACCES: 1545 case EPERM: 1546 printed = scnprintf(buf, size, 1547 "Error:\t%s.\n" 1548 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1549 1550 value = perf_event_paranoid(); 1551 1552 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1553 1554 if (value >= 2) { 1555 printed += scnprintf(buf + printed, size - printed, 1556 "For your workloads it needs to be <= 1\nHint:\t"); 1557 } 1558 printed += scnprintf(buf + printed, size - printed, 1559 "For system wide tracing it needs to be set to -1.\n"); 1560 1561 printed += scnprintf(buf + printed, size - printed, 1562 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1563 "Hint:\tThe current value is %d.", value); 1564 break; 1565 case EINVAL: { 1566 struct evsel *first = perf_evlist__first(evlist); 1567 int max_freq; 1568 1569 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1570 goto out_default; 1571 1572 if (first->core.attr.sample_freq < (u64)max_freq) 1573 goto out_default; 1574 1575 printed = scnprintf(buf, size, 1576 "Error:\t%s.\n" 1577 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1578 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1579 emsg, max_freq, first->core.attr.sample_freq); 1580 break; 1581 } 1582 default: 1583 out_default: 1584 scnprintf(buf, size, "%s", emsg); 1585 break; 1586 } 1587 1588 return 0; 1589 } 1590 1591 int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size) 1592 { 1593 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1594 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1595 1596 switch (err) { 1597 case EPERM: 1598 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1599 printed += scnprintf(buf + printed, size - printed, 1600 "Error:\t%s.\n" 1601 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1602 "Hint:\tTried using %zd kB.\n", 1603 emsg, pages_max_per_user, pages_attempted); 1604 1605 if (pages_attempted >= pages_max_per_user) { 1606 printed += scnprintf(buf + printed, size - printed, 1607 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1608 pages_max_per_user + pages_attempted); 1609 } 1610 1611 printed += scnprintf(buf + printed, size - printed, 1612 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1613 break; 1614 default: 1615 scnprintf(buf, size, "%s", emsg); 1616 break; 1617 } 1618 1619 return 0; 1620 } 1621 1622 void perf_evlist__to_front(struct evlist *evlist, 1623 struct evsel *move_evsel) 1624 { 1625 struct evsel *evsel, *n; 1626 LIST_HEAD(move); 1627 1628 if (move_evsel == perf_evlist__first(evlist)) 1629 return; 1630 1631 evlist__for_each_entry_safe(evlist, n, evsel) { 1632 if (evsel->leader == move_evsel->leader) 1633 list_move_tail(&evsel->core.node, &move); 1634 } 1635 1636 list_splice(&move, &evlist->core.entries); 1637 } 1638 1639 void perf_evlist__set_tracking_event(struct evlist *evlist, 1640 struct evsel *tracking_evsel) 1641 { 1642 struct evsel *evsel; 1643 1644 if (tracking_evsel->tracking) 1645 return; 1646 1647 evlist__for_each_entry(evlist, evsel) { 1648 if (evsel != tracking_evsel) 1649 evsel->tracking = false; 1650 } 1651 1652 tracking_evsel->tracking = true; 1653 } 1654 1655 struct evsel * 1656 perf_evlist__find_evsel_by_str(struct evlist *evlist, 1657 const char *str) 1658 { 1659 struct evsel *evsel; 1660 1661 evlist__for_each_entry(evlist, evsel) { 1662 if (!evsel->name) 1663 continue; 1664 if (strcmp(str, evsel->name) == 0) 1665 return evsel; 1666 } 1667 1668 return NULL; 1669 } 1670 1671 void perf_evlist__toggle_bkw_mmap(struct evlist *evlist, 1672 enum bkw_mmap_state state) 1673 { 1674 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1675 enum action { 1676 NONE, 1677 PAUSE, 1678 RESUME, 1679 } action = NONE; 1680 1681 if (!evlist->overwrite_mmap) 1682 return; 1683 1684 switch (old_state) { 1685 case BKW_MMAP_NOTREADY: { 1686 if (state != BKW_MMAP_RUNNING) 1687 goto state_err; 1688 break; 1689 } 1690 case BKW_MMAP_RUNNING: { 1691 if (state != BKW_MMAP_DATA_PENDING) 1692 goto state_err; 1693 action = PAUSE; 1694 break; 1695 } 1696 case BKW_MMAP_DATA_PENDING: { 1697 if (state != BKW_MMAP_EMPTY) 1698 goto state_err; 1699 break; 1700 } 1701 case BKW_MMAP_EMPTY: { 1702 if (state != BKW_MMAP_RUNNING) 1703 goto state_err; 1704 action = RESUME; 1705 break; 1706 } 1707 default: 1708 WARN_ONCE(1, "Shouldn't get there\n"); 1709 } 1710 1711 evlist->bkw_mmap_state = state; 1712 1713 switch (action) { 1714 case PAUSE: 1715 perf_evlist__pause(evlist); 1716 break; 1717 case RESUME: 1718 perf_evlist__resume(evlist); 1719 break; 1720 case NONE: 1721 default: 1722 break; 1723 } 1724 1725 state_err: 1726 return; 1727 } 1728 1729 bool perf_evlist__exclude_kernel(struct evlist *evlist) 1730 { 1731 struct evsel *evsel; 1732 1733 evlist__for_each_entry(evlist, evsel) { 1734 if (!evsel->core.attr.exclude_kernel) 1735 return false; 1736 } 1737 1738 return true; 1739 } 1740 1741 /* 1742 * Events in data file are not collect in groups, but we still want 1743 * the group display. Set the artificial group and set the leader's 1744 * forced_leader flag to notify the display code. 1745 */ 1746 void perf_evlist__force_leader(struct evlist *evlist) 1747 { 1748 if (!evlist->nr_groups) { 1749 struct evsel *leader = perf_evlist__first(evlist); 1750 1751 perf_evlist__set_leader(evlist); 1752 leader->forced_leader = true; 1753 } 1754 } 1755 1756 struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, 1757 struct evsel *evsel) 1758 { 1759 struct evsel *c2, *leader; 1760 bool is_open = true; 1761 1762 leader = evsel->leader; 1763 pr_debug("Weak group for %s/%d failed\n", 1764 leader->name, leader->core.nr_members); 1765 1766 /* 1767 * for_each_group_member doesn't work here because it doesn't 1768 * include the first entry. 1769 */ 1770 evlist__for_each_entry(evsel_list, c2) { 1771 if (c2 == evsel) 1772 is_open = false; 1773 if (c2->leader == leader) { 1774 if (is_open) 1775 perf_evsel__close(c2); 1776 c2->leader = c2; 1777 c2->core.nr_members = 0; 1778 } 1779 } 1780 return leader; 1781 } 1782 1783 int perf_evlist__add_sb_event(struct evlist **evlist, 1784 struct perf_event_attr *attr, 1785 perf_evsel__sb_cb_t cb, 1786 void *data) 1787 { 1788 struct evsel *evsel; 1789 bool new_evlist = (*evlist) == NULL; 1790 1791 if (*evlist == NULL) 1792 *evlist = evlist__new(); 1793 if (*evlist == NULL) 1794 return -1; 1795 1796 if (!attr->sample_id_all) { 1797 pr_warning("enabling sample_id_all for all side band events\n"); 1798 attr->sample_id_all = 1; 1799 } 1800 1801 evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries); 1802 if (!evsel) 1803 goto out_err; 1804 1805 evsel->side_band.cb = cb; 1806 evsel->side_band.data = data; 1807 evlist__add(*evlist, evsel); 1808 return 0; 1809 1810 out_err: 1811 if (new_evlist) { 1812 evlist__delete(*evlist); 1813 *evlist = NULL; 1814 } 1815 return -1; 1816 } 1817 1818 static void *perf_evlist__poll_thread(void *arg) 1819 { 1820 struct evlist *evlist = arg; 1821 bool draining = false; 1822 int i, done = 0; 1823 1824 while (!done) { 1825 bool got_data = false; 1826 1827 if (evlist->thread.done) 1828 draining = true; 1829 1830 if (!draining) 1831 perf_evlist__poll(evlist, 1000); 1832 1833 for (i = 0; i < evlist->nr_mmaps; i++) { 1834 struct perf_mmap *map = &evlist->mmap[i]; 1835 union perf_event *event; 1836 1837 if (perf_mmap__read_init(map)) 1838 continue; 1839 while ((event = perf_mmap__read_event(map)) != NULL) { 1840 struct evsel *evsel = perf_evlist__event2evsel(evlist, event); 1841 1842 if (evsel && evsel->side_band.cb) 1843 evsel->side_band.cb(event, evsel->side_band.data); 1844 else 1845 pr_warning("cannot locate proper evsel for the side band event\n"); 1846 1847 perf_mmap__consume(map); 1848 got_data = true; 1849 } 1850 perf_mmap__read_done(map); 1851 } 1852 1853 if (draining && !got_data) 1854 break; 1855 } 1856 return NULL; 1857 } 1858 1859 int perf_evlist__start_sb_thread(struct evlist *evlist, 1860 struct target *target) 1861 { 1862 struct evsel *counter; 1863 1864 if (!evlist) 1865 return 0; 1866 1867 if (perf_evlist__create_maps(evlist, target)) 1868 goto out_delete_evlist; 1869 1870 evlist__for_each_entry(evlist, counter) { 1871 if (evsel__open(counter, evlist->core.cpus, 1872 evlist->core.threads) < 0) 1873 goto out_delete_evlist; 1874 } 1875 1876 if (perf_evlist__mmap(evlist, UINT_MAX)) 1877 goto out_delete_evlist; 1878 1879 evlist__for_each_entry(evlist, counter) { 1880 if (evsel__enable(counter)) 1881 goto out_delete_evlist; 1882 } 1883 1884 evlist->thread.done = 0; 1885 if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) 1886 goto out_delete_evlist; 1887 1888 return 0; 1889 1890 out_delete_evlist: 1891 evlist__delete(evlist); 1892 evlist = NULL; 1893 return -1; 1894 } 1895 1896 void perf_evlist__stop_sb_thread(struct evlist *evlist) 1897 { 1898 if (!evlist) 1899 return; 1900 evlist->thread.done = 1; 1901 pthread_join(evlist->thread.th, NULL); 1902 evlist__delete(evlist); 1903 } 1904