1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <poll.h> 14 #include "cpumap.h" 15 #include "thread_map.h" 16 #include "target.h" 17 #include "evlist.h" 18 #include "evsel.h" 19 #include "debug.h" 20 #include "units.h" 21 #include "asm/bug.h" 22 #include <signal.h> 23 #include <unistd.h> 24 25 #include "parse-events.h" 26 #include <subcmd/parse-options.h> 27 28 #include <sys/ioctl.h> 29 #include <sys/mman.h> 30 31 #include <linux/bitops.h> 32 #include <linux/hash.h> 33 #include <linux/log2.h> 34 #include <linux/err.h> 35 36 static void perf_mmap__munmap(struct perf_mmap *map); 37 static void perf_mmap__put(struct perf_mmap *map); 38 39 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 40 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 41 42 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 43 struct thread_map *threads) 44 { 45 int i; 46 47 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 48 INIT_HLIST_HEAD(&evlist->heads[i]); 49 INIT_LIST_HEAD(&evlist->entries); 50 perf_evlist__set_maps(evlist, cpus, threads); 51 fdarray__init(&evlist->pollfd, 64); 52 evlist->workload.pid = -1; 53 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 54 } 55 56 struct perf_evlist *perf_evlist__new(void) 57 { 58 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 59 60 if (evlist != NULL) 61 perf_evlist__init(evlist, NULL, NULL); 62 63 return evlist; 64 } 65 66 struct perf_evlist *perf_evlist__new_default(void) 67 { 68 struct perf_evlist *evlist = perf_evlist__new(); 69 70 if (evlist && perf_evlist__add_default(evlist)) { 71 perf_evlist__delete(evlist); 72 evlist = NULL; 73 } 74 75 return evlist; 76 } 77 78 struct perf_evlist *perf_evlist__new_dummy(void) 79 { 80 struct perf_evlist *evlist = perf_evlist__new(); 81 82 if (evlist && perf_evlist__add_dummy(evlist)) { 83 perf_evlist__delete(evlist); 84 evlist = NULL; 85 } 86 87 return evlist; 88 } 89 90 /** 91 * perf_evlist__set_id_pos - set the positions of event ids. 92 * @evlist: selected event list 93 * 94 * Events with compatible sample types all have the same id_pos 95 * and is_pos. For convenience, put a copy on evlist. 96 */ 97 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 98 { 99 struct perf_evsel *first = perf_evlist__first(evlist); 100 101 evlist->id_pos = first->id_pos; 102 evlist->is_pos = first->is_pos; 103 } 104 105 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 106 { 107 struct perf_evsel *evsel; 108 109 evlist__for_each_entry(evlist, evsel) 110 perf_evsel__calc_id_pos(evsel); 111 112 perf_evlist__set_id_pos(evlist); 113 } 114 115 static void perf_evlist__purge(struct perf_evlist *evlist) 116 { 117 struct perf_evsel *pos, *n; 118 119 evlist__for_each_entry_safe(evlist, n, pos) { 120 list_del_init(&pos->node); 121 pos->evlist = NULL; 122 perf_evsel__delete(pos); 123 } 124 125 evlist->nr_entries = 0; 126 } 127 128 void perf_evlist__exit(struct perf_evlist *evlist) 129 { 130 zfree(&evlist->mmap); 131 zfree(&evlist->backward_mmap); 132 fdarray__exit(&evlist->pollfd); 133 } 134 135 void perf_evlist__delete(struct perf_evlist *evlist) 136 { 137 if (evlist == NULL) 138 return; 139 140 perf_evlist__munmap(evlist); 141 perf_evlist__close(evlist); 142 cpu_map__put(evlist->cpus); 143 thread_map__put(evlist->threads); 144 evlist->cpus = NULL; 145 evlist->threads = NULL; 146 perf_evlist__purge(evlist); 147 perf_evlist__exit(evlist); 148 free(evlist); 149 } 150 151 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 152 struct perf_evsel *evsel) 153 { 154 /* 155 * We already have cpus for evsel (via PMU sysfs) so 156 * keep it, if there's no target cpu list defined. 157 */ 158 if (!evsel->own_cpus || evlist->has_user_cpus) { 159 cpu_map__put(evsel->cpus); 160 evsel->cpus = cpu_map__get(evlist->cpus); 161 } else if (evsel->cpus != evsel->own_cpus) { 162 cpu_map__put(evsel->cpus); 163 evsel->cpus = cpu_map__get(evsel->own_cpus); 164 } 165 166 thread_map__put(evsel->threads); 167 evsel->threads = thread_map__get(evlist->threads); 168 } 169 170 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 171 { 172 struct perf_evsel *evsel; 173 174 evlist__for_each_entry(evlist, evsel) 175 __perf_evlist__propagate_maps(evlist, evsel); 176 } 177 178 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 179 { 180 entry->evlist = evlist; 181 list_add_tail(&entry->node, &evlist->entries); 182 entry->idx = evlist->nr_entries; 183 entry->tracking = !entry->idx; 184 185 if (!evlist->nr_entries++) 186 perf_evlist__set_id_pos(evlist); 187 188 __perf_evlist__propagate_maps(evlist, entry); 189 } 190 191 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 192 { 193 evsel->evlist = NULL; 194 list_del_init(&evsel->node); 195 evlist->nr_entries -= 1; 196 } 197 198 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 199 struct list_head *list) 200 { 201 struct perf_evsel *evsel, *temp; 202 203 __evlist__for_each_entry_safe(list, temp, evsel) { 204 list_del_init(&evsel->node); 205 perf_evlist__add(evlist, evsel); 206 } 207 } 208 209 void __perf_evlist__set_leader(struct list_head *list) 210 { 211 struct perf_evsel *evsel, *leader; 212 213 leader = list_entry(list->next, struct perf_evsel, node); 214 evsel = list_entry(list->prev, struct perf_evsel, node); 215 216 leader->nr_members = evsel->idx - leader->idx + 1; 217 218 __evlist__for_each_entry(list, evsel) { 219 evsel->leader = leader; 220 } 221 } 222 223 void perf_evlist__set_leader(struct perf_evlist *evlist) 224 { 225 if (evlist->nr_entries) { 226 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 227 __perf_evlist__set_leader(&evlist->entries); 228 } 229 } 230 231 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 232 { 233 attr->precise_ip = 3; 234 235 while (attr->precise_ip != 0) { 236 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 237 if (fd != -1) { 238 close(fd); 239 break; 240 } 241 --attr->precise_ip; 242 } 243 } 244 245 int perf_evlist__add_default(struct perf_evlist *evlist) 246 { 247 struct perf_evsel *evsel = perf_evsel__new_cycles(); 248 249 if (evsel == NULL) 250 return -ENOMEM; 251 252 perf_evlist__add(evlist, evsel); 253 return 0; 254 } 255 256 int perf_evlist__add_dummy(struct perf_evlist *evlist) 257 { 258 struct perf_event_attr attr = { 259 .type = PERF_TYPE_SOFTWARE, 260 .config = PERF_COUNT_SW_DUMMY, 261 .size = sizeof(attr), /* to capture ABI version */ 262 }; 263 struct perf_evsel *evsel = perf_evsel__new(&attr); 264 265 if (evsel == NULL) 266 return -ENOMEM; 267 268 perf_evlist__add(evlist, evsel); 269 return 0; 270 } 271 272 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 273 struct perf_event_attr *attrs, size_t nr_attrs) 274 { 275 struct perf_evsel *evsel, *n; 276 LIST_HEAD(head); 277 size_t i; 278 279 for (i = 0; i < nr_attrs; i++) { 280 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 281 if (evsel == NULL) 282 goto out_delete_partial_list; 283 list_add_tail(&evsel->node, &head); 284 } 285 286 perf_evlist__splice_list_tail(evlist, &head); 287 288 return 0; 289 290 out_delete_partial_list: 291 __evlist__for_each_entry_safe(&head, n, evsel) 292 perf_evsel__delete(evsel); 293 return -1; 294 } 295 296 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 297 struct perf_event_attr *attrs, size_t nr_attrs) 298 { 299 size_t i; 300 301 for (i = 0; i < nr_attrs; i++) 302 event_attr_init(attrs + i); 303 304 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 305 } 306 307 struct perf_evsel * 308 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 309 { 310 struct perf_evsel *evsel; 311 312 evlist__for_each_entry(evlist, evsel) { 313 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 314 (int)evsel->attr.config == id) 315 return evsel; 316 } 317 318 return NULL; 319 } 320 321 struct perf_evsel * 322 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 323 const char *name) 324 { 325 struct perf_evsel *evsel; 326 327 evlist__for_each_entry(evlist, evsel) { 328 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 329 (strcmp(evsel->name, name) == 0)) 330 return evsel; 331 } 332 333 return NULL; 334 } 335 336 int perf_evlist__add_newtp(struct perf_evlist *evlist, 337 const char *sys, const char *name, void *handler) 338 { 339 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 340 341 if (IS_ERR(evsel)) 342 return -1; 343 344 evsel->handler = handler; 345 perf_evlist__add(evlist, evsel); 346 return 0; 347 } 348 349 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 350 struct perf_evsel *evsel) 351 { 352 if (evsel->system_wide) 353 return 1; 354 else 355 return thread_map__nr(evlist->threads); 356 } 357 358 void perf_evlist__disable(struct perf_evlist *evlist) 359 { 360 struct perf_evsel *pos; 361 362 evlist__for_each_entry(evlist, pos) { 363 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 364 continue; 365 perf_evsel__disable(pos); 366 } 367 368 evlist->enabled = false; 369 } 370 371 void perf_evlist__enable(struct perf_evlist *evlist) 372 { 373 struct perf_evsel *pos; 374 375 evlist__for_each_entry(evlist, pos) { 376 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 377 continue; 378 perf_evsel__enable(pos); 379 } 380 381 evlist->enabled = true; 382 } 383 384 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 385 { 386 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 387 } 388 389 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 390 struct perf_evsel *evsel, int cpu) 391 { 392 int thread; 393 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 394 395 if (!evsel->fd) 396 return -EINVAL; 397 398 for (thread = 0; thread < nr_threads; thread++) { 399 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 400 if (err) 401 return err; 402 } 403 return 0; 404 } 405 406 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 407 struct perf_evsel *evsel, 408 int thread) 409 { 410 int cpu; 411 int nr_cpus = cpu_map__nr(evlist->cpus); 412 413 if (!evsel->fd) 414 return -EINVAL; 415 416 for (cpu = 0; cpu < nr_cpus; cpu++) { 417 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 418 if (err) 419 return err; 420 } 421 return 0; 422 } 423 424 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 425 struct perf_evsel *evsel, int idx) 426 { 427 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 428 429 if (per_cpu_mmaps) 430 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 431 else 432 return perf_evlist__enable_event_thread(evlist, evsel, idx); 433 } 434 435 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 436 { 437 int nr_cpus = cpu_map__nr(evlist->cpus); 438 int nr_threads = thread_map__nr(evlist->threads); 439 int nfds = 0; 440 struct perf_evsel *evsel; 441 442 evlist__for_each_entry(evlist, evsel) { 443 if (evsel->system_wide) 444 nfds += nr_cpus; 445 else 446 nfds += nr_cpus * nr_threads; 447 } 448 449 if (fdarray__available_entries(&evlist->pollfd) < nfds && 450 fdarray__grow(&evlist->pollfd, nfds) < 0) 451 return -ENOMEM; 452 453 return 0; 454 } 455 456 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 457 struct perf_mmap *map, short revent) 458 { 459 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 460 /* 461 * Save the idx so that when we filter out fds POLLHUP'ed we can 462 * close the associated evlist->mmap[] entry. 463 */ 464 if (pos >= 0) { 465 evlist->pollfd.priv[pos].ptr = map; 466 467 fcntl(fd, F_SETFL, O_NONBLOCK); 468 } 469 470 return pos; 471 } 472 473 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 474 { 475 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 476 } 477 478 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 479 void *arg __maybe_unused) 480 { 481 struct perf_mmap *map = fda->priv[fd].ptr; 482 483 if (map) 484 perf_mmap__put(map); 485 } 486 487 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 488 { 489 return fdarray__filter(&evlist->pollfd, revents_and_mask, 490 perf_evlist__munmap_filtered, NULL); 491 } 492 493 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 494 { 495 return fdarray__poll(&evlist->pollfd, timeout); 496 } 497 498 static void perf_evlist__id_hash(struct perf_evlist *evlist, 499 struct perf_evsel *evsel, 500 int cpu, int thread, u64 id) 501 { 502 int hash; 503 struct perf_sample_id *sid = SID(evsel, cpu, thread); 504 505 sid->id = id; 506 sid->evsel = evsel; 507 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 508 hlist_add_head(&sid->node, &evlist->heads[hash]); 509 } 510 511 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 512 int cpu, int thread, u64 id) 513 { 514 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 515 evsel->id[evsel->ids++] = id; 516 } 517 518 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 519 struct perf_evsel *evsel, 520 int cpu, int thread, int fd) 521 { 522 u64 read_data[4] = { 0, }; 523 int id_idx = 1; /* The first entry is the counter value */ 524 u64 id; 525 int ret; 526 527 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 528 if (!ret) 529 goto add; 530 531 if (errno != ENOTTY) 532 return -1; 533 534 /* Legacy way to get event id.. All hail to old kernels! */ 535 536 /* 537 * This way does not work with group format read, so bail 538 * out in that case. 539 */ 540 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 541 return -1; 542 543 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 544 read(fd, &read_data, sizeof(read_data)) == -1) 545 return -1; 546 547 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 548 ++id_idx; 549 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 550 ++id_idx; 551 552 id = read_data[id_idx]; 553 554 add: 555 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 556 return 0; 557 } 558 559 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 560 struct perf_evsel *evsel, int idx, int cpu, 561 int thread) 562 { 563 struct perf_sample_id *sid = SID(evsel, cpu, thread); 564 sid->idx = idx; 565 if (evlist->cpus && cpu >= 0) 566 sid->cpu = evlist->cpus->map[cpu]; 567 else 568 sid->cpu = -1; 569 if (!evsel->system_wide && evlist->threads && thread >= 0) 570 sid->tid = thread_map__pid(evlist->threads, thread); 571 else 572 sid->tid = -1; 573 } 574 575 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 576 { 577 struct hlist_head *head; 578 struct perf_sample_id *sid; 579 int hash; 580 581 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 582 head = &evlist->heads[hash]; 583 584 hlist_for_each_entry(sid, head, node) 585 if (sid->id == id) 586 return sid; 587 588 return NULL; 589 } 590 591 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 592 { 593 struct perf_sample_id *sid; 594 595 if (evlist->nr_entries == 1 || !id) 596 return perf_evlist__first(evlist); 597 598 sid = perf_evlist__id2sid(evlist, id); 599 if (sid) 600 return sid->evsel; 601 602 if (!perf_evlist__sample_id_all(evlist)) 603 return perf_evlist__first(evlist); 604 605 return NULL; 606 } 607 608 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 609 u64 id) 610 { 611 struct perf_sample_id *sid; 612 613 if (!id) 614 return NULL; 615 616 sid = perf_evlist__id2sid(evlist, id); 617 if (sid) 618 return sid->evsel; 619 620 return NULL; 621 } 622 623 static int perf_evlist__event2id(struct perf_evlist *evlist, 624 union perf_event *event, u64 *id) 625 { 626 const u64 *array = event->sample.array; 627 ssize_t n; 628 629 n = (event->header.size - sizeof(event->header)) >> 3; 630 631 if (event->header.type == PERF_RECORD_SAMPLE) { 632 if (evlist->id_pos >= n) 633 return -1; 634 *id = array[evlist->id_pos]; 635 } else { 636 if (evlist->is_pos > n) 637 return -1; 638 n -= evlist->is_pos; 639 *id = array[n]; 640 } 641 return 0; 642 } 643 644 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 645 union perf_event *event) 646 { 647 struct perf_evsel *first = perf_evlist__first(evlist); 648 struct hlist_head *head; 649 struct perf_sample_id *sid; 650 int hash; 651 u64 id; 652 653 if (evlist->nr_entries == 1) 654 return first; 655 656 if (!first->attr.sample_id_all && 657 event->header.type != PERF_RECORD_SAMPLE) 658 return first; 659 660 if (perf_evlist__event2id(evlist, event, &id)) 661 return NULL; 662 663 /* Synthesized events have an id of zero */ 664 if (!id) 665 return first; 666 667 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 668 head = &evlist->heads[hash]; 669 670 hlist_for_each_entry(sid, head, node) { 671 if (sid->id == id) 672 return sid->evsel; 673 } 674 return NULL; 675 } 676 677 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 678 { 679 int i; 680 681 if (!evlist->backward_mmap) 682 return 0; 683 684 for (i = 0; i < evlist->nr_mmaps; i++) { 685 int fd = evlist->backward_mmap[i].fd; 686 int err; 687 688 if (fd < 0) 689 continue; 690 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 691 if (err) 692 return err; 693 } 694 return 0; 695 } 696 697 static int perf_evlist__pause(struct perf_evlist *evlist) 698 { 699 return perf_evlist__set_paused(evlist, true); 700 } 701 702 static int perf_evlist__resume(struct perf_evlist *evlist) 703 { 704 return perf_evlist__set_paused(evlist, false); 705 } 706 707 /* When check_messup is true, 'end' must points to a good entry */ 708 static union perf_event * 709 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, 710 u64 end, u64 *prev) 711 { 712 unsigned char *data = md->base + page_size; 713 union perf_event *event = NULL; 714 int diff = end - start; 715 716 if (check_messup) { 717 /* 718 * If we're further behind than half the buffer, there's a chance 719 * the writer will bite our tail and mess up the samples under us. 720 * 721 * If we somehow ended up ahead of the 'end', we got messed up. 722 * 723 * In either case, truncate and restart at 'end'. 724 */ 725 if (diff > md->mask / 2 || diff < 0) { 726 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 727 728 /* 729 * 'end' points to a known good entry, start there. 730 */ 731 start = end; 732 diff = 0; 733 } 734 } 735 736 if (diff >= (int)sizeof(event->header)) { 737 size_t size; 738 739 event = (union perf_event *)&data[start & md->mask]; 740 size = event->header.size; 741 742 if (size < sizeof(event->header) || diff < (int)size) { 743 event = NULL; 744 goto broken_event; 745 } 746 747 /* 748 * Event straddles the mmap boundary -- header should always 749 * be inside due to u64 alignment of output. 750 */ 751 if ((start & md->mask) + size != ((start + size) & md->mask)) { 752 unsigned int offset = start; 753 unsigned int len = min(sizeof(*event), size), cpy; 754 void *dst = md->event_copy; 755 756 do { 757 cpy = min(md->mask + 1 - (offset & md->mask), len); 758 memcpy(dst, &data[offset & md->mask], cpy); 759 offset += cpy; 760 dst += cpy; 761 len -= cpy; 762 } while (len); 763 764 event = (union perf_event *) md->event_copy; 765 } 766 767 start += size; 768 } 769 770 broken_event: 771 if (prev) 772 *prev = start; 773 774 return event; 775 } 776 777 union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) 778 { 779 u64 head; 780 u64 old = md->prev; 781 782 /* 783 * Check if event was unmapped due to a POLLHUP/POLLERR. 784 */ 785 if (!refcount_read(&md->refcnt)) 786 return NULL; 787 788 head = perf_mmap__read_head(md); 789 790 return perf_mmap__read(md, check_messup, old, head, &md->prev); 791 } 792 793 union perf_event * 794 perf_mmap__read_backward(struct perf_mmap *md) 795 { 796 u64 head, end; 797 u64 start = md->prev; 798 799 /* 800 * Check if event was unmapped due to a POLLHUP/POLLERR. 801 */ 802 if (!refcount_read(&md->refcnt)) 803 return NULL; 804 805 head = perf_mmap__read_head(md); 806 if (!head) 807 return NULL; 808 809 /* 810 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 811 * it each time when kernel writes to it, so in fact 'head' is 812 * negative. 'end' pointer is made manually by adding the size of 813 * the ring buffer to 'head' pointer, means the validate data can 814 * read is the whole ring buffer. If 'end' is positive, the ring 815 * buffer has not fully filled, so we must adjust 'end' to 0. 816 * 817 * However, since both 'head' and 'end' is unsigned, we can't 818 * simply compare 'end' against 0. Here we compare '-head' and 819 * the size of the ring buffer, where -head is the number of bytes 820 * kernel write to the ring buffer. 821 */ 822 if (-head < (u64)(md->mask + 1)) 823 end = 0; 824 else 825 end = head + md->mask + 1; 826 827 return perf_mmap__read(md, false, start, end, &md->prev); 828 } 829 830 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) 831 { 832 struct perf_mmap *md = &evlist->mmap[idx]; 833 834 /* 835 * Check messup is required for forward overwritable ring buffer: 836 * memory pointed by md->prev can be overwritten in this case. 837 * No need for read-write ring buffer: kernel stop outputting when 838 * it hit md->prev (perf_mmap__consume()). 839 */ 840 return perf_mmap__read_forward(md, evlist->overwrite); 841 } 842 843 union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) 844 { 845 struct perf_mmap *md = &evlist->mmap[idx]; 846 847 /* 848 * No need to check messup for backward ring buffer: 849 * We can always read arbitrary long data from a backward 850 * ring buffer unless we forget to pause it before reading. 851 */ 852 return perf_mmap__read_backward(md); 853 } 854 855 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 856 { 857 return perf_evlist__mmap_read_forward(evlist, idx); 858 } 859 860 void perf_mmap__read_catchup(struct perf_mmap *md) 861 { 862 u64 head; 863 864 if (!refcount_read(&md->refcnt)) 865 return; 866 867 head = perf_mmap__read_head(md); 868 md->prev = head; 869 } 870 871 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 872 { 873 perf_mmap__read_catchup(&evlist->mmap[idx]); 874 } 875 876 static bool perf_mmap__empty(struct perf_mmap *md) 877 { 878 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 879 } 880 881 static void perf_mmap__get(struct perf_mmap *map) 882 { 883 refcount_inc(&map->refcnt); 884 } 885 886 static void perf_mmap__put(struct perf_mmap *md) 887 { 888 BUG_ON(md->base && refcount_read(&md->refcnt) == 0); 889 890 if (refcount_dec_and_test(&md->refcnt)) 891 perf_mmap__munmap(md); 892 } 893 894 void perf_mmap__consume(struct perf_mmap *md, bool overwrite) 895 { 896 if (!overwrite) { 897 u64 old = md->prev; 898 899 perf_mmap__write_tail(md, old); 900 } 901 902 if (refcount_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 903 perf_mmap__put(md); 904 } 905 906 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 907 { 908 perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); 909 } 910 911 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 912 struct auxtrace_mmap_params *mp __maybe_unused, 913 void *userpg __maybe_unused, 914 int fd __maybe_unused) 915 { 916 return 0; 917 } 918 919 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 920 { 921 } 922 923 void __weak auxtrace_mmap_params__init( 924 struct auxtrace_mmap_params *mp __maybe_unused, 925 off_t auxtrace_offset __maybe_unused, 926 unsigned int auxtrace_pages __maybe_unused, 927 bool auxtrace_overwrite __maybe_unused) 928 { 929 } 930 931 void __weak auxtrace_mmap_params__set_idx( 932 struct auxtrace_mmap_params *mp __maybe_unused, 933 struct perf_evlist *evlist __maybe_unused, 934 int idx __maybe_unused, 935 bool per_cpu __maybe_unused) 936 { 937 } 938 939 static void perf_mmap__munmap(struct perf_mmap *map) 940 { 941 if (map->base != NULL) { 942 munmap(map->base, perf_mmap__mmap_len(map)); 943 map->base = NULL; 944 map->fd = -1; 945 refcount_set(&map->refcnt, 0); 946 } 947 auxtrace_mmap__munmap(&map->auxtrace_mmap); 948 } 949 950 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 951 { 952 int i; 953 954 if (evlist->mmap) 955 for (i = 0; i < evlist->nr_mmaps; i++) 956 perf_mmap__munmap(&evlist->mmap[i]); 957 958 if (evlist->backward_mmap) 959 for (i = 0; i < evlist->nr_mmaps; i++) 960 perf_mmap__munmap(&evlist->backward_mmap[i]); 961 } 962 963 void perf_evlist__munmap(struct perf_evlist *evlist) 964 { 965 perf_evlist__munmap_nofree(evlist); 966 zfree(&evlist->mmap); 967 zfree(&evlist->backward_mmap); 968 } 969 970 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) 971 { 972 int i; 973 struct perf_mmap *map; 974 975 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 976 if (cpu_map__empty(evlist->cpus)) 977 evlist->nr_mmaps = thread_map__nr(evlist->threads); 978 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 979 if (!map) 980 return NULL; 981 982 for (i = 0; i < evlist->nr_mmaps; i++) { 983 map[i].fd = -1; 984 /* 985 * When the perf_mmap() call is made we grab one refcount, plus 986 * one extra to let perf_evlist__mmap_consume() get the last 987 * events after all real references (perf_mmap__get()) are 988 * dropped. 989 * 990 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 991 * thus does perf_mmap__get() on it. 992 */ 993 refcount_set(&map[i].refcnt, 0); 994 } 995 return map; 996 } 997 998 struct mmap_params { 999 int prot; 1000 int mask; 1001 struct auxtrace_mmap_params auxtrace_mp; 1002 }; 1003 1004 static int perf_mmap__mmap(struct perf_mmap *map, 1005 struct mmap_params *mp, int fd) 1006 { 1007 /* 1008 * The last one will be done at perf_evlist__mmap_consume(), so that we 1009 * make sure we don't prevent tools from consuming every last event in 1010 * the ring buffer. 1011 * 1012 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 1013 * anymore, but the last events for it are still in the ring buffer, 1014 * waiting to be consumed. 1015 * 1016 * Tools can chose to ignore this at their own discretion, but the 1017 * evlist layer can't just drop it when filtering events in 1018 * perf_evlist__filter_pollfd(). 1019 */ 1020 refcount_set(&map->refcnt, 2); 1021 map->prev = 0; 1022 map->mask = mp->mask; 1023 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, 1024 MAP_SHARED, fd, 0); 1025 if (map->base == MAP_FAILED) { 1026 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 1027 errno); 1028 map->base = NULL; 1029 return -1; 1030 } 1031 map->fd = fd; 1032 1033 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 1034 &mp->auxtrace_mp, map->base, fd)) 1035 return -1; 1036 1037 return 0; 1038 } 1039 1040 static bool 1041 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 1042 struct perf_evsel *evsel) 1043 { 1044 if (evsel->attr.write_backward) 1045 return false; 1046 return true; 1047 } 1048 1049 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 1050 struct mmap_params *mp, int cpu_idx, 1051 int thread, int *_output, int *_output_backward) 1052 { 1053 struct perf_evsel *evsel; 1054 int revent; 1055 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 1056 1057 evlist__for_each_entry(evlist, evsel) { 1058 struct perf_mmap *maps = evlist->mmap; 1059 int *output = _output; 1060 int fd; 1061 int cpu; 1062 1063 if (evsel->attr.write_backward) { 1064 output = _output_backward; 1065 maps = evlist->backward_mmap; 1066 1067 if (!maps) { 1068 maps = perf_evlist__alloc_mmap(evlist); 1069 if (!maps) 1070 return -1; 1071 evlist->backward_mmap = maps; 1072 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 1073 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 1074 } 1075 } 1076 1077 if (evsel->system_wide && thread) 1078 continue; 1079 1080 cpu = cpu_map__idx(evsel->cpus, evlist_cpu); 1081 if (cpu == -1) 1082 continue; 1083 1084 fd = FD(evsel, cpu, thread); 1085 1086 if (*output == -1) { 1087 *output = fd; 1088 1089 if (perf_mmap__mmap(&maps[idx], mp, *output) < 0) 1090 return -1; 1091 } else { 1092 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 1093 return -1; 1094 1095 perf_mmap__get(&maps[idx]); 1096 } 1097 1098 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 1099 1100 /* 1101 * The system_wide flag causes a selected event to be opened 1102 * always without a pid. Consequently it will never get a 1103 * POLLHUP, but it is used for tracking in combination with 1104 * other events, so it should not need to be polled anyway. 1105 * Therefore don't add it for polling. 1106 */ 1107 if (!evsel->system_wide && 1108 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 1109 perf_mmap__put(&maps[idx]); 1110 return -1; 1111 } 1112 1113 if (evsel->attr.read_format & PERF_FORMAT_ID) { 1114 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 1115 fd) < 0) 1116 return -1; 1117 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 1118 thread); 1119 } 1120 } 1121 1122 return 0; 1123 } 1124 1125 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 1126 struct mmap_params *mp) 1127 { 1128 int cpu, thread; 1129 int nr_cpus = cpu_map__nr(evlist->cpus); 1130 int nr_threads = thread_map__nr(evlist->threads); 1131 1132 pr_debug2("perf event ring buffer mmapped per cpu\n"); 1133 for (cpu = 0; cpu < nr_cpus; cpu++) { 1134 int output = -1; 1135 int output_backward = -1; 1136 1137 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 1138 true); 1139 1140 for (thread = 0; thread < nr_threads; thread++) { 1141 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 1142 thread, &output, &output_backward)) 1143 goto out_unmap; 1144 } 1145 } 1146 1147 return 0; 1148 1149 out_unmap: 1150 perf_evlist__munmap_nofree(evlist); 1151 return -1; 1152 } 1153 1154 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1155 struct mmap_params *mp) 1156 { 1157 int thread; 1158 int nr_threads = thread_map__nr(evlist->threads); 1159 1160 pr_debug2("perf event ring buffer mmapped per thread\n"); 1161 for (thread = 0; thread < nr_threads; thread++) { 1162 int output = -1; 1163 int output_backward = -1; 1164 1165 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1166 false); 1167 1168 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1169 &output, &output_backward)) 1170 goto out_unmap; 1171 } 1172 1173 return 0; 1174 1175 out_unmap: 1176 perf_evlist__munmap_nofree(evlist); 1177 return -1; 1178 } 1179 1180 unsigned long perf_event_mlock_kb_in_pages(void) 1181 { 1182 unsigned long pages; 1183 int max; 1184 1185 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1186 /* 1187 * Pick a once upon a time good value, i.e. things look 1188 * strange since we can't read a sysctl value, but lets not 1189 * die yet... 1190 */ 1191 max = 512; 1192 } else { 1193 max -= (page_size / 1024); 1194 } 1195 1196 pages = (max * 1024) / page_size; 1197 if (!is_power_of_2(pages)) 1198 pages = rounddown_pow_of_two(pages); 1199 1200 return pages; 1201 } 1202 1203 size_t perf_evlist__mmap_size(unsigned long pages) 1204 { 1205 if (pages == UINT_MAX) 1206 pages = perf_event_mlock_kb_in_pages(); 1207 else if (!is_power_of_2(pages)) 1208 return 0; 1209 1210 return (pages + 1) * page_size; 1211 } 1212 1213 static long parse_pages_arg(const char *str, unsigned long min, 1214 unsigned long max) 1215 { 1216 unsigned long pages, val; 1217 static struct parse_tag tags[] = { 1218 { .tag = 'B', .mult = 1 }, 1219 { .tag = 'K', .mult = 1 << 10 }, 1220 { .tag = 'M', .mult = 1 << 20 }, 1221 { .tag = 'G', .mult = 1 << 30 }, 1222 { .tag = 0 }, 1223 }; 1224 1225 if (str == NULL) 1226 return -EINVAL; 1227 1228 val = parse_tag_value(str, tags); 1229 if (val != (unsigned long) -1) { 1230 /* we got file size value */ 1231 pages = PERF_ALIGN(val, page_size) / page_size; 1232 } else { 1233 /* we got pages count value */ 1234 char *eptr; 1235 pages = strtoul(str, &eptr, 10); 1236 if (*eptr != '\0') 1237 return -EINVAL; 1238 } 1239 1240 if (pages == 0 && min == 0) { 1241 /* leave number of pages at 0 */ 1242 } else if (!is_power_of_2(pages)) { 1243 char buf[100]; 1244 1245 /* round pages up to next power of 2 */ 1246 pages = roundup_pow_of_two(pages); 1247 if (!pages) 1248 return -EINVAL; 1249 1250 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 1251 pr_info("rounding mmap pages size to %s (%lu pages)\n", 1252 buf, pages); 1253 } 1254 1255 if (pages > max) 1256 return -EINVAL; 1257 1258 return pages; 1259 } 1260 1261 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1262 { 1263 unsigned long max = UINT_MAX; 1264 long pages; 1265 1266 if (max > SIZE_MAX / page_size) 1267 max = SIZE_MAX / page_size; 1268 1269 pages = parse_pages_arg(str, 1, max); 1270 if (pages < 0) { 1271 pr_err("Invalid argument for --mmap_pages/-m\n"); 1272 return -1; 1273 } 1274 1275 *mmap_pages = pages; 1276 return 0; 1277 } 1278 1279 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1280 int unset __maybe_unused) 1281 { 1282 return __perf_evlist__parse_mmap_pages(opt->value, str); 1283 } 1284 1285 /** 1286 * perf_evlist__mmap_ex - Create mmaps to receive events. 1287 * @evlist: list of events 1288 * @pages: map length in pages 1289 * @overwrite: overwrite older events? 1290 * @auxtrace_pages - auxtrace map length in pages 1291 * @auxtrace_overwrite - overwrite older auxtrace data? 1292 * 1293 * If @overwrite is %false the user needs to signal event consumption using 1294 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1295 * automatically. 1296 * 1297 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1298 * consumption using auxtrace_mmap__write_tail(). 1299 * 1300 * Return: %0 on success, negative error code otherwise. 1301 */ 1302 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1303 bool overwrite, unsigned int auxtrace_pages, 1304 bool auxtrace_overwrite) 1305 { 1306 struct perf_evsel *evsel; 1307 const struct cpu_map *cpus = evlist->cpus; 1308 const struct thread_map *threads = evlist->threads; 1309 struct mmap_params mp = { 1310 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1311 }; 1312 1313 if (!evlist->mmap) 1314 evlist->mmap = perf_evlist__alloc_mmap(evlist); 1315 if (!evlist->mmap) 1316 return -ENOMEM; 1317 1318 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1319 return -ENOMEM; 1320 1321 evlist->overwrite = overwrite; 1322 evlist->mmap_len = perf_evlist__mmap_size(pages); 1323 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1324 mp.mask = evlist->mmap_len - page_size - 1; 1325 1326 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1327 auxtrace_pages, auxtrace_overwrite); 1328 1329 evlist__for_each_entry(evlist, evsel) { 1330 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1331 evsel->sample_id == NULL && 1332 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1333 return -ENOMEM; 1334 } 1335 1336 if (cpu_map__empty(cpus)) 1337 return perf_evlist__mmap_per_thread(evlist, &mp); 1338 1339 return perf_evlist__mmap_per_cpu(evlist, &mp); 1340 } 1341 1342 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1343 bool overwrite) 1344 { 1345 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1346 } 1347 1348 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1349 { 1350 struct cpu_map *cpus; 1351 struct thread_map *threads; 1352 1353 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1354 1355 if (!threads) 1356 return -1; 1357 1358 if (target__uses_dummy_map(target)) 1359 cpus = cpu_map__dummy_new(); 1360 else 1361 cpus = cpu_map__new(target->cpu_list); 1362 1363 if (!cpus) 1364 goto out_delete_threads; 1365 1366 evlist->has_user_cpus = !!target->cpu_list; 1367 1368 perf_evlist__set_maps(evlist, cpus, threads); 1369 1370 return 0; 1371 1372 out_delete_threads: 1373 thread_map__put(threads); 1374 return -1; 1375 } 1376 1377 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1378 struct thread_map *threads) 1379 { 1380 /* 1381 * Allow for the possibility that one or another of the maps isn't being 1382 * changed i.e. don't put it. Note we are assuming the maps that are 1383 * being applied are brand new and evlist is taking ownership of the 1384 * original reference count of 1. If that is not the case it is up to 1385 * the caller to increase the reference count. 1386 */ 1387 if (cpus != evlist->cpus) { 1388 cpu_map__put(evlist->cpus); 1389 evlist->cpus = cpu_map__get(cpus); 1390 } 1391 1392 if (threads != evlist->threads) { 1393 thread_map__put(evlist->threads); 1394 evlist->threads = thread_map__get(threads); 1395 } 1396 1397 perf_evlist__propagate_maps(evlist); 1398 } 1399 1400 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1401 enum perf_event_sample_format bit) 1402 { 1403 struct perf_evsel *evsel; 1404 1405 evlist__for_each_entry(evlist, evsel) 1406 __perf_evsel__set_sample_bit(evsel, bit); 1407 } 1408 1409 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1410 enum perf_event_sample_format bit) 1411 { 1412 struct perf_evsel *evsel; 1413 1414 evlist__for_each_entry(evlist, evsel) 1415 __perf_evsel__reset_sample_bit(evsel, bit); 1416 } 1417 1418 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1419 { 1420 struct perf_evsel *evsel; 1421 int err = 0; 1422 const int ncpus = cpu_map__nr(evlist->cpus), 1423 nthreads = thread_map__nr(evlist->threads); 1424 1425 evlist__for_each_entry(evlist, evsel) { 1426 if (evsel->filter == NULL) 1427 continue; 1428 1429 /* 1430 * filters only work for tracepoint event, which doesn't have cpu limit. 1431 * So evlist and evsel should always be same. 1432 */ 1433 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1434 if (err) { 1435 *err_evsel = evsel; 1436 break; 1437 } 1438 } 1439 1440 return err; 1441 } 1442 1443 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1444 { 1445 struct perf_evsel *evsel; 1446 int err = 0; 1447 1448 evlist__for_each_entry(evlist, evsel) { 1449 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1450 continue; 1451 1452 err = perf_evsel__set_filter(evsel, filter); 1453 if (err) 1454 break; 1455 } 1456 1457 return err; 1458 } 1459 1460 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1461 { 1462 char *filter; 1463 int ret = -1; 1464 size_t i; 1465 1466 for (i = 0; i < npids; ++i) { 1467 if (i == 0) { 1468 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1469 return -1; 1470 } else { 1471 char *tmp; 1472 1473 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1474 goto out_free; 1475 1476 free(filter); 1477 filter = tmp; 1478 } 1479 } 1480 1481 ret = perf_evlist__set_filter(evlist, filter); 1482 out_free: 1483 free(filter); 1484 return ret; 1485 } 1486 1487 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1488 { 1489 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1490 } 1491 1492 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1493 { 1494 struct perf_evsel *pos; 1495 1496 if (evlist->nr_entries == 1) 1497 return true; 1498 1499 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1500 return false; 1501 1502 evlist__for_each_entry(evlist, pos) { 1503 if (pos->id_pos != evlist->id_pos || 1504 pos->is_pos != evlist->is_pos) 1505 return false; 1506 } 1507 1508 return true; 1509 } 1510 1511 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1512 { 1513 struct perf_evsel *evsel; 1514 1515 if (evlist->combined_sample_type) 1516 return evlist->combined_sample_type; 1517 1518 evlist__for_each_entry(evlist, evsel) 1519 evlist->combined_sample_type |= evsel->attr.sample_type; 1520 1521 return evlist->combined_sample_type; 1522 } 1523 1524 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1525 { 1526 evlist->combined_sample_type = 0; 1527 return __perf_evlist__combined_sample_type(evlist); 1528 } 1529 1530 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1531 { 1532 struct perf_evsel *evsel; 1533 u64 branch_type = 0; 1534 1535 evlist__for_each_entry(evlist, evsel) 1536 branch_type |= evsel->attr.branch_sample_type; 1537 return branch_type; 1538 } 1539 1540 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1541 { 1542 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1543 u64 read_format = first->attr.read_format; 1544 u64 sample_type = first->attr.sample_type; 1545 1546 evlist__for_each_entry(evlist, pos) { 1547 if (read_format != pos->attr.read_format) 1548 return false; 1549 } 1550 1551 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1552 if ((sample_type & PERF_SAMPLE_READ) && 1553 !(read_format & PERF_FORMAT_ID)) { 1554 return false; 1555 } 1556 1557 return true; 1558 } 1559 1560 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1561 { 1562 struct perf_evsel *first = perf_evlist__first(evlist); 1563 return first->attr.read_format; 1564 } 1565 1566 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1567 { 1568 struct perf_evsel *first = perf_evlist__first(evlist); 1569 struct perf_sample *data; 1570 u64 sample_type; 1571 u16 size = 0; 1572 1573 if (!first->attr.sample_id_all) 1574 goto out; 1575 1576 sample_type = first->attr.sample_type; 1577 1578 if (sample_type & PERF_SAMPLE_TID) 1579 size += sizeof(data->tid) * 2; 1580 1581 if (sample_type & PERF_SAMPLE_TIME) 1582 size += sizeof(data->time); 1583 1584 if (sample_type & PERF_SAMPLE_ID) 1585 size += sizeof(data->id); 1586 1587 if (sample_type & PERF_SAMPLE_STREAM_ID) 1588 size += sizeof(data->stream_id); 1589 1590 if (sample_type & PERF_SAMPLE_CPU) 1591 size += sizeof(data->cpu) * 2; 1592 1593 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1594 size += sizeof(data->id); 1595 out: 1596 return size; 1597 } 1598 1599 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1600 { 1601 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1602 1603 evlist__for_each_entry_continue(evlist, pos) { 1604 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1605 return false; 1606 } 1607 1608 return true; 1609 } 1610 1611 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1612 { 1613 struct perf_evsel *first = perf_evlist__first(evlist); 1614 return first->attr.sample_id_all; 1615 } 1616 1617 void perf_evlist__set_selected(struct perf_evlist *evlist, 1618 struct perf_evsel *evsel) 1619 { 1620 evlist->selected = evsel; 1621 } 1622 1623 void perf_evlist__close(struct perf_evlist *evlist) 1624 { 1625 struct perf_evsel *evsel; 1626 int ncpus = cpu_map__nr(evlist->cpus); 1627 int nthreads = thread_map__nr(evlist->threads); 1628 1629 evlist__for_each_entry_reverse(evlist, evsel) { 1630 int n = evsel->cpus ? evsel->cpus->nr : ncpus; 1631 perf_evsel__close(evsel, n, nthreads); 1632 } 1633 } 1634 1635 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1636 { 1637 struct cpu_map *cpus; 1638 struct thread_map *threads; 1639 int err = -ENOMEM; 1640 1641 /* 1642 * Try reading /sys/devices/system/cpu/online to get 1643 * an all cpus map. 1644 * 1645 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1646 * code needs an overhaul to properly forward the 1647 * error, and we may not want to do that fallback to a 1648 * default cpu identity map :-\ 1649 */ 1650 cpus = cpu_map__new(NULL); 1651 if (!cpus) 1652 goto out; 1653 1654 threads = thread_map__new_dummy(); 1655 if (!threads) 1656 goto out_put; 1657 1658 perf_evlist__set_maps(evlist, cpus, threads); 1659 out: 1660 return err; 1661 out_put: 1662 cpu_map__put(cpus); 1663 goto out; 1664 } 1665 1666 int perf_evlist__open(struct perf_evlist *evlist) 1667 { 1668 struct perf_evsel *evsel; 1669 int err; 1670 1671 /* 1672 * Default: one fd per CPU, all threads, aka systemwide 1673 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1674 */ 1675 if (evlist->threads == NULL && evlist->cpus == NULL) { 1676 err = perf_evlist__create_syswide_maps(evlist); 1677 if (err < 0) 1678 goto out_err; 1679 } 1680 1681 perf_evlist__update_id_pos(evlist); 1682 1683 evlist__for_each_entry(evlist, evsel) { 1684 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1685 if (err < 0) 1686 goto out_err; 1687 } 1688 1689 return 0; 1690 out_err: 1691 perf_evlist__close(evlist); 1692 errno = -err; 1693 return err; 1694 } 1695 1696 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1697 const char *argv[], bool pipe_output, 1698 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1699 { 1700 int child_ready_pipe[2], go_pipe[2]; 1701 char bf; 1702 1703 if (pipe(child_ready_pipe) < 0) { 1704 perror("failed to create 'ready' pipe"); 1705 return -1; 1706 } 1707 1708 if (pipe(go_pipe) < 0) { 1709 perror("failed to create 'go' pipe"); 1710 goto out_close_ready_pipe; 1711 } 1712 1713 evlist->workload.pid = fork(); 1714 if (evlist->workload.pid < 0) { 1715 perror("failed to fork"); 1716 goto out_close_pipes; 1717 } 1718 1719 if (!evlist->workload.pid) { 1720 int ret; 1721 1722 if (pipe_output) 1723 dup2(2, 1); 1724 1725 signal(SIGTERM, SIG_DFL); 1726 1727 close(child_ready_pipe[0]); 1728 close(go_pipe[1]); 1729 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1730 1731 /* 1732 * Tell the parent we're ready to go 1733 */ 1734 close(child_ready_pipe[1]); 1735 1736 /* 1737 * Wait until the parent tells us to go. 1738 */ 1739 ret = read(go_pipe[0], &bf, 1); 1740 /* 1741 * The parent will ask for the execvp() to be performed by 1742 * writing exactly one byte, in workload.cork_fd, usually via 1743 * perf_evlist__start_workload(). 1744 * 1745 * For cancelling the workload without actually running it, 1746 * the parent will just close workload.cork_fd, without writing 1747 * anything, i.e. read will return zero and we just exit() 1748 * here. 1749 */ 1750 if (ret != 1) { 1751 if (ret == -1) 1752 perror("unable to read pipe"); 1753 exit(ret); 1754 } 1755 1756 execvp(argv[0], (char **)argv); 1757 1758 if (exec_error) { 1759 union sigval val; 1760 1761 val.sival_int = errno; 1762 if (sigqueue(getppid(), SIGUSR1, val)) 1763 perror(argv[0]); 1764 } else 1765 perror(argv[0]); 1766 exit(-1); 1767 } 1768 1769 if (exec_error) { 1770 struct sigaction act = { 1771 .sa_flags = SA_SIGINFO, 1772 .sa_sigaction = exec_error, 1773 }; 1774 sigaction(SIGUSR1, &act, NULL); 1775 } 1776 1777 if (target__none(target)) { 1778 if (evlist->threads == NULL) { 1779 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1780 __func__, __LINE__); 1781 goto out_close_pipes; 1782 } 1783 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1784 } 1785 1786 close(child_ready_pipe[1]); 1787 close(go_pipe[0]); 1788 /* 1789 * wait for child to settle 1790 */ 1791 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1792 perror("unable to read pipe"); 1793 goto out_close_pipes; 1794 } 1795 1796 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1797 evlist->workload.cork_fd = go_pipe[1]; 1798 close(child_ready_pipe[0]); 1799 return 0; 1800 1801 out_close_pipes: 1802 close(go_pipe[0]); 1803 close(go_pipe[1]); 1804 out_close_ready_pipe: 1805 close(child_ready_pipe[0]); 1806 close(child_ready_pipe[1]); 1807 return -1; 1808 } 1809 1810 int perf_evlist__start_workload(struct perf_evlist *evlist) 1811 { 1812 if (evlist->workload.cork_fd > 0) { 1813 char bf = 0; 1814 int ret; 1815 /* 1816 * Remove the cork, let it rip! 1817 */ 1818 ret = write(evlist->workload.cork_fd, &bf, 1); 1819 if (ret < 0) 1820 perror("unable to write to pipe"); 1821 1822 close(evlist->workload.cork_fd); 1823 return ret; 1824 } 1825 1826 return 0; 1827 } 1828 1829 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1830 struct perf_sample *sample) 1831 { 1832 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1833 1834 if (!evsel) 1835 return -EFAULT; 1836 return perf_evsel__parse_sample(evsel, event, sample); 1837 } 1838 1839 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1840 { 1841 struct perf_evsel *evsel; 1842 size_t printed = 0; 1843 1844 evlist__for_each_entry(evlist, evsel) { 1845 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1846 perf_evsel__name(evsel)); 1847 } 1848 1849 return printed + fprintf(fp, "\n"); 1850 } 1851 1852 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1853 int err, char *buf, size_t size) 1854 { 1855 int printed, value; 1856 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1857 1858 switch (err) { 1859 case EACCES: 1860 case EPERM: 1861 printed = scnprintf(buf, size, 1862 "Error:\t%s.\n" 1863 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1864 1865 value = perf_event_paranoid(); 1866 1867 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1868 1869 if (value >= 2) { 1870 printed += scnprintf(buf + printed, size - printed, 1871 "For your workloads it needs to be <= 1\nHint:\t"); 1872 } 1873 printed += scnprintf(buf + printed, size - printed, 1874 "For system wide tracing it needs to be set to -1.\n"); 1875 1876 printed += scnprintf(buf + printed, size - printed, 1877 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1878 "Hint:\tThe current value is %d.", value); 1879 break; 1880 case EINVAL: { 1881 struct perf_evsel *first = perf_evlist__first(evlist); 1882 int max_freq; 1883 1884 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1885 goto out_default; 1886 1887 if (first->attr.sample_freq < (u64)max_freq) 1888 goto out_default; 1889 1890 printed = scnprintf(buf, size, 1891 "Error:\t%s.\n" 1892 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1893 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1894 emsg, max_freq, first->attr.sample_freq); 1895 break; 1896 } 1897 default: 1898 out_default: 1899 scnprintf(buf, size, "%s", emsg); 1900 break; 1901 } 1902 1903 return 0; 1904 } 1905 1906 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1907 { 1908 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1909 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1910 1911 switch (err) { 1912 case EPERM: 1913 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1914 printed += scnprintf(buf + printed, size - printed, 1915 "Error:\t%s.\n" 1916 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1917 "Hint:\tTried using %zd kB.\n", 1918 emsg, pages_max_per_user, pages_attempted); 1919 1920 if (pages_attempted >= pages_max_per_user) { 1921 printed += scnprintf(buf + printed, size - printed, 1922 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1923 pages_max_per_user + pages_attempted); 1924 } 1925 1926 printed += scnprintf(buf + printed, size - printed, 1927 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1928 break; 1929 default: 1930 scnprintf(buf, size, "%s", emsg); 1931 break; 1932 } 1933 1934 return 0; 1935 } 1936 1937 void perf_evlist__to_front(struct perf_evlist *evlist, 1938 struct perf_evsel *move_evsel) 1939 { 1940 struct perf_evsel *evsel, *n; 1941 LIST_HEAD(move); 1942 1943 if (move_evsel == perf_evlist__first(evlist)) 1944 return; 1945 1946 evlist__for_each_entry_safe(evlist, n, evsel) { 1947 if (evsel->leader == move_evsel->leader) 1948 list_move_tail(&evsel->node, &move); 1949 } 1950 1951 list_splice(&move, &evlist->entries); 1952 } 1953 1954 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1955 struct perf_evsel *tracking_evsel) 1956 { 1957 struct perf_evsel *evsel; 1958 1959 if (tracking_evsel->tracking) 1960 return; 1961 1962 evlist__for_each_entry(evlist, evsel) { 1963 if (evsel != tracking_evsel) 1964 evsel->tracking = false; 1965 } 1966 1967 tracking_evsel->tracking = true; 1968 } 1969 1970 struct perf_evsel * 1971 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1972 const char *str) 1973 { 1974 struct perf_evsel *evsel; 1975 1976 evlist__for_each_entry(evlist, evsel) { 1977 if (!evsel->name) 1978 continue; 1979 if (strcmp(str, evsel->name) == 0) 1980 return evsel; 1981 } 1982 1983 return NULL; 1984 } 1985 1986 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1987 enum bkw_mmap_state state) 1988 { 1989 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1990 enum action { 1991 NONE, 1992 PAUSE, 1993 RESUME, 1994 } action = NONE; 1995 1996 if (!evlist->backward_mmap) 1997 return; 1998 1999 switch (old_state) { 2000 case BKW_MMAP_NOTREADY: { 2001 if (state != BKW_MMAP_RUNNING) 2002 goto state_err;; 2003 break; 2004 } 2005 case BKW_MMAP_RUNNING: { 2006 if (state != BKW_MMAP_DATA_PENDING) 2007 goto state_err; 2008 action = PAUSE; 2009 break; 2010 } 2011 case BKW_MMAP_DATA_PENDING: { 2012 if (state != BKW_MMAP_EMPTY) 2013 goto state_err; 2014 break; 2015 } 2016 case BKW_MMAP_EMPTY: { 2017 if (state != BKW_MMAP_RUNNING) 2018 goto state_err; 2019 action = RESUME; 2020 break; 2021 } 2022 default: 2023 WARN_ONCE(1, "Shouldn't get there\n"); 2024 } 2025 2026 evlist->bkw_mmap_state = state; 2027 2028 switch (action) { 2029 case PAUSE: 2030 perf_evlist__pause(evlist); 2031 break; 2032 case RESUME: 2033 perf_evlist__resume(evlist); 2034 break; 2035 case NONE: 2036 default: 2037 break; 2038 } 2039 2040 state_err: 2041 return; 2042 } 2043