1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include "asm/bug.h" 19 #include <unistd.h> 20 21 #include "parse-events.h" 22 #include <subcmd/parse-options.h> 23 24 #include <sys/mman.h> 25 26 #include <linux/bitops.h> 27 #include <linux/hash.h> 28 #include <linux/log2.h> 29 #include <linux/err.h> 30 31 static void perf_mmap__munmap(struct perf_mmap *map); 32 static void perf_mmap__put(struct perf_mmap *map); 33 34 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 35 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 36 37 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 38 struct thread_map *threads) 39 { 40 int i; 41 42 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 43 INIT_HLIST_HEAD(&evlist->heads[i]); 44 INIT_LIST_HEAD(&evlist->entries); 45 perf_evlist__set_maps(evlist, cpus, threads); 46 fdarray__init(&evlist->pollfd, 64); 47 evlist->workload.pid = -1; 48 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 49 } 50 51 struct perf_evlist *perf_evlist__new(void) 52 { 53 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 54 55 if (evlist != NULL) 56 perf_evlist__init(evlist, NULL, NULL); 57 58 return evlist; 59 } 60 61 struct perf_evlist *perf_evlist__new_default(void) 62 { 63 struct perf_evlist *evlist = perf_evlist__new(); 64 65 if (evlist && perf_evlist__add_default(evlist)) { 66 perf_evlist__delete(evlist); 67 evlist = NULL; 68 } 69 70 return evlist; 71 } 72 73 struct perf_evlist *perf_evlist__new_dummy(void) 74 { 75 struct perf_evlist *evlist = perf_evlist__new(); 76 77 if (evlist && perf_evlist__add_dummy(evlist)) { 78 perf_evlist__delete(evlist); 79 evlist = NULL; 80 } 81 82 return evlist; 83 } 84 85 /** 86 * perf_evlist__set_id_pos - set the positions of event ids. 87 * @evlist: selected event list 88 * 89 * Events with compatible sample types all have the same id_pos 90 * and is_pos. For convenience, put a copy on evlist. 91 */ 92 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 93 { 94 struct perf_evsel *first = perf_evlist__first(evlist); 95 96 evlist->id_pos = first->id_pos; 97 evlist->is_pos = first->is_pos; 98 } 99 100 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 101 { 102 struct perf_evsel *evsel; 103 104 evlist__for_each_entry(evlist, evsel) 105 perf_evsel__calc_id_pos(evsel); 106 107 perf_evlist__set_id_pos(evlist); 108 } 109 110 static void perf_evlist__purge(struct perf_evlist *evlist) 111 { 112 struct perf_evsel *pos, *n; 113 114 evlist__for_each_entry_safe(evlist, n, pos) { 115 list_del_init(&pos->node); 116 pos->evlist = NULL; 117 perf_evsel__delete(pos); 118 } 119 120 evlist->nr_entries = 0; 121 } 122 123 void perf_evlist__exit(struct perf_evlist *evlist) 124 { 125 zfree(&evlist->mmap); 126 zfree(&evlist->backward_mmap); 127 fdarray__exit(&evlist->pollfd); 128 } 129 130 void perf_evlist__delete(struct perf_evlist *evlist) 131 { 132 if (evlist == NULL) 133 return; 134 135 perf_evlist__munmap(evlist); 136 perf_evlist__close(evlist); 137 cpu_map__put(evlist->cpus); 138 thread_map__put(evlist->threads); 139 evlist->cpus = NULL; 140 evlist->threads = NULL; 141 perf_evlist__purge(evlist); 142 perf_evlist__exit(evlist); 143 free(evlist); 144 } 145 146 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 147 struct perf_evsel *evsel) 148 { 149 /* 150 * We already have cpus for evsel (via PMU sysfs) so 151 * keep it, if there's no target cpu list defined. 152 */ 153 if (!evsel->own_cpus || evlist->has_user_cpus) { 154 cpu_map__put(evsel->cpus); 155 evsel->cpus = cpu_map__get(evlist->cpus); 156 } else if (evsel->cpus != evsel->own_cpus) { 157 cpu_map__put(evsel->cpus); 158 evsel->cpus = cpu_map__get(evsel->own_cpus); 159 } 160 161 thread_map__put(evsel->threads); 162 evsel->threads = thread_map__get(evlist->threads); 163 } 164 165 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 166 { 167 struct perf_evsel *evsel; 168 169 evlist__for_each_entry(evlist, evsel) 170 __perf_evlist__propagate_maps(evlist, evsel); 171 } 172 173 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 174 { 175 entry->evlist = evlist; 176 list_add_tail(&entry->node, &evlist->entries); 177 entry->idx = evlist->nr_entries; 178 entry->tracking = !entry->idx; 179 180 if (!evlist->nr_entries++) 181 perf_evlist__set_id_pos(evlist); 182 183 __perf_evlist__propagate_maps(evlist, entry); 184 } 185 186 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 187 { 188 evsel->evlist = NULL; 189 list_del_init(&evsel->node); 190 evlist->nr_entries -= 1; 191 } 192 193 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 194 struct list_head *list) 195 { 196 struct perf_evsel *evsel, *temp; 197 198 __evlist__for_each_entry_safe(list, temp, evsel) { 199 list_del_init(&evsel->node); 200 perf_evlist__add(evlist, evsel); 201 } 202 } 203 204 void __perf_evlist__set_leader(struct list_head *list) 205 { 206 struct perf_evsel *evsel, *leader; 207 208 leader = list_entry(list->next, struct perf_evsel, node); 209 evsel = list_entry(list->prev, struct perf_evsel, node); 210 211 leader->nr_members = evsel->idx - leader->idx + 1; 212 213 __evlist__for_each_entry(list, evsel) { 214 evsel->leader = leader; 215 } 216 } 217 218 void perf_evlist__set_leader(struct perf_evlist *evlist) 219 { 220 if (evlist->nr_entries) { 221 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 222 __perf_evlist__set_leader(&evlist->entries); 223 } 224 } 225 226 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 227 { 228 attr->precise_ip = 3; 229 230 while (attr->precise_ip != 0) { 231 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 232 if (fd != -1) { 233 close(fd); 234 break; 235 } 236 --attr->precise_ip; 237 } 238 } 239 240 int perf_evlist__add_default(struct perf_evlist *evlist) 241 { 242 struct perf_event_attr attr = { 243 .type = PERF_TYPE_HARDWARE, 244 .config = PERF_COUNT_HW_CPU_CYCLES, 245 }; 246 struct perf_evsel *evsel; 247 248 event_attr_init(&attr); 249 250 perf_event_attr__set_max_precise_ip(&attr); 251 252 evsel = perf_evsel__new(&attr); 253 if (evsel == NULL) 254 goto error; 255 256 /* use asprintf() because free(evsel) assumes name is allocated */ 257 if (asprintf(&evsel->name, "cycles%.*s", 258 attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) 259 goto error_free; 260 261 perf_evlist__add(evlist, evsel); 262 return 0; 263 error_free: 264 perf_evsel__delete(evsel); 265 error: 266 return -ENOMEM; 267 } 268 269 int perf_evlist__add_dummy(struct perf_evlist *evlist) 270 { 271 struct perf_event_attr attr = { 272 .type = PERF_TYPE_SOFTWARE, 273 .config = PERF_COUNT_SW_DUMMY, 274 .size = sizeof(attr), /* to capture ABI version */ 275 }; 276 struct perf_evsel *evsel = perf_evsel__new(&attr); 277 278 if (evsel == NULL) 279 return -ENOMEM; 280 281 perf_evlist__add(evlist, evsel); 282 return 0; 283 } 284 285 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 286 struct perf_event_attr *attrs, size_t nr_attrs) 287 { 288 struct perf_evsel *evsel, *n; 289 LIST_HEAD(head); 290 size_t i; 291 292 for (i = 0; i < nr_attrs; i++) { 293 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 294 if (evsel == NULL) 295 goto out_delete_partial_list; 296 list_add_tail(&evsel->node, &head); 297 } 298 299 perf_evlist__splice_list_tail(evlist, &head); 300 301 return 0; 302 303 out_delete_partial_list: 304 __evlist__for_each_entry_safe(&head, n, evsel) 305 perf_evsel__delete(evsel); 306 return -1; 307 } 308 309 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 310 struct perf_event_attr *attrs, size_t nr_attrs) 311 { 312 size_t i; 313 314 for (i = 0; i < nr_attrs; i++) 315 event_attr_init(attrs + i); 316 317 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 318 } 319 320 struct perf_evsel * 321 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 322 { 323 struct perf_evsel *evsel; 324 325 evlist__for_each_entry(evlist, evsel) { 326 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 327 (int)evsel->attr.config == id) 328 return evsel; 329 } 330 331 return NULL; 332 } 333 334 struct perf_evsel * 335 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 336 const char *name) 337 { 338 struct perf_evsel *evsel; 339 340 evlist__for_each_entry(evlist, evsel) { 341 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 342 (strcmp(evsel->name, name) == 0)) 343 return evsel; 344 } 345 346 return NULL; 347 } 348 349 int perf_evlist__add_newtp(struct perf_evlist *evlist, 350 const char *sys, const char *name, void *handler) 351 { 352 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 353 354 if (IS_ERR(evsel)) 355 return -1; 356 357 evsel->handler = handler; 358 perf_evlist__add(evlist, evsel); 359 return 0; 360 } 361 362 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 363 struct perf_evsel *evsel) 364 { 365 if (evsel->system_wide) 366 return 1; 367 else 368 return thread_map__nr(evlist->threads); 369 } 370 371 void perf_evlist__disable(struct perf_evlist *evlist) 372 { 373 struct perf_evsel *pos; 374 375 evlist__for_each_entry(evlist, pos) { 376 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 377 continue; 378 perf_evsel__disable(pos); 379 } 380 381 evlist->enabled = false; 382 } 383 384 void perf_evlist__enable(struct perf_evlist *evlist) 385 { 386 struct perf_evsel *pos; 387 388 evlist__for_each_entry(evlist, pos) { 389 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 390 continue; 391 perf_evsel__enable(pos); 392 } 393 394 evlist->enabled = true; 395 } 396 397 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 398 { 399 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 400 } 401 402 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 403 struct perf_evsel *evsel, int cpu) 404 { 405 int thread, err; 406 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 407 408 if (!evsel->fd) 409 return -EINVAL; 410 411 for (thread = 0; thread < nr_threads; thread++) { 412 err = ioctl(FD(evsel, cpu, thread), 413 PERF_EVENT_IOC_ENABLE, 0); 414 if (err) 415 return err; 416 } 417 return 0; 418 } 419 420 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 421 struct perf_evsel *evsel, 422 int thread) 423 { 424 int cpu, err; 425 int nr_cpus = cpu_map__nr(evlist->cpus); 426 427 if (!evsel->fd) 428 return -EINVAL; 429 430 for (cpu = 0; cpu < nr_cpus; cpu++) { 431 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 432 if (err) 433 return err; 434 } 435 return 0; 436 } 437 438 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 439 struct perf_evsel *evsel, int idx) 440 { 441 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 442 443 if (per_cpu_mmaps) 444 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 445 else 446 return perf_evlist__enable_event_thread(evlist, evsel, idx); 447 } 448 449 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 450 { 451 int nr_cpus = cpu_map__nr(evlist->cpus); 452 int nr_threads = thread_map__nr(evlist->threads); 453 int nfds = 0; 454 struct perf_evsel *evsel; 455 456 evlist__for_each_entry(evlist, evsel) { 457 if (evsel->system_wide) 458 nfds += nr_cpus; 459 else 460 nfds += nr_cpus * nr_threads; 461 } 462 463 if (fdarray__available_entries(&evlist->pollfd) < nfds && 464 fdarray__grow(&evlist->pollfd, nfds) < 0) 465 return -ENOMEM; 466 467 return 0; 468 } 469 470 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 471 struct perf_mmap *map, short revent) 472 { 473 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 474 /* 475 * Save the idx so that when we filter out fds POLLHUP'ed we can 476 * close the associated evlist->mmap[] entry. 477 */ 478 if (pos >= 0) { 479 evlist->pollfd.priv[pos].ptr = map; 480 481 fcntl(fd, F_SETFL, O_NONBLOCK); 482 } 483 484 return pos; 485 } 486 487 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 488 { 489 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 490 } 491 492 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 493 void *arg __maybe_unused) 494 { 495 struct perf_mmap *map = fda->priv[fd].ptr; 496 497 if (map) 498 perf_mmap__put(map); 499 } 500 501 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 502 { 503 return fdarray__filter(&evlist->pollfd, revents_and_mask, 504 perf_evlist__munmap_filtered, NULL); 505 } 506 507 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 508 { 509 return fdarray__poll(&evlist->pollfd, timeout); 510 } 511 512 static void perf_evlist__id_hash(struct perf_evlist *evlist, 513 struct perf_evsel *evsel, 514 int cpu, int thread, u64 id) 515 { 516 int hash; 517 struct perf_sample_id *sid = SID(evsel, cpu, thread); 518 519 sid->id = id; 520 sid->evsel = evsel; 521 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 522 hlist_add_head(&sid->node, &evlist->heads[hash]); 523 } 524 525 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 526 int cpu, int thread, u64 id) 527 { 528 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 529 evsel->id[evsel->ids++] = id; 530 } 531 532 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 533 struct perf_evsel *evsel, 534 int cpu, int thread, int fd) 535 { 536 u64 read_data[4] = { 0, }; 537 int id_idx = 1; /* The first entry is the counter value */ 538 u64 id; 539 int ret; 540 541 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 542 if (!ret) 543 goto add; 544 545 if (errno != ENOTTY) 546 return -1; 547 548 /* Legacy way to get event id.. All hail to old kernels! */ 549 550 /* 551 * This way does not work with group format read, so bail 552 * out in that case. 553 */ 554 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 555 return -1; 556 557 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 558 read(fd, &read_data, sizeof(read_data)) == -1) 559 return -1; 560 561 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 562 ++id_idx; 563 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 564 ++id_idx; 565 566 id = read_data[id_idx]; 567 568 add: 569 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 570 return 0; 571 } 572 573 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 574 struct perf_evsel *evsel, int idx, int cpu, 575 int thread) 576 { 577 struct perf_sample_id *sid = SID(evsel, cpu, thread); 578 sid->idx = idx; 579 if (evlist->cpus && cpu >= 0) 580 sid->cpu = evlist->cpus->map[cpu]; 581 else 582 sid->cpu = -1; 583 if (!evsel->system_wide && evlist->threads && thread >= 0) 584 sid->tid = thread_map__pid(evlist->threads, thread); 585 else 586 sid->tid = -1; 587 } 588 589 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 590 { 591 struct hlist_head *head; 592 struct perf_sample_id *sid; 593 int hash; 594 595 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 596 head = &evlist->heads[hash]; 597 598 hlist_for_each_entry(sid, head, node) 599 if (sid->id == id) 600 return sid; 601 602 return NULL; 603 } 604 605 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 606 { 607 struct perf_sample_id *sid; 608 609 if (evlist->nr_entries == 1 || !id) 610 return perf_evlist__first(evlist); 611 612 sid = perf_evlist__id2sid(evlist, id); 613 if (sid) 614 return sid->evsel; 615 616 if (!perf_evlist__sample_id_all(evlist)) 617 return perf_evlist__first(evlist); 618 619 return NULL; 620 } 621 622 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 623 u64 id) 624 { 625 struct perf_sample_id *sid; 626 627 if (!id) 628 return NULL; 629 630 sid = perf_evlist__id2sid(evlist, id); 631 if (sid) 632 return sid->evsel; 633 634 return NULL; 635 } 636 637 static int perf_evlist__event2id(struct perf_evlist *evlist, 638 union perf_event *event, u64 *id) 639 { 640 const u64 *array = event->sample.array; 641 ssize_t n; 642 643 n = (event->header.size - sizeof(event->header)) >> 3; 644 645 if (event->header.type == PERF_RECORD_SAMPLE) { 646 if (evlist->id_pos >= n) 647 return -1; 648 *id = array[evlist->id_pos]; 649 } else { 650 if (evlist->is_pos > n) 651 return -1; 652 n -= evlist->is_pos; 653 *id = array[n]; 654 } 655 return 0; 656 } 657 658 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 659 union perf_event *event) 660 { 661 struct perf_evsel *first = perf_evlist__first(evlist); 662 struct hlist_head *head; 663 struct perf_sample_id *sid; 664 int hash; 665 u64 id; 666 667 if (evlist->nr_entries == 1) 668 return first; 669 670 if (!first->attr.sample_id_all && 671 event->header.type != PERF_RECORD_SAMPLE) 672 return first; 673 674 if (perf_evlist__event2id(evlist, event, &id)) 675 return NULL; 676 677 /* Synthesized events have an id of zero */ 678 if (!id) 679 return first; 680 681 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 682 head = &evlist->heads[hash]; 683 684 hlist_for_each_entry(sid, head, node) { 685 if (sid->id == id) 686 return sid->evsel; 687 } 688 return NULL; 689 } 690 691 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 692 { 693 int i; 694 695 if (!evlist->backward_mmap) 696 return 0; 697 698 for (i = 0; i < evlist->nr_mmaps; i++) { 699 int fd = evlist->backward_mmap[i].fd; 700 int err; 701 702 if (fd < 0) 703 continue; 704 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 705 if (err) 706 return err; 707 } 708 return 0; 709 } 710 711 static int perf_evlist__pause(struct perf_evlist *evlist) 712 { 713 return perf_evlist__set_paused(evlist, true); 714 } 715 716 static int perf_evlist__resume(struct perf_evlist *evlist) 717 { 718 return perf_evlist__set_paused(evlist, false); 719 } 720 721 /* When check_messup is true, 'end' must points to a good entry */ 722 static union perf_event * 723 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, 724 u64 end, u64 *prev) 725 { 726 unsigned char *data = md->base + page_size; 727 union perf_event *event = NULL; 728 int diff = end - start; 729 730 if (check_messup) { 731 /* 732 * If we're further behind than half the buffer, there's a chance 733 * the writer will bite our tail and mess up the samples under us. 734 * 735 * If we somehow ended up ahead of the 'end', we got messed up. 736 * 737 * In either case, truncate and restart at 'end'. 738 */ 739 if (diff > md->mask / 2 || diff < 0) { 740 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 741 742 /* 743 * 'end' points to a known good entry, start there. 744 */ 745 start = end; 746 diff = 0; 747 } 748 } 749 750 if (diff >= (int)sizeof(event->header)) { 751 size_t size; 752 753 event = (union perf_event *)&data[start & md->mask]; 754 size = event->header.size; 755 756 if (size < sizeof(event->header) || diff < (int)size) { 757 event = NULL; 758 goto broken_event; 759 } 760 761 /* 762 * Event straddles the mmap boundary -- header should always 763 * be inside due to u64 alignment of output. 764 */ 765 if ((start & md->mask) + size != ((start + size) & md->mask)) { 766 unsigned int offset = start; 767 unsigned int len = min(sizeof(*event), size), cpy; 768 void *dst = md->event_copy; 769 770 do { 771 cpy = min(md->mask + 1 - (offset & md->mask), len); 772 memcpy(dst, &data[offset & md->mask], cpy); 773 offset += cpy; 774 dst += cpy; 775 len -= cpy; 776 } while (len); 777 778 event = (union perf_event *) md->event_copy; 779 } 780 781 start += size; 782 } 783 784 broken_event: 785 if (prev) 786 *prev = start; 787 788 return event; 789 } 790 791 union perf_event *perf_mmap__read_forward(struct perf_mmap *md, bool check_messup) 792 { 793 u64 head; 794 u64 old = md->prev; 795 796 /* 797 * Check if event was unmapped due to a POLLHUP/POLLERR. 798 */ 799 if (!atomic_read(&md->refcnt)) 800 return NULL; 801 802 head = perf_mmap__read_head(md); 803 804 return perf_mmap__read(md, check_messup, old, head, &md->prev); 805 } 806 807 union perf_event * 808 perf_mmap__read_backward(struct perf_mmap *md) 809 { 810 u64 head, end; 811 u64 start = md->prev; 812 813 /* 814 * Check if event was unmapped due to a POLLHUP/POLLERR. 815 */ 816 if (!atomic_read(&md->refcnt)) 817 return NULL; 818 819 head = perf_mmap__read_head(md); 820 if (!head) 821 return NULL; 822 823 /* 824 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 825 * it each time when kernel writes to it, so in fact 'head' is 826 * negative. 'end' pointer is made manually by adding the size of 827 * the ring buffer to 'head' pointer, means the validate data can 828 * read is the whole ring buffer. If 'end' is positive, the ring 829 * buffer has not fully filled, so we must adjust 'end' to 0. 830 * 831 * However, since both 'head' and 'end' is unsigned, we can't 832 * simply compare 'end' against 0. Here we compare '-head' and 833 * the size of the ring buffer, where -head is the number of bytes 834 * kernel write to the ring buffer. 835 */ 836 if (-head < (u64)(md->mask + 1)) 837 end = 0; 838 else 839 end = head + md->mask + 1; 840 841 return perf_mmap__read(md, false, start, end, &md->prev); 842 } 843 844 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) 845 { 846 struct perf_mmap *md = &evlist->mmap[idx]; 847 848 /* 849 * Check messup is required for forward overwritable ring buffer: 850 * memory pointed by md->prev can be overwritten in this case. 851 * No need for read-write ring buffer: kernel stop outputting when 852 * it hit md->prev (perf_mmap__consume()). 853 */ 854 return perf_mmap__read_forward(md, evlist->overwrite); 855 } 856 857 union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) 858 { 859 struct perf_mmap *md = &evlist->mmap[idx]; 860 861 /* 862 * No need to check messup for backward ring buffer: 863 * We can always read arbitrary long data from a backward 864 * ring buffer unless we forget to pause it before reading. 865 */ 866 return perf_mmap__read_backward(md); 867 } 868 869 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 870 { 871 return perf_evlist__mmap_read_forward(evlist, idx); 872 } 873 874 void perf_mmap__read_catchup(struct perf_mmap *md) 875 { 876 u64 head; 877 878 if (!atomic_read(&md->refcnt)) 879 return; 880 881 head = perf_mmap__read_head(md); 882 md->prev = head; 883 } 884 885 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 886 { 887 perf_mmap__read_catchup(&evlist->mmap[idx]); 888 } 889 890 static bool perf_mmap__empty(struct perf_mmap *md) 891 { 892 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 893 } 894 895 static void perf_mmap__get(struct perf_mmap *map) 896 { 897 atomic_inc(&map->refcnt); 898 } 899 900 static void perf_mmap__put(struct perf_mmap *md) 901 { 902 BUG_ON(md->base && atomic_read(&md->refcnt) == 0); 903 904 if (atomic_dec_and_test(&md->refcnt)) 905 perf_mmap__munmap(md); 906 } 907 908 void perf_mmap__consume(struct perf_mmap *md, bool overwrite) 909 { 910 if (!overwrite) { 911 u64 old = md->prev; 912 913 perf_mmap__write_tail(md, old); 914 } 915 916 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 917 perf_mmap__put(md); 918 } 919 920 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 921 { 922 perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); 923 } 924 925 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 926 struct auxtrace_mmap_params *mp __maybe_unused, 927 void *userpg __maybe_unused, 928 int fd __maybe_unused) 929 { 930 return 0; 931 } 932 933 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 934 { 935 } 936 937 void __weak auxtrace_mmap_params__init( 938 struct auxtrace_mmap_params *mp __maybe_unused, 939 off_t auxtrace_offset __maybe_unused, 940 unsigned int auxtrace_pages __maybe_unused, 941 bool auxtrace_overwrite __maybe_unused) 942 { 943 } 944 945 void __weak auxtrace_mmap_params__set_idx( 946 struct auxtrace_mmap_params *mp __maybe_unused, 947 struct perf_evlist *evlist __maybe_unused, 948 int idx __maybe_unused, 949 bool per_cpu __maybe_unused) 950 { 951 } 952 953 static void perf_mmap__munmap(struct perf_mmap *map) 954 { 955 if (map->base != NULL) { 956 munmap(map->base, perf_mmap__mmap_len(map)); 957 map->base = NULL; 958 map->fd = -1; 959 atomic_set(&map->refcnt, 0); 960 } 961 auxtrace_mmap__munmap(&map->auxtrace_mmap); 962 } 963 964 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 965 { 966 int i; 967 968 if (evlist->mmap) 969 for (i = 0; i < evlist->nr_mmaps; i++) 970 perf_mmap__munmap(&evlist->mmap[i]); 971 972 if (evlist->backward_mmap) 973 for (i = 0; i < evlist->nr_mmaps; i++) 974 perf_mmap__munmap(&evlist->backward_mmap[i]); 975 } 976 977 void perf_evlist__munmap(struct perf_evlist *evlist) 978 { 979 perf_evlist__munmap_nofree(evlist); 980 zfree(&evlist->mmap); 981 zfree(&evlist->backward_mmap); 982 } 983 984 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) 985 { 986 int i; 987 struct perf_mmap *map; 988 989 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 990 if (cpu_map__empty(evlist->cpus)) 991 evlist->nr_mmaps = thread_map__nr(evlist->threads); 992 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 993 if (!map) 994 return NULL; 995 996 for (i = 0; i < evlist->nr_mmaps; i++) 997 map[i].fd = -1; 998 return map; 999 } 1000 1001 struct mmap_params { 1002 int prot; 1003 int mask; 1004 struct auxtrace_mmap_params auxtrace_mp; 1005 }; 1006 1007 static int perf_mmap__mmap(struct perf_mmap *map, 1008 struct mmap_params *mp, int fd) 1009 { 1010 /* 1011 * The last one will be done at perf_evlist__mmap_consume(), so that we 1012 * make sure we don't prevent tools from consuming every last event in 1013 * the ring buffer. 1014 * 1015 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 1016 * anymore, but the last events for it are still in the ring buffer, 1017 * waiting to be consumed. 1018 * 1019 * Tools can chose to ignore this at their own discretion, but the 1020 * evlist layer can't just drop it when filtering events in 1021 * perf_evlist__filter_pollfd(). 1022 */ 1023 atomic_set(&map->refcnt, 2); 1024 map->prev = 0; 1025 map->mask = mp->mask; 1026 map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot, 1027 MAP_SHARED, fd, 0); 1028 if (map->base == MAP_FAILED) { 1029 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 1030 errno); 1031 map->base = NULL; 1032 return -1; 1033 } 1034 map->fd = fd; 1035 1036 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 1037 &mp->auxtrace_mp, map->base, fd)) 1038 return -1; 1039 1040 return 0; 1041 } 1042 1043 static bool 1044 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 1045 struct perf_evsel *evsel) 1046 { 1047 if (evsel->attr.write_backward) 1048 return false; 1049 return true; 1050 } 1051 1052 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 1053 struct mmap_params *mp, int cpu, 1054 int thread, int *_output, int *_output_backward) 1055 { 1056 struct perf_evsel *evsel; 1057 int revent; 1058 1059 evlist__for_each_entry(evlist, evsel) { 1060 struct perf_mmap *maps = evlist->mmap; 1061 int *output = _output; 1062 int fd; 1063 1064 if (evsel->attr.write_backward) { 1065 output = _output_backward; 1066 maps = evlist->backward_mmap; 1067 1068 if (!maps) { 1069 maps = perf_evlist__alloc_mmap(evlist); 1070 if (!maps) 1071 return -1; 1072 evlist->backward_mmap = maps; 1073 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 1074 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 1075 } 1076 } 1077 1078 if (evsel->system_wide && thread) 1079 continue; 1080 1081 fd = FD(evsel, cpu, thread); 1082 1083 if (*output == -1) { 1084 *output = fd; 1085 1086 if (perf_mmap__mmap(&maps[idx], mp, *output) < 0) 1087 return -1; 1088 } else { 1089 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 1090 return -1; 1091 1092 perf_mmap__get(&maps[idx]); 1093 } 1094 1095 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 1096 1097 /* 1098 * The system_wide flag causes a selected event to be opened 1099 * always without a pid. Consequently it will never get a 1100 * POLLHUP, but it is used for tracking in combination with 1101 * other events, so it should not need to be polled anyway. 1102 * Therefore don't add it for polling. 1103 */ 1104 if (!evsel->system_wide && 1105 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 1106 perf_mmap__put(&maps[idx]); 1107 return -1; 1108 } 1109 1110 if (evsel->attr.read_format & PERF_FORMAT_ID) { 1111 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 1112 fd) < 0) 1113 return -1; 1114 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 1115 thread); 1116 } 1117 } 1118 1119 return 0; 1120 } 1121 1122 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 1123 struct mmap_params *mp) 1124 { 1125 int cpu, thread; 1126 int nr_cpus = cpu_map__nr(evlist->cpus); 1127 int nr_threads = thread_map__nr(evlist->threads); 1128 1129 pr_debug2("perf event ring buffer mmapped per cpu\n"); 1130 for (cpu = 0; cpu < nr_cpus; cpu++) { 1131 int output = -1; 1132 int output_backward = -1; 1133 1134 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 1135 true); 1136 1137 for (thread = 0; thread < nr_threads; thread++) { 1138 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 1139 thread, &output, &output_backward)) 1140 goto out_unmap; 1141 } 1142 } 1143 1144 return 0; 1145 1146 out_unmap: 1147 perf_evlist__munmap_nofree(evlist); 1148 return -1; 1149 } 1150 1151 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1152 struct mmap_params *mp) 1153 { 1154 int thread; 1155 int nr_threads = thread_map__nr(evlist->threads); 1156 1157 pr_debug2("perf event ring buffer mmapped per thread\n"); 1158 for (thread = 0; thread < nr_threads; thread++) { 1159 int output = -1; 1160 int output_backward = -1; 1161 1162 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1163 false); 1164 1165 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1166 &output, &output_backward)) 1167 goto out_unmap; 1168 } 1169 1170 return 0; 1171 1172 out_unmap: 1173 perf_evlist__munmap_nofree(evlist); 1174 return -1; 1175 } 1176 1177 unsigned long perf_event_mlock_kb_in_pages(void) 1178 { 1179 unsigned long pages; 1180 int max; 1181 1182 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1183 /* 1184 * Pick a once upon a time good value, i.e. things look 1185 * strange since we can't read a sysctl value, but lets not 1186 * die yet... 1187 */ 1188 max = 512; 1189 } else { 1190 max -= (page_size / 1024); 1191 } 1192 1193 pages = (max * 1024) / page_size; 1194 if (!is_power_of_2(pages)) 1195 pages = rounddown_pow_of_two(pages); 1196 1197 return pages; 1198 } 1199 1200 static size_t perf_evlist__mmap_size(unsigned long pages) 1201 { 1202 if (pages == UINT_MAX) 1203 pages = perf_event_mlock_kb_in_pages(); 1204 else if (!is_power_of_2(pages)) 1205 return 0; 1206 1207 return (pages + 1) * page_size; 1208 } 1209 1210 static long parse_pages_arg(const char *str, unsigned long min, 1211 unsigned long max) 1212 { 1213 unsigned long pages, val; 1214 static struct parse_tag tags[] = { 1215 { .tag = 'B', .mult = 1 }, 1216 { .tag = 'K', .mult = 1 << 10 }, 1217 { .tag = 'M', .mult = 1 << 20 }, 1218 { .tag = 'G', .mult = 1 << 30 }, 1219 { .tag = 0 }, 1220 }; 1221 1222 if (str == NULL) 1223 return -EINVAL; 1224 1225 val = parse_tag_value(str, tags); 1226 if (val != (unsigned long) -1) { 1227 /* we got file size value */ 1228 pages = PERF_ALIGN(val, page_size) / page_size; 1229 } else { 1230 /* we got pages count value */ 1231 char *eptr; 1232 pages = strtoul(str, &eptr, 10); 1233 if (*eptr != '\0') 1234 return -EINVAL; 1235 } 1236 1237 if (pages == 0 && min == 0) { 1238 /* leave number of pages at 0 */ 1239 } else if (!is_power_of_2(pages)) { 1240 /* round pages up to next power of 2 */ 1241 pages = roundup_pow_of_two(pages); 1242 if (!pages) 1243 return -EINVAL; 1244 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1245 pages * page_size, pages); 1246 } 1247 1248 if (pages > max) 1249 return -EINVAL; 1250 1251 return pages; 1252 } 1253 1254 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1255 { 1256 unsigned long max = UINT_MAX; 1257 long pages; 1258 1259 if (max > SIZE_MAX / page_size) 1260 max = SIZE_MAX / page_size; 1261 1262 pages = parse_pages_arg(str, 1, max); 1263 if (pages < 0) { 1264 pr_err("Invalid argument for --mmap_pages/-m\n"); 1265 return -1; 1266 } 1267 1268 *mmap_pages = pages; 1269 return 0; 1270 } 1271 1272 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1273 int unset __maybe_unused) 1274 { 1275 return __perf_evlist__parse_mmap_pages(opt->value, str); 1276 } 1277 1278 /** 1279 * perf_evlist__mmap_ex - Create mmaps to receive events. 1280 * @evlist: list of events 1281 * @pages: map length in pages 1282 * @overwrite: overwrite older events? 1283 * @auxtrace_pages - auxtrace map length in pages 1284 * @auxtrace_overwrite - overwrite older auxtrace data? 1285 * 1286 * If @overwrite is %false the user needs to signal event consumption using 1287 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1288 * automatically. 1289 * 1290 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1291 * consumption using auxtrace_mmap__write_tail(). 1292 * 1293 * Return: %0 on success, negative error code otherwise. 1294 */ 1295 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1296 bool overwrite, unsigned int auxtrace_pages, 1297 bool auxtrace_overwrite) 1298 { 1299 struct perf_evsel *evsel; 1300 const struct cpu_map *cpus = evlist->cpus; 1301 const struct thread_map *threads = evlist->threads; 1302 struct mmap_params mp = { 1303 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1304 }; 1305 1306 if (!evlist->mmap) 1307 evlist->mmap = perf_evlist__alloc_mmap(evlist); 1308 if (!evlist->mmap) 1309 return -ENOMEM; 1310 1311 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1312 return -ENOMEM; 1313 1314 evlist->overwrite = overwrite; 1315 evlist->mmap_len = perf_evlist__mmap_size(pages); 1316 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1317 mp.mask = evlist->mmap_len - page_size - 1; 1318 1319 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1320 auxtrace_pages, auxtrace_overwrite); 1321 1322 evlist__for_each_entry(evlist, evsel) { 1323 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1324 evsel->sample_id == NULL && 1325 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1326 return -ENOMEM; 1327 } 1328 1329 if (cpu_map__empty(cpus)) 1330 return perf_evlist__mmap_per_thread(evlist, &mp); 1331 1332 return perf_evlist__mmap_per_cpu(evlist, &mp); 1333 } 1334 1335 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1336 bool overwrite) 1337 { 1338 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1339 } 1340 1341 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1342 { 1343 struct cpu_map *cpus; 1344 struct thread_map *threads; 1345 1346 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1347 1348 if (!threads) 1349 return -1; 1350 1351 if (target__uses_dummy_map(target)) 1352 cpus = cpu_map__dummy_new(); 1353 else 1354 cpus = cpu_map__new(target->cpu_list); 1355 1356 if (!cpus) 1357 goto out_delete_threads; 1358 1359 evlist->has_user_cpus = !!target->cpu_list; 1360 1361 perf_evlist__set_maps(evlist, cpus, threads); 1362 1363 return 0; 1364 1365 out_delete_threads: 1366 thread_map__put(threads); 1367 return -1; 1368 } 1369 1370 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1371 struct thread_map *threads) 1372 { 1373 /* 1374 * Allow for the possibility that one or another of the maps isn't being 1375 * changed i.e. don't put it. Note we are assuming the maps that are 1376 * being applied are brand new and evlist is taking ownership of the 1377 * original reference count of 1. If that is not the case it is up to 1378 * the caller to increase the reference count. 1379 */ 1380 if (cpus != evlist->cpus) { 1381 cpu_map__put(evlist->cpus); 1382 evlist->cpus = cpu_map__get(cpus); 1383 } 1384 1385 if (threads != evlist->threads) { 1386 thread_map__put(evlist->threads); 1387 evlist->threads = thread_map__get(threads); 1388 } 1389 1390 perf_evlist__propagate_maps(evlist); 1391 } 1392 1393 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1394 enum perf_event_sample_format bit) 1395 { 1396 struct perf_evsel *evsel; 1397 1398 evlist__for_each_entry(evlist, evsel) 1399 __perf_evsel__set_sample_bit(evsel, bit); 1400 } 1401 1402 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1403 enum perf_event_sample_format bit) 1404 { 1405 struct perf_evsel *evsel; 1406 1407 evlist__for_each_entry(evlist, evsel) 1408 __perf_evsel__reset_sample_bit(evsel, bit); 1409 } 1410 1411 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1412 { 1413 struct perf_evsel *evsel; 1414 int err = 0; 1415 const int ncpus = cpu_map__nr(evlist->cpus), 1416 nthreads = thread_map__nr(evlist->threads); 1417 1418 evlist__for_each_entry(evlist, evsel) { 1419 if (evsel->filter == NULL) 1420 continue; 1421 1422 /* 1423 * filters only work for tracepoint event, which doesn't have cpu limit. 1424 * So evlist and evsel should always be same. 1425 */ 1426 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1427 if (err) { 1428 *err_evsel = evsel; 1429 break; 1430 } 1431 } 1432 1433 return err; 1434 } 1435 1436 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1437 { 1438 struct perf_evsel *evsel; 1439 int err = 0; 1440 1441 evlist__for_each_entry(evlist, evsel) { 1442 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1443 continue; 1444 1445 err = perf_evsel__set_filter(evsel, filter); 1446 if (err) 1447 break; 1448 } 1449 1450 return err; 1451 } 1452 1453 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1454 { 1455 char *filter; 1456 int ret = -1; 1457 size_t i; 1458 1459 for (i = 0; i < npids; ++i) { 1460 if (i == 0) { 1461 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1462 return -1; 1463 } else { 1464 char *tmp; 1465 1466 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1467 goto out_free; 1468 1469 free(filter); 1470 filter = tmp; 1471 } 1472 } 1473 1474 ret = perf_evlist__set_filter(evlist, filter); 1475 out_free: 1476 free(filter); 1477 return ret; 1478 } 1479 1480 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1481 { 1482 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1483 } 1484 1485 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1486 { 1487 struct perf_evsel *pos; 1488 1489 if (evlist->nr_entries == 1) 1490 return true; 1491 1492 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1493 return false; 1494 1495 evlist__for_each_entry(evlist, pos) { 1496 if (pos->id_pos != evlist->id_pos || 1497 pos->is_pos != evlist->is_pos) 1498 return false; 1499 } 1500 1501 return true; 1502 } 1503 1504 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1505 { 1506 struct perf_evsel *evsel; 1507 1508 if (evlist->combined_sample_type) 1509 return evlist->combined_sample_type; 1510 1511 evlist__for_each_entry(evlist, evsel) 1512 evlist->combined_sample_type |= evsel->attr.sample_type; 1513 1514 return evlist->combined_sample_type; 1515 } 1516 1517 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1518 { 1519 evlist->combined_sample_type = 0; 1520 return __perf_evlist__combined_sample_type(evlist); 1521 } 1522 1523 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1524 { 1525 struct perf_evsel *evsel; 1526 u64 branch_type = 0; 1527 1528 evlist__for_each_entry(evlist, evsel) 1529 branch_type |= evsel->attr.branch_sample_type; 1530 return branch_type; 1531 } 1532 1533 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1534 { 1535 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1536 u64 read_format = first->attr.read_format; 1537 u64 sample_type = first->attr.sample_type; 1538 1539 evlist__for_each_entry(evlist, pos) { 1540 if (read_format != pos->attr.read_format) 1541 return false; 1542 } 1543 1544 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1545 if ((sample_type & PERF_SAMPLE_READ) && 1546 !(read_format & PERF_FORMAT_ID)) { 1547 return false; 1548 } 1549 1550 return true; 1551 } 1552 1553 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1554 { 1555 struct perf_evsel *first = perf_evlist__first(evlist); 1556 return first->attr.read_format; 1557 } 1558 1559 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1560 { 1561 struct perf_evsel *first = perf_evlist__first(evlist); 1562 struct perf_sample *data; 1563 u64 sample_type; 1564 u16 size = 0; 1565 1566 if (!first->attr.sample_id_all) 1567 goto out; 1568 1569 sample_type = first->attr.sample_type; 1570 1571 if (sample_type & PERF_SAMPLE_TID) 1572 size += sizeof(data->tid) * 2; 1573 1574 if (sample_type & PERF_SAMPLE_TIME) 1575 size += sizeof(data->time); 1576 1577 if (sample_type & PERF_SAMPLE_ID) 1578 size += sizeof(data->id); 1579 1580 if (sample_type & PERF_SAMPLE_STREAM_ID) 1581 size += sizeof(data->stream_id); 1582 1583 if (sample_type & PERF_SAMPLE_CPU) 1584 size += sizeof(data->cpu) * 2; 1585 1586 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1587 size += sizeof(data->id); 1588 out: 1589 return size; 1590 } 1591 1592 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1593 { 1594 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1595 1596 evlist__for_each_entry_continue(evlist, pos) { 1597 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1598 return false; 1599 } 1600 1601 return true; 1602 } 1603 1604 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1605 { 1606 struct perf_evsel *first = perf_evlist__first(evlist); 1607 return first->attr.sample_id_all; 1608 } 1609 1610 void perf_evlist__set_selected(struct perf_evlist *evlist, 1611 struct perf_evsel *evsel) 1612 { 1613 evlist->selected = evsel; 1614 } 1615 1616 void perf_evlist__close(struct perf_evlist *evlist) 1617 { 1618 struct perf_evsel *evsel; 1619 int ncpus = cpu_map__nr(evlist->cpus); 1620 int nthreads = thread_map__nr(evlist->threads); 1621 int n; 1622 1623 evlist__for_each_entry_reverse(evlist, evsel) { 1624 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1625 perf_evsel__close(evsel, n, nthreads); 1626 } 1627 } 1628 1629 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1630 { 1631 struct cpu_map *cpus; 1632 struct thread_map *threads; 1633 int err = -ENOMEM; 1634 1635 /* 1636 * Try reading /sys/devices/system/cpu/online to get 1637 * an all cpus map. 1638 * 1639 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1640 * code needs an overhaul to properly forward the 1641 * error, and we may not want to do that fallback to a 1642 * default cpu identity map :-\ 1643 */ 1644 cpus = cpu_map__new(NULL); 1645 if (!cpus) 1646 goto out; 1647 1648 threads = thread_map__new_dummy(); 1649 if (!threads) 1650 goto out_put; 1651 1652 perf_evlist__set_maps(evlist, cpus, threads); 1653 out: 1654 return err; 1655 out_put: 1656 cpu_map__put(cpus); 1657 goto out; 1658 } 1659 1660 int perf_evlist__open(struct perf_evlist *evlist) 1661 { 1662 struct perf_evsel *evsel; 1663 int err; 1664 1665 /* 1666 * Default: one fd per CPU, all threads, aka systemwide 1667 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1668 */ 1669 if (evlist->threads == NULL && evlist->cpus == NULL) { 1670 err = perf_evlist__create_syswide_maps(evlist); 1671 if (err < 0) 1672 goto out_err; 1673 } 1674 1675 perf_evlist__update_id_pos(evlist); 1676 1677 evlist__for_each_entry(evlist, evsel) { 1678 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1679 if (err < 0) 1680 goto out_err; 1681 } 1682 1683 return 0; 1684 out_err: 1685 perf_evlist__close(evlist); 1686 errno = -err; 1687 return err; 1688 } 1689 1690 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1691 const char *argv[], bool pipe_output, 1692 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1693 { 1694 int child_ready_pipe[2], go_pipe[2]; 1695 char bf; 1696 1697 if (pipe(child_ready_pipe) < 0) { 1698 perror("failed to create 'ready' pipe"); 1699 return -1; 1700 } 1701 1702 if (pipe(go_pipe) < 0) { 1703 perror("failed to create 'go' pipe"); 1704 goto out_close_ready_pipe; 1705 } 1706 1707 evlist->workload.pid = fork(); 1708 if (evlist->workload.pid < 0) { 1709 perror("failed to fork"); 1710 goto out_close_pipes; 1711 } 1712 1713 if (!evlist->workload.pid) { 1714 int ret; 1715 1716 if (pipe_output) 1717 dup2(2, 1); 1718 1719 signal(SIGTERM, SIG_DFL); 1720 1721 close(child_ready_pipe[0]); 1722 close(go_pipe[1]); 1723 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1724 1725 /* 1726 * Tell the parent we're ready to go 1727 */ 1728 close(child_ready_pipe[1]); 1729 1730 /* 1731 * Wait until the parent tells us to go. 1732 */ 1733 ret = read(go_pipe[0], &bf, 1); 1734 /* 1735 * The parent will ask for the execvp() to be performed by 1736 * writing exactly one byte, in workload.cork_fd, usually via 1737 * perf_evlist__start_workload(). 1738 * 1739 * For cancelling the workload without actually running it, 1740 * the parent will just close workload.cork_fd, without writing 1741 * anything, i.e. read will return zero and we just exit() 1742 * here. 1743 */ 1744 if (ret != 1) { 1745 if (ret == -1) 1746 perror("unable to read pipe"); 1747 exit(ret); 1748 } 1749 1750 execvp(argv[0], (char **)argv); 1751 1752 if (exec_error) { 1753 union sigval val; 1754 1755 val.sival_int = errno; 1756 if (sigqueue(getppid(), SIGUSR1, val)) 1757 perror(argv[0]); 1758 } else 1759 perror(argv[0]); 1760 exit(-1); 1761 } 1762 1763 if (exec_error) { 1764 struct sigaction act = { 1765 .sa_flags = SA_SIGINFO, 1766 .sa_sigaction = exec_error, 1767 }; 1768 sigaction(SIGUSR1, &act, NULL); 1769 } 1770 1771 if (target__none(target)) { 1772 if (evlist->threads == NULL) { 1773 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1774 __func__, __LINE__); 1775 goto out_close_pipes; 1776 } 1777 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1778 } 1779 1780 close(child_ready_pipe[1]); 1781 close(go_pipe[0]); 1782 /* 1783 * wait for child to settle 1784 */ 1785 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1786 perror("unable to read pipe"); 1787 goto out_close_pipes; 1788 } 1789 1790 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1791 evlist->workload.cork_fd = go_pipe[1]; 1792 close(child_ready_pipe[0]); 1793 return 0; 1794 1795 out_close_pipes: 1796 close(go_pipe[0]); 1797 close(go_pipe[1]); 1798 out_close_ready_pipe: 1799 close(child_ready_pipe[0]); 1800 close(child_ready_pipe[1]); 1801 return -1; 1802 } 1803 1804 int perf_evlist__start_workload(struct perf_evlist *evlist) 1805 { 1806 if (evlist->workload.cork_fd > 0) { 1807 char bf = 0; 1808 int ret; 1809 /* 1810 * Remove the cork, let it rip! 1811 */ 1812 ret = write(evlist->workload.cork_fd, &bf, 1); 1813 if (ret < 0) 1814 perror("enable to write to pipe"); 1815 1816 close(evlist->workload.cork_fd); 1817 return ret; 1818 } 1819 1820 return 0; 1821 } 1822 1823 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1824 struct perf_sample *sample) 1825 { 1826 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1827 1828 if (!evsel) 1829 return -EFAULT; 1830 return perf_evsel__parse_sample(evsel, event, sample); 1831 } 1832 1833 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1834 { 1835 struct perf_evsel *evsel; 1836 size_t printed = 0; 1837 1838 evlist__for_each_entry(evlist, evsel) { 1839 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1840 perf_evsel__name(evsel)); 1841 } 1842 1843 return printed + fprintf(fp, "\n"); 1844 } 1845 1846 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1847 int err, char *buf, size_t size) 1848 { 1849 int printed, value; 1850 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1851 1852 switch (err) { 1853 case EACCES: 1854 case EPERM: 1855 printed = scnprintf(buf, size, 1856 "Error:\t%s.\n" 1857 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1858 1859 value = perf_event_paranoid(); 1860 1861 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1862 1863 if (value >= 2) { 1864 printed += scnprintf(buf + printed, size - printed, 1865 "For your workloads it needs to be <= 1\nHint:\t"); 1866 } 1867 printed += scnprintf(buf + printed, size - printed, 1868 "For system wide tracing it needs to be set to -1.\n"); 1869 1870 printed += scnprintf(buf + printed, size - printed, 1871 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1872 "Hint:\tThe current value is %d.", value); 1873 break; 1874 case EINVAL: { 1875 struct perf_evsel *first = perf_evlist__first(evlist); 1876 int max_freq; 1877 1878 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1879 goto out_default; 1880 1881 if (first->attr.sample_freq < (u64)max_freq) 1882 goto out_default; 1883 1884 printed = scnprintf(buf, size, 1885 "Error:\t%s.\n" 1886 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1887 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1888 emsg, max_freq, first->attr.sample_freq); 1889 break; 1890 } 1891 default: 1892 out_default: 1893 scnprintf(buf, size, "%s", emsg); 1894 break; 1895 } 1896 1897 return 0; 1898 } 1899 1900 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1901 { 1902 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1903 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1904 1905 switch (err) { 1906 case EPERM: 1907 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1908 printed += scnprintf(buf + printed, size - printed, 1909 "Error:\t%s.\n" 1910 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1911 "Hint:\tTried using %zd kB.\n", 1912 emsg, pages_max_per_user, pages_attempted); 1913 1914 if (pages_attempted >= pages_max_per_user) { 1915 printed += scnprintf(buf + printed, size - printed, 1916 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1917 pages_max_per_user + pages_attempted); 1918 } 1919 1920 printed += scnprintf(buf + printed, size - printed, 1921 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1922 break; 1923 default: 1924 scnprintf(buf, size, "%s", emsg); 1925 break; 1926 } 1927 1928 return 0; 1929 } 1930 1931 void perf_evlist__to_front(struct perf_evlist *evlist, 1932 struct perf_evsel *move_evsel) 1933 { 1934 struct perf_evsel *evsel, *n; 1935 LIST_HEAD(move); 1936 1937 if (move_evsel == perf_evlist__first(evlist)) 1938 return; 1939 1940 evlist__for_each_entry_safe(evlist, n, evsel) { 1941 if (evsel->leader == move_evsel->leader) 1942 list_move_tail(&evsel->node, &move); 1943 } 1944 1945 list_splice(&move, &evlist->entries); 1946 } 1947 1948 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1949 struct perf_evsel *tracking_evsel) 1950 { 1951 struct perf_evsel *evsel; 1952 1953 if (tracking_evsel->tracking) 1954 return; 1955 1956 evlist__for_each_entry(evlist, evsel) { 1957 if (evsel != tracking_evsel) 1958 evsel->tracking = false; 1959 } 1960 1961 tracking_evsel->tracking = true; 1962 } 1963 1964 struct perf_evsel * 1965 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1966 const char *str) 1967 { 1968 struct perf_evsel *evsel; 1969 1970 evlist__for_each_entry(evlist, evsel) { 1971 if (!evsel->name) 1972 continue; 1973 if (strcmp(str, evsel->name) == 0) 1974 return evsel; 1975 } 1976 1977 return NULL; 1978 } 1979 1980 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1981 enum bkw_mmap_state state) 1982 { 1983 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1984 enum action { 1985 NONE, 1986 PAUSE, 1987 RESUME, 1988 } action = NONE; 1989 1990 if (!evlist->backward_mmap) 1991 return; 1992 1993 switch (old_state) { 1994 case BKW_MMAP_NOTREADY: { 1995 if (state != BKW_MMAP_RUNNING) 1996 goto state_err;; 1997 break; 1998 } 1999 case BKW_MMAP_RUNNING: { 2000 if (state != BKW_MMAP_DATA_PENDING) 2001 goto state_err; 2002 action = PAUSE; 2003 break; 2004 } 2005 case BKW_MMAP_DATA_PENDING: { 2006 if (state != BKW_MMAP_EMPTY) 2007 goto state_err; 2008 break; 2009 } 2010 case BKW_MMAP_EMPTY: { 2011 if (state != BKW_MMAP_RUNNING) 2012 goto state_err; 2013 action = RESUME; 2014 break; 2015 } 2016 default: 2017 WARN_ONCE(1, "Shouldn't get there\n"); 2018 } 2019 2020 evlist->bkw_mmap_state = state; 2021 2022 switch (action) { 2023 case PAUSE: 2024 perf_evlist__pause(evlist); 2025 break; 2026 case RESUME: 2027 perf_evlist__resume(evlist); 2028 break; 2029 case NONE: 2030 default: 2031 break; 2032 } 2033 2034 state_err: 2035 return; 2036 } 2037