1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include <unistd.h> 19 20 #include "parse-events.h" 21 #include <subcmd/parse-options.h> 22 23 #include <sys/mman.h> 24 25 #include <linux/bitops.h> 26 #include <linux/hash.h> 27 #include <linux/log2.h> 28 #include <linux/err.h> 29 30 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); 31 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); 32 33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 34 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 35 36 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 37 struct thread_map *threads) 38 { 39 int i; 40 41 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 42 INIT_HLIST_HEAD(&evlist->heads[i]); 43 INIT_LIST_HEAD(&evlist->entries); 44 perf_evlist__set_maps(evlist, cpus, threads); 45 fdarray__init(&evlist->pollfd, 64); 46 evlist->workload.pid = -1; 47 evlist->backward = false; 48 } 49 50 struct perf_evlist *perf_evlist__new(void) 51 { 52 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 53 54 if (evlist != NULL) 55 perf_evlist__init(evlist, NULL, NULL); 56 57 return evlist; 58 } 59 60 struct perf_evlist *perf_evlist__new_default(void) 61 { 62 struct perf_evlist *evlist = perf_evlist__new(); 63 64 if (evlist && perf_evlist__add_default(evlist)) { 65 perf_evlist__delete(evlist); 66 evlist = NULL; 67 } 68 69 return evlist; 70 } 71 72 struct perf_evlist *perf_evlist__new_dummy(void) 73 { 74 struct perf_evlist *evlist = perf_evlist__new(); 75 76 if (evlist && perf_evlist__add_dummy(evlist)) { 77 perf_evlist__delete(evlist); 78 evlist = NULL; 79 } 80 81 return evlist; 82 } 83 84 /** 85 * perf_evlist__set_id_pos - set the positions of event ids. 86 * @evlist: selected event list 87 * 88 * Events with compatible sample types all have the same id_pos 89 * and is_pos. For convenience, put a copy on evlist. 90 */ 91 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 92 { 93 struct perf_evsel *first = perf_evlist__first(evlist); 94 95 evlist->id_pos = first->id_pos; 96 evlist->is_pos = first->is_pos; 97 } 98 99 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 100 { 101 struct perf_evsel *evsel; 102 103 evlist__for_each(evlist, evsel) 104 perf_evsel__calc_id_pos(evsel); 105 106 perf_evlist__set_id_pos(evlist); 107 } 108 109 static void perf_evlist__purge(struct perf_evlist *evlist) 110 { 111 struct perf_evsel *pos, *n; 112 113 evlist__for_each_safe(evlist, n, pos) { 114 list_del_init(&pos->node); 115 pos->evlist = NULL; 116 perf_evsel__delete(pos); 117 } 118 119 evlist->nr_entries = 0; 120 } 121 122 void perf_evlist__exit(struct perf_evlist *evlist) 123 { 124 zfree(&evlist->mmap); 125 fdarray__exit(&evlist->pollfd); 126 } 127 128 void perf_evlist__delete(struct perf_evlist *evlist) 129 { 130 perf_evlist__munmap(evlist); 131 perf_evlist__close(evlist); 132 cpu_map__put(evlist->cpus); 133 thread_map__put(evlist->threads); 134 evlist->cpus = NULL; 135 evlist->threads = NULL; 136 perf_evlist__purge(evlist); 137 perf_evlist__exit(evlist); 138 free(evlist); 139 } 140 141 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 142 struct perf_evsel *evsel) 143 { 144 /* 145 * We already have cpus for evsel (via PMU sysfs) so 146 * keep it, if there's no target cpu list defined. 147 */ 148 if (!evsel->own_cpus || evlist->has_user_cpus) { 149 cpu_map__put(evsel->cpus); 150 evsel->cpus = cpu_map__get(evlist->cpus); 151 } else if (evsel->cpus != evsel->own_cpus) { 152 cpu_map__put(evsel->cpus); 153 evsel->cpus = cpu_map__get(evsel->own_cpus); 154 } 155 156 thread_map__put(evsel->threads); 157 evsel->threads = thread_map__get(evlist->threads); 158 } 159 160 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 161 { 162 struct perf_evsel *evsel; 163 164 evlist__for_each(evlist, evsel) 165 __perf_evlist__propagate_maps(evlist, evsel); 166 } 167 168 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 169 { 170 entry->evlist = evlist; 171 list_add_tail(&entry->node, &evlist->entries); 172 entry->idx = evlist->nr_entries; 173 entry->tracking = !entry->idx; 174 175 if (!evlist->nr_entries++) 176 perf_evlist__set_id_pos(evlist); 177 178 __perf_evlist__propagate_maps(evlist, entry); 179 } 180 181 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 182 { 183 evsel->evlist = NULL; 184 list_del_init(&evsel->node); 185 evlist->nr_entries -= 1; 186 } 187 188 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 189 struct list_head *list) 190 { 191 struct perf_evsel *evsel, *temp; 192 193 __evlist__for_each_safe(list, temp, evsel) { 194 list_del_init(&evsel->node); 195 perf_evlist__add(evlist, evsel); 196 } 197 } 198 199 void __perf_evlist__set_leader(struct list_head *list) 200 { 201 struct perf_evsel *evsel, *leader; 202 203 leader = list_entry(list->next, struct perf_evsel, node); 204 evsel = list_entry(list->prev, struct perf_evsel, node); 205 206 leader->nr_members = evsel->idx - leader->idx + 1; 207 208 __evlist__for_each(list, evsel) { 209 evsel->leader = leader; 210 } 211 } 212 213 void perf_evlist__set_leader(struct perf_evlist *evlist) 214 { 215 if (evlist->nr_entries) { 216 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 217 __perf_evlist__set_leader(&evlist->entries); 218 } 219 } 220 221 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 222 { 223 attr->precise_ip = 3; 224 225 while (attr->precise_ip != 0) { 226 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 227 if (fd != -1) { 228 close(fd); 229 break; 230 } 231 --attr->precise_ip; 232 } 233 } 234 235 int perf_evlist__add_default(struct perf_evlist *evlist) 236 { 237 struct perf_event_attr attr = { 238 .type = PERF_TYPE_HARDWARE, 239 .config = PERF_COUNT_HW_CPU_CYCLES, 240 }; 241 struct perf_evsel *evsel; 242 243 event_attr_init(&attr); 244 245 perf_event_attr__set_max_precise_ip(&attr); 246 247 evsel = perf_evsel__new(&attr); 248 if (evsel == NULL) 249 goto error; 250 251 /* use asprintf() because free(evsel) assumes name is allocated */ 252 if (asprintf(&evsel->name, "cycles%.*s", 253 attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) 254 goto error_free; 255 256 perf_evlist__add(evlist, evsel); 257 return 0; 258 error_free: 259 perf_evsel__delete(evsel); 260 error: 261 return -ENOMEM; 262 } 263 264 int perf_evlist__add_dummy(struct perf_evlist *evlist) 265 { 266 struct perf_event_attr attr = { 267 .type = PERF_TYPE_SOFTWARE, 268 .config = PERF_COUNT_SW_DUMMY, 269 .size = sizeof(attr), /* to capture ABI version */ 270 }; 271 struct perf_evsel *evsel = perf_evsel__new(&attr); 272 273 if (evsel == NULL) 274 return -ENOMEM; 275 276 perf_evlist__add(evlist, evsel); 277 return 0; 278 } 279 280 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 281 struct perf_event_attr *attrs, size_t nr_attrs) 282 { 283 struct perf_evsel *evsel, *n; 284 LIST_HEAD(head); 285 size_t i; 286 287 for (i = 0; i < nr_attrs; i++) { 288 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 289 if (evsel == NULL) 290 goto out_delete_partial_list; 291 list_add_tail(&evsel->node, &head); 292 } 293 294 perf_evlist__splice_list_tail(evlist, &head); 295 296 return 0; 297 298 out_delete_partial_list: 299 __evlist__for_each_safe(&head, n, evsel) 300 perf_evsel__delete(evsel); 301 return -1; 302 } 303 304 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 305 struct perf_event_attr *attrs, size_t nr_attrs) 306 { 307 size_t i; 308 309 for (i = 0; i < nr_attrs; i++) 310 event_attr_init(attrs + i); 311 312 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 313 } 314 315 struct perf_evsel * 316 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 317 { 318 struct perf_evsel *evsel; 319 320 evlist__for_each(evlist, evsel) { 321 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 322 (int)evsel->attr.config == id) 323 return evsel; 324 } 325 326 return NULL; 327 } 328 329 struct perf_evsel * 330 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 331 const char *name) 332 { 333 struct perf_evsel *evsel; 334 335 evlist__for_each(evlist, evsel) { 336 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 337 (strcmp(evsel->name, name) == 0)) 338 return evsel; 339 } 340 341 return NULL; 342 } 343 344 int perf_evlist__add_newtp(struct perf_evlist *evlist, 345 const char *sys, const char *name, void *handler) 346 { 347 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 348 349 if (IS_ERR(evsel)) 350 return -1; 351 352 evsel->handler = handler; 353 perf_evlist__add(evlist, evsel); 354 return 0; 355 } 356 357 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 358 struct perf_evsel *evsel) 359 { 360 if (evsel->system_wide) 361 return 1; 362 else 363 return thread_map__nr(evlist->threads); 364 } 365 366 void perf_evlist__disable(struct perf_evlist *evlist) 367 { 368 struct perf_evsel *pos; 369 370 evlist__for_each(evlist, pos) { 371 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 372 continue; 373 perf_evsel__disable(pos); 374 } 375 376 evlist->enabled = false; 377 } 378 379 void perf_evlist__enable(struct perf_evlist *evlist) 380 { 381 struct perf_evsel *pos; 382 383 evlist__for_each(evlist, pos) { 384 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 385 continue; 386 perf_evsel__enable(pos); 387 } 388 389 evlist->enabled = true; 390 } 391 392 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 393 { 394 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 395 } 396 397 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 398 struct perf_evsel *evsel, int cpu) 399 { 400 int thread, err; 401 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 402 403 if (!evsel->fd) 404 return -EINVAL; 405 406 for (thread = 0; thread < nr_threads; thread++) { 407 err = ioctl(FD(evsel, cpu, thread), 408 PERF_EVENT_IOC_ENABLE, 0); 409 if (err) 410 return err; 411 } 412 return 0; 413 } 414 415 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 416 struct perf_evsel *evsel, 417 int thread) 418 { 419 int cpu, err; 420 int nr_cpus = cpu_map__nr(evlist->cpus); 421 422 if (!evsel->fd) 423 return -EINVAL; 424 425 for (cpu = 0; cpu < nr_cpus; cpu++) { 426 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 427 if (err) 428 return err; 429 } 430 return 0; 431 } 432 433 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 434 struct perf_evsel *evsel, int idx) 435 { 436 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 437 438 if (per_cpu_mmaps) 439 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 440 else 441 return perf_evlist__enable_event_thread(evlist, evsel, idx); 442 } 443 444 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 445 { 446 int nr_cpus = cpu_map__nr(evlist->cpus); 447 int nr_threads = thread_map__nr(evlist->threads); 448 int nfds = 0; 449 struct perf_evsel *evsel; 450 451 evlist__for_each(evlist, evsel) { 452 if (evsel->system_wide) 453 nfds += nr_cpus; 454 else 455 nfds += nr_cpus * nr_threads; 456 } 457 458 if (fdarray__available_entries(&evlist->pollfd) < nfds && 459 fdarray__grow(&evlist->pollfd, nfds) < 0) 460 return -ENOMEM; 461 462 return 0; 463 } 464 465 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) 466 { 467 int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); 468 /* 469 * Save the idx so that when we filter out fds POLLHUP'ed we can 470 * close the associated evlist->mmap[] entry. 471 */ 472 if (pos >= 0) { 473 evlist->pollfd.priv[pos].idx = idx; 474 475 fcntl(fd, F_SETFL, O_NONBLOCK); 476 } 477 478 return pos; 479 } 480 481 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 482 { 483 return __perf_evlist__add_pollfd(evlist, fd, -1); 484 } 485 486 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) 487 { 488 struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); 489 490 perf_evlist__mmap_put(evlist, fda->priv[fd].idx); 491 } 492 493 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 494 { 495 return fdarray__filter(&evlist->pollfd, revents_and_mask, 496 perf_evlist__munmap_filtered); 497 } 498 499 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 500 { 501 return fdarray__poll(&evlist->pollfd, timeout); 502 } 503 504 static void perf_evlist__id_hash(struct perf_evlist *evlist, 505 struct perf_evsel *evsel, 506 int cpu, int thread, u64 id) 507 { 508 int hash; 509 struct perf_sample_id *sid = SID(evsel, cpu, thread); 510 511 sid->id = id; 512 sid->evsel = evsel; 513 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 514 hlist_add_head(&sid->node, &evlist->heads[hash]); 515 } 516 517 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 518 int cpu, int thread, u64 id) 519 { 520 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 521 evsel->id[evsel->ids++] = id; 522 } 523 524 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 525 struct perf_evsel *evsel, 526 int cpu, int thread, int fd) 527 { 528 u64 read_data[4] = { 0, }; 529 int id_idx = 1; /* The first entry is the counter value */ 530 u64 id; 531 int ret; 532 533 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 534 if (!ret) 535 goto add; 536 537 if (errno != ENOTTY) 538 return -1; 539 540 /* Legacy way to get event id.. All hail to old kernels! */ 541 542 /* 543 * This way does not work with group format read, so bail 544 * out in that case. 545 */ 546 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 547 return -1; 548 549 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 550 read(fd, &read_data, sizeof(read_data)) == -1) 551 return -1; 552 553 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 554 ++id_idx; 555 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 556 ++id_idx; 557 558 id = read_data[id_idx]; 559 560 add: 561 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 562 return 0; 563 } 564 565 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 566 struct perf_evsel *evsel, int idx, int cpu, 567 int thread) 568 { 569 struct perf_sample_id *sid = SID(evsel, cpu, thread); 570 sid->idx = idx; 571 if (evlist->cpus && cpu >= 0) 572 sid->cpu = evlist->cpus->map[cpu]; 573 else 574 sid->cpu = -1; 575 if (!evsel->system_wide && evlist->threads && thread >= 0) 576 sid->tid = thread_map__pid(evlist->threads, thread); 577 else 578 sid->tid = -1; 579 } 580 581 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 582 { 583 struct hlist_head *head; 584 struct perf_sample_id *sid; 585 int hash; 586 587 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 588 head = &evlist->heads[hash]; 589 590 hlist_for_each_entry(sid, head, node) 591 if (sid->id == id) 592 return sid; 593 594 return NULL; 595 } 596 597 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 598 { 599 struct perf_sample_id *sid; 600 601 if (evlist->nr_entries == 1 || !id) 602 return perf_evlist__first(evlist); 603 604 sid = perf_evlist__id2sid(evlist, id); 605 if (sid) 606 return sid->evsel; 607 608 if (!perf_evlist__sample_id_all(evlist)) 609 return perf_evlist__first(evlist); 610 611 return NULL; 612 } 613 614 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 615 u64 id) 616 { 617 struct perf_sample_id *sid; 618 619 if (!id) 620 return NULL; 621 622 sid = perf_evlist__id2sid(evlist, id); 623 if (sid) 624 return sid->evsel; 625 626 return NULL; 627 } 628 629 static int perf_evlist__event2id(struct perf_evlist *evlist, 630 union perf_event *event, u64 *id) 631 { 632 const u64 *array = event->sample.array; 633 ssize_t n; 634 635 n = (event->header.size - sizeof(event->header)) >> 3; 636 637 if (event->header.type == PERF_RECORD_SAMPLE) { 638 if (evlist->id_pos >= n) 639 return -1; 640 *id = array[evlist->id_pos]; 641 } else { 642 if (evlist->is_pos > n) 643 return -1; 644 n -= evlist->is_pos; 645 *id = array[n]; 646 } 647 return 0; 648 } 649 650 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 651 union perf_event *event) 652 { 653 struct perf_evsel *first = perf_evlist__first(evlist); 654 struct hlist_head *head; 655 struct perf_sample_id *sid; 656 int hash; 657 u64 id; 658 659 if (evlist->nr_entries == 1) 660 return first; 661 662 if (!first->attr.sample_id_all && 663 event->header.type != PERF_RECORD_SAMPLE) 664 return first; 665 666 if (perf_evlist__event2id(evlist, event, &id)) 667 return NULL; 668 669 /* Synthesized events have an id of zero */ 670 if (!id) 671 return first; 672 673 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 674 head = &evlist->heads[hash]; 675 676 hlist_for_each_entry(sid, head, node) { 677 if (sid->id == id) 678 return sid->evsel; 679 } 680 return NULL; 681 } 682 683 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 684 { 685 int i; 686 687 for (i = 0; i < evlist->nr_mmaps; i++) { 688 int fd = evlist->mmap[i].fd; 689 int err; 690 691 if (fd < 0) 692 continue; 693 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 694 if (err) 695 return err; 696 } 697 return 0; 698 } 699 700 int perf_evlist__pause(struct perf_evlist *evlist) 701 { 702 return perf_evlist__set_paused(evlist, true); 703 } 704 705 int perf_evlist__resume(struct perf_evlist *evlist) 706 { 707 return perf_evlist__set_paused(evlist, false); 708 } 709 710 /* When check_messup is true, 'end' must points to a good entry */ 711 static union perf_event * 712 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, 713 u64 end, u64 *prev) 714 { 715 unsigned char *data = md->base + page_size; 716 union perf_event *event = NULL; 717 int diff = end - start; 718 719 if (check_messup) { 720 /* 721 * If we're further behind than half the buffer, there's a chance 722 * the writer will bite our tail and mess up the samples under us. 723 * 724 * If we somehow ended up ahead of the 'end', we got messed up. 725 * 726 * In either case, truncate and restart at 'end'. 727 */ 728 if (diff > md->mask / 2 || diff < 0) { 729 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 730 731 /* 732 * 'end' points to a known good entry, start there. 733 */ 734 start = end; 735 diff = 0; 736 } 737 } 738 739 if (diff >= (int)sizeof(event->header)) { 740 size_t size; 741 742 event = (union perf_event *)&data[start & md->mask]; 743 size = event->header.size; 744 745 if (size < sizeof(event->header) || diff < (int)size) { 746 event = NULL; 747 goto broken_event; 748 } 749 750 /* 751 * Event straddles the mmap boundary -- header should always 752 * be inside due to u64 alignment of output. 753 */ 754 if ((start & md->mask) + size != ((start + size) & md->mask)) { 755 unsigned int offset = start; 756 unsigned int len = min(sizeof(*event), size), cpy; 757 void *dst = md->event_copy; 758 759 do { 760 cpy = min(md->mask + 1 - (offset & md->mask), len); 761 memcpy(dst, &data[offset & md->mask], cpy); 762 offset += cpy; 763 dst += cpy; 764 len -= cpy; 765 } while (len); 766 767 event = (union perf_event *) md->event_copy; 768 } 769 770 start += size; 771 } 772 773 broken_event: 774 if (prev) 775 *prev = start; 776 777 return event; 778 } 779 780 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 781 { 782 struct perf_mmap *md = &evlist->mmap[idx]; 783 u64 head; 784 u64 old = md->prev; 785 786 /* 787 * Check if event was unmapped due to a POLLHUP/POLLERR. 788 */ 789 if (!atomic_read(&md->refcnt)) 790 return NULL; 791 792 head = perf_mmap__read_head(md); 793 794 return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev); 795 } 796 797 union perf_event * 798 perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) 799 { 800 struct perf_mmap *md = &evlist->mmap[idx]; 801 u64 head, end; 802 u64 start = md->prev; 803 804 /* 805 * Check if event was unmapped due to a POLLHUP/POLLERR. 806 */ 807 if (!atomic_read(&md->refcnt)) 808 return NULL; 809 810 head = perf_mmap__read_head(md); 811 if (!head) 812 return NULL; 813 814 /* 815 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 816 * it each time when kernel writes to it, so in fact 'head' is 817 * negative. 'end' pointer is made manually by adding the size of 818 * the ring buffer to 'head' pointer, means the validate data can 819 * read is the whole ring buffer. If 'end' is positive, the ring 820 * buffer has not fully filled, so we must adjust 'end' to 0. 821 * 822 * However, since both 'head' and 'end' is unsigned, we can't 823 * simply compare 'end' against 0. Here we compare '-head' and 824 * the size of the ring buffer, where -head is the number of bytes 825 * kernel write to the ring buffer. 826 */ 827 if (-head < (u64)(md->mask + 1)) 828 end = 0; 829 else 830 end = head + md->mask + 1; 831 832 return perf_mmap__read(md, false, start, end, &md->prev); 833 } 834 835 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 836 { 837 struct perf_mmap *md = &evlist->mmap[idx]; 838 u64 head; 839 840 if (!atomic_read(&md->refcnt)) 841 return; 842 843 head = perf_mmap__read_head(md); 844 md->prev = head; 845 } 846 847 static bool perf_mmap__empty(struct perf_mmap *md) 848 { 849 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 850 } 851 852 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) 853 { 854 atomic_inc(&evlist->mmap[idx].refcnt); 855 } 856 857 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) 858 { 859 BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); 860 861 if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) 862 __perf_evlist__munmap(evlist, idx); 863 } 864 865 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 866 { 867 struct perf_mmap *md = &evlist->mmap[idx]; 868 869 if (!evlist->overwrite) { 870 u64 old = md->prev; 871 872 perf_mmap__write_tail(md, old); 873 } 874 875 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 876 perf_evlist__mmap_put(evlist, idx); 877 } 878 879 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 880 struct auxtrace_mmap_params *mp __maybe_unused, 881 void *userpg __maybe_unused, 882 int fd __maybe_unused) 883 { 884 return 0; 885 } 886 887 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 888 { 889 } 890 891 void __weak auxtrace_mmap_params__init( 892 struct auxtrace_mmap_params *mp __maybe_unused, 893 off_t auxtrace_offset __maybe_unused, 894 unsigned int auxtrace_pages __maybe_unused, 895 bool auxtrace_overwrite __maybe_unused) 896 { 897 } 898 899 void __weak auxtrace_mmap_params__set_idx( 900 struct auxtrace_mmap_params *mp __maybe_unused, 901 struct perf_evlist *evlist __maybe_unused, 902 int idx __maybe_unused, 903 bool per_cpu __maybe_unused) 904 { 905 } 906 907 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) 908 { 909 if (evlist->mmap[idx].base != NULL) { 910 munmap(evlist->mmap[idx].base, evlist->mmap_len); 911 evlist->mmap[idx].base = NULL; 912 evlist->mmap[idx].fd = -1; 913 atomic_set(&evlist->mmap[idx].refcnt, 0); 914 } 915 auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); 916 } 917 918 void perf_evlist__munmap(struct perf_evlist *evlist) 919 { 920 int i; 921 922 if (evlist->mmap == NULL) 923 return; 924 925 for (i = 0; i < evlist->nr_mmaps; i++) 926 __perf_evlist__munmap(evlist, i); 927 928 zfree(&evlist->mmap); 929 } 930 931 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 932 { 933 int i; 934 935 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 936 if (cpu_map__empty(evlist->cpus)) 937 evlist->nr_mmaps = thread_map__nr(evlist->threads); 938 evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 939 for (i = 0; i < evlist->nr_mmaps; i++) 940 evlist->mmap[i].fd = -1; 941 return evlist->mmap != NULL ? 0 : -ENOMEM; 942 } 943 944 struct mmap_params { 945 int prot; 946 int mask; 947 struct auxtrace_mmap_params auxtrace_mp; 948 }; 949 950 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, 951 struct mmap_params *mp, int fd) 952 { 953 /* 954 * The last one will be done at perf_evlist__mmap_consume(), so that we 955 * make sure we don't prevent tools from consuming every last event in 956 * the ring buffer. 957 * 958 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 959 * anymore, but the last events for it are still in the ring buffer, 960 * waiting to be consumed. 961 * 962 * Tools can chose to ignore this at their own discretion, but the 963 * evlist layer can't just drop it when filtering events in 964 * perf_evlist__filter_pollfd(). 965 */ 966 atomic_set(&evlist->mmap[idx].refcnt, 2); 967 evlist->mmap[idx].prev = 0; 968 evlist->mmap[idx].mask = mp->mask; 969 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, 970 MAP_SHARED, fd, 0); 971 if (evlist->mmap[idx].base == MAP_FAILED) { 972 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 973 errno); 974 evlist->mmap[idx].base = NULL; 975 return -1; 976 } 977 evlist->mmap[idx].fd = fd; 978 979 if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, 980 &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) 981 return -1; 982 983 return 0; 984 } 985 986 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 987 struct mmap_params *mp, int cpu, 988 int thread, int *output) 989 { 990 struct perf_evsel *evsel; 991 992 evlist__for_each(evlist, evsel) { 993 int fd; 994 995 if (evsel->system_wide && thread) 996 continue; 997 998 fd = FD(evsel, cpu, thread); 999 1000 if (*output == -1) { 1001 *output = fd; 1002 if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) 1003 return -1; 1004 } else { 1005 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 1006 return -1; 1007 1008 perf_evlist__mmap_get(evlist, idx); 1009 } 1010 1011 /* 1012 * The system_wide flag causes a selected event to be opened 1013 * always without a pid. Consequently it will never get a 1014 * POLLHUP, but it is used for tracking in combination with 1015 * other events, so it should not need to be polled anyway. 1016 * Therefore don't add it for polling. 1017 */ 1018 if (!evsel->system_wide && 1019 __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { 1020 perf_evlist__mmap_put(evlist, idx); 1021 return -1; 1022 } 1023 1024 if (evsel->attr.read_format & PERF_FORMAT_ID) { 1025 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 1026 fd) < 0) 1027 return -1; 1028 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 1029 thread); 1030 } 1031 } 1032 1033 return 0; 1034 } 1035 1036 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 1037 struct mmap_params *mp) 1038 { 1039 int cpu, thread; 1040 int nr_cpus = cpu_map__nr(evlist->cpus); 1041 int nr_threads = thread_map__nr(evlist->threads); 1042 1043 pr_debug2("perf event ring buffer mmapped per cpu\n"); 1044 for (cpu = 0; cpu < nr_cpus; cpu++) { 1045 int output = -1; 1046 1047 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 1048 true); 1049 1050 for (thread = 0; thread < nr_threads; thread++) { 1051 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 1052 thread, &output)) 1053 goto out_unmap; 1054 } 1055 } 1056 1057 return 0; 1058 1059 out_unmap: 1060 for (cpu = 0; cpu < nr_cpus; cpu++) 1061 __perf_evlist__munmap(evlist, cpu); 1062 return -1; 1063 } 1064 1065 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1066 struct mmap_params *mp) 1067 { 1068 int thread; 1069 int nr_threads = thread_map__nr(evlist->threads); 1070 1071 pr_debug2("perf event ring buffer mmapped per thread\n"); 1072 for (thread = 0; thread < nr_threads; thread++) { 1073 int output = -1; 1074 1075 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1076 false); 1077 1078 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1079 &output)) 1080 goto out_unmap; 1081 } 1082 1083 return 0; 1084 1085 out_unmap: 1086 for (thread = 0; thread < nr_threads; thread++) 1087 __perf_evlist__munmap(evlist, thread); 1088 return -1; 1089 } 1090 1091 unsigned long perf_event_mlock_kb_in_pages(void) 1092 { 1093 unsigned long pages; 1094 int max; 1095 1096 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1097 /* 1098 * Pick a once upon a time good value, i.e. things look 1099 * strange since we can't read a sysctl value, but lets not 1100 * die yet... 1101 */ 1102 max = 512; 1103 } else { 1104 max -= (page_size / 1024); 1105 } 1106 1107 pages = (max * 1024) / page_size; 1108 if (!is_power_of_2(pages)) 1109 pages = rounddown_pow_of_two(pages); 1110 1111 return pages; 1112 } 1113 1114 static size_t perf_evlist__mmap_size(unsigned long pages) 1115 { 1116 if (pages == UINT_MAX) 1117 pages = perf_event_mlock_kb_in_pages(); 1118 else if (!is_power_of_2(pages)) 1119 return 0; 1120 1121 return (pages + 1) * page_size; 1122 } 1123 1124 static long parse_pages_arg(const char *str, unsigned long min, 1125 unsigned long max) 1126 { 1127 unsigned long pages, val; 1128 static struct parse_tag tags[] = { 1129 { .tag = 'B', .mult = 1 }, 1130 { .tag = 'K', .mult = 1 << 10 }, 1131 { .tag = 'M', .mult = 1 << 20 }, 1132 { .tag = 'G', .mult = 1 << 30 }, 1133 { .tag = 0 }, 1134 }; 1135 1136 if (str == NULL) 1137 return -EINVAL; 1138 1139 val = parse_tag_value(str, tags); 1140 if (val != (unsigned long) -1) { 1141 /* we got file size value */ 1142 pages = PERF_ALIGN(val, page_size) / page_size; 1143 } else { 1144 /* we got pages count value */ 1145 char *eptr; 1146 pages = strtoul(str, &eptr, 10); 1147 if (*eptr != '\0') 1148 return -EINVAL; 1149 } 1150 1151 if (pages == 0 && min == 0) { 1152 /* leave number of pages at 0 */ 1153 } else if (!is_power_of_2(pages)) { 1154 /* round pages up to next power of 2 */ 1155 pages = roundup_pow_of_two(pages); 1156 if (!pages) 1157 return -EINVAL; 1158 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1159 pages * page_size, pages); 1160 } 1161 1162 if (pages > max) 1163 return -EINVAL; 1164 1165 return pages; 1166 } 1167 1168 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1169 { 1170 unsigned long max = UINT_MAX; 1171 long pages; 1172 1173 if (max > SIZE_MAX / page_size) 1174 max = SIZE_MAX / page_size; 1175 1176 pages = parse_pages_arg(str, 1, max); 1177 if (pages < 0) { 1178 pr_err("Invalid argument for --mmap_pages/-m\n"); 1179 return -1; 1180 } 1181 1182 *mmap_pages = pages; 1183 return 0; 1184 } 1185 1186 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1187 int unset __maybe_unused) 1188 { 1189 return __perf_evlist__parse_mmap_pages(opt->value, str); 1190 } 1191 1192 /** 1193 * perf_evlist__mmap_ex - Create mmaps to receive events. 1194 * @evlist: list of events 1195 * @pages: map length in pages 1196 * @overwrite: overwrite older events? 1197 * @auxtrace_pages - auxtrace map length in pages 1198 * @auxtrace_overwrite - overwrite older auxtrace data? 1199 * 1200 * If @overwrite is %false the user needs to signal event consumption using 1201 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1202 * automatically. 1203 * 1204 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1205 * consumption using auxtrace_mmap__write_tail(). 1206 * 1207 * Return: %0 on success, negative error code otherwise. 1208 */ 1209 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1210 bool overwrite, unsigned int auxtrace_pages, 1211 bool auxtrace_overwrite) 1212 { 1213 struct perf_evsel *evsel; 1214 const struct cpu_map *cpus = evlist->cpus; 1215 const struct thread_map *threads = evlist->threads; 1216 struct mmap_params mp = { 1217 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1218 }; 1219 1220 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 1221 return -ENOMEM; 1222 1223 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1224 return -ENOMEM; 1225 1226 evlist->overwrite = overwrite; 1227 evlist->mmap_len = perf_evlist__mmap_size(pages); 1228 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1229 mp.mask = evlist->mmap_len - page_size - 1; 1230 1231 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1232 auxtrace_pages, auxtrace_overwrite); 1233 1234 evlist__for_each(evlist, evsel) { 1235 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1236 evsel->sample_id == NULL && 1237 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1238 return -ENOMEM; 1239 } 1240 1241 if (cpu_map__empty(cpus)) 1242 return perf_evlist__mmap_per_thread(evlist, &mp); 1243 1244 return perf_evlist__mmap_per_cpu(evlist, &mp); 1245 } 1246 1247 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1248 bool overwrite) 1249 { 1250 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1251 } 1252 1253 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1254 { 1255 struct cpu_map *cpus; 1256 struct thread_map *threads; 1257 1258 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1259 1260 if (!threads) 1261 return -1; 1262 1263 if (target__uses_dummy_map(target)) 1264 cpus = cpu_map__dummy_new(); 1265 else 1266 cpus = cpu_map__new(target->cpu_list); 1267 1268 if (!cpus) 1269 goto out_delete_threads; 1270 1271 evlist->has_user_cpus = !!target->cpu_list; 1272 1273 perf_evlist__set_maps(evlist, cpus, threads); 1274 1275 return 0; 1276 1277 out_delete_threads: 1278 thread_map__put(threads); 1279 return -1; 1280 } 1281 1282 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1283 struct thread_map *threads) 1284 { 1285 /* 1286 * Allow for the possibility that one or another of the maps isn't being 1287 * changed i.e. don't put it. Note we are assuming the maps that are 1288 * being applied are brand new and evlist is taking ownership of the 1289 * original reference count of 1. If that is not the case it is up to 1290 * the caller to increase the reference count. 1291 */ 1292 if (cpus != evlist->cpus) { 1293 cpu_map__put(evlist->cpus); 1294 evlist->cpus = cpu_map__get(cpus); 1295 } 1296 1297 if (threads != evlist->threads) { 1298 thread_map__put(evlist->threads); 1299 evlist->threads = thread_map__get(threads); 1300 } 1301 1302 perf_evlist__propagate_maps(evlist); 1303 } 1304 1305 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1306 enum perf_event_sample_format bit) 1307 { 1308 struct perf_evsel *evsel; 1309 1310 evlist__for_each(evlist, evsel) 1311 __perf_evsel__set_sample_bit(evsel, bit); 1312 } 1313 1314 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1315 enum perf_event_sample_format bit) 1316 { 1317 struct perf_evsel *evsel; 1318 1319 evlist__for_each(evlist, evsel) 1320 __perf_evsel__reset_sample_bit(evsel, bit); 1321 } 1322 1323 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1324 { 1325 struct perf_evsel *evsel; 1326 int err = 0; 1327 const int ncpus = cpu_map__nr(evlist->cpus), 1328 nthreads = thread_map__nr(evlist->threads); 1329 1330 evlist__for_each(evlist, evsel) { 1331 if (evsel->filter == NULL) 1332 continue; 1333 1334 /* 1335 * filters only work for tracepoint event, which doesn't have cpu limit. 1336 * So evlist and evsel should always be same. 1337 */ 1338 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1339 if (err) { 1340 *err_evsel = evsel; 1341 break; 1342 } 1343 } 1344 1345 return err; 1346 } 1347 1348 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1349 { 1350 struct perf_evsel *evsel; 1351 int err = 0; 1352 1353 evlist__for_each(evlist, evsel) { 1354 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1355 continue; 1356 1357 err = perf_evsel__set_filter(evsel, filter); 1358 if (err) 1359 break; 1360 } 1361 1362 return err; 1363 } 1364 1365 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1366 { 1367 char *filter; 1368 int ret = -1; 1369 size_t i; 1370 1371 for (i = 0; i < npids; ++i) { 1372 if (i == 0) { 1373 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1374 return -1; 1375 } else { 1376 char *tmp; 1377 1378 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1379 goto out_free; 1380 1381 free(filter); 1382 filter = tmp; 1383 } 1384 } 1385 1386 ret = perf_evlist__set_filter(evlist, filter); 1387 out_free: 1388 free(filter); 1389 return ret; 1390 } 1391 1392 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1393 { 1394 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1395 } 1396 1397 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1398 { 1399 struct perf_evsel *pos; 1400 1401 if (evlist->nr_entries == 1) 1402 return true; 1403 1404 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1405 return false; 1406 1407 evlist__for_each(evlist, pos) { 1408 if (pos->id_pos != evlist->id_pos || 1409 pos->is_pos != evlist->is_pos) 1410 return false; 1411 } 1412 1413 return true; 1414 } 1415 1416 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1417 { 1418 struct perf_evsel *evsel; 1419 1420 if (evlist->combined_sample_type) 1421 return evlist->combined_sample_type; 1422 1423 evlist__for_each(evlist, evsel) 1424 evlist->combined_sample_type |= evsel->attr.sample_type; 1425 1426 return evlist->combined_sample_type; 1427 } 1428 1429 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1430 { 1431 evlist->combined_sample_type = 0; 1432 return __perf_evlist__combined_sample_type(evlist); 1433 } 1434 1435 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1436 { 1437 struct perf_evsel *evsel; 1438 u64 branch_type = 0; 1439 1440 evlist__for_each(evlist, evsel) 1441 branch_type |= evsel->attr.branch_sample_type; 1442 return branch_type; 1443 } 1444 1445 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1446 { 1447 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1448 u64 read_format = first->attr.read_format; 1449 u64 sample_type = first->attr.sample_type; 1450 1451 evlist__for_each(evlist, pos) { 1452 if (read_format != pos->attr.read_format) 1453 return false; 1454 } 1455 1456 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1457 if ((sample_type & PERF_SAMPLE_READ) && 1458 !(read_format & PERF_FORMAT_ID)) { 1459 return false; 1460 } 1461 1462 return true; 1463 } 1464 1465 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1466 { 1467 struct perf_evsel *first = perf_evlist__first(evlist); 1468 return first->attr.read_format; 1469 } 1470 1471 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1472 { 1473 struct perf_evsel *first = perf_evlist__first(evlist); 1474 struct perf_sample *data; 1475 u64 sample_type; 1476 u16 size = 0; 1477 1478 if (!first->attr.sample_id_all) 1479 goto out; 1480 1481 sample_type = first->attr.sample_type; 1482 1483 if (sample_type & PERF_SAMPLE_TID) 1484 size += sizeof(data->tid) * 2; 1485 1486 if (sample_type & PERF_SAMPLE_TIME) 1487 size += sizeof(data->time); 1488 1489 if (sample_type & PERF_SAMPLE_ID) 1490 size += sizeof(data->id); 1491 1492 if (sample_type & PERF_SAMPLE_STREAM_ID) 1493 size += sizeof(data->stream_id); 1494 1495 if (sample_type & PERF_SAMPLE_CPU) 1496 size += sizeof(data->cpu) * 2; 1497 1498 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1499 size += sizeof(data->id); 1500 out: 1501 return size; 1502 } 1503 1504 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1505 { 1506 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1507 1508 evlist__for_each_continue(evlist, pos) { 1509 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1510 return false; 1511 } 1512 1513 return true; 1514 } 1515 1516 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1517 { 1518 struct perf_evsel *first = perf_evlist__first(evlist); 1519 return first->attr.sample_id_all; 1520 } 1521 1522 void perf_evlist__set_selected(struct perf_evlist *evlist, 1523 struct perf_evsel *evsel) 1524 { 1525 evlist->selected = evsel; 1526 } 1527 1528 void perf_evlist__close(struct perf_evlist *evlist) 1529 { 1530 struct perf_evsel *evsel; 1531 int ncpus = cpu_map__nr(evlist->cpus); 1532 int nthreads = thread_map__nr(evlist->threads); 1533 int n; 1534 1535 evlist__for_each_reverse(evlist, evsel) { 1536 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1537 perf_evsel__close(evsel, n, nthreads); 1538 } 1539 } 1540 1541 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1542 { 1543 struct cpu_map *cpus; 1544 struct thread_map *threads; 1545 int err = -ENOMEM; 1546 1547 /* 1548 * Try reading /sys/devices/system/cpu/online to get 1549 * an all cpus map. 1550 * 1551 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1552 * code needs an overhaul to properly forward the 1553 * error, and we may not want to do that fallback to a 1554 * default cpu identity map :-\ 1555 */ 1556 cpus = cpu_map__new(NULL); 1557 if (!cpus) 1558 goto out; 1559 1560 threads = thread_map__new_dummy(); 1561 if (!threads) 1562 goto out_put; 1563 1564 perf_evlist__set_maps(evlist, cpus, threads); 1565 out: 1566 return err; 1567 out_put: 1568 cpu_map__put(cpus); 1569 goto out; 1570 } 1571 1572 int perf_evlist__open(struct perf_evlist *evlist) 1573 { 1574 struct perf_evsel *evsel; 1575 int err; 1576 1577 /* 1578 * Default: one fd per CPU, all threads, aka systemwide 1579 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1580 */ 1581 if (evlist->threads == NULL && evlist->cpus == NULL) { 1582 err = perf_evlist__create_syswide_maps(evlist); 1583 if (err < 0) 1584 goto out_err; 1585 } 1586 1587 perf_evlist__update_id_pos(evlist); 1588 1589 evlist__for_each(evlist, evsel) { 1590 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1591 if (err < 0) 1592 goto out_err; 1593 } 1594 1595 return 0; 1596 out_err: 1597 perf_evlist__close(evlist); 1598 errno = -err; 1599 return err; 1600 } 1601 1602 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1603 const char *argv[], bool pipe_output, 1604 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1605 { 1606 int child_ready_pipe[2], go_pipe[2]; 1607 char bf; 1608 1609 if (pipe(child_ready_pipe) < 0) { 1610 perror("failed to create 'ready' pipe"); 1611 return -1; 1612 } 1613 1614 if (pipe(go_pipe) < 0) { 1615 perror("failed to create 'go' pipe"); 1616 goto out_close_ready_pipe; 1617 } 1618 1619 evlist->workload.pid = fork(); 1620 if (evlist->workload.pid < 0) { 1621 perror("failed to fork"); 1622 goto out_close_pipes; 1623 } 1624 1625 if (!evlist->workload.pid) { 1626 int ret; 1627 1628 if (pipe_output) 1629 dup2(2, 1); 1630 1631 signal(SIGTERM, SIG_DFL); 1632 1633 close(child_ready_pipe[0]); 1634 close(go_pipe[1]); 1635 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1636 1637 /* 1638 * Tell the parent we're ready to go 1639 */ 1640 close(child_ready_pipe[1]); 1641 1642 /* 1643 * Wait until the parent tells us to go. 1644 */ 1645 ret = read(go_pipe[0], &bf, 1); 1646 /* 1647 * The parent will ask for the execvp() to be performed by 1648 * writing exactly one byte, in workload.cork_fd, usually via 1649 * perf_evlist__start_workload(). 1650 * 1651 * For cancelling the workload without actually running it, 1652 * the parent will just close workload.cork_fd, without writing 1653 * anything, i.e. read will return zero and we just exit() 1654 * here. 1655 */ 1656 if (ret != 1) { 1657 if (ret == -1) 1658 perror("unable to read pipe"); 1659 exit(ret); 1660 } 1661 1662 execvp(argv[0], (char **)argv); 1663 1664 if (exec_error) { 1665 union sigval val; 1666 1667 val.sival_int = errno; 1668 if (sigqueue(getppid(), SIGUSR1, val)) 1669 perror(argv[0]); 1670 } else 1671 perror(argv[0]); 1672 exit(-1); 1673 } 1674 1675 if (exec_error) { 1676 struct sigaction act = { 1677 .sa_flags = SA_SIGINFO, 1678 .sa_sigaction = exec_error, 1679 }; 1680 sigaction(SIGUSR1, &act, NULL); 1681 } 1682 1683 if (target__none(target)) { 1684 if (evlist->threads == NULL) { 1685 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1686 __func__, __LINE__); 1687 goto out_close_pipes; 1688 } 1689 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1690 } 1691 1692 close(child_ready_pipe[1]); 1693 close(go_pipe[0]); 1694 /* 1695 * wait for child to settle 1696 */ 1697 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1698 perror("unable to read pipe"); 1699 goto out_close_pipes; 1700 } 1701 1702 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1703 evlist->workload.cork_fd = go_pipe[1]; 1704 close(child_ready_pipe[0]); 1705 return 0; 1706 1707 out_close_pipes: 1708 close(go_pipe[0]); 1709 close(go_pipe[1]); 1710 out_close_ready_pipe: 1711 close(child_ready_pipe[0]); 1712 close(child_ready_pipe[1]); 1713 return -1; 1714 } 1715 1716 int perf_evlist__start_workload(struct perf_evlist *evlist) 1717 { 1718 if (evlist->workload.cork_fd > 0) { 1719 char bf = 0; 1720 int ret; 1721 /* 1722 * Remove the cork, let it rip! 1723 */ 1724 ret = write(evlist->workload.cork_fd, &bf, 1); 1725 if (ret < 0) 1726 perror("enable to write to pipe"); 1727 1728 close(evlist->workload.cork_fd); 1729 return ret; 1730 } 1731 1732 return 0; 1733 } 1734 1735 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1736 struct perf_sample *sample) 1737 { 1738 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1739 1740 if (!evsel) 1741 return -EFAULT; 1742 return perf_evsel__parse_sample(evsel, event, sample); 1743 } 1744 1745 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1746 { 1747 struct perf_evsel *evsel; 1748 size_t printed = 0; 1749 1750 evlist__for_each(evlist, evsel) { 1751 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1752 perf_evsel__name(evsel)); 1753 } 1754 1755 return printed + fprintf(fp, "\n"); 1756 } 1757 1758 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1759 int err, char *buf, size_t size) 1760 { 1761 int printed, value; 1762 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1763 1764 switch (err) { 1765 case EACCES: 1766 case EPERM: 1767 printed = scnprintf(buf, size, 1768 "Error:\t%s.\n" 1769 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1770 1771 value = perf_event_paranoid(); 1772 1773 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1774 1775 if (value >= 2) { 1776 printed += scnprintf(buf + printed, size - printed, 1777 "For your workloads it needs to be <= 1\nHint:\t"); 1778 } 1779 printed += scnprintf(buf + printed, size - printed, 1780 "For system wide tracing it needs to be set to -1.\n"); 1781 1782 printed += scnprintf(buf + printed, size - printed, 1783 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1784 "Hint:\tThe current value is %d.", value); 1785 break; 1786 case EINVAL: { 1787 struct perf_evsel *first = perf_evlist__first(evlist); 1788 int max_freq; 1789 1790 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1791 goto out_default; 1792 1793 if (first->attr.sample_freq < (u64)max_freq) 1794 goto out_default; 1795 1796 printed = scnprintf(buf, size, 1797 "Error:\t%s.\n" 1798 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1799 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1800 emsg, max_freq, first->attr.sample_freq); 1801 break; 1802 } 1803 default: 1804 out_default: 1805 scnprintf(buf, size, "%s", emsg); 1806 break; 1807 } 1808 1809 return 0; 1810 } 1811 1812 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1813 { 1814 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1815 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1816 1817 switch (err) { 1818 case EPERM: 1819 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1820 printed += scnprintf(buf + printed, size - printed, 1821 "Error:\t%s.\n" 1822 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1823 "Hint:\tTried using %zd kB.\n", 1824 emsg, pages_max_per_user, pages_attempted); 1825 1826 if (pages_attempted >= pages_max_per_user) { 1827 printed += scnprintf(buf + printed, size - printed, 1828 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1829 pages_max_per_user + pages_attempted); 1830 } 1831 1832 printed += scnprintf(buf + printed, size - printed, 1833 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1834 break; 1835 default: 1836 scnprintf(buf, size, "%s", emsg); 1837 break; 1838 } 1839 1840 return 0; 1841 } 1842 1843 void perf_evlist__to_front(struct perf_evlist *evlist, 1844 struct perf_evsel *move_evsel) 1845 { 1846 struct perf_evsel *evsel, *n; 1847 LIST_HEAD(move); 1848 1849 if (move_evsel == perf_evlist__first(evlist)) 1850 return; 1851 1852 evlist__for_each_safe(evlist, n, evsel) { 1853 if (evsel->leader == move_evsel->leader) 1854 list_move_tail(&evsel->node, &move); 1855 } 1856 1857 list_splice(&move, &evlist->entries); 1858 } 1859 1860 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1861 struct perf_evsel *tracking_evsel) 1862 { 1863 struct perf_evsel *evsel; 1864 1865 if (tracking_evsel->tracking) 1866 return; 1867 1868 evlist__for_each(evlist, evsel) { 1869 if (evsel != tracking_evsel) 1870 evsel->tracking = false; 1871 } 1872 1873 tracking_evsel->tracking = true; 1874 } 1875 1876 struct perf_evsel * 1877 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1878 const char *str) 1879 { 1880 struct perf_evsel *evsel; 1881 1882 evlist__for_each(evlist, evsel) { 1883 if (!evsel->name) 1884 continue; 1885 if (strcmp(str, evsel->name) == 0) 1886 return evsel; 1887 } 1888 1889 return NULL; 1890 } 1891