1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include <unistd.h> 19 20 #include "parse-events.h" 21 #include <subcmd/parse-options.h> 22 23 #include <sys/mman.h> 24 25 #include <linux/bitops.h> 26 #include <linux/hash.h> 27 #include <linux/log2.h> 28 #include <linux/err.h> 29 30 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); 31 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); 32 33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 34 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 35 36 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 37 struct thread_map *threads) 38 { 39 int i; 40 41 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 42 INIT_HLIST_HEAD(&evlist->heads[i]); 43 INIT_LIST_HEAD(&evlist->entries); 44 perf_evlist__set_maps(evlist, cpus, threads); 45 fdarray__init(&evlist->pollfd, 64); 46 evlist->workload.pid = -1; 47 evlist->backward = false; 48 } 49 50 struct perf_evlist *perf_evlist__new(void) 51 { 52 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 53 54 if (evlist != NULL) 55 perf_evlist__init(evlist, NULL, NULL); 56 57 return evlist; 58 } 59 60 struct perf_evlist *perf_evlist__new_default(void) 61 { 62 struct perf_evlist *evlist = perf_evlist__new(); 63 64 if (evlist && perf_evlist__add_default(evlist)) { 65 perf_evlist__delete(evlist); 66 evlist = NULL; 67 } 68 69 return evlist; 70 } 71 72 struct perf_evlist *perf_evlist__new_dummy(void) 73 { 74 struct perf_evlist *evlist = perf_evlist__new(); 75 76 if (evlist && perf_evlist__add_dummy(evlist)) { 77 perf_evlist__delete(evlist); 78 evlist = NULL; 79 } 80 81 return evlist; 82 } 83 84 /** 85 * perf_evlist__set_id_pos - set the positions of event ids. 86 * @evlist: selected event list 87 * 88 * Events with compatible sample types all have the same id_pos 89 * and is_pos. For convenience, put a copy on evlist. 90 */ 91 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 92 { 93 struct perf_evsel *first = perf_evlist__first(evlist); 94 95 evlist->id_pos = first->id_pos; 96 evlist->is_pos = first->is_pos; 97 } 98 99 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 100 { 101 struct perf_evsel *evsel; 102 103 evlist__for_each_entry(evlist, evsel) 104 perf_evsel__calc_id_pos(evsel); 105 106 perf_evlist__set_id_pos(evlist); 107 } 108 109 static void perf_evlist__purge(struct perf_evlist *evlist) 110 { 111 struct perf_evsel *pos, *n; 112 113 evlist__for_each_entry_safe(evlist, n, pos) { 114 list_del_init(&pos->node); 115 pos->evlist = NULL; 116 perf_evsel__delete(pos); 117 } 118 119 evlist->nr_entries = 0; 120 } 121 122 void perf_evlist__exit(struct perf_evlist *evlist) 123 { 124 zfree(&evlist->mmap); 125 fdarray__exit(&evlist->pollfd); 126 } 127 128 void perf_evlist__delete(struct perf_evlist *evlist) 129 { 130 if (evlist == NULL) 131 return; 132 133 perf_evlist__munmap(evlist); 134 perf_evlist__close(evlist); 135 cpu_map__put(evlist->cpus); 136 thread_map__put(evlist->threads); 137 evlist->cpus = NULL; 138 evlist->threads = NULL; 139 perf_evlist__purge(evlist); 140 perf_evlist__exit(evlist); 141 free(evlist); 142 } 143 144 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 145 struct perf_evsel *evsel) 146 { 147 /* 148 * We already have cpus for evsel (via PMU sysfs) so 149 * keep it, if there's no target cpu list defined. 150 */ 151 if (!evsel->own_cpus || evlist->has_user_cpus) { 152 cpu_map__put(evsel->cpus); 153 evsel->cpus = cpu_map__get(evlist->cpus); 154 } else if (evsel->cpus != evsel->own_cpus) { 155 cpu_map__put(evsel->cpus); 156 evsel->cpus = cpu_map__get(evsel->own_cpus); 157 } 158 159 thread_map__put(evsel->threads); 160 evsel->threads = thread_map__get(evlist->threads); 161 } 162 163 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 164 { 165 struct perf_evsel *evsel; 166 167 evlist__for_each_entry(evlist, evsel) 168 __perf_evlist__propagate_maps(evlist, evsel); 169 } 170 171 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 172 { 173 entry->evlist = evlist; 174 list_add_tail(&entry->node, &evlist->entries); 175 entry->idx = evlist->nr_entries; 176 entry->tracking = !entry->idx; 177 178 if (!evlist->nr_entries++) 179 perf_evlist__set_id_pos(evlist); 180 181 __perf_evlist__propagate_maps(evlist, entry); 182 } 183 184 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 185 { 186 evsel->evlist = NULL; 187 list_del_init(&evsel->node); 188 evlist->nr_entries -= 1; 189 } 190 191 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 192 struct list_head *list) 193 { 194 struct perf_evsel *evsel, *temp; 195 196 __evlist__for_each_entry_safe(list, temp, evsel) { 197 list_del_init(&evsel->node); 198 perf_evlist__add(evlist, evsel); 199 } 200 } 201 202 void __perf_evlist__set_leader(struct list_head *list) 203 { 204 struct perf_evsel *evsel, *leader; 205 206 leader = list_entry(list->next, struct perf_evsel, node); 207 evsel = list_entry(list->prev, struct perf_evsel, node); 208 209 leader->nr_members = evsel->idx - leader->idx + 1; 210 211 __evlist__for_each_entry(list, evsel) { 212 evsel->leader = leader; 213 } 214 } 215 216 void perf_evlist__set_leader(struct perf_evlist *evlist) 217 { 218 if (evlist->nr_entries) { 219 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 220 __perf_evlist__set_leader(&evlist->entries); 221 } 222 } 223 224 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 225 { 226 attr->precise_ip = 3; 227 228 while (attr->precise_ip != 0) { 229 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 230 if (fd != -1) { 231 close(fd); 232 break; 233 } 234 --attr->precise_ip; 235 } 236 } 237 238 int perf_evlist__add_default(struct perf_evlist *evlist) 239 { 240 struct perf_event_attr attr = { 241 .type = PERF_TYPE_HARDWARE, 242 .config = PERF_COUNT_HW_CPU_CYCLES, 243 }; 244 struct perf_evsel *evsel; 245 246 event_attr_init(&attr); 247 248 perf_event_attr__set_max_precise_ip(&attr); 249 250 evsel = perf_evsel__new(&attr); 251 if (evsel == NULL) 252 goto error; 253 254 /* use asprintf() because free(evsel) assumes name is allocated */ 255 if (asprintf(&evsel->name, "cycles%.*s", 256 attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) 257 goto error_free; 258 259 perf_evlist__add(evlist, evsel); 260 return 0; 261 error_free: 262 perf_evsel__delete(evsel); 263 error: 264 return -ENOMEM; 265 } 266 267 int perf_evlist__add_dummy(struct perf_evlist *evlist) 268 { 269 struct perf_event_attr attr = { 270 .type = PERF_TYPE_SOFTWARE, 271 .config = PERF_COUNT_SW_DUMMY, 272 .size = sizeof(attr), /* to capture ABI version */ 273 }; 274 struct perf_evsel *evsel = perf_evsel__new(&attr); 275 276 if (evsel == NULL) 277 return -ENOMEM; 278 279 perf_evlist__add(evlist, evsel); 280 return 0; 281 } 282 283 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 284 struct perf_event_attr *attrs, size_t nr_attrs) 285 { 286 struct perf_evsel *evsel, *n; 287 LIST_HEAD(head); 288 size_t i; 289 290 for (i = 0; i < nr_attrs; i++) { 291 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 292 if (evsel == NULL) 293 goto out_delete_partial_list; 294 list_add_tail(&evsel->node, &head); 295 } 296 297 perf_evlist__splice_list_tail(evlist, &head); 298 299 return 0; 300 301 out_delete_partial_list: 302 __evlist__for_each_entry_safe(&head, n, evsel) 303 perf_evsel__delete(evsel); 304 return -1; 305 } 306 307 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 308 struct perf_event_attr *attrs, size_t nr_attrs) 309 { 310 size_t i; 311 312 for (i = 0; i < nr_attrs; i++) 313 event_attr_init(attrs + i); 314 315 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 316 } 317 318 struct perf_evsel * 319 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 320 { 321 struct perf_evsel *evsel; 322 323 evlist__for_each_entry(evlist, evsel) { 324 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 325 (int)evsel->attr.config == id) 326 return evsel; 327 } 328 329 return NULL; 330 } 331 332 struct perf_evsel * 333 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 334 const char *name) 335 { 336 struct perf_evsel *evsel; 337 338 evlist__for_each_entry(evlist, evsel) { 339 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 340 (strcmp(evsel->name, name) == 0)) 341 return evsel; 342 } 343 344 return NULL; 345 } 346 347 int perf_evlist__add_newtp(struct perf_evlist *evlist, 348 const char *sys, const char *name, void *handler) 349 { 350 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 351 352 if (IS_ERR(evsel)) 353 return -1; 354 355 evsel->handler = handler; 356 perf_evlist__add(evlist, evsel); 357 return 0; 358 } 359 360 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 361 struct perf_evsel *evsel) 362 { 363 if (evsel->system_wide) 364 return 1; 365 else 366 return thread_map__nr(evlist->threads); 367 } 368 369 void perf_evlist__disable(struct perf_evlist *evlist) 370 { 371 struct perf_evsel *pos; 372 373 evlist__for_each_entry(evlist, pos) { 374 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 375 continue; 376 perf_evsel__disable(pos); 377 } 378 379 evlist->enabled = false; 380 } 381 382 void perf_evlist__enable(struct perf_evlist *evlist) 383 { 384 struct perf_evsel *pos; 385 386 evlist__for_each_entry(evlist, pos) { 387 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 388 continue; 389 perf_evsel__enable(pos); 390 } 391 392 evlist->enabled = true; 393 } 394 395 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 396 { 397 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 398 } 399 400 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 401 struct perf_evsel *evsel, int cpu) 402 { 403 int thread, err; 404 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 405 406 if (!evsel->fd) 407 return -EINVAL; 408 409 for (thread = 0; thread < nr_threads; thread++) { 410 err = ioctl(FD(evsel, cpu, thread), 411 PERF_EVENT_IOC_ENABLE, 0); 412 if (err) 413 return err; 414 } 415 return 0; 416 } 417 418 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 419 struct perf_evsel *evsel, 420 int thread) 421 { 422 int cpu, err; 423 int nr_cpus = cpu_map__nr(evlist->cpus); 424 425 if (!evsel->fd) 426 return -EINVAL; 427 428 for (cpu = 0; cpu < nr_cpus; cpu++) { 429 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 430 if (err) 431 return err; 432 } 433 return 0; 434 } 435 436 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 437 struct perf_evsel *evsel, int idx) 438 { 439 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 440 441 if (per_cpu_mmaps) 442 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 443 else 444 return perf_evlist__enable_event_thread(evlist, evsel, idx); 445 } 446 447 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 448 { 449 int nr_cpus = cpu_map__nr(evlist->cpus); 450 int nr_threads = thread_map__nr(evlist->threads); 451 int nfds = 0; 452 struct perf_evsel *evsel; 453 454 evlist__for_each_entry(evlist, evsel) { 455 if (evsel->system_wide) 456 nfds += nr_cpus; 457 else 458 nfds += nr_cpus * nr_threads; 459 } 460 461 if (fdarray__available_entries(&evlist->pollfd) < nfds && 462 fdarray__grow(&evlist->pollfd, nfds) < 0) 463 return -ENOMEM; 464 465 return 0; 466 } 467 468 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent) 469 { 470 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 471 /* 472 * Save the idx so that when we filter out fds POLLHUP'ed we can 473 * close the associated evlist->mmap[] entry. 474 */ 475 if (pos >= 0) { 476 evlist->pollfd.priv[pos].idx = idx; 477 478 fcntl(fd, F_SETFL, O_NONBLOCK); 479 } 480 481 return pos; 482 } 483 484 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 485 { 486 return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN); 487 } 488 489 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 490 void *arg __maybe_unused) 491 { 492 struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); 493 494 perf_evlist__mmap_put(evlist, fda->priv[fd].idx); 495 } 496 497 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 498 { 499 return fdarray__filter(&evlist->pollfd, revents_and_mask, 500 perf_evlist__munmap_filtered, NULL); 501 } 502 503 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 504 { 505 return fdarray__poll(&evlist->pollfd, timeout); 506 } 507 508 static void perf_evlist__id_hash(struct perf_evlist *evlist, 509 struct perf_evsel *evsel, 510 int cpu, int thread, u64 id) 511 { 512 int hash; 513 struct perf_sample_id *sid = SID(evsel, cpu, thread); 514 515 sid->id = id; 516 sid->evsel = evsel; 517 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 518 hlist_add_head(&sid->node, &evlist->heads[hash]); 519 } 520 521 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 522 int cpu, int thread, u64 id) 523 { 524 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 525 evsel->id[evsel->ids++] = id; 526 } 527 528 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 529 struct perf_evsel *evsel, 530 int cpu, int thread, int fd) 531 { 532 u64 read_data[4] = { 0, }; 533 int id_idx = 1; /* The first entry is the counter value */ 534 u64 id; 535 int ret; 536 537 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 538 if (!ret) 539 goto add; 540 541 if (errno != ENOTTY) 542 return -1; 543 544 /* Legacy way to get event id.. All hail to old kernels! */ 545 546 /* 547 * This way does not work with group format read, so bail 548 * out in that case. 549 */ 550 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 551 return -1; 552 553 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 554 read(fd, &read_data, sizeof(read_data)) == -1) 555 return -1; 556 557 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 558 ++id_idx; 559 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 560 ++id_idx; 561 562 id = read_data[id_idx]; 563 564 add: 565 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 566 return 0; 567 } 568 569 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 570 struct perf_evsel *evsel, int idx, int cpu, 571 int thread) 572 { 573 struct perf_sample_id *sid = SID(evsel, cpu, thread); 574 sid->idx = idx; 575 if (evlist->cpus && cpu >= 0) 576 sid->cpu = evlist->cpus->map[cpu]; 577 else 578 sid->cpu = -1; 579 if (!evsel->system_wide && evlist->threads && thread >= 0) 580 sid->tid = thread_map__pid(evlist->threads, thread); 581 else 582 sid->tid = -1; 583 } 584 585 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 586 { 587 struct hlist_head *head; 588 struct perf_sample_id *sid; 589 int hash; 590 591 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 592 head = &evlist->heads[hash]; 593 594 hlist_for_each_entry(sid, head, node) 595 if (sid->id == id) 596 return sid; 597 598 return NULL; 599 } 600 601 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 602 { 603 struct perf_sample_id *sid; 604 605 if (evlist->nr_entries == 1 || !id) 606 return perf_evlist__first(evlist); 607 608 sid = perf_evlist__id2sid(evlist, id); 609 if (sid) 610 return sid->evsel; 611 612 if (!perf_evlist__sample_id_all(evlist)) 613 return perf_evlist__first(evlist); 614 615 return NULL; 616 } 617 618 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 619 u64 id) 620 { 621 struct perf_sample_id *sid; 622 623 if (!id) 624 return NULL; 625 626 sid = perf_evlist__id2sid(evlist, id); 627 if (sid) 628 return sid->evsel; 629 630 return NULL; 631 } 632 633 static int perf_evlist__event2id(struct perf_evlist *evlist, 634 union perf_event *event, u64 *id) 635 { 636 const u64 *array = event->sample.array; 637 ssize_t n; 638 639 n = (event->header.size - sizeof(event->header)) >> 3; 640 641 if (event->header.type == PERF_RECORD_SAMPLE) { 642 if (evlist->id_pos >= n) 643 return -1; 644 *id = array[evlist->id_pos]; 645 } else { 646 if (evlist->is_pos > n) 647 return -1; 648 n -= evlist->is_pos; 649 *id = array[n]; 650 } 651 return 0; 652 } 653 654 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 655 union perf_event *event) 656 { 657 struct perf_evsel *first = perf_evlist__first(evlist); 658 struct hlist_head *head; 659 struct perf_sample_id *sid; 660 int hash; 661 u64 id; 662 663 if (evlist->nr_entries == 1) 664 return first; 665 666 if (!first->attr.sample_id_all && 667 event->header.type != PERF_RECORD_SAMPLE) 668 return first; 669 670 if (perf_evlist__event2id(evlist, event, &id)) 671 return NULL; 672 673 /* Synthesized events have an id of zero */ 674 if (!id) 675 return first; 676 677 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 678 head = &evlist->heads[hash]; 679 680 hlist_for_each_entry(sid, head, node) { 681 if (sid->id == id) 682 return sid->evsel; 683 } 684 return NULL; 685 } 686 687 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 688 { 689 int i; 690 691 for (i = 0; i < evlist->nr_mmaps; i++) { 692 int fd = evlist->mmap[i].fd; 693 int err; 694 695 if (fd < 0) 696 continue; 697 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 698 if (err) 699 return err; 700 } 701 return 0; 702 } 703 704 int perf_evlist__pause(struct perf_evlist *evlist) 705 { 706 return perf_evlist__set_paused(evlist, true); 707 } 708 709 int perf_evlist__resume(struct perf_evlist *evlist) 710 { 711 return perf_evlist__set_paused(evlist, false); 712 } 713 714 /* When check_messup is true, 'end' must points to a good entry */ 715 static union perf_event * 716 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, 717 u64 end, u64 *prev) 718 { 719 unsigned char *data = md->base + page_size; 720 union perf_event *event = NULL; 721 int diff = end - start; 722 723 if (check_messup) { 724 /* 725 * If we're further behind than half the buffer, there's a chance 726 * the writer will bite our tail and mess up the samples under us. 727 * 728 * If we somehow ended up ahead of the 'end', we got messed up. 729 * 730 * In either case, truncate and restart at 'end'. 731 */ 732 if (diff > md->mask / 2 || diff < 0) { 733 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 734 735 /* 736 * 'end' points to a known good entry, start there. 737 */ 738 start = end; 739 diff = 0; 740 } 741 } 742 743 if (diff >= (int)sizeof(event->header)) { 744 size_t size; 745 746 event = (union perf_event *)&data[start & md->mask]; 747 size = event->header.size; 748 749 if (size < sizeof(event->header) || diff < (int)size) { 750 event = NULL; 751 goto broken_event; 752 } 753 754 /* 755 * Event straddles the mmap boundary -- header should always 756 * be inside due to u64 alignment of output. 757 */ 758 if ((start & md->mask) + size != ((start + size) & md->mask)) { 759 unsigned int offset = start; 760 unsigned int len = min(sizeof(*event), size), cpy; 761 void *dst = md->event_copy; 762 763 do { 764 cpy = min(md->mask + 1 - (offset & md->mask), len); 765 memcpy(dst, &data[offset & md->mask], cpy); 766 offset += cpy; 767 dst += cpy; 768 len -= cpy; 769 } while (len); 770 771 event = (union perf_event *) md->event_copy; 772 } 773 774 start += size; 775 } 776 777 broken_event: 778 if (prev) 779 *prev = start; 780 781 return event; 782 } 783 784 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) 785 { 786 struct perf_mmap *md = &evlist->mmap[idx]; 787 u64 head; 788 u64 old = md->prev; 789 790 /* 791 * Check if event was unmapped due to a POLLHUP/POLLERR. 792 */ 793 if (!atomic_read(&md->refcnt)) 794 return NULL; 795 796 head = perf_mmap__read_head(md); 797 798 return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev); 799 } 800 801 union perf_event * 802 perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) 803 { 804 struct perf_mmap *md = &evlist->mmap[idx]; 805 u64 head, end; 806 u64 start = md->prev; 807 808 /* 809 * Check if event was unmapped due to a POLLHUP/POLLERR. 810 */ 811 if (!atomic_read(&md->refcnt)) 812 return NULL; 813 814 head = perf_mmap__read_head(md); 815 if (!head) 816 return NULL; 817 818 /* 819 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 820 * it each time when kernel writes to it, so in fact 'head' is 821 * negative. 'end' pointer is made manually by adding the size of 822 * the ring buffer to 'head' pointer, means the validate data can 823 * read is the whole ring buffer. If 'end' is positive, the ring 824 * buffer has not fully filled, so we must adjust 'end' to 0. 825 * 826 * However, since both 'head' and 'end' is unsigned, we can't 827 * simply compare 'end' against 0. Here we compare '-head' and 828 * the size of the ring buffer, where -head is the number of bytes 829 * kernel write to the ring buffer. 830 */ 831 if (-head < (u64)(md->mask + 1)) 832 end = 0; 833 else 834 end = head + md->mask + 1; 835 836 return perf_mmap__read(md, false, start, end, &md->prev); 837 } 838 839 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 840 { 841 if (!evlist->backward) 842 return perf_evlist__mmap_read_forward(evlist, idx); 843 return perf_evlist__mmap_read_backward(evlist, idx); 844 } 845 846 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 847 { 848 struct perf_mmap *md = &evlist->mmap[idx]; 849 u64 head; 850 851 if (!atomic_read(&md->refcnt)) 852 return; 853 854 head = perf_mmap__read_head(md); 855 md->prev = head; 856 } 857 858 static bool perf_mmap__empty(struct perf_mmap *md) 859 { 860 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 861 } 862 863 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) 864 { 865 atomic_inc(&evlist->mmap[idx].refcnt); 866 } 867 868 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) 869 { 870 struct perf_mmap *md = &evlist->mmap[idx]; 871 872 BUG_ON(md->base && atomic_read(&md->refcnt) == 0); 873 874 if (atomic_dec_and_test(&md->refcnt)) 875 __perf_evlist__munmap(evlist, idx); 876 } 877 878 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 879 { 880 struct perf_mmap *md = &evlist->mmap[idx]; 881 882 if (!evlist->overwrite) { 883 u64 old = md->prev; 884 885 perf_mmap__write_tail(md, old); 886 } 887 888 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 889 perf_evlist__mmap_put(evlist, idx); 890 } 891 892 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 893 struct auxtrace_mmap_params *mp __maybe_unused, 894 void *userpg __maybe_unused, 895 int fd __maybe_unused) 896 { 897 return 0; 898 } 899 900 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 901 { 902 } 903 904 void __weak auxtrace_mmap_params__init( 905 struct auxtrace_mmap_params *mp __maybe_unused, 906 off_t auxtrace_offset __maybe_unused, 907 unsigned int auxtrace_pages __maybe_unused, 908 bool auxtrace_overwrite __maybe_unused) 909 { 910 } 911 912 void __weak auxtrace_mmap_params__set_idx( 913 struct auxtrace_mmap_params *mp __maybe_unused, 914 struct perf_evlist *evlist __maybe_unused, 915 int idx __maybe_unused, 916 bool per_cpu __maybe_unused) 917 { 918 } 919 920 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) 921 { 922 if (evlist->mmap[idx].base != NULL) { 923 munmap(evlist->mmap[idx].base, evlist->mmap_len); 924 evlist->mmap[idx].base = NULL; 925 evlist->mmap[idx].fd = -1; 926 atomic_set(&evlist->mmap[idx].refcnt, 0); 927 } 928 auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); 929 } 930 931 void perf_evlist__munmap(struct perf_evlist *evlist) 932 { 933 int i; 934 935 if (evlist->mmap == NULL) 936 return; 937 938 for (i = 0; i < evlist->nr_mmaps; i++) 939 __perf_evlist__munmap(evlist, i); 940 941 zfree(&evlist->mmap); 942 } 943 944 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 945 { 946 int i; 947 948 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 949 if (cpu_map__empty(evlist->cpus)) 950 evlist->nr_mmaps = thread_map__nr(evlist->threads); 951 evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 952 if (!evlist->mmap) 953 return -ENOMEM; 954 955 for (i = 0; i < evlist->nr_mmaps; i++) 956 evlist->mmap[i].fd = -1; 957 return 0; 958 } 959 960 struct mmap_params { 961 int prot; 962 int mask; 963 struct auxtrace_mmap_params auxtrace_mp; 964 }; 965 966 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, 967 struct mmap_params *mp, int fd) 968 { 969 /* 970 * The last one will be done at perf_evlist__mmap_consume(), so that we 971 * make sure we don't prevent tools from consuming every last event in 972 * the ring buffer. 973 * 974 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 975 * anymore, but the last events for it are still in the ring buffer, 976 * waiting to be consumed. 977 * 978 * Tools can chose to ignore this at their own discretion, but the 979 * evlist layer can't just drop it when filtering events in 980 * perf_evlist__filter_pollfd(). 981 */ 982 atomic_set(&evlist->mmap[idx].refcnt, 2); 983 evlist->mmap[idx].prev = 0; 984 evlist->mmap[idx].mask = mp->mask; 985 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, 986 MAP_SHARED, fd, 0); 987 if (evlist->mmap[idx].base == MAP_FAILED) { 988 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 989 errno); 990 evlist->mmap[idx].base = NULL; 991 return -1; 992 } 993 evlist->mmap[idx].fd = fd; 994 995 if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, 996 &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) 997 return -1; 998 999 return 0; 1000 } 1001 1002 static bool 1003 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 1004 struct perf_evsel *evsel) 1005 { 1006 if (evsel->overwrite) 1007 return false; 1008 return true; 1009 } 1010 1011 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 1012 struct mmap_params *mp, int cpu, 1013 int thread, int *output) 1014 { 1015 struct perf_evsel *evsel; 1016 int revent; 1017 1018 evlist__for_each_entry(evlist, evsel) { 1019 int fd; 1020 1021 if (evsel->overwrite != (evlist->overwrite && evlist->backward)) 1022 continue; 1023 1024 if (evsel->system_wide && thread) 1025 continue; 1026 1027 fd = FD(evsel, cpu, thread); 1028 1029 if (*output == -1) { 1030 *output = fd; 1031 if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) 1032 return -1; 1033 } else { 1034 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 1035 return -1; 1036 1037 perf_evlist__mmap_get(evlist, idx); 1038 } 1039 1040 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 1041 1042 /* 1043 * The system_wide flag causes a selected event to be opened 1044 * always without a pid. Consequently it will never get a 1045 * POLLHUP, but it is used for tracking in combination with 1046 * other events, so it should not need to be polled anyway. 1047 * Therefore don't add it for polling. 1048 */ 1049 if (!evsel->system_wide && 1050 __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) { 1051 perf_evlist__mmap_put(evlist, idx); 1052 return -1; 1053 } 1054 1055 if (evsel->attr.read_format & PERF_FORMAT_ID) { 1056 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 1057 fd) < 0) 1058 return -1; 1059 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 1060 thread); 1061 } 1062 } 1063 1064 return 0; 1065 } 1066 1067 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 1068 struct mmap_params *mp) 1069 { 1070 int cpu, thread; 1071 int nr_cpus = cpu_map__nr(evlist->cpus); 1072 int nr_threads = thread_map__nr(evlist->threads); 1073 1074 pr_debug2("perf event ring buffer mmapped per cpu\n"); 1075 for (cpu = 0; cpu < nr_cpus; cpu++) { 1076 int output = -1; 1077 1078 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 1079 true); 1080 1081 for (thread = 0; thread < nr_threads; thread++) { 1082 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 1083 thread, &output)) 1084 goto out_unmap; 1085 } 1086 } 1087 1088 return 0; 1089 1090 out_unmap: 1091 for (cpu = 0; cpu < nr_cpus; cpu++) 1092 __perf_evlist__munmap(evlist, cpu); 1093 return -1; 1094 } 1095 1096 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1097 struct mmap_params *mp) 1098 { 1099 int thread; 1100 int nr_threads = thread_map__nr(evlist->threads); 1101 1102 pr_debug2("perf event ring buffer mmapped per thread\n"); 1103 for (thread = 0; thread < nr_threads; thread++) { 1104 int output = -1; 1105 1106 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1107 false); 1108 1109 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1110 &output)) 1111 goto out_unmap; 1112 } 1113 1114 return 0; 1115 1116 out_unmap: 1117 for (thread = 0; thread < nr_threads; thread++) 1118 __perf_evlist__munmap(evlist, thread); 1119 return -1; 1120 } 1121 1122 unsigned long perf_event_mlock_kb_in_pages(void) 1123 { 1124 unsigned long pages; 1125 int max; 1126 1127 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1128 /* 1129 * Pick a once upon a time good value, i.e. things look 1130 * strange since we can't read a sysctl value, but lets not 1131 * die yet... 1132 */ 1133 max = 512; 1134 } else { 1135 max -= (page_size / 1024); 1136 } 1137 1138 pages = (max * 1024) / page_size; 1139 if (!is_power_of_2(pages)) 1140 pages = rounddown_pow_of_two(pages); 1141 1142 return pages; 1143 } 1144 1145 static size_t perf_evlist__mmap_size(unsigned long pages) 1146 { 1147 if (pages == UINT_MAX) 1148 pages = perf_event_mlock_kb_in_pages(); 1149 else if (!is_power_of_2(pages)) 1150 return 0; 1151 1152 return (pages + 1) * page_size; 1153 } 1154 1155 static long parse_pages_arg(const char *str, unsigned long min, 1156 unsigned long max) 1157 { 1158 unsigned long pages, val; 1159 static struct parse_tag tags[] = { 1160 { .tag = 'B', .mult = 1 }, 1161 { .tag = 'K', .mult = 1 << 10 }, 1162 { .tag = 'M', .mult = 1 << 20 }, 1163 { .tag = 'G', .mult = 1 << 30 }, 1164 { .tag = 0 }, 1165 }; 1166 1167 if (str == NULL) 1168 return -EINVAL; 1169 1170 val = parse_tag_value(str, tags); 1171 if (val != (unsigned long) -1) { 1172 /* we got file size value */ 1173 pages = PERF_ALIGN(val, page_size) / page_size; 1174 } else { 1175 /* we got pages count value */ 1176 char *eptr; 1177 pages = strtoul(str, &eptr, 10); 1178 if (*eptr != '\0') 1179 return -EINVAL; 1180 } 1181 1182 if (pages == 0 && min == 0) { 1183 /* leave number of pages at 0 */ 1184 } else if (!is_power_of_2(pages)) { 1185 /* round pages up to next power of 2 */ 1186 pages = roundup_pow_of_two(pages); 1187 if (!pages) 1188 return -EINVAL; 1189 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1190 pages * page_size, pages); 1191 } 1192 1193 if (pages > max) 1194 return -EINVAL; 1195 1196 return pages; 1197 } 1198 1199 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1200 { 1201 unsigned long max = UINT_MAX; 1202 long pages; 1203 1204 if (max > SIZE_MAX / page_size) 1205 max = SIZE_MAX / page_size; 1206 1207 pages = parse_pages_arg(str, 1, max); 1208 if (pages < 0) { 1209 pr_err("Invalid argument for --mmap_pages/-m\n"); 1210 return -1; 1211 } 1212 1213 *mmap_pages = pages; 1214 return 0; 1215 } 1216 1217 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1218 int unset __maybe_unused) 1219 { 1220 return __perf_evlist__parse_mmap_pages(opt->value, str); 1221 } 1222 1223 /** 1224 * perf_evlist__mmap_ex - Create mmaps to receive events. 1225 * @evlist: list of events 1226 * @pages: map length in pages 1227 * @overwrite: overwrite older events? 1228 * @auxtrace_pages - auxtrace map length in pages 1229 * @auxtrace_overwrite - overwrite older auxtrace data? 1230 * 1231 * If @overwrite is %false the user needs to signal event consumption using 1232 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1233 * automatically. 1234 * 1235 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1236 * consumption using auxtrace_mmap__write_tail(). 1237 * 1238 * Return: %0 on success, negative error code otherwise. 1239 */ 1240 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1241 bool overwrite, unsigned int auxtrace_pages, 1242 bool auxtrace_overwrite) 1243 { 1244 struct perf_evsel *evsel; 1245 const struct cpu_map *cpus = evlist->cpus; 1246 const struct thread_map *threads = evlist->threads; 1247 struct mmap_params mp = { 1248 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1249 }; 1250 1251 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 1252 return -ENOMEM; 1253 1254 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1255 return -ENOMEM; 1256 1257 evlist->overwrite = overwrite; 1258 evlist->mmap_len = perf_evlist__mmap_size(pages); 1259 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1260 mp.mask = evlist->mmap_len - page_size - 1; 1261 1262 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1263 auxtrace_pages, auxtrace_overwrite); 1264 1265 evlist__for_each_entry(evlist, evsel) { 1266 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1267 evsel->sample_id == NULL && 1268 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1269 return -ENOMEM; 1270 } 1271 1272 if (cpu_map__empty(cpus)) 1273 return perf_evlist__mmap_per_thread(evlist, &mp); 1274 1275 return perf_evlist__mmap_per_cpu(evlist, &mp); 1276 } 1277 1278 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1279 bool overwrite) 1280 { 1281 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1282 } 1283 1284 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1285 { 1286 struct cpu_map *cpus; 1287 struct thread_map *threads; 1288 1289 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1290 1291 if (!threads) 1292 return -1; 1293 1294 if (target__uses_dummy_map(target)) 1295 cpus = cpu_map__dummy_new(); 1296 else 1297 cpus = cpu_map__new(target->cpu_list); 1298 1299 if (!cpus) 1300 goto out_delete_threads; 1301 1302 evlist->has_user_cpus = !!target->cpu_list; 1303 1304 perf_evlist__set_maps(evlist, cpus, threads); 1305 1306 return 0; 1307 1308 out_delete_threads: 1309 thread_map__put(threads); 1310 return -1; 1311 } 1312 1313 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1314 struct thread_map *threads) 1315 { 1316 /* 1317 * Allow for the possibility that one or another of the maps isn't being 1318 * changed i.e. don't put it. Note we are assuming the maps that are 1319 * being applied are brand new and evlist is taking ownership of the 1320 * original reference count of 1. If that is not the case it is up to 1321 * the caller to increase the reference count. 1322 */ 1323 if (cpus != evlist->cpus) { 1324 cpu_map__put(evlist->cpus); 1325 evlist->cpus = cpu_map__get(cpus); 1326 } 1327 1328 if (threads != evlist->threads) { 1329 thread_map__put(evlist->threads); 1330 evlist->threads = thread_map__get(threads); 1331 } 1332 1333 perf_evlist__propagate_maps(evlist); 1334 } 1335 1336 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1337 enum perf_event_sample_format bit) 1338 { 1339 struct perf_evsel *evsel; 1340 1341 evlist__for_each_entry(evlist, evsel) 1342 __perf_evsel__set_sample_bit(evsel, bit); 1343 } 1344 1345 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1346 enum perf_event_sample_format bit) 1347 { 1348 struct perf_evsel *evsel; 1349 1350 evlist__for_each_entry(evlist, evsel) 1351 __perf_evsel__reset_sample_bit(evsel, bit); 1352 } 1353 1354 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1355 { 1356 struct perf_evsel *evsel; 1357 int err = 0; 1358 const int ncpus = cpu_map__nr(evlist->cpus), 1359 nthreads = thread_map__nr(evlist->threads); 1360 1361 evlist__for_each_entry(evlist, evsel) { 1362 if (evsel->filter == NULL) 1363 continue; 1364 1365 /* 1366 * filters only work for tracepoint event, which doesn't have cpu limit. 1367 * So evlist and evsel should always be same. 1368 */ 1369 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1370 if (err) { 1371 *err_evsel = evsel; 1372 break; 1373 } 1374 } 1375 1376 return err; 1377 } 1378 1379 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1380 { 1381 struct perf_evsel *evsel; 1382 int err = 0; 1383 1384 evlist__for_each_entry(evlist, evsel) { 1385 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1386 continue; 1387 1388 err = perf_evsel__set_filter(evsel, filter); 1389 if (err) 1390 break; 1391 } 1392 1393 return err; 1394 } 1395 1396 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1397 { 1398 char *filter; 1399 int ret = -1; 1400 size_t i; 1401 1402 for (i = 0; i < npids; ++i) { 1403 if (i == 0) { 1404 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1405 return -1; 1406 } else { 1407 char *tmp; 1408 1409 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1410 goto out_free; 1411 1412 free(filter); 1413 filter = tmp; 1414 } 1415 } 1416 1417 ret = perf_evlist__set_filter(evlist, filter); 1418 out_free: 1419 free(filter); 1420 return ret; 1421 } 1422 1423 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1424 { 1425 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1426 } 1427 1428 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1429 { 1430 struct perf_evsel *pos; 1431 1432 if (evlist->nr_entries == 1) 1433 return true; 1434 1435 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1436 return false; 1437 1438 evlist__for_each_entry(evlist, pos) { 1439 if (pos->id_pos != evlist->id_pos || 1440 pos->is_pos != evlist->is_pos) 1441 return false; 1442 } 1443 1444 return true; 1445 } 1446 1447 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1448 { 1449 struct perf_evsel *evsel; 1450 1451 if (evlist->combined_sample_type) 1452 return evlist->combined_sample_type; 1453 1454 evlist__for_each_entry(evlist, evsel) 1455 evlist->combined_sample_type |= evsel->attr.sample_type; 1456 1457 return evlist->combined_sample_type; 1458 } 1459 1460 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1461 { 1462 evlist->combined_sample_type = 0; 1463 return __perf_evlist__combined_sample_type(evlist); 1464 } 1465 1466 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1467 { 1468 struct perf_evsel *evsel; 1469 u64 branch_type = 0; 1470 1471 evlist__for_each_entry(evlist, evsel) 1472 branch_type |= evsel->attr.branch_sample_type; 1473 return branch_type; 1474 } 1475 1476 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1477 { 1478 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1479 u64 read_format = first->attr.read_format; 1480 u64 sample_type = first->attr.sample_type; 1481 1482 evlist__for_each_entry(evlist, pos) { 1483 if (read_format != pos->attr.read_format) 1484 return false; 1485 } 1486 1487 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1488 if ((sample_type & PERF_SAMPLE_READ) && 1489 !(read_format & PERF_FORMAT_ID)) { 1490 return false; 1491 } 1492 1493 return true; 1494 } 1495 1496 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1497 { 1498 struct perf_evsel *first = perf_evlist__first(evlist); 1499 return first->attr.read_format; 1500 } 1501 1502 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1503 { 1504 struct perf_evsel *first = perf_evlist__first(evlist); 1505 struct perf_sample *data; 1506 u64 sample_type; 1507 u16 size = 0; 1508 1509 if (!first->attr.sample_id_all) 1510 goto out; 1511 1512 sample_type = first->attr.sample_type; 1513 1514 if (sample_type & PERF_SAMPLE_TID) 1515 size += sizeof(data->tid) * 2; 1516 1517 if (sample_type & PERF_SAMPLE_TIME) 1518 size += sizeof(data->time); 1519 1520 if (sample_type & PERF_SAMPLE_ID) 1521 size += sizeof(data->id); 1522 1523 if (sample_type & PERF_SAMPLE_STREAM_ID) 1524 size += sizeof(data->stream_id); 1525 1526 if (sample_type & PERF_SAMPLE_CPU) 1527 size += sizeof(data->cpu) * 2; 1528 1529 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1530 size += sizeof(data->id); 1531 out: 1532 return size; 1533 } 1534 1535 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1536 { 1537 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1538 1539 evlist__for_each_entry_continue(evlist, pos) { 1540 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1541 return false; 1542 } 1543 1544 return true; 1545 } 1546 1547 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1548 { 1549 struct perf_evsel *first = perf_evlist__first(evlist); 1550 return first->attr.sample_id_all; 1551 } 1552 1553 void perf_evlist__set_selected(struct perf_evlist *evlist, 1554 struct perf_evsel *evsel) 1555 { 1556 evlist->selected = evsel; 1557 } 1558 1559 void perf_evlist__close(struct perf_evlist *evlist) 1560 { 1561 struct perf_evsel *evsel; 1562 int ncpus = cpu_map__nr(evlist->cpus); 1563 int nthreads = thread_map__nr(evlist->threads); 1564 int n; 1565 1566 evlist__for_each_entry_reverse(evlist, evsel) { 1567 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1568 perf_evsel__close(evsel, n, nthreads); 1569 } 1570 } 1571 1572 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1573 { 1574 struct cpu_map *cpus; 1575 struct thread_map *threads; 1576 int err = -ENOMEM; 1577 1578 /* 1579 * Try reading /sys/devices/system/cpu/online to get 1580 * an all cpus map. 1581 * 1582 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1583 * code needs an overhaul to properly forward the 1584 * error, and we may not want to do that fallback to a 1585 * default cpu identity map :-\ 1586 */ 1587 cpus = cpu_map__new(NULL); 1588 if (!cpus) 1589 goto out; 1590 1591 threads = thread_map__new_dummy(); 1592 if (!threads) 1593 goto out_put; 1594 1595 perf_evlist__set_maps(evlist, cpus, threads); 1596 out: 1597 return err; 1598 out_put: 1599 cpu_map__put(cpus); 1600 goto out; 1601 } 1602 1603 int perf_evlist__open(struct perf_evlist *evlist) 1604 { 1605 struct perf_evsel *evsel; 1606 int err; 1607 1608 /* 1609 * Default: one fd per CPU, all threads, aka systemwide 1610 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1611 */ 1612 if (evlist->threads == NULL && evlist->cpus == NULL) { 1613 err = perf_evlist__create_syswide_maps(evlist); 1614 if (err < 0) 1615 goto out_err; 1616 } 1617 1618 perf_evlist__update_id_pos(evlist); 1619 1620 evlist__for_each_entry(evlist, evsel) { 1621 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1622 if (err < 0) 1623 goto out_err; 1624 } 1625 1626 return 0; 1627 out_err: 1628 perf_evlist__close(evlist); 1629 errno = -err; 1630 return err; 1631 } 1632 1633 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1634 const char *argv[], bool pipe_output, 1635 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1636 { 1637 int child_ready_pipe[2], go_pipe[2]; 1638 char bf; 1639 1640 if (pipe(child_ready_pipe) < 0) { 1641 perror("failed to create 'ready' pipe"); 1642 return -1; 1643 } 1644 1645 if (pipe(go_pipe) < 0) { 1646 perror("failed to create 'go' pipe"); 1647 goto out_close_ready_pipe; 1648 } 1649 1650 evlist->workload.pid = fork(); 1651 if (evlist->workload.pid < 0) { 1652 perror("failed to fork"); 1653 goto out_close_pipes; 1654 } 1655 1656 if (!evlist->workload.pid) { 1657 int ret; 1658 1659 if (pipe_output) 1660 dup2(2, 1); 1661 1662 signal(SIGTERM, SIG_DFL); 1663 1664 close(child_ready_pipe[0]); 1665 close(go_pipe[1]); 1666 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1667 1668 /* 1669 * Tell the parent we're ready to go 1670 */ 1671 close(child_ready_pipe[1]); 1672 1673 /* 1674 * Wait until the parent tells us to go. 1675 */ 1676 ret = read(go_pipe[0], &bf, 1); 1677 /* 1678 * The parent will ask for the execvp() to be performed by 1679 * writing exactly one byte, in workload.cork_fd, usually via 1680 * perf_evlist__start_workload(). 1681 * 1682 * For cancelling the workload without actually running it, 1683 * the parent will just close workload.cork_fd, without writing 1684 * anything, i.e. read will return zero and we just exit() 1685 * here. 1686 */ 1687 if (ret != 1) { 1688 if (ret == -1) 1689 perror("unable to read pipe"); 1690 exit(ret); 1691 } 1692 1693 execvp(argv[0], (char **)argv); 1694 1695 if (exec_error) { 1696 union sigval val; 1697 1698 val.sival_int = errno; 1699 if (sigqueue(getppid(), SIGUSR1, val)) 1700 perror(argv[0]); 1701 } else 1702 perror(argv[0]); 1703 exit(-1); 1704 } 1705 1706 if (exec_error) { 1707 struct sigaction act = { 1708 .sa_flags = SA_SIGINFO, 1709 .sa_sigaction = exec_error, 1710 }; 1711 sigaction(SIGUSR1, &act, NULL); 1712 } 1713 1714 if (target__none(target)) { 1715 if (evlist->threads == NULL) { 1716 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1717 __func__, __LINE__); 1718 goto out_close_pipes; 1719 } 1720 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1721 } 1722 1723 close(child_ready_pipe[1]); 1724 close(go_pipe[0]); 1725 /* 1726 * wait for child to settle 1727 */ 1728 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1729 perror("unable to read pipe"); 1730 goto out_close_pipes; 1731 } 1732 1733 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1734 evlist->workload.cork_fd = go_pipe[1]; 1735 close(child_ready_pipe[0]); 1736 return 0; 1737 1738 out_close_pipes: 1739 close(go_pipe[0]); 1740 close(go_pipe[1]); 1741 out_close_ready_pipe: 1742 close(child_ready_pipe[0]); 1743 close(child_ready_pipe[1]); 1744 return -1; 1745 } 1746 1747 int perf_evlist__start_workload(struct perf_evlist *evlist) 1748 { 1749 if (evlist->workload.cork_fd > 0) { 1750 char bf = 0; 1751 int ret; 1752 /* 1753 * Remove the cork, let it rip! 1754 */ 1755 ret = write(evlist->workload.cork_fd, &bf, 1); 1756 if (ret < 0) 1757 perror("enable to write to pipe"); 1758 1759 close(evlist->workload.cork_fd); 1760 return ret; 1761 } 1762 1763 return 0; 1764 } 1765 1766 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1767 struct perf_sample *sample) 1768 { 1769 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1770 1771 if (!evsel) 1772 return -EFAULT; 1773 return perf_evsel__parse_sample(evsel, event, sample); 1774 } 1775 1776 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1777 { 1778 struct perf_evsel *evsel; 1779 size_t printed = 0; 1780 1781 evlist__for_each_entry(evlist, evsel) { 1782 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1783 perf_evsel__name(evsel)); 1784 } 1785 1786 return printed + fprintf(fp, "\n"); 1787 } 1788 1789 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1790 int err, char *buf, size_t size) 1791 { 1792 int printed, value; 1793 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1794 1795 switch (err) { 1796 case EACCES: 1797 case EPERM: 1798 printed = scnprintf(buf, size, 1799 "Error:\t%s.\n" 1800 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1801 1802 value = perf_event_paranoid(); 1803 1804 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1805 1806 if (value >= 2) { 1807 printed += scnprintf(buf + printed, size - printed, 1808 "For your workloads it needs to be <= 1\nHint:\t"); 1809 } 1810 printed += scnprintf(buf + printed, size - printed, 1811 "For system wide tracing it needs to be set to -1.\n"); 1812 1813 printed += scnprintf(buf + printed, size - printed, 1814 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1815 "Hint:\tThe current value is %d.", value); 1816 break; 1817 case EINVAL: { 1818 struct perf_evsel *first = perf_evlist__first(evlist); 1819 int max_freq; 1820 1821 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1822 goto out_default; 1823 1824 if (first->attr.sample_freq < (u64)max_freq) 1825 goto out_default; 1826 1827 printed = scnprintf(buf, size, 1828 "Error:\t%s.\n" 1829 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1830 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1831 emsg, max_freq, first->attr.sample_freq); 1832 break; 1833 } 1834 default: 1835 out_default: 1836 scnprintf(buf, size, "%s", emsg); 1837 break; 1838 } 1839 1840 return 0; 1841 } 1842 1843 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1844 { 1845 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1846 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1847 1848 switch (err) { 1849 case EPERM: 1850 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1851 printed += scnprintf(buf + printed, size - printed, 1852 "Error:\t%s.\n" 1853 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1854 "Hint:\tTried using %zd kB.\n", 1855 emsg, pages_max_per_user, pages_attempted); 1856 1857 if (pages_attempted >= pages_max_per_user) { 1858 printed += scnprintf(buf + printed, size - printed, 1859 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1860 pages_max_per_user + pages_attempted); 1861 } 1862 1863 printed += scnprintf(buf + printed, size - printed, 1864 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1865 break; 1866 default: 1867 scnprintf(buf, size, "%s", emsg); 1868 break; 1869 } 1870 1871 return 0; 1872 } 1873 1874 void perf_evlist__to_front(struct perf_evlist *evlist, 1875 struct perf_evsel *move_evsel) 1876 { 1877 struct perf_evsel *evsel, *n; 1878 LIST_HEAD(move); 1879 1880 if (move_evsel == perf_evlist__first(evlist)) 1881 return; 1882 1883 evlist__for_each_entry_safe(evlist, n, evsel) { 1884 if (evsel->leader == move_evsel->leader) 1885 list_move_tail(&evsel->node, &move); 1886 } 1887 1888 list_splice(&move, &evlist->entries); 1889 } 1890 1891 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1892 struct perf_evsel *tracking_evsel) 1893 { 1894 struct perf_evsel *evsel; 1895 1896 if (tracking_evsel->tracking) 1897 return; 1898 1899 evlist__for_each_entry(evlist, evsel) { 1900 if (evsel != tracking_evsel) 1901 evsel->tracking = false; 1902 } 1903 1904 tracking_evsel->tracking = true; 1905 } 1906 1907 struct perf_evsel * 1908 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1909 const char *str) 1910 { 1911 struct perf_evsel *evsel; 1912 1913 evlist__for_each_entry(evlist, evsel) { 1914 if (!evsel->name) 1915 continue; 1916 if (strcmp(str, evsel->name) == 0) 1917 return evsel; 1918 } 1919 1920 return NULL; 1921 } 1922