1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include <unistd.h> 19 20 #include "parse-events.h" 21 #include <subcmd/parse-options.h> 22 23 #include <sys/mman.h> 24 25 #include <linux/bitops.h> 26 #include <linux/hash.h> 27 #include <linux/log2.h> 28 #include <linux/err.h> 29 30 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); 31 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); 32 33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 34 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 35 36 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 37 struct thread_map *threads) 38 { 39 int i; 40 41 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 42 INIT_HLIST_HEAD(&evlist->heads[i]); 43 INIT_LIST_HEAD(&evlist->entries); 44 perf_evlist__set_maps(evlist, cpus, threads); 45 fdarray__init(&evlist->pollfd, 64); 46 evlist->workload.pid = -1; 47 } 48 49 struct perf_evlist *perf_evlist__new(void) 50 { 51 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 52 53 if (evlist != NULL) 54 perf_evlist__init(evlist, NULL, NULL); 55 56 return evlist; 57 } 58 59 struct perf_evlist *perf_evlist__new_default(void) 60 { 61 struct perf_evlist *evlist = perf_evlist__new(); 62 63 if (evlist && perf_evlist__add_default(evlist)) { 64 perf_evlist__delete(evlist); 65 evlist = NULL; 66 } 67 68 return evlist; 69 } 70 71 struct perf_evlist *perf_evlist__new_dummy(void) 72 { 73 struct perf_evlist *evlist = perf_evlist__new(); 74 75 if (evlist && perf_evlist__add_dummy(evlist)) { 76 perf_evlist__delete(evlist); 77 evlist = NULL; 78 } 79 80 return evlist; 81 } 82 83 /** 84 * perf_evlist__set_id_pos - set the positions of event ids. 85 * @evlist: selected event list 86 * 87 * Events with compatible sample types all have the same id_pos 88 * and is_pos. For convenience, put a copy on evlist. 89 */ 90 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 91 { 92 struct perf_evsel *first = perf_evlist__first(evlist); 93 94 evlist->id_pos = first->id_pos; 95 evlist->is_pos = first->is_pos; 96 } 97 98 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 99 { 100 struct perf_evsel *evsel; 101 102 evlist__for_each(evlist, evsel) 103 perf_evsel__calc_id_pos(evsel); 104 105 perf_evlist__set_id_pos(evlist); 106 } 107 108 static void perf_evlist__purge(struct perf_evlist *evlist) 109 { 110 struct perf_evsel *pos, *n; 111 112 evlist__for_each_safe(evlist, n, pos) { 113 list_del_init(&pos->node); 114 pos->evlist = NULL; 115 perf_evsel__delete(pos); 116 } 117 118 evlist->nr_entries = 0; 119 } 120 121 void perf_evlist__exit(struct perf_evlist *evlist) 122 { 123 zfree(&evlist->mmap); 124 fdarray__exit(&evlist->pollfd); 125 } 126 127 void perf_evlist__delete(struct perf_evlist *evlist) 128 { 129 perf_evlist__munmap(evlist); 130 perf_evlist__close(evlist); 131 cpu_map__put(evlist->cpus); 132 thread_map__put(evlist->threads); 133 evlist->cpus = NULL; 134 evlist->threads = NULL; 135 perf_evlist__purge(evlist); 136 perf_evlist__exit(evlist); 137 free(evlist); 138 } 139 140 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 141 struct perf_evsel *evsel) 142 { 143 /* 144 * We already have cpus for evsel (via PMU sysfs) so 145 * keep it, if there's no target cpu list defined. 146 */ 147 if (!evsel->own_cpus || evlist->has_user_cpus) { 148 cpu_map__put(evsel->cpus); 149 evsel->cpus = cpu_map__get(evlist->cpus); 150 } else if (evsel->cpus != evsel->own_cpus) { 151 cpu_map__put(evsel->cpus); 152 evsel->cpus = cpu_map__get(evsel->own_cpus); 153 } 154 155 thread_map__put(evsel->threads); 156 evsel->threads = thread_map__get(evlist->threads); 157 } 158 159 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 160 { 161 struct perf_evsel *evsel; 162 163 evlist__for_each(evlist, evsel) 164 __perf_evlist__propagate_maps(evlist, evsel); 165 } 166 167 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 168 { 169 entry->evlist = evlist; 170 list_add_tail(&entry->node, &evlist->entries); 171 entry->idx = evlist->nr_entries; 172 entry->tracking = !entry->idx; 173 174 if (!evlist->nr_entries++) 175 perf_evlist__set_id_pos(evlist); 176 177 __perf_evlist__propagate_maps(evlist, entry); 178 } 179 180 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 181 { 182 evsel->evlist = NULL; 183 list_del_init(&evsel->node); 184 evlist->nr_entries -= 1; 185 } 186 187 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 188 struct list_head *list) 189 { 190 struct perf_evsel *evsel, *temp; 191 192 __evlist__for_each_safe(list, temp, evsel) { 193 list_del_init(&evsel->node); 194 perf_evlist__add(evlist, evsel); 195 } 196 } 197 198 void __perf_evlist__set_leader(struct list_head *list) 199 { 200 struct perf_evsel *evsel, *leader; 201 202 leader = list_entry(list->next, struct perf_evsel, node); 203 evsel = list_entry(list->prev, struct perf_evsel, node); 204 205 leader->nr_members = evsel->idx - leader->idx + 1; 206 207 __evlist__for_each(list, evsel) { 208 evsel->leader = leader; 209 } 210 } 211 212 void perf_evlist__set_leader(struct perf_evlist *evlist) 213 { 214 if (evlist->nr_entries) { 215 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 216 __perf_evlist__set_leader(&evlist->entries); 217 } 218 } 219 220 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 221 { 222 attr->precise_ip = 3; 223 224 while (attr->precise_ip != 0) { 225 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 226 if (fd != -1) { 227 close(fd); 228 break; 229 } 230 --attr->precise_ip; 231 } 232 } 233 234 int perf_evlist__add_default(struct perf_evlist *evlist) 235 { 236 struct perf_event_attr attr = { 237 .type = PERF_TYPE_HARDWARE, 238 .config = PERF_COUNT_HW_CPU_CYCLES, 239 }; 240 struct perf_evsel *evsel; 241 242 event_attr_init(&attr); 243 244 perf_event_attr__set_max_precise_ip(&attr); 245 246 evsel = perf_evsel__new(&attr); 247 if (evsel == NULL) 248 goto error; 249 250 /* use asprintf() because free(evsel) assumes name is allocated */ 251 if (asprintf(&evsel->name, "cycles%.*s", 252 attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) 253 goto error_free; 254 255 perf_evlist__add(evlist, evsel); 256 return 0; 257 error_free: 258 perf_evsel__delete(evsel); 259 error: 260 return -ENOMEM; 261 } 262 263 int perf_evlist__add_dummy(struct perf_evlist *evlist) 264 { 265 struct perf_event_attr attr = { 266 .type = PERF_TYPE_SOFTWARE, 267 .config = PERF_COUNT_SW_DUMMY, 268 .size = sizeof(attr), /* to capture ABI version */ 269 }; 270 struct perf_evsel *evsel = perf_evsel__new(&attr); 271 272 if (evsel == NULL) 273 return -ENOMEM; 274 275 perf_evlist__add(evlist, evsel); 276 return 0; 277 } 278 279 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 280 struct perf_event_attr *attrs, size_t nr_attrs) 281 { 282 struct perf_evsel *evsel, *n; 283 LIST_HEAD(head); 284 size_t i; 285 286 for (i = 0; i < nr_attrs; i++) { 287 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 288 if (evsel == NULL) 289 goto out_delete_partial_list; 290 list_add_tail(&evsel->node, &head); 291 } 292 293 perf_evlist__splice_list_tail(evlist, &head); 294 295 return 0; 296 297 out_delete_partial_list: 298 __evlist__for_each_safe(&head, n, evsel) 299 perf_evsel__delete(evsel); 300 return -1; 301 } 302 303 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 304 struct perf_event_attr *attrs, size_t nr_attrs) 305 { 306 size_t i; 307 308 for (i = 0; i < nr_attrs; i++) 309 event_attr_init(attrs + i); 310 311 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 312 } 313 314 struct perf_evsel * 315 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 316 { 317 struct perf_evsel *evsel; 318 319 evlist__for_each(evlist, evsel) { 320 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 321 (int)evsel->attr.config == id) 322 return evsel; 323 } 324 325 return NULL; 326 } 327 328 struct perf_evsel * 329 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 330 const char *name) 331 { 332 struct perf_evsel *evsel; 333 334 evlist__for_each(evlist, evsel) { 335 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 336 (strcmp(evsel->name, name) == 0)) 337 return evsel; 338 } 339 340 return NULL; 341 } 342 343 int perf_evlist__add_newtp(struct perf_evlist *evlist, 344 const char *sys, const char *name, void *handler) 345 { 346 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 347 348 if (IS_ERR(evsel)) 349 return -1; 350 351 evsel->handler = handler; 352 perf_evlist__add(evlist, evsel); 353 return 0; 354 } 355 356 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 357 struct perf_evsel *evsel) 358 { 359 if (evsel->system_wide) 360 return 1; 361 else 362 return thread_map__nr(evlist->threads); 363 } 364 365 void perf_evlist__disable(struct perf_evlist *evlist) 366 { 367 struct perf_evsel *pos; 368 369 evlist__for_each(evlist, pos) { 370 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 371 continue; 372 perf_evsel__disable(pos); 373 } 374 375 evlist->enabled = false; 376 } 377 378 void perf_evlist__enable(struct perf_evlist *evlist) 379 { 380 struct perf_evsel *pos; 381 382 evlist__for_each(evlist, pos) { 383 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 384 continue; 385 perf_evsel__enable(pos); 386 } 387 388 evlist->enabled = true; 389 } 390 391 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 392 { 393 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 394 } 395 396 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 397 struct perf_evsel *evsel, int cpu) 398 { 399 int thread, err; 400 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 401 402 if (!evsel->fd) 403 return -EINVAL; 404 405 for (thread = 0; thread < nr_threads; thread++) { 406 err = ioctl(FD(evsel, cpu, thread), 407 PERF_EVENT_IOC_ENABLE, 0); 408 if (err) 409 return err; 410 } 411 return 0; 412 } 413 414 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 415 struct perf_evsel *evsel, 416 int thread) 417 { 418 int cpu, err; 419 int nr_cpus = cpu_map__nr(evlist->cpus); 420 421 if (!evsel->fd) 422 return -EINVAL; 423 424 for (cpu = 0; cpu < nr_cpus; cpu++) { 425 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 426 if (err) 427 return err; 428 } 429 return 0; 430 } 431 432 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 433 struct perf_evsel *evsel, int idx) 434 { 435 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 436 437 if (per_cpu_mmaps) 438 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 439 else 440 return perf_evlist__enable_event_thread(evlist, evsel, idx); 441 } 442 443 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 444 { 445 int nr_cpus = cpu_map__nr(evlist->cpus); 446 int nr_threads = thread_map__nr(evlist->threads); 447 int nfds = 0; 448 struct perf_evsel *evsel; 449 450 evlist__for_each(evlist, evsel) { 451 if (evsel->system_wide) 452 nfds += nr_cpus; 453 else 454 nfds += nr_cpus * nr_threads; 455 } 456 457 if (fdarray__available_entries(&evlist->pollfd) < nfds && 458 fdarray__grow(&evlist->pollfd, nfds) < 0) 459 return -ENOMEM; 460 461 return 0; 462 } 463 464 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) 465 { 466 int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); 467 /* 468 * Save the idx so that when we filter out fds POLLHUP'ed we can 469 * close the associated evlist->mmap[] entry. 470 */ 471 if (pos >= 0) { 472 evlist->pollfd.priv[pos].idx = idx; 473 474 fcntl(fd, F_SETFL, O_NONBLOCK); 475 } 476 477 return pos; 478 } 479 480 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 481 { 482 return __perf_evlist__add_pollfd(evlist, fd, -1); 483 } 484 485 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) 486 { 487 struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); 488 489 perf_evlist__mmap_put(evlist, fda->priv[fd].idx); 490 } 491 492 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 493 { 494 return fdarray__filter(&evlist->pollfd, revents_and_mask, 495 perf_evlist__munmap_filtered); 496 } 497 498 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 499 { 500 return fdarray__poll(&evlist->pollfd, timeout); 501 } 502 503 static void perf_evlist__id_hash(struct perf_evlist *evlist, 504 struct perf_evsel *evsel, 505 int cpu, int thread, u64 id) 506 { 507 int hash; 508 struct perf_sample_id *sid = SID(evsel, cpu, thread); 509 510 sid->id = id; 511 sid->evsel = evsel; 512 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 513 hlist_add_head(&sid->node, &evlist->heads[hash]); 514 } 515 516 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 517 int cpu, int thread, u64 id) 518 { 519 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 520 evsel->id[evsel->ids++] = id; 521 } 522 523 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 524 struct perf_evsel *evsel, 525 int cpu, int thread, int fd) 526 { 527 u64 read_data[4] = { 0, }; 528 int id_idx = 1; /* The first entry is the counter value */ 529 u64 id; 530 int ret; 531 532 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 533 if (!ret) 534 goto add; 535 536 if (errno != ENOTTY) 537 return -1; 538 539 /* Legacy way to get event id.. All hail to old kernels! */ 540 541 /* 542 * This way does not work with group format read, so bail 543 * out in that case. 544 */ 545 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 546 return -1; 547 548 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 549 read(fd, &read_data, sizeof(read_data)) == -1) 550 return -1; 551 552 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 553 ++id_idx; 554 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 555 ++id_idx; 556 557 id = read_data[id_idx]; 558 559 add: 560 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 561 return 0; 562 } 563 564 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 565 struct perf_evsel *evsel, int idx, int cpu, 566 int thread) 567 { 568 struct perf_sample_id *sid = SID(evsel, cpu, thread); 569 sid->idx = idx; 570 if (evlist->cpus && cpu >= 0) 571 sid->cpu = evlist->cpus->map[cpu]; 572 else 573 sid->cpu = -1; 574 if (!evsel->system_wide && evlist->threads && thread >= 0) 575 sid->tid = thread_map__pid(evlist->threads, thread); 576 else 577 sid->tid = -1; 578 } 579 580 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 581 { 582 struct hlist_head *head; 583 struct perf_sample_id *sid; 584 int hash; 585 586 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 587 head = &evlist->heads[hash]; 588 589 hlist_for_each_entry(sid, head, node) 590 if (sid->id == id) 591 return sid; 592 593 return NULL; 594 } 595 596 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 597 { 598 struct perf_sample_id *sid; 599 600 if (evlist->nr_entries == 1 || !id) 601 return perf_evlist__first(evlist); 602 603 sid = perf_evlist__id2sid(evlist, id); 604 if (sid) 605 return sid->evsel; 606 607 if (!perf_evlist__sample_id_all(evlist)) 608 return perf_evlist__first(evlist); 609 610 return NULL; 611 } 612 613 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 614 u64 id) 615 { 616 struct perf_sample_id *sid; 617 618 if (!id) 619 return NULL; 620 621 sid = perf_evlist__id2sid(evlist, id); 622 if (sid) 623 return sid->evsel; 624 625 return NULL; 626 } 627 628 static int perf_evlist__event2id(struct perf_evlist *evlist, 629 union perf_event *event, u64 *id) 630 { 631 const u64 *array = event->sample.array; 632 ssize_t n; 633 634 n = (event->header.size - sizeof(event->header)) >> 3; 635 636 if (event->header.type == PERF_RECORD_SAMPLE) { 637 if (evlist->id_pos >= n) 638 return -1; 639 *id = array[evlist->id_pos]; 640 } else { 641 if (evlist->is_pos > n) 642 return -1; 643 n -= evlist->is_pos; 644 *id = array[n]; 645 } 646 return 0; 647 } 648 649 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 650 union perf_event *event) 651 { 652 struct perf_evsel *first = perf_evlist__first(evlist); 653 struct hlist_head *head; 654 struct perf_sample_id *sid; 655 int hash; 656 u64 id; 657 658 if (evlist->nr_entries == 1) 659 return first; 660 661 if (!first->attr.sample_id_all && 662 event->header.type != PERF_RECORD_SAMPLE) 663 return first; 664 665 if (perf_evlist__event2id(evlist, event, &id)) 666 return NULL; 667 668 /* Synthesized events have an id of zero */ 669 if (!id) 670 return first; 671 672 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 673 head = &evlist->heads[hash]; 674 675 hlist_for_each_entry(sid, head, node) { 676 if (sid->id == id) 677 return sid->evsel; 678 } 679 return NULL; 680 } 681 682 /* When check_messup is true, 'end' must points to a good entry */ 683 static union perf_event * 684 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, 685 u64 end, u64 *prev) 686 { 687 unsigned char *data = md->base + page_size; 688 union perf_event *event = NULL; 689 int diff = end - start; 690 691 if (check_messup) { 692 /* 693 * If we're further behind than half the buffer, there's a chance 694 * the writer will bite our tail and mess up the samples under us. 695 * 696 * If we somehow ended up ahead of the 'end', we got messed up. 697 * 698 * In either case, truncate and restart at 'end'. 699 */ 700 if (diff > md->mask / 2 || diff < 0) { 701 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 702 703 /* 704 * 'end' points to a known good entry, start there. 705 */ 706 start = end; 707 diff = 0; 708 } 709 } 710 711 if (diff >= (int)sizeof(event->header)) { 712 size_t size; 713 714 event = (union perf_event *)&data[start & md->mask]; 715 size = event->header.size; 716 717 if (size < sizeof(event->header) || diff < (int)size) { 718 event = NULL; 719 goto broken_event; 720 } 721 722 /* 723 * Event straddles the mmap boundary -- header should always 724 * be inside due to u64 alignment of output. 725 */ 726 if ((start & md->mask) + size != ((start + size) & md->mask)) { 727 unsigned int offset = start; 728 unsigned int len = min(sizeof(*event), size), cpy; 729 void *dst = md->event_copy; 730 731 do { 732 cpy = min(md->mask + 1 - (offset & md->mask), len); 733 memcpy(dst, &data[offset & md->mask], cpy); 734 offset += cpy; 735 dst += cpy; 736 len -= cpy; 737 } while (len); 738 739 event = (union perf_event *) md->event_copy; 740 } 741 742 start += size; 743 } 744 745 broken_event: 746 if (prev) 747 *prev = start; 748 749 return event; 750 } 751 752 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 753 { 754 struct perf_mmap *md = &evlist->mmap[idx]; 755 u64 head; 756 u64 old = md->prev; 757 758 /* 759 * Check if event was unmapped due to a POLLHUP/POLLERR. 760 */ 761 if (!atomic_read(&md->refcnt)) 762 return NULL; 763 764 head = perf_mmap__read_head(md); 765 766 return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev); 767 } 768 769 union perf_event * 770 perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) 771 { 772 struct perf_mmap *md = &evlist->mmap[idx]; 773 u64 head, end; 774 u64 start = md->prev; 775 776 /* 777 * Check if event was unmapped due to a POLLHUP/POLLERR. 778 */ 779 if (!atomic_read(&md->refcnt)) 780 return NULL; 781 782 head = perf_mmap__read_head(md); 783 if (!head) 784 return NULL; 785 786 /* 787 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 788 * it each time when kernel writes to it, so in fact 'head' is 789 * negative. 'end' pointer is made manually by adding the size of 790 * the ring buffer to 'head' pointer, means the validate data can 791 * read is the whole ring buffer. If 'end' is positive, the ring 792 * buffer has not fully filled, so we must adjust 'end' to 0. 793 * 794 * However, since both 'head' and 'end' is unsigned, we can't 795 * simply compare 'end' against 0. Here we compare '-head' and 796 * the size of the ring buffer, where -head is the number of bytes 797 * kernel write to the ring buffer. 798 */ 799 if (-head < (u64)(md->mask + 1)) 800 end = 0; 801 else 802 end = head + md->mask + 1; 803 804 return perf_mmap__read(md, false, start, end, &md->prev); 805 } 806 807 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 808 { 809 struct perf_mmap *md = &evlist->mmap[idx]; 810 u64 head; 811 812 if (!atomic_read(&md->refcnt)) 813 return; 814 815 head = perf_mmap__read_head(md); 816 md->prev = head; 817 } 818 819 static bool perf_mmap__empty(struct perf_mmap *md) 820 { 821 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 822 } 823 824 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) 825 { 826 atomic_inc(&evlist->mmap[idx].refcnt); 827 } 828 829 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) 830 { 831 BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); 832 833 if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) 834 __perf_evlist__munmap(evlist, idx); 835 } 836 837 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 838 { 839 struct perf_mmap *md = &evlist->mmap[idx]; 840 841 if (!evlist->overwrite) { 842 u64 old = md->prev; 843 844 perf_mmap__write_tail(md, old); 845 } 846 847 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 848 perf_evlist__mmap_put(evlist, idx); 849 } 850 851 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 852 struct auxtrace_mmap_params *mp __maybe_unused, 853 void *userpg __maybe_unused, 854 int fd __maybe_unused) 855 { 856 return 0; 857 } 858 859 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 860 { 861 } 862 863 void __weak auxtrace_mmap_params__init( 864 struct auxtrace_mmap_params *mp __maybe_unused, 865 off_t auxtrace_offset __maybe_unused, 866 unsigned int auxtrace_pages __maybe_unused, 867 bool auxtrace_overwrite __maybe_unused) 868 { 869 } 870 871 void __weak auxtrace_mmap_params__set_idx( 872 struct auxtrace_mmap_params *mp __maybe_unused, 873 struct perf_evlist *evlist __maybe_unused, 874 int idx __maybe_unused, 875 bool per_cpu __maybe_unused) 876 { 877 } 878 879 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) 880 { 881 if (evlist->mmap[idx].base != NULL) { 882 munmap(evlist->mmap[idx].base, evlist->mmap_len); 883 evlist->mmap[idx].base = NULL; 884 atomic_set(&evlist->mmap[idx].refcnt, 0); 885 } 886 auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); 887 } 888 889 void perf_evlist__munmap(struct perf_evlist *evlist) 890 { 891 int i; 892 893 if (evlist->mmap == NULL) 894 return; 895 896 for (i = 0; i < evlist->nr_mmaps; i++) 897 __perf_evlist__munmap(evlist, i); 898 899 zfree(&evlist->mmap); 900 } 901 902 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 903 { 904 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 905 if (cpu_map__empty(evlist->cpus)) 906 evlist->nr_mmaps = thread_map__nr(evlist->threads); 907 evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 908 return evlist->mmap != NULL ? 0 : -ENOMEM; 909 } 910 911 struct mmap_params { 912 int prot; 913 int mask; 914 struct auxtrace_mmap_params auxtrace_mp; 915 }; 916 917 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, 918 struct mmap_params *mp, int fd) 919 { 920 /* 921 * The last one will be done at perf_evlist__mmap_consume(), so that we 922 * make sure we don't prevent tools from consuming every last event in 923 * the ring buffer. 924 * 925 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 926 * anymore, but the last events for it are still in the ring buffer, 927 * waiting to be consumed. 928 * 929 * Tools can chose to ignore this at their own discretion, but the 930 * evlist layer can't just drop it when filtering events in 931 * perf_evlist__filter_pollfd(). 932 */ 933 atomic_set(&evlist->mmap[idx].refcnt, 2); 934 evlist->mmap[idx].prev = 0; 935 evlist->mmap[idx].mask = mp->mask; 936 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, 937 MAP_SHARED, fd, 0); 938 if (evlist->mmap[idx].base == MAP_FAILED) { 939 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 940 errno); 941 evlist->mmap[idx].base = NULL; 942 return -1; 943 } 944 945 if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, 946 &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) 947 return -1; 948 949 return 0; 950 } 951 952 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 953 struct mmap_params *mp, int cpu, 954 int thread, int *output) 955 { 956 struct perf_evsel *evsel; 957 958 evlist__for_each(evlist, evsel) { 959 int fd; 960 961 if (evsel->system_wide && thread) 962 continue; 963 964 fd = FD(evsel, cpu, thread); 965 966 if (*output == -1) { 967 *output = fd; 968 if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) 969 return -1; 970 } else { 971 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 972 return -1; 973 974 perf_evlist__mmap_get(evlist, idx); 975 } 976 977 /* 978 * The system_wide flag causes a selected event to be opened 979 * always without a pid. Consequently it will never get a 980 * POLLHUP, but it is used for tracking in combination with 981 * other events, so it should not need to be polled anyway. 982 * Therefore don't add it for polling. 983 */ 984 if (!evsel->system_wide && 985 __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { 986 perf_evlist__mmap_put(evlist, idx); 987 return -1; 988 } 989 990 if (evsel->attr.read_format & PERF_FORMAT_ID) { 991 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 992 fd) < 0) 993 return -1; 994 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 995 thread); 996 } 997 } 998 999 return 0; 1000 } 1001 1002 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 1003 struct mmap_params *mp) 1004 { 1005 int cpu, thread; 1006 int nr_cpus = cpu_map__nr(evlist->cpus); 1007 int nr_threads = thread_map__nr(evlist->threads); 1008 1009 pr_debug2("perf event ring buffer mmapped per cpu\n"); 1010 for (cpu = 0; cpu < nr_cpus; cpu++) { 1011 int output = -1; 1012 1013 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 1014 true); 1015 1016 for (thread = 0; thread < nr_threads; thread++) { 1017 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 1018 thread, &output)) 1019 goto out_unmap; 1020 } 1021 } 1022 1023 return 0; 1024 1025 out_unmap: 1026 for (cpu = 0; cpu < nr_cpus; cpu++) 1027 __perf_evlist__munmap(evlist, cpu); 1028 return -1; 1029 } 1030 1031 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1032 struct mmap_params *mp) 1033 { 1034 int thread; 1035 int nr_threads = thread_map__nr(evlist->threads); 1036 1037 pr_debug2("perf event ring buffer mmapped per thread\n"); 1038 for (thread = 0; thread < nr_threads; thread++) { 1039 int output = -1; 1040 1041 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1042 false); 1043 1044 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1045 &output)) 1046 goto out_unmap; 1047 } 1048 1049 return 0; 1050 1051 out_unmap: 1052 for (thread = 0; thread < nr_threads; thread++) 1053 __perf_evlist__munmap(evlist, thread); 1054 return -1; 1055 } 1056 1057 unsigned long perf_event_mlock_kb_in_pages(void) 1058 { 1059 unsigned long pages; 1060 int max; 1061 1062 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1063 /* 1064 * Pick a once upon a time good value, i.e. things look 1065 * strange since we can't read a sysctl value, but lets not 1066 * die yet... 1067 */ 1068 max = 512; 1069 } else { 1070 max -= (page_size / 1024); 1071 } 1072 1073 pages = (max * 1024) / page_size; 1074 if (!is_power_of_2(pages)) 1075 pages = rounddown_pow_of_two(pages); 1076 1077 return pages; 1078 } 1079 1080 static size_t perf_evlist__mmap_size(unsigned long pages) 1081 { 1082 if (pages == UINT_MAX) 1083 pages = perf_event_mlock_kb_in_pages(); 1084 else if (!is_power_of_2(pages)) 1085 return 0; 1086 1087 return (pages + 1) * page_size; 1088 } 1089 1090 static long parse_pages_arg(const char *str, unsigned long min, 1091 unsigned long max) 1092 { 1093 unsigned long pages, val; 1094 static struct parse_tag tags[] = { 1095 { .tag = 'B', .mult = 1 }, 1096 { .tag = 'K', .mult = 1 << 10 }, 1097 { .tag = 'M', .mult = 1 << 20 }, 1098 { .tag = 'G', .mult = 1 << 30 }, 1099 { .tag = 0 }, 1100 }; 1101 1102 if (str == NULL) 1103 return -EINVAL; 1104 1105 val = parse_tag_value(str, tags); 1106 if (val != (unsigned long) -1) { 1107 /* we got file size value */ 1108 pages = PERF_ALIGN(val, page_size) / page_size; 1109 } else { 1110 /* we got pages count value */ 1111 char *eptr; 1112 pages = strtoul(str, &eptr, 10); 1113 if (*eptr != '\0') 1114 return -EINVAL; 1115 } 1116 1117 if (pages == 0 && min == 0) { 1118 /* leave number of pages at 0 */ 1119 } else if (!is_power_of_2(pages)) { 1120 /* round pages up to next power of 2 */ 1121 pages = roundup_pow_of_two(pages); 1122 if (!pages) 1123 return -EINVAL; 1124 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1125 pages * page_size, pages); 1126 } 1127 1128 if (pages > max) 1129 return -EINVAL; 1130 1131 return pages; 1132 } 1133 1134 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1135 { 1136 unsigned long max = UINT_MAX; 1137 long pages; 1138 1139 if (max > SIZE_MAX / page_size) 1140 max = SIZE_MAX / page_size; 1141 1142 pages = parse_pages_arg(str, 1, max); 1143 if (pages < 0) { 1144 pr_err("Invalid argument for --mmap_pages/-m\n"); 1145 return -1; 1146 } 1147 1148 *mmap_pages = pages; 1149 return 0; 1150 } 1151 1152 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1153 int unset __maybe_unused) 1154 { 1155 return __perf_evlist__parse_mmap_pages(opt->value, str); 1156 } 1157 1158 /** 1159 * perf_evlist__mmap_ex - Create mmaps to receive events. 1160 * @evlist: list of events 1161 * @pages: map length in pages 1162 * @overwrite: overwrite older events? 1163 * @auxtrace_pages - auxtrace map length in pages 1164 * @auxtrace_overwrite - overwrite older auxtrace data? 1165 * 1166 * If @overwrite is %false the user needs to signal event consumption using 1167 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1168 * automatically. 1169 * 1170 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1171 * consumption using auxtrace_mmap__write_tail(). 1172 * 1173 * Return: %0 on success, negative error code otherwise. 1174 */ 1175 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1176 bool overwrite, unsigned int auxtrace_pages, 1177 bool auxtrace_overwrite) 1178 { 1179 struct perf_evsel *evsel; 1180 const struct cpu_map *cpus = evlist->cpus; 1181 const struct thread_map *threads = evlist->threads; 1182 struct mmap_params mp = { 1183 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1184 }; 1185 1186 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 1187 return -ENOMEM; 1188 1189 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1190 return -ENOMEM; 1191 1192 evlist->overwrite = overwrite; 1193 evlist->mmap_len = perf_evlist__mmap_size(pages); 1194 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1195 mp.mask = evlist->mmap_len - page_size - 1; 1196 1197 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1198 auxtrace_pages, auxtrace_overwrite); 1199 1200 evlist__for_each(evlist, evsel) { 1201 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1202 evsel->sample_id == NULL && 1203 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1204 return -ENOMEM; 1205 } 1206 1207 if (cpu_map__empty(cpus)) 1208 return perf_evlist__mmap_per_thread(evlist, &mp); 1209 1210 return perf_evlist__mmap_per_cpu(evlist, &mp); 1211 } 1212 1213 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1214 bool overwrite) 1215 { 1216 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1217 } 1218 1219 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1220 { 1221 struct cpu_map *cpus; 1222 struct thread_map *threads; 1223 1224 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1225 1226 if (!threads) 1227 return -1; 1228 1229 if (target__uses_dummy_map(target)) 1230 cpus = cpu_map__dummy_new(); 1231 else 1232 cpus = cpu_map__new(target->cpu_list); 1233 1234 if (!cpus) 1235 goto out_delete_threads; 1236 1237 evlist->has_user_cpus = !!target->cpu_list; 1238 1239 perf_evlist__set_maps(evlist, cpus, threads); 1240 1241 return 0; 1242 1243 out_delete_threads: 1244 thread_map__put(threads); 1245 return -1; 1246 } 1247 1248 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1249 struct thread_map *threads) 1250 { 1251 /* 1252 * Allow for the possibility that one or another of the maps isn't being 1253 * changed i.e. don't put it. Note we are assuming the maps that are 1254 * being applied are brand new and evlist is taking ownership of the 1255 * original reference count of 1. If that is not the case it is up to 1256 * the caller to increase the reference count. 1257 */ 1258 if (cpus != evlist->cpus) { 1259 cpu_map__put(evlist->cpus); 1260 evlist->cpus = cpu_map__get(cpus); 1261 } 1262 1263 if (threads != evlist->threads) { 1264 thread_map__put(evlist->threads); 1265 evlist->threads = thread_map__get(threads); 1266 } 1267 1268 perf_evlist__propagate_maps(evlist); 1269 } 1270 1271 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1272 enum perf_event_sample_format bit) 1273 { 1274 struct perf_evsel *evsel; 1275 1276 evlist__for_each(evlist, evsel) 1277 __perf_evsel__set_sample_bit(evsel, bit); 1278 } 1279 1280 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1281 enum perf_event_sample_format bit) 1282 { 1283 struct perf_evsel *evsel; 1284 1285 evlist__for_each(evlist, evsel) 1286 __perf_evsel__reset_sample_bit(evsel, bit); 1287 } 1288 1289 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1290 { 1291 struct perf_evsel *evsel; 1292 int err = 0; 1293 const int ncpus = cpu_map__nr(evlist->cpus), 1294 nthreads = thread_map__nr(evlist->threads); 1295 1296 evlist__for_each(evlist, evsel) { 1297 if (evsel->filter == NULL) 1298 continue; 1299 1300 /* 1301 * filters only work for tracepoint event, which doesn't have cpu limit. 1302 * So evlist and evsel should always be same. 1303 */ 1304 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1305 if (err) { 1306 *err_evsel = evsel; 1307 break; 1308 } 1309 } 1310 1311 return err; 1312 } 1313 1314 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1315 { 1316 struct perf_evsel *evsel; 1317 int err = 0; 1318 1319 evlist__for_each(evlist, evsel) { 1320 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1321 continue; 1322 1323 err = perf_evsel__set_filter(evsel, filter); 1324 if (err) 1325 break; 1326 } 1327 1328 return err; 1329 } 1330 1331 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1332 { 1333 char *filter; 1334 int ret = -1; 1335 size_t i; 1336 1337 for (i = 0; i < npids; ++i) { 1338 if (i == 0) { 1339 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1340 return -1; 1341 } else { 1342 char *tmp; 1343 1344 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1345 goto out_free; 1346 1347 free(filter); 1348 filter = tmp; 1349 } 1350 } 1351 1352 ret = perf_evlist__set_filter(evlist, filter); 1353 out_free: 1354 free(filter); 1355 return ret; 1356 } 1357 1358 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1359 { 1360 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1361 } 1362 1363 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1364 { 1365 struct perf_evsel *pos; 1366 1367 if (evlist->nr_entries == 1) 1368 return true; 1369 1370 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1371 return false; 1372 1373 evlist__for_each(evlist, pos) { 1374 if (pos->id_pos != evlist->id_pos || 1375 pos->is_pos != evlist->is_pos) 1376 return false; 1377 } 1378 1379 return true; 1380 } 1381 1382 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1383 { 1384 struct perf_evsel *evsel; 1385 1386 if (evlist->combined_sample_type) 1387 return evlist->combined_sample_type; 1388 1389 evlist__for_each(evlist, evsel) 1390 evlist->combined_sample_type |= evsel->attr.sample_type; 1391 1392 return evlist->combined_sample_type; 1393 } 1394 1395 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1396 { 1397 evlist->combined_sample_type = 0; 1398 return __perf_evlist__combined_sample_type(evlist); 1399 } 1400 1401 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1402 { 1403 struct perf_evsel *evsel; 1404 u64 branch_type = 0; 1405 1406 evlist__for_each(evlist, evsel) 1407 branch_type |= evsel->attr.branch_sample_type; 1408 return branch_type; 1409 } 1410 1411 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1412 { 1413 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1414 u64 read_format = first->attr.read_format; 1415 u64 sample_type = first->attr.sample_type; 1416 1417 evlist__for_each(evlist, pos) { 1418 if (read_format != pos->attr.read_format) 1419 return false; 1420 } 1421 1422 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1423 if ((sample_type & PERF_SAMPLE_READ) && 1424 !(read_format & PERF_FORMAT_ID)) { 1425 return false; 1426 } 1427 1428 return true; 1429 } 1430 1431 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1432 { 1433 struct perf_evsel *first = perf_evlist__first(evlist); 1434 return first->attr.read_format; 1435 } 1436 1437 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1438 { 1439 struct perf_evsel *first = perf_evlist__first(evlist); 1440 struct perf_sample *data; 1441 u64 sample_type; 1442 u16 size = 0; 1443 1444 if (!first->attr.sample_id_all) 1445 goto out; 1446 1447 sample_type = first->attr.sample_type; 1448 1449 if (sample_type & PERF_SAMPLE_TID) 1450 size += sizeof(data->tid) * 2; 1451 1452 if (sample_type & PERF_SAMPLE_TIME) 1453 size += sizeof(data->time); 1454 1455 if (sample_type & PERF_SAMPLE_ID) 1456 size += sizeof(data->id); 1457 1458 if (sample_type & PERF_SAMPLE_STREAM_ID) 1459 size += sizeof(data->stream_id); 1460 1461 if (sample_type & PERF_SAMPLE_CPU) 1462 size += sizeof(data->cpu) * 2; 1463 1464 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1465 size += sizeof(data->id); 1466 out: 1467 return size; 1468 } 1469 1470 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1471 { 1472 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1473 1474 evlist__for_each_continue(evlist, pos) { 1475 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1476 return false; 1477 } 1478 1479 return true; 1480 } 1481 1482 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1483 { 1484 struct perf_evsel *first = perf_evlist__first(evlist); 1485 return first->attr.sample_id_all; 1486 } 1487 1488 void perf_evlist__set_selected(struct perf_evlist *evlist, 1489 struct perf_evsel *evsel) 1490 { 1491 evlist->selected = evsel; 1492 } 1493 1494 void perf_evlist__close(struct perf_evlist *evlist) 1495 { 1496 struct perf_evsel *evsel; 1497 int ncpus = cpu_map__nr(evlist->cpus); 1498 int nthreads = thread_map__nr(evlist->threads); 1499 int n; 1500 1501 evlist__for_each_reverse(evlist, evsel) { 1502 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1503 perf_evsel__close(evsel, n, nthreads); 1504 } 1505 } 1506 1507 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1508 { 1509 struct cpu_map *cpus; 1510 struct thread_map *threads; 1511 int err = -ENOMEM; 1512 1513 /* 1514 * Try reading /sys/devices/system/cpu/online to get 1515 * an all cpus map. 1516 * 1517 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1518 * code needs an overhaul to properly forward the 1519 * error, and we may not want to do that fallback to a 1520 * default cpu identity map :-\ 1521 */ 1522 cpus = cpu_map__new(NULL); 1523 if (!cpus) 1524 goto out; 1525 1526 threads = thread_map__new_dummy(); 1527 if (!threads) 1528 goto out_put; 1529 1530 perf_evlist__set_maps(evlist, cpus, threads); 1531 out: 1532 return err; 1533 out_put: 1534 cpu_map__put(cpus); 1535 goto out; 1536 } 1537 1538 int perf_evlist__open(struct perf_evlist *evlist) 1539 { 1540 struct perf_evsel *evsel; 1541 int err; 1542 1543 /* 1544 * Default: one fd per CPU, all threads, aka systemwide 1545 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1546 */ 1547 if (evlist->threads == NULL && evlist->cpus == NULL) { 1548 err = perf_evlist__create_syswide_maps(evlist); 1549 if (err < 0) 1550 goto out_err; 1551 } 1552 1553 perf_evlist__update_id_pos(evlist); 1554 1555 evlist__for_each(evlist, evsel) { 1556 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1557 if (err < 0) 1558 goto out_err; 1559 } 1560 1561 return 0; 1562 out_err: 1563 perf_evlist__close(evlist); 1564 errno = -err; 1565 return err; 1566 } 1567 1568 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1569 const char *argv[], bool pipe_output, 1570 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1571 { 1572 int child_ready_pipe[2], go_pipe[2]; 1573 char bf; 1574 1575 if (pipe(child_ready_pipe) < 0) { 1576 perror("failed to create 'ready' pipe"); 1577 return -1; 1578 } 1579 1580 if (pipe(go_pipe) < 0) { 1581 perror("failed to create 'go' pipe"); 1582 goto out_close_ready_pipe; 1583 } 1584 1585 evlist->workload.pid = fork(); 1586 if (evlist->workload.pid < 0) { 1587 perror("failed to fork"); 1588 goto out_close_pipes; 1589 } 1590 1591 if (!evlist->workload.pid) { 1592 int ret; 1593 1594 if (pipe_output) 1595 dup2(2, 1); 1596 1597 signal(SIGTERM, SIG_DFL); 1598 1599 close(child_ready_pipe[0]); 1600 close(go_pipe[1]); 1601 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1602 1603 /* 1604 * Tell the parent we're ready to go 1605 */ 1606 close(child_ready_pipe[1]); 1607 1608 /* 1609 * Wait until the parent tells us to go. 1610 */ 1611 ret = read(go_pipe[0], &bf, 1); 1612 /* 1613 * The parent will ask for the execvp() to be performed by 1614 * writing exactly one byte, in workload.cork_fd, usually via 1615 * perf_evlist__start_workload(). 1616 * 1617 * For cancelling the workload without actually running it, 1618 * the parent will just close workload.cork_fd, without writing 1619 * anything, i.e. read will return zero and we just exit() 1620 * here. 1621 */ 1622 if (ret != 1) { 1623 if (ret == -1) 1624 perror("unable to read pipe"); 1625 exit(ret); 1626 } 1627 1628 execvp(argv[0], (char **)argv); 1629 1630 if (exec_error) { 1631 union sigval val; 1632 1633 val.sival_int = errno; 1634 if (sigqueue(getppid(), SIGUSR1, val)) 1635 perror(argv[0]); 1636 } else 1637 perror(argv[0]); 1638 exit(-1); 1639 } 1640 1641 if (exec_error) { 1642 struct sigaction act = { 1643 .sa_flags = SA_SIGINFO, 1644 .sa_sigaction = exec_error, 1645 }; 1646 sigaction(SIGUSR1, &act, NULL); 1647 } 1648 1649 if (target__none(target)) { 1650 if (evlist->threads == NULL) { 1651 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1652 __func__, __LINE__); 1653 goto out_close_pipes; 1654 } 1655 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1656 } 1657 1658 close(child_ready_pipe[1]); 1659 close(go_pipe[0]); 1660 /* 1661 * wait for child to settle 1662 */ 1663 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1664 perror("unable to read pipe"); 1665 goto out_close_pipes; 1666 } 1667 1668 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1669 evlist->workload.cork_fd = go_pipe[1]; 1670 close(child_ready_pipe[0]); 1671 return 0; 1672 1673 out_close_pipes: 1674 close(go_pipe[0]); 1675 close(go_pipe[1]); 1676 out_close_ready_pipe: 1677 close(child_ready_pipe[0]); 1678 close(child_ready_pipe[1]); 1679 return -1; 1680 } 1681 1682 int perf_evlist__start_workload(struct perf_evlist *evlist) 1683 { 1684 if (evlist->workload.cork_fd > 0) { 1685 char bf = 0; 1686 int ret; 1687 /* 1688 * Remove the cork, let it rip! 1689 */ 1690 ret = write(evlist->workload.cork_fd, &bf, 1); 1691 if (ret < 0) 1692 perror("enable to write to pipe"); 1693 1694 close(evlist->workload.cork_fd); 1695 return ret; 1696 } 1697 1698 return 0; 1699 } 1700 1701 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1702 struct perf_sample *sample) 1703 { 1704 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1705 1706 if (!evsel) 1707 return -EFAULT; 1708 return perf_evsel__parse_sample(evsel, event, sample); 1709 } 1710 1711 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1712 { 1713 struct perf_evsel *evsel; 1714 size_t printed = 0; 1715 1716 evlist__for_each(evlist, evsel) { 1717 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1718 perf_evsel__name(evsel)); 1719 } 1720 1721 return printed + fprintf(fp, "\n"); 1722 } 1723 1724 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1725 int err, char *buf, size_t size) 1726 { 1727 int printed, value; 1728 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1729 1730 switch (err) { 1731 case EACCES: 1732 case EPERM: 1733 printed = scnprintf(buf, size, 1734 "Error:\t%s.\n" 1735 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1736 1737 value = perf_event_paranoid(); 1738 1739 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1740 1741 if (value >= 2) { 1742 printed += scnprintf(buf + printed, size - printed, 1743 "For your workloads it needs to be <= 1\nHint:\t"); 1744 } 1745 printed += scnprintf(buf + printed, size - printed, 1746 "For system wide tracing it needs to be set to -1.\n"); 1747 1748 printed += scnprintf(buf + printed, size - printed, 1749 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1750 "Hint:\tThe current value is %d.", value); 1751 break; 1752 case EINVAL: { 1753 struct perf_evsel *first = perf_evlist__first(evlist); 1754 int max_freq; 1755 1756 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1757 goto out_default; 1758 1759 if (first->attr.sample_freq < (u64)max_freq) 1760 goto out_default; 1761 1762 printed = scnprintf(buf, size, 1763 "Error:\t%s.\n" 1764 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1765 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1766 emsg, max_freq, first->attr.sample_freq); 1767 break; 1768 } 1769 default: 1770 out_default: 1771 scnprintf(buf, size, "%s", emsg); 1772 break; 1773 } 1774 1775 return 0; 1776 } 1777 1778 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1779 { 1780 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1781 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1782 1783 switch (err) { 1784 case EPERM: 1785 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1786 printed += scnprintf(buf + printed, size - printed, 1787 "Error:\t%s.\n" 1788 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1789 "Hint:\tTried using %zd kB.\n", 1790 emsg, pages_max_per_user, pages_attempted); 1791 1792 if (pages_attempted >= pages_max_per_user) { 1793 printed += scnprintf(buf + printed, size - printed, 1794 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1795 pages_max_per_user + pages_attempted); 1796 } 1797 1798 printed += scnprintf(buf + printed, size - printed, 1799 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1800 break; 1801 default: 1802 scnprintf(buf, size, "%s", emsg); 1803 break; 1804 } 1805 1806 return 0; 1807 } 1808 1809 void perf_evlist__to_front(struct perf_evlist *evlist, 1810 struct perf_evsel *move_evsel) 1811 { 1812 struct perf_evsel *evsel, *n; 1813 LIST_HEAD(move); 1814 1815 if (move_evsel == perf_evlist__first(evlist)) 1816 return; 1817 1818 evlist__for_each_safe(evlist, n, evsel) { 1819 if (evsel->leader == move_evsel->leader) 1820 list_move_tail(&evsel->node, &move); 1821 } 1822 1823 list_splice(&move, &evlist->entries); 1824 } 1825 1826 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1827 struct perf_evsel *tracking_evsel) 1828 { 1829 struct perf_evsel *evsel; 1830 1831 if (tracking_evsel->tracking) 1832 return; 1833 1834 evlist__for_each(evlist, evsel) { 1835 if (evsel != tracking_evsel) 1836 evsel->tracking = false; 1837 } 1838 1839 tracking_evsel->tracking = true; 1840 } 1841 1842 struct perf_evsel * 1843 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1844 const char *str) 1845 { 1846 struct perf_evsel *evsel; 1847 1848 evlist__for_each(evlist, evsel) { 1849 if (!evsel->name) 1850 continue; 1851 if (strcmp(str, evsel->name) == 0) 1852 return evsel; 1853 } 1854 1855 return NULL; 1856 } 1857