1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include <unistd.h> 19 20 #include "parse-events.h" 21 #include <subcmd/parse-options.h> 22 23 #include <sys/mman.h> 24 25 #include <linux/bitops.h> 26 #include <linux/hash.h> 27 #include <linux/log2.h> 28 #include <linux/err.h> 29 30 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); 31 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); 32 33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 34 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 35 36 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 37 struct thread_map *threads) 38 { 39 int i; 40 41 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 42 INIT_HLIST_HEAD(&evlist->heads[i]); 43 INIT_LIST_HEAD(&evlist->entries); 44 perf_evlist__set_maps(evlist, cpus, threads); 45 fdarray__init(&evlist->pollfd, 64); 46 evlist->workload.pid = -1; 47 evlist->backward = false; 48 } 49 50 struct perf_evlist *perf_evlist__new(void) 51 { 52 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 53 54 if (evlist != NULL) 55 perf_evlist__init(evlist, NULL, NULL); 56 57 return evlist; 58 } 59 60 struct perf_evlist *perf_evlist__new_default(void) 61 { 62 struct perf_evlist *evlist = perf_evlist__new(); 63 64 if (evlist && perf_evlist__add_default(evlist)) { 65 perf_evlist__delete(evlist); 66 evlist = NULL; 67 } 68 69 return evlist; 70 } 71 72 struct perf_evlist *perf_evlist__new_dummy(void) 73 { 74 struct perf_evlist *evlist = perf_evlist__new(); 75 76 if (evlist && perf_evlist__add_dummy(evlist)) { 77 perf_evlist__delete(evlist); 78 evlist = NULL; 79 } 80 81 return evlist; 82 } 83 84 /** 85 * perf_evlist__set_id_pos - set the positions of event ids. 86 * @evlist: selected event list 87 * 88 * Events with compatible sample types all have the same id_pos 89 * and is_pos. For convenience, put a copy on evlist. 90 */ 91 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 92 { 93 struct perf_evsel *first = perf_evlist__first(evlist); 94 95 evlist->id_pos = first->id_pos; 96 evlist->is_pos = first->is_pos; 97 } 98 99 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 100 { 101 struct perf_evsel *evsel; 102 103 evlist__for_each(evlist, evsel) 104 perf_evsel__calc_id_pos(evsel); 105 106 perf_evlist__set_id_pos(evlist); 107 } 108 109 static void perf_evlist__purge(struct perf_evlist *evlist) 110 { 111 struct perf_evsel *pos, *n; 112 113 evlist__for_each_safe(evlist, n, pos) { 114 list_del_init(&pos->node); 115 pos->evlist = NULL; 116 perf_evsel__delete(pos); 117 } 118 119 evlist->nr_entries = 0; 120 } 121 122 void perf_evlist__exit(struct perf_evlist *evlist) 123 { 124 zfree(&evlist->mmap); 125 fdarray__exit(&evlist->pollfd); 126 } 127 128 void perf_evlist__delete(struct perf_evlist *evlist) 129 { 130 perf_evlist__munmap(evlist); 131 perf_evlist__close(evlist); 132 cpu_map__put(evlist->cpus); 133 thread_map__put(evlist->threads); 134 evlist->cpus = NULL; 135 evlist->threads = NULL; 136 perf_evlist__purge(evlist); 137 perf_evlist__exit(evlist); 138 free(evlist); 139 } 140 141 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 142 struct perf_evsel *evsel) 143 { 144 /* 145 * We already have cpus for evsel (via PMU sysfs) so 146 * keep it, if there's no target cpu list defined. 147 */ 148 if (!evsel->own_cpus || evlist->has_user_cpus) { 149 cpu_map__put(evsel->cpus); 150 evsel->cpus = cpu_map__get(evlist->cpus); 151 } else if (evsel->cpus != evsel->own_cpus) { 152 cpu_map__put(evsel->cpus); 153 evsel->cpus = cpu_map__get(evsel->own_cpus); 154 } 155 156 thread_map__put(evsel->threads); 157 evsel->threads = thread_map__get(evlist->threads); 158 } 159 160 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 161 { 162 struct perf_evsel *evsel; 163 164 evlist__for_each(evlist, evsel) 165 __perf_evlist__propagate_maps(evlist, evsel); 166 } 167 168 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 169 { 170 entry->evlist = evlist; 171 list_add_tail(&entry->node, &evlist->entries); 172 entry->idx = evlist->nr_entries; 173 entry->tracking = !entry->idx; 174 175 if (!evlist->nr_entries++) 176 perf_evlist__set_id_pos(evlist); 177 178 __perf_evlist__propagate_maps(evlist, entry); 179 } 180 181 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 182 { 183 evsel->evlist = NULL; 184 list_del_init(&evsel->node); 185 evlist->nr_entries -= 1; 186 } 187 188 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 189 struct list_head *list) 190 { 191 struct perf_evsel *evsel, *temp; 192 193 __evlist__for_each_safe(list, temp, evsel) { 194 list_del_init(&evsel->node); 195 perf_evlist__add(evlist, evsel); 196 } 197 } 198 199 void __perf_evlist__set_leader(struct list_head *list) 200 { 201 struct perf_evsel *evsel, *leader; 202 203 leader = list_entry(list->next, struct perf_evsel, node); 204 evsel = list_entry(list->prev, struct perf_evsel, node); 205 206 leader->nr_members = evsel->idx - leader->idx + 1; 207 208 __evlist__for_each(list, evsel) { 209 evsel->leader = leader; 210 } 211 } 212 213 void perf_evlist__set_leader(struct perf_evlist *evlist) 214 { 215 if (evlist->nr_entries) { 216 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 217 __perf_evlist__set_leader(&evlist->entries); 218 } 219 } 220 221 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 222 { 223 attr->precise_ip = 3; 224 225 while (attr->precise_ip != 0) { 226 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 227 if (fd != -1) { 228 close(fd); 229 break; 230 } 231 --attr->precise_ip; 232 } 233 } 234 235 int perf_evlist__add_default(struct perf_evlist *evlist) 236 { 237 struct perf_event_attr attr = { 238 .type = PERF_TYPE_HARDWARE, 239 .config = PERF_COUNT_HW_CPU_CYCLES, 240 }; 241 struct perf_evsel *evsel; 242 243 event_attr_init(&attr); 244 245 perf_event_attr__set_max_precise_ip(&attr); 246 247 evsel = perf_evsel__new(&attr); 248 if (evsel == NULL) 249 goto error; 250 251 /* use asprintf() because free(evsel) assumes name is allocated */ 252 if (asprintf(&evsel->name, "cycles%.*s", 253 attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) 254 goto error_free; 255 256 perf_evlist__add(evlist, evsel); 257 return 0; 258 error_free: 259 perf_evsel__delete(evsel); 260 error: 261 return -ENOMEM; 262 } 263 264 int perf_evlist__add_dummy(struct perf_evlist *evlist) 265 { 266 struct perf_event_attr attr = { 267 .type = PERF_TYPE_SOFTWARE, 268 .config = PERF_COUNT_SW_DUMMY, 269 .size = sizeof(attr), /* to capture ABI version */ 270 }; 271 struct perf_evsel *evsel = perf_evsel__new(&attr); 272 273 if (evsel == NULL) 274 return -ENOMEM; 275 276 perf_evlist__add(evlist, evsel); 277 return 0; 278 } 279 280 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 281 struct perf_event_attr *attrs, size_t nr_attrs) 282 { 283 struct perf_evsel *evsel, *n; 284 LIST_HEAD(head); 285 size_t i; 286 287 for (i = 0; i < nr_attrs; i++) { 288 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 289 if (evsel == NULL) 290 goto out_delete_partial_list; 291 list_add_tail(&evsel->node, &head); 292 } 293 294 perf_evlist__splice_list_tail(evlist, &head); 295 296 return 0; 297 298 out_delete_partial_list: 299 __evlist__for_each_safe(&head, n, evsel) 300 perf_evsel__delete(evsel); 301 return -1; 302 } 303 304 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 305 struct perf_event_attr *attrs, size_t nr_attrs) 306 { 307 size_t i; 308 309 for (i = 0; i < nr_attrs; i++) 310 event_attr_init(attrs + i); 311 312 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 313 } 314 315 struct perf_evsel * 316 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 317 { 318 struct perf_evsel *evsel; 319 320 evlist__for_each(evlist, evsel) { 321 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 322 (int)evsel->attr.config == id) 323 return evsel; 324 } 325 326 return NULL; 327 } 328 329 struct perf_evsel * 330 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 331 const char *name) 332 { 333 struct perf_evsel *evsel; 334 335 evlist__for_each(evlist, evsel) { 336 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 337 (strcmp(evsel->name, name) == 0)) 338 return evsel; 339 } 340 341 return NULL; 342 } 343 344 int perf_evlist__add_newtp(struct perf_evlist *evlist, 345 const char *sys, const char *name, void *handler) 346 { 347 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 348 349 if (IS_ERR(evsel)) 350 return -1; 351 352 evsel->handler = handler; 353 perf_evlist__add(evlist, evsel); 354 return 0; 355 } 356 357 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 358 struct perf_evsel *evsel) 359 { 360 if (evsel->system_wide) 361 return 1; 362 else 363 return thread_map__nr(evlist->threads); 364 } 365 366 void perf_evlist__disable(struct perf_evlist *evlist) 367 { 368 struct perf_evsel *pos; 369 370 evlist__for_each(evlist, pos) { 371 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 372 continue; 373 perf_evsel__disable(pos); 374 } 375 376 evlist->enabled = false; 377 } 378 379 void perf_evlist__enable(struct perf_evlist *evlist) 380 { 381 struct perf_evsel *pos; 382 383 evlist__for_each(evlist, pos) { 384 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 385 continue; 386 perf_evsel__enable(pos); 387 } 388 389 evlist->enabled = true; 390 } 391 392 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 393 { 394 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 395 } 396 397 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 398 struct perf_evsel *evsel, int cpu) 399 { 400 int thread, err; 401 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 402 403 if (!evsel->fd) 404 return -EINVAL; 405 406 for (thread = 0; thread < nr_threads; thread++) { 407 err = ioctl(FD(evsel, cpu, thread), 408 PERF_EVENT_IOC_ENABLE, 0); 409 if (err) 410 return err; 411 } 412 return 0; 413 } 414 415 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 416 struct perf_evsel *evsel, 417 int thread) 418 { 419 int cpu, err; 420 int nr_cpus = cpu_map__nr(evlist->cpus); 421 422 if (!evsel->fd) 423 return -EINVAL; 424 425 for (cpu = 0; cpu < nr_cpus; cpu++) { 426 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 427 if (err) 428 return err; 429 } 430 return 0; 431 } 432 433 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 434 struct perf_evsel *evsel, int idx) 435 { 436 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 437 438 if (per_cpu_mmaps) 439 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 440 else 441 return perf_evlist__enable_event_thread(evlist, evsel, idx); 442 } 443 444 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 445 { 446 int nr_cpus = cpu_map__nr(evlist->cpus); 447 int nr_threads = thread_map__nr(evlist->threads); 448 int nfds = 0; 449 struct perf_evsel *evsel; 450 451 evlist__for_each(evlist, evsel) { 452 if (evsel->system_wide) 453 nfds += nr_cpus; 454 else 455 nfds += nr_cpus * nr_threads; 456 } 457 458 if (fdarray__available_entries(&evlist->pollfd) < nfds && 459 fdarray__grow(&evlist->pollfd, nfds) < 0) 460 return -ENOMEM; 461 462 return 0; 463 } 464 465 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx, short revent) 466 { 467 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 468 /* 469 * Save the idx so that when we filter out fds POLLHUP'ed we can 470 * close the associated evlist->mmap[] entry. 471 */ 472 if (pos >= 0) { 473 evlist->pollfd.priv[pos].idx = idx; 474 475 fcntl(fd, F_SETFL, O_NONBLOCK); 476 } 477 478 return pos; 479 } 480 481 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 482 { 483 return __perf_evlist__add_pollfd(evlist, fd, -1, POLLIN); 484 } 485 486 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 487 void *arg __maybe_unused) 488 { 489 struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); 490 491 perf_evlist__mmap_put(evlist, fda->priv[fd].idx); 492 } 493 494 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 495 { 496 return fdarray__filter(&evlist->pollfd, revents_and_mask, 497 perf_evlist__munmap_filtered, NULL); 498 } 499 500 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 501 { 502 return fdarray__poll(&evlist->pollfd, timeout); 503 } 504 505 static void perf_evlist__id_hash(struct perf_evlist *evlist, 506 struct perf_evsel *evsel, 507 int cpu, int thread, u64 id) 508 { 509 int hash; 510 struct perf_sample_id *sid = SID(evsel, cpu, thread); 511 512 sid->id = id; 513 sid->evsel = evsel; 514 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 515 hlist_add_head(&sid->node, &evlist->heads[hash]); 516 } 517 518 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 519 int cpu, int thread, u64 id) 520 { 521 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 522 evsel->id[evsel->ids++] = id; 523 } 524 525 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 526 struct perf_evsel *evsel, 527 int cpu, int thread, int fd) 528 { 529 u64 read_data[4] = { 0, }; 530 int id_idx = 1; /* The first entry is the counter value */ 531 u64 id; 532 int ret; 533 534 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 535 if (!ret) 536 goto add; 537 538 if (errno != ENOTTY) 539 return -1; 540 541 /* Legacy way to get event id.. All hail to old kernels! */ 542 543 /* 544 * This way does not work with group format read, so bail 545 * out in that case. 546 */ 547 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 548 return -1; 549 550 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 551 read(fd, &read_data, sizeof(read_data)) == -1) 552 return -1; 553 554 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 555 ++id_idx; 556 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 557 ++id_idx; 558 559 id = read_data[id_idx]; 560 561 add: 562 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 563 return 0; 564 } 565 566 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 567 struct perf_evsel *evsel, int idx, int cpu, 568 int thread) 569 { 570 struct perf_sample_id *sid = SID(evsel, cpu, thread); 571 sid->idx = idx; 572 if (evlist->cpus && cpu >= 0) 573 sid->cpu = evlist->cpus->map[cpu]; 574 else 575 sid->cpu = -1; 576 if (!evsel->system_wide && evlist->threads && thread >= 0) 577 sid->tid = thread_map__pid(evlist->threads, thread); 578 else 579 sid->tid = -1; 580 } 581 582 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 583 { 584 struct hlist_head *head; 585 struct perf_sample_id *sid; 586 int hash; 587 588 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 589 head = &evlist->heads[hash]; 590 591 hlist_for_each_entry(sid, head, node) 592 if (sid->id == id) 593 return sid; 594 595 return NULL; 596 } 597 598 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 599 { 600 struct perf_sample_id *sid; 601 602 if (evlist->nr_entries == 1 || !id) 603 return perf_evlist__first(evlist); 604 605 sid = perf_evlist__id2sid(evlist, id); 606 if (sid) 607 return sid->evsel; 608 609 if (!perf_evlist__sample_id_all(evlist)) 610 return perf_evlist__first(evlist); 611 612 return NULL; 613 } 614 615 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 616 u64 id) 617 { 618 struct perf_sample_id *sid; 619 620 if (!id) 621 return NULL; 622 623 sid = perf_evlist__id2sid(evlist, id); 624 if (sid) 625 return sid->evsel; 626 627 return NULL; 628 } 629 630 static int perf_evlist__event2id(struct perf_evlist *evlist, 631 union perf_event *event, u64 *id) 632 { 633 const u64 *array = event->sample.array; 634 ssize_t n; 635 636 n = (event->header.size - sizeof(event->header)) >> 3; 637 638 if (event->header.type == PERF_RECORD_SAMPLE) { 639 if (evlist->id_pos >= n) 640 return -1; 641 *id = array[evlist->id_pos]; 642 } else { 643 if (evlist->is_pos > n) 644 return -1; 645 n -= evlist->is_pos; 646 *id = array[n]; 647 } 648 return 0; 649 } 650 651 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 652 union perf_event *event) 653 { 654 struct perf_evsel *first = perf_evlist__first(evlist); 655 struct hlist_head *head; 656 struct perf_sample_id *sid; 657 int hash; 658 u64 id; 659 660 if (evlist->nr_entries == 1) 661 return first; 662 663 if (!first->attr.sample_id_all && 664 event->header.type != PERF_RECORD_SAMPLE) 665 return first; 666 667 if (perf_evlist__event2id(evlist, event, &id)) 668 return NULL; 669 670 /* Synthesized events have an id of zero */ 671 if (!id) 672 return first; 673 674 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 675 head = &evlist->heads[hash]; 676 677 hlist_for_each_entry(sid, head, node) { 678 if (sid->id == id) 679 return sid->evsel; 680 } 681 return NULL; 682 } 683 684 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 685 { 686 int i; 687 688 for (i = 0; i < evlist->nr_mmaps; i++) { 689 int fd = evlist->mmap[i].fd; 690 int err; 691 692 if (fd < 0) 693 continue; 694 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 695 if (err) 696 return err; 697 } 698 return 0; 699 } 700 701 int perf_evlist__pause(struct perf_evlist *evlist) 702 { 703 return perf_evlist__set_paused(evlist, true); 704 } 705 706 int perf_evlist__resume(struct perf_evlist *evlist) 707 { 708 return perf_evlist__set_paused(evlist, false); 709 } 710 711 /* When check_messup is true, 'end' must points to a good entry */ 712 static union perf_event * 713 perf_mmap__read(struct perf_mmap *md, bool check_messup, u64 start, 714 u64 end, u64 *prev) 715 { 716 unsigned char *data = md->base + page_size; 717 union perf_event *event = NULL; 718 int diff = end - start; 719 720 if (check_messup) { 721 /* 722 * If we're further behind than half the buffer, there's a chance 723 * the writer will bite our tail and mess up the samples under us. 724 * 725 * If we somehow ended up ahead of the 'end', we got messed up. 726 * 727 * In either case, truncate and restart at 'end'. 728 */ 729 if (diff > md->mask / 2 || diff < 0) { 730 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 731 732 /* 733 * 'end' points to a known good entry, start there. 734 */ 735 start = end; 736 diff = 0; 737 } 738 } 739 740 if (diff >= (int)sizeof(event->header)) { 741 size_t size; 742 743 event = (union perf_event *)&data[start & md->mask]; 744 size = event->header.size; 745 746 if (size < sizeof(event->header) || diff < (int)size) { 747 event = NULL; 748 goto broken_event; 749 } 750 751 /* 752 * Event straddles the mmap boundary -- header should always 753 * be inside due to u64 alignment of output. 754 */ 755 if ((start & md->mask) + size != ((start + size) & md->mask)) { 756 unsigned int offset = start; 757 unsigned int len = min(sizeof(*event), size), cpy; 758 void *dst = md->event_copy; 759 760 do { 761 cpy = min(md->mask + 1 - (offset & md->mask), len); 762 memcpy(dst, &data[offset & md->mask], cpy); 763 offset += cpy; 764 dst += cpy; 765 len -= cpy; 766 } while (len); 767 768 event = (union perf_event *) md->event_copy; 769 } 770 771 start += size; 772 } 773 774 broken_event: 775 if (prev) 776 *prev = start; 777 778 return event; 779 } 780 781 union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int idx) 782 { 783 struct perf_mmap *md = &evlist->mmap[idx]; 784 u64 head; 785 u64 old = md->prev; 786 787 /* 788 * Check if event was unmapped due to a POLLHUP/POLLERR. 789 */ 790 if (!atomic_read(&md->refcnt)) 791 return NULL; 792 793 head = perf_mmap__read_head(md); 794 795 return perf_mmap__read(md, evlist->overwrite, old, head, &md->prev); 796 } 797 798 union perf_event * 799 perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) 800 { 801 struct perf_mmap *md = &evlist->mmap[idx]; 802 u64 head, end; 803 u64 start = md->prev; 804 805 /* 806 * Check if event was unmapped due to a POLLHUP/POLLERR. 807 */ 808 if (!atomic_read(&md->refcnt)) 809 return NULL; 810 811 head = perf_mmap__read_head(md); 812 if (!head) 813 return NULL; 814 815 /* 816 * 'head' pointer starts from 0. Kernel minus sizeof(record) form 817 * it each time when kernel writes to it, so in fact 'head' is 818 * negative. 'end' pointer is made manually by adding the size of 819 * the ring buffer to 'head' pointer, means the validate data can 820 * read is the whole ring buffer. If 'end' is positive, the ring 821 * buffer has not fully filled, so we must adjust 'end' to 0. 822 * 823 * However, since both 'head' and 'end' is unsigned, we can't 824 * simply compare 'end' against 0. Here we compare '-head' and 825 * the size of the ring buffer, where -head is the number of bytes 826 * kernel write to the ring buffer. 827 */ 828 if (-head < (u64)(md->mask + 1)) 829 end = 0; 830 else 831 end = head + md->mask + 1; 832 833 return perf_mmap__read(md, false, start, end, &md->prev); 834 } 835 836 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 837 { 838 if (!evlist->backward) 839 return perf_evlist__mmap_read_forward(evlist, idx); 840 return perf_evlist__mmap_read_backward(evlist, idx); 841 } 842 843 void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) 844 { 845 struct perf_mmap *md = &evlist->mmap[idx]; 846 u64 head; 847 848 if (!atomic_read(&md->refcnt)) 849 return; 850 851 head = perf_mmap__read_head(md); 852 md->prev = head; 853 } 854 855 static bool perf_mmap__empty(struct perf_mmap *md) 856 { 857 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 858 } 859 860 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) 861 { 862 atomic_inc(&evlist->mmap[idx].refcnt); 863 } 864 865 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) 866 { 867 struct perf_mmap *md = &evlist->mmap[idx]; 868 869 BUG_ON(md->base && atomic_read(&md->refcnt) == 0); 870 871 if (atomic_dec_and_test(&md->refcnt)) 872 __perf_evlist__munmap(evlist, idx); 873 } 874 875 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 876 { 877 struct perf_mmap *md = &evlist->mmap[idx]; 878 879 if (!evlist->overwrite) { 880 u64 old = md->prev; 881 882 perf_mmap__write_tail(md, old); 883 } 884 885 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 886 perf_evlist__mmap_put(evlist, idx); 887 } 888 889 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 890 struct auxtrace_mmap_params *mp __maybe_unused, 891 void *userpg __maybe_unused, 892 int fd __maybe_unused) 893 { 894 return 0; 895 } 896 897 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 898 { 899 } 900 901 void __weak auxtrace_mmap_params__init( 902 struct auxtrace_mmap_params *mp __maybe_unused, 903 off_t auxtrace_offset __maybe_unused, 904 unsigned int auxtrace_pages __maybe_unused, 905 bool auxtrace_overwrite __maybe_unused) 906 { 907 } 908 909 void __weak auxtrace_mmap_params__set_idx( 910 struct auxtrace_mmap_params *mp __maybe_unused, 911 struct perf_evlist *evlist __maybe_unused, 912 int idx __maybe_unused, 913 bool per_cpu __maybe_unused) 914 { 915 } 916 917 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) 918 { 919 if (evlist->mmap[idx].base != NULL) { 920 munmap(evlist->mmap[idx].base, evlist->mmap_len); 921 evlist->mmap[idx].base = NULL; 922 evlist->mmap[idx].fd = -1; 923 atomic_set(&evlist->mmap[idx].refcnt, 0); 924 } 925 auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); 926 } 927 928 void perf_evlist__munmap(struct perf_evlist *evlist) 929 { 930 int i; 931 932 if (evlist->mmap == NULL) 933 return; 934 935 for (i = 0; i < evlist->nr_mmaps; i++) 936 __perf_evlist__munmap(evlist, i); 937 938 zfree(&evlist->mmap); 939 } 940 941 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 942 { 943 int i; 944 945 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 946 if (cpu_map__empty(evlist->cpus)) 947 evlist->nr_mmaps = thread_map__nr(evlist->threads); 948 evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 949 if (!evlist->mmap) 950 return -ENOMEM; 951 952 for (i = 0; i < evlist->nr_mmaps; i++) 953 evlist->mmap[i].fd = -1; 954 return 0; 955 } 956 957 struct mmap_params { 958 int prot; 959 int mask; 960 struct auxtrace_mmap_params auxtrace_mp; 961 }; 962 963 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, 964 struct mmap_params *mp, int fd) 965 { 966 /* 967 * The last one will be done at perf_evlist__mmap_consume(), so that we 968 * make sure we don't prevent tools from consuming every last event in 969 * the ring buffer. 970 * 971 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 972 * anymore, but the last events for it are still in the ring buffer, 973 * waiting to be consumed. 974 * 975 * Tools can chose to ignore this at their own discretion, but the 976 * evlist layer can't just drop it when filtering events in 977 * perf_evlist__filter_pollfd(). 978 */ 979 atomic_set(&evlist->mmap[idx].refcnt, 2); 980 evlist->mmap[idx].prev = 0; 981 evlist->mmap[idx].mask = mp->mask; 982 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, 983 MAP_SHARED, fd, 0); 984 if (evlist->mmap[idx].base == MAP_FAILED) { 985 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 986 errno); 987 evlist->mmap[idx].base = NULL; 988 return -1; 989 } 990 evlist->mmap[idx].fd = fd; 991 992 if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, 993 &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) 994 return -1; 995 996 return 0; 997 } 998 999 static bool 1000 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 1001 struct perf_evsel *evsel) 1002 { 1003 if (evsel->overwrite) 1004 return false; 1005 return true; 1006 } 1007 1008 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 1009 struct mmap_params *mp, int cpu, 1010 int thread, int *output) 1011 { 1012 struct perf_evsel *evsel; 1013 int revent; 1014 1015 evlist__for_each(evlist, evsel) { 1016 int fd; 1017 1018 if (evsel->overwrite != (evlist->overwrite && evlist->backward)) 1019 continue; 1020 1021 if (evsel->system_wide && thread) 1022 continue; 1023 1024 fd = FD(evsel, cpu, thread); 1025 1026 if (*output == -1) { 1027 *output = fd; 1028 if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) 1029 return -1; 1030 } else { 1031 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 1032 return -1; 1033 1034 perf_evlist__mmap_get(evlist, idx); 1035 } 1036 1037 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 1038 1039 /* 1040 * The system_wide flag causes a selected event to be opened 1041 * always without a pid. Consequently it will never get a 1042 * POLLHUP, but it is used for tracking in combination with 1043 * other events, so it should not need to be polled anyway. 1044 * Therefore don't add it for polling. 1045 */ 1046 if (!evsel->system_wide && 1047 __perf_evlist__add_pollfd(evlist, fd, idx, revent) < 0) { 1048 perf_evlist__mmap_put(evlist, idx); 1049 return -1; 1050 } 1051 1052 if (evsel->attr.read_format & PERF_FORMAT_ID) { 1053 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 1054 fd) < 0) 1055 return -1; 1056 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 1057 thread); 1058 } 1059 } 1060 1061 return 0; 1062 } 1063 1064 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 1065 struct mmap_params *mp) 1066 { 1067 int cpu, thread; 1068 int nr_cpus = cpu_map__nr(evlist->cpus); 1069 int nr_threads = thread_map__nr(evlist->threads); 1070 1071 pr_debug2("perf event ring buffer mmapped per cpu\n"); 1072 for (cpu = 0; cpu < nr_cpus; cpu++) { 1073 int output = -1; 1074 1075 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 1076 true); 1077 1078 for (thread = 0; thread < nr_threads; thread++) { 1079 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 1080 thread, &output)) 1081 goto out_unmap; 1082 } 1083 } 1084 1085 return 0; 1086 1087 out_unmap: 1088 for (cpu = 0; cpu < nr_cpus; cpu++) 1089 __perf_evlist__munmap(evlist, cpu); 1090 return -1; 1091 } 1092 1093 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1094 struct mmap_params *mp) 1095 { 1096 int thread; 1097 int nr_threads = thread_map__nr(evlist->threads); 1098 1099 pr_debug2("perf event ring buffer mmapped per thread\n"); 1100 for (thread = 0; thread < nr_threads; thread++) { 1101 int output = -1; 1102 1103 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1104 false); 1105 1106 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1107 &output)) 1108 goto out_unmap; 1109 } 1110 1111 return 0; 1112 1113 out_unmap: 1114 for (thread = 0; thread < nr_threads; thread++) 1115 __perf_evlist__munmap(evlist, thread); 1116 return -1; 1117 } 1118 1119 unsigned long perf_event_mlock_kb_in_pages(void) 1120 { 1121 unsigned long pages; 1122 int max; 1123 1124 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1125 /* 1126 * Pick a once upon a time good value, i.e. things look 1127 * strange since we can't read a sysctl value, but lets not 1128 * die yet... 1129 */ 1130 max = 512; 1131 } else { 1132 max -= (page_size / 1024); 1133 } 1134 1135 pages = (max * 1024) / page_size; 1136 if (!is_power_of_2(pages)) 1137 pages = rounddown_pow_of_two(pages); 1138 1139 return pages; 1140 } 1141 1142 static size_t perf_evlist__mmap_size(unsigned long pages) 1143 { 1144 if (pages == UINT_MAX) 1145 pages = perf_event_mlock_kb_in_pages(); 1146 else if (!is_power_of_2(pages)) 1147 return 0; 1148 1149 return (pages + 1) * page_size; 1150 } 1151 1152 static long parse_pages_arg(const char *str, unsigned long min, 1153 unsigned long max) 1154 { 1155 unsigned long pages, val; 1156 static struct parse_tag tags[] = { 1157 { .tag = 'B', .mult = 1 }, 1158 { .tag = 'K', .mult = 1 << 10 }, 1159 { .tag = 'M', .mult = 1 << 20 }, 1160 { .tag = 'G', .mult = 1 << 30 }, 1161 { .tag = 0 }, 1162 }; 1163 1164 if (str == NULL) 1165 return -EINVAL; 1166 1167 val = parse_tag_value(str, tags); 1168 if (val != (unsigned long) -1) { 1169 /* we got file size value */ 1170 pages = PERF_ALIGN(val, page_size) / page_size; 1171 } else { 1172 /* we got pages count value */ 1173 char *eptr; 1174 pages = strtoul(str, &eptr, 10); 1175 if (*eptr != '\0') 1176 return -EINVAL; 1177 } 1178 1179 if (pages == 0 && min == 0) { 1180 /* leave number of pages at 0 */ 1181 } else if (!is_power_of_2(pages)) { 1182 /* round pages up to next power of 2 */ 1183 pages = roundup_pow_of_two(pages); 1184 if (!pages) 1185 return -EINVAL; 1186 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1187 pages * page_size, pages); 1188 } 1189 1190 if (pages > max) 1191 return -EINVAL; 1192 1193 return pages; 1194 } 1195 1196 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1197 { 1198 unsigned long max = UINT_MAX; 1199 long pages; 1200 1201 if (max > SIZE_MAX / page_size) 1202 max = SIZE_MAX / page_size; 1203 1204 pages = parse_pages_arg(str, 1, max); 1205 if (pages < 0) { 1206 pr_err("Invalid argument for --mmap_pages/-m\n"); 1207 return -1; 1208 } 1209 1210 *mmap_pages = pages; 1211 return 0; 1212 } 1213 1214 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1215 int unset __maybe_unused) 1216 { 1217 return __perf_evlist__parse_mmap_pages(opt->value, str); 1218 } 1219 1220 /** 1221 * perf_evlist__mmap_ex - Create mmaps to receive events. 1222 * @evlist: list of events 1223 * @pages: map length in pages 1224 * @overwrite: overwrite older events? 1225 * @auxtrace_pages - auxtrace map length in pages 1226 * @auxtrace_overwrite - overwrite older auxtrace data? 1227 * 1228 * If @overwrite is %false the user needs to signal event consumption using 1229 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1230 * automatically. 1231 * 1232 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1233 * consumption using auxtrace_mmap__write_tail(). 1234 * 1235 * Return: %0 on success, negative error code otherwise. 1236 */ 1237 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1238 bool overwrite, unsigned int auxtrace_pages, 1239 bool auxtrace_overwrite) 1240 { 1241 struct perf_evsel *evsel; 1242 const struct cpu_map *cpus = evlist->cpus; 1243 const struct thread_map *threads = evlist->threads; 1244 struct mmap_params mp = { 1245 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1246 }; 1247 1248 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 1249 return -ENOMEM; 1250 1251 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1252 return -ENOMEM; 1253 1254 evlist->overwrite = overwrite; 1255 evlist->mmap_len = perf_evlist__mmap_size(pages); 1256 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1257 mp.mask = evlist->mmap_len - page_size - 1; 1258 1259 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1260 auxtrace_pages, auxtrace_overwrite); 1261 1262 evlist__for_each(evlist, evsel) { 1263 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1264 evsel->sample_id == NULL && 1265 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1266 return -ENOMEM; 1267 } 1268 1269 if (cpu_map__empty(cpus)) 1270 return perf_evlist__mmap_per_thread(evlist, &mp); 1271 1272 return perf_evlist__mmap_per_cpu(evlist, &mp); 1273 } 1274 1275 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1276 bool overwrite) 1277 { 1278 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1279 } 1280 1281 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1282 { 1283 struct cpu_map *cpus; 1284 struct thread_map *threads; 1285 1286 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1287 1288 if (!threads) 1289 return -1; 1290 1291 if (target__uses_dummy_map(target)) 1292 cpus = cpu_map__dummy_new(); 1293 else 1294 cpus = cpu_map__new(target->cpu_list); 1295 1296 if (!cpus) 1297 goto out_delete_threads; 1298 1299 evlist->has_user_cpus = !!target->cpu_list; 1300 1301 perf_evlist__set_maps(evlist, cpus, threads); 1302 1303 return 0; 1304 1305 out_delete_threads: 1306 thread_map__put(threads); 1307 return -1; 1308 } 1309 1310 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1311 struct thread_map *threads) 1312 { 1313 /* 1314 * Allow for the possibility that one or another of the maps isn't being 1315 * changed i.e. don't put it. Note we are assuming the maps that are 1316 * being applied are brand new and evlist is taking ownership of the 1317 * original reference count of 1. If that is not the case it is up to 1318 * the caller to increase the reference count. 1319 */ 1320 if (cpus != evlist->cpus) { 1321 cpu_map__put(evlist->cpus); 1322 evlist->cpus = cpu_map__get(cpus); 1323 } 1324 1325 if (threads != evlist->threads) { 1326 thread_map__put(evlist->threads); 1327 evlist->threads = thread_map__get(threads); 1328 } 1329 1330 perf_evlist__propagate_maps(evlist); 1331 } 1332 1333 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1334 enum perf_event_sample_format bit) 1335 { 1336 struct perf_evsel *evsel; 1337 1338 evlist__for_each(evlist, evsel) 1339 __perf_evsel__set_sample_bit(evsel, bit); 1340 } 1341 1342 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1343 enum perf_event_sample_format bit) 1344 { 1345 struct perf_evsel *evsel; 1346 1347 evlist__for_each(evlist, evsel) 1348 __perf_evsel__reset_sample_bit(evsel, bit); 1349 } 1350 1351 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1352 { 1353 struct perf_evsel *evsel; 1354 int err = 0; 1355 const int ncpus = cpu_map__nr(evlist->cpus), 1356 nthreads = thread_map__nr(evlist->threads); 1357 1358 evlist__for_each(evlist, evsel) { 1359 if (evsel->filter == NULL) 1360 continue; 1361 1362 /* 1363 * filters only work for tracepoint event, which doesn't have cpu limit. 1364 * So evlist and evsel should always be same. 1365 */ 1366 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1367 if (err) { 1368 *err_evsel = evsel; 1369 break; 1370 } 1371 } 1372 1373 return err; 1374 } 1375 1376 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1377 { 1378 struct perf_evsel *evsel; 1379 int err = 0; 1380 1381 evlist__for_each(evlist, evsel) { 1382 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1383 continue; 1384 1385 err = perf_evsel__set_filter(evsel, filter); 1386 if (err) 1387 break; 1388 } 1389 1390 return err; 1391 } 1392 1393 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1394 { 1395 char *filter; 1396 int ret = -1; 1397 size_t i; 1398 1399 for (i = 0; i < npids; ++i) { 1400 if (i == 0) { 1401 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1402 return -1; 1403 } else { 1404 char *tmp; 1405 1406 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1407 goto out_free; 1408 1409 free(filter); 1410 filter = tmp; 1411 } 1412 } 1413 1414 ret = perf_evlist__set_filter(evlist, filter); 1415 out_free: 1416 free(filter); 1417 return ret; 1418 } 1419 1420 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1421 { 1422 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1423 } 1424 1425 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1426 { 1427 struct perf_evsel *pos; 1428 1429 if (evlist->nr_entries == 1) 1430 return true; 1431 1432 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1433 return false; 1434 1435 evlist__for_each(evlist, pos) { 1436 if (pos->id_pos != evlist->id_pos || 1437 pos->is_pos != evlist->is_pos) 1438 return false; 1439 } 1440 1441 return true; 1442 } 1443 1444 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1445 { 1446 struct perf_evsel *evsel; 1447 1448 if (evlist->combined_sample_type) 1449 return evlist->combined_sample_type; 1450 1451 evlist__for_each(evlist, evsel) 1452 evlist->combined_sample_type |= evsel->attr.sample_type; 1453 1454 return evlist->combined_sample_type; 1455 } 1456 1457 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1458 { 1459 evlist->combined_sample_type = 0; 1460 return __perf_evlist__combined_sample_type(evlist); 1461 } 1462 1463 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1464 { 1465 struct perf_evsel *evsel; 1466 u64 branch_type = 0; 1467 1468 evlist__for_each(evlist, evsel) 1469 branch_type |= evsel->attr.branch_sample_type; 1470 return branch_type; 1471 } 1472 1473 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1474 { 1475 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1476 u64 read_format = first->attr.read_format; 1477 u64 sample_type = first->attr.sample_type; 1478 1479 evlist__for_each(evlist, pos) { 1480 if (read_format != pos->attr.read_format) 1481 return false; 1482 } 1483 1484 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1485 if ((sample_type & PERF_SAMPLE_READ) && 1486 !(read_format & PERF_FORMAT_ID)) { 1487 return false; 1488 } 1489 1490 return true; 1491 } 1492 1493 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1494 { 1495 struct perf_evsel *first = perf_evlist__first(evlist); 1496 return first->attr.read_format; 1497 } 1498 1499 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1500 { 1501 struct perf_evsel *first = perf_evlist__first(evlist); 1502 struct perf_sample *data; 1503 u64 sample_type; 1504 u16 size = 0; 1505 1506 if (!first->attr.sample_id_all) 1507 goto out; 1508 1509 sample_type = first->attr.sample_type; 1510 1511 if (sample_type & PERF_SAMPLE_TID) 1512 size += sizeof(data->tid) * 2; 1513 1514 if (sample_type & PERF_SAMPLE_TIME) 1515 size += sizeof(data->time); 1516 1517 if (sample_type & PERF_SAMPLE_ID) 1518 size += sizeof(data->id); 1519 1520 if (sample_type & PERF_SAMPLE_STREAM_ID) 1521 size += sizeof(data->stream_id); 1522 1523 if (sample_type & PERF_SAMPLE_CPU) 1524 size += sizeof(data->cpu) * 2; 1525 1526 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1527 size += sizeof(data->id); 1528 out: 1529 return size; 1530 } 1531 1532 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1533 { 1534 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1535 1536 evlist__for_each_continue(evlist, pos) { 1537 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1538 return false; 1539 } 1540 1541 return true; 1542 } 1543 1544 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1545 { 1546 struct perf_evsel *first = perf_evlist__first(evlist); 1547 return first->attr.sample_id_all; 1548 } 1549 1550 void perf_evlist__set_selected(struct perf_evlist *evlist, 1551 struct perf_evsel *evsel) 1552 { 1553 evlist->selected = evsel; 1554 } 1555 1556 void perf_evlist__close(struct perf_evlist *evlist) 1557 { 1558 struct perf_evsel *evsel; 1559 int ncpus = cpu_map__nr(evlist->cpus); 1560 int nthreads = thread_map__nr(evlist->threads); 1561 int n; 1562 1563 evlist__for_each_reverse(evlist, evsel) { 1564 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1565 perf_evsel__close(evsel, n, nthreads); 1566 } 1567 } 1568 1569 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1570 { 1571 struct cpu_map *cpus; 1572 struct thread_map *threads; 1573 int err = -ENOMEM; 1574 1575 /* 1576 * Try reading /sys/devices/system/cpu/online to get 1577 * an all cpus map. 1578 * 1579 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1580 * code needs an overhaul to properly forward the 1581 * error, and we may not want to do that fallback to a 1582 * default cpu identity map :-\ 1583 */ 1584 cpus = cpu_map__new(NULL); 1585 if (!cpus) 1586 goto out; 1587 1588 threads = thread_map__new_dummy(); 1589 if (!threads) 1590 goto out_put; 1591 1592 perf_evlist__set_maps(evlist, cpus, threads); 1593 out: 1594 return err; 1595 out_put: 1596 cpu_map__put(cpus); 1597 goto out; 1598 } 1599 1600 int perf_evlist__open(struct perf_evlist *evlist) 1601 { 1602 struct perf_evsel *evsel; 1603 int err; 1604 1605 /* 1606 * Default: one fd per CPU, all threads, aka systemwide 1607 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1608 */ 1609 if (evlist->threads == NULL && evlist->cpus == NULL) { 1610 err = perf_evlist__create_syswide_maps(evlist); 1611 if (err < 0) 1612 goto out_err; 1613 } 1614 1615 perf_evlist__update_id_pos(evlist); 1616 1617 evlist__for_each(evlist, evsel) { 1618 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1619 if (err < 0) 1620 goto out_err; 1621 } 1622 1623 return 0; 1624 out_err: 1625 perf_evlist__close(evlist); 1626 errno = -err; 1627 return err; 1628 } 1629 1630 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1631 const char *argv[], bool pipe_output, 1632 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1633 { 1634 int child_ready_pipe[2], go_pipe[2]; 1635 char bf; 1636 1637 if (pipe(child_ready_pipe) < 0) { 1638 perror("failed to create 'ready' pipe"); 1639 return -1; 1640 } 1641 1642 if (pipe(go_pipe) < 0) { 1643 perror("failed to create 'go' pipe"); 1644 goto out_close_ready_pipe; 1645 } 1646 1647 evlist->workload.pid = fork(); 1648 if (evlist->workload.pid < 0) { 1649 perror("failed to fork"); 1650 goto out_close_pipes; 1651 } 1652 1653 if (!evlist->workload.pid) { 1654 int ret; 1655 1656 if (pipe_output) 1657 dup2(2, 1); 1658 1659 signal(SIGTERM, SIG_DFL); 1660 1661 close(child_ready_pipe[0]); 1662 close(go_pipe[1]); 1663 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1664 1665 /* 1666 * Tell the parent we're ready to go 1667 */ 1668 close(child_ready_pipe[1]); 1669 1670 /* 1671 * Wait until the parent tells us to go. 1672 */ 1673 ret = read(go_pipe[0], &bf, 1); 1674 /* 1675 * The parent will ask for the execvp() to be performed by 1676 * writing exactly one byte, in workload.cork_fd, usually via 1677 * perf_evlist__start_workload(). 1678 * 1679 * For cancelling the workload without actually running it, 1680 * the parent will just close workload.cork_fd, without writing 1681 * anything, i.e. read will return zero and we just exit() 1682 * here. 1683 */ 1684 if (ret != 1) { 1685 if (ret == -1) 1686 perror("unable to read pipe"); 1687 exit(ret); 1688 } 1689 1690 execvp(argv[0], (char **)argv); 1691 1692 if (exec_error) { 1693 union sigval val; 1694 1695 val.sival_int = errno; 1696 if (sigqueue(getppid(), SIGUSR1, val)) 1697 perror(argv[0]); 1698 } else 1699 perror(argv[0]); 1700 exit(-1); 1701 } 1702 1703 if (exec_error) { 1704 struct sigaction act = { 1705 .sa_flags = SA_SIGINFO, 1706 .sa_sigaction = exec_error, 1707 }; 1708 sigaction(SIGUSR1, &act, NULL); 1709 } 1710 1711 if (target__none(target)) { 1712 if (evlist->threads == NULL) { 1713 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1714 __func__, __LINE__); 1715 goto out_close_pipes; 1716 } 1717 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1718 } 1719 1720 close(child_ready_pipe[1]); 1721 close(go_pipe[0]); 1722 /* 1723 * wait for child to settle 1724 */ 1725 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1726 perror("unable to read pipe"); 1727 goto out_close_pipes; 1728 } 1729 1730 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1731 evlist->workload.cork_fd = go_pipe[1]; 1732 close(child_ready_pipe[0]); 1733 return 0; 1734 1735 out_close_pipes: 1736 close(go_pipe[0]); 1737 close(go_pipe[1]); 1738 out_close_ready_pipe: 1739 close(child_ready_pipe[0]); 1740 close(child_ready_pipe[1]); 1741 return -1; 1742 } 1743 1744 int perf_evlist__start_workload(struct perf_evlist *evlist) 1745 { 1746 if (evlist->workload.cork_fd > 0) { 1747 char bf = 0; 1748 int ret; 1749 /* 1750 * Remove the cork, let it rip! 1751 */ 1752 ret = write(evlist->workload.cork_fd, &bf, 1); 1753 if (ret < 0) 1754 perror("enable to write to pipe"); 1755 1756 close(evlist->workload.cork_fd); 1757 return ret; 1758 } 1759 1760 return 0; 1761 } 1762 1763 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1764 struct perf_sample *sample) 1765 { 1766 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1767 1768 if (!evsel) 1769 return -EFAULT; 1770 return perf_evsel__parse_sample(evsel, event, sample); 1771 } 1772 1773 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1774 { 1775 struct perf_evsel *evsel; 1776 size_t printed = 0; 1777 1778 evlist__for_each(evlist, evsel) { 1779 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1780 perf_evsel__name(evsel)); 1781 } 1782 1783 return printed + fprintf(fp, "\n"); 1784 } 1785 1786 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1787 int err, char *buf, size_t size) 1788 { 1789 int printed, value; 1790 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1791 1792 switch (err) { 1793 case EACCES: 1794 case EPERM: 1795 printed = scnprintf(buf, size, 1796 "Error:\t%s.\n" 1797 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1798 1799 value = perf_event_paranoid(); 1800 1801 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1802 1803 if (value >= 2) { 1804 printed += scnprintf(buf + printed, size - printed, 1805 "For your workloads it needs to be <= 1\nHint:\t"); 1806 } 1807 printed += scnprintf(buf + printed, size - printed, 1808 "For system wide tracing it needs to be set to -1.\n"); 1809 1810 printed += scnprintf(buf + printed, size - printed, 1811 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1812 "Hint:\tThe current value is %d.", value); 1813 break; 1814 case EINVAL: { 1815 struct perf_evsel *first = perf_evlist__first(evlist); 1816 int max_freq; 1817 1818 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1819 goto out_default; 1820 1821 if (first->attr.sample_freq < (u64)max_freq) 1822 goto out_default; 1823 1824 printed = scnprintf(buf, size, 1825 "Error:\t%s.\n" 1826 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1827 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1828 emsg, max_freq, first->attr.sample_freq); 1829 break; 1830 } 1831 default: 1832 out_default: 1833 scnprintf(buf, size, "%s", emsg); 1834 break; 1835 } 1836 1837 return 0; 1838 } 1839 1840 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1841 { 1842 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1843 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1844 1845 switch (err) { 1846 case EPERM: 1847 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1848 printed += scnprintf(buf + printed, size - printed, 1849 "Error:\t%s.\n" 1850 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1851 "Hint:\tTried using %zd kB.\n", 1852 emsg, pages_max_per_user, pages_attempted); 1853 1854 if (pages_attempted >= pages_max_per_user) { 1855 printed += scnprintf(buf + printed, size - printed, 1856 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1857 pages_max_per_user + pages_attempted); 1858 } 1859 1860 printed += scnprintf(buf + printed, size - printed, 1861 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1862 break; 1863 default: 1864 scnprintf(buf, size, "%s", emsg); 1865 break; 1866 } 1867 1868 return 0; 1869 } 1870 1871 void perf_evlist__to_front(struct perf_evlist *evlist, 1872 struct perf_evsel *move_evsel) 1873 { 1874 struct perf_evsel *evsel, *n; 1875 LIST_HEAD(move); 1876 1877 if (move_evsel == perf_evlist__first(evlist)) 1878 return; 1879 1880 evlist__for_each_safe(evlist, n, evsel) { 1881 if (evsel->leader == move_evsel->leader) 1882 list_move_tail(&evsel->node, &move); 1883 } 1884 1885 list_splice(&move, &evlist->entries); 1886 } 1887 1888 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1889 struct perf_evsel *tracking_evsel) 1890 { 1891 struct perf_evsel *evsel; 1892 1893 if (tracking_evsel->tracking) 1894 return; 1895 1896 evlist__for_each(evlist, evsel) { 1897 if (evsel != tracking_evsel) 1898 evsel->tracking = false; 1899 } 1900 1901 tracking_evsel->tracking = true; 1902 } 1903 1904 struct perf_evsel * 1905 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1906 const char *str) 1907 { 1908 struct perf_evsel *evsel; 1909 1910 evlist__for_each(evlist, evsel) { 1911 if (!evsel->name) 1912 continue; 1913 if (strcmp(str, evsel->name) == 0) 1914 return evsel; 1915 } 1916 1917 return NULL; 1918 } 1919