1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <poll.h> 12 #include "cpumap.h" 13 #include "thread_map.h" 14 #include "target.h" 15 #include "evlist.h" 16 #include "evsel.h" 17 #include "debug.h" 18 #include <unistd.h> 19 20 #include "parse-events.h" 21 #include <subcmd/parse-options.h> 22 23 #include <sys/mman.h> 24 25 #include <linux/bitops.h> 26 #include <linux/hash.h> 27 #include <linux/log2.h> 28 #include <linux/err.h> 29 30 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx); 31 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx); 32 33 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 34 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 35 36 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 37 struct thread_map *threads) 38 { 39 int i; 40 41 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 42 INIT_HLIST_HEAD(&evlist->heads[i]); 43 INIT_LIST_HEAD(&evlist->entries); 44 perf_evlist__set_maps(evlist, cpus, threads); 45 fdarray__init(&evlist->pollfd, 64); 46 evlist->workload.pid = -1; 47 } 48 49 struct perf_evlist *perf_evlist__new(void) 50 { 51 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 52 53 if (evlist != NULL) 54 perf_evlist__init(evlist, NULL, NULL); 55 56 return evlist; 57 } 58 59 struct perf_evlist *perf_evlist__new_default(void) 60 { 61 struct perf_evlist *evlist = perf_evlist__new(); 62 63 if (evlist && perf_evlist__add_default(evlist)) { 64 perf_evlist__delete(evlist); 65 evlist = NULL; 66 } 67 68 return evlist; 69 } 70 71 struct perf_evlist *perf_evlist__new_dummy(void) 72 { 73 struct perf_evlist *evlist = perf_evlist__new(); 74 75 if (evlist && perf_evlist__add_dummy(evlist)) { 76 perf_evlist__delete(evlist); 77 evlist = NULL; 78 } 79 80 return evlist; 81 } 82 83 /** 84 * perf_evlist__set_id_pos - set the positions of event ids. 85 * @evlist: selected event list 86 * 87 * Events with compatible sample types all have the same id_pos 88 * and is_pos. For convenience, put a copy on evlist. 89 */ 90 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 91 { 92 struct perf_evsel *first = perf_evlist__first(evlist); 93 94 evlist->id_pos = first->id_pos; 95 evlist->is_pos = first->is_pos; 96 } 97 98 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 99 { 100 struct perf_evsel *evsel; 101 102 evlist__for_each(evlist, evsel) 103 perf_evsel__calc_id_pos(evsel); 104 105 perf_evlist__set_id_pos(evlist); 106 } 107 108 static void perf_evlist__purge(struct perf_evlist *evlist) 109 { 110 struct perf_evsel *pos, *n; 111 112 evlist__for_each_safe(evlist, n, pos) { 113 list_del_init(&pos->node); 114 pos->evlist = NULL; 115 perf_evsel__delete(pos); 116 } 117 118 evlist->nr_entries = 0; 119 } 120 121 void perf_evlist__exit(struct perf_evlist *evlist) 122 { 123 zfree(&evlist->mmap); 124 fdarray__exit(&evlist->pollfd); 125 } 126 127 void perf_evlist__delete(struct perf_evlist *evlist) 128 { 129 perf_evlist__munmap(evlist); 130 perf_evlist__close(evlist); 131 cpu_map__put(evlist->cpus); 132 thread_map__put(evlist->threads); 133 evlist->cpus = NULL; 134 evlist->threads = NULL; 135 perf_evlist__purge(evlist); 136 perf_evlist__exit(evlist); 137 free(evlist); 138 } 139 140 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 141 struct perf_evsel *evsel) 142 { 143 /* 144 * We already have cpus for evsel (via PMU sysfs) so 145 * keep it, if there's no target cpu list defined. 146 */ 147 if (!evsel->own_cpus || evlist->has_user_cpus) { 148 cpu_map__put(evsel->cpus); 149 evsel->cpus = cpu_map__get(evlist->cpus); 150 } else if (evsel->cpus != evsel->own_cpus) { 151 cpu_map__put(evsel->cpus); 152 evsel->cpus = cpu_map__get(evsel->own_cpus); 153 } 154 155 thread_map__put(evsel->threads); 156 evsel->threads = thread_map__get(evlist->threads); 157 } 158 159 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 160 { 161 struct perf_evsel *evsel; 162 163 evlist__for_each(evlist, evsel) 164 __perf_evlist__propagate_maps(evlist, evsel); 165 } 166 167 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 168 { 169 entry->evlist = evlist; 170 list_add_tail(&entry->node, &evlist->entries); 171 entry->idx = evlist->nr_entries; 172 entry->tracking = !entry->idx; 173 174 if (!evlist->nr_entries++) 175 perf_evlist__set_id_pos(evlist); 176 177 __perf_evlist__propagate_maps(evlist, entry); 178 } 179 180 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 181 { 182 evsel->evlist = NULL; 183 list_del_init(&evsel->node); 184 evlist->nr_entries -= 1; 185 } 186 187 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 188 struct list_head *list) 189 { 190 struct perf_evsel *evsel, *temp; 191 192 __evlist__for_each_safe(list, temp, evsel) { 193 list_del_init(&evsel->node); 194 perf_evlist__add(evlist, evsel); 195 } 196 } 197 198 void __perf_evlist__set_leader(struct list_head *list) 199 { 200 struct perf_evsel *evsel, *leader; 201 202 leader = list_entry(list->next, struct perf_evsel, node); 203 evsel = list_entry(list->prev, struct perf_evsel, node); 204 205 leader->nr_members = evsel->idx - leader->idx + 1; 206 207 __evlist__for_each(list, evsel) { 208 evsel->leader = leader; 209 } 210 } 211 212 void perf_evlist__set_leader(struct perf_evlist *evlist) 213 { 214 if (evlist->nr_entries) { 215 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 216 __perf_evlist__set_leader(&evlist->entries); 217 } 218 } 219 220 void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) 221 { 222 attr->precise_ip = 3; 223 224 while (attr->precise_ip != 0) { 225 int fd = sys_perf_event_open(attr, 0, -1, -1, 0); 226 if (fd != -1) { 227 close(fd); 228 break; 229 } 230 --attr->precise_ip; 231 } 232 } 233 234 int perf_evlist__add_default(struct perf_evlist *evlist) 235 { 236 struct perf_event_attr attr = { 237 .type = PERF_TYPE_HARDWARE, 238 .config = PERF_COUNT_HW_CPU_CYCLES, 239 }; 240 struct perf_evsel *evsel; 241 242 event_attr_init(&attr); 243 244 perf_event_attr__set_max_precise_ip(&attr); 245 246 evsel = perf_evsel__new(&attr); 247 if (evsel == NULL) 248 goto error; 249 250 /* use asprintf() because free(evsel) assumes name is allocated */ 251 if (asprintf(&evsel->name, "cycles%.*s", 252 attr.precise_ip ? attr.precise_ip + 1 : 0, ":ppp") < 0) 253 goto error_free; 254 255 perf_evlist__add(evlist, evsel); 256 return 0; 257 error_free: 258 perf_evsel__delete(evsel); 259 error: 260 return -ENOMEM; 261 } 262 263 int perf_evlist__add_dummy(struct perf_evlist *evlist) 264 { 265 struct perf_event_attr attr = { 266 .type = PERF_TYPE_SOFTWARE, 267 .config = PERF_COUNT_SW_DUMMY, 268 .size = sizeof(attr), /* to capture ABI version */ 269 }; 270 struct perf_evsel *evsel = perf_evsel__new(&attr); 271 272 if (evsel == NULL) 273 return -ENOMEM; 274 275 perf_evlist__add(evlist, evsel); 276 return 0; 277 } 278 279 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 280 struct perf_event_attr *attrs, size_t nr_attrs) 281 { 282 struct perf_evsel *evsel, *n; 283 LIST_HEAD(head); 284 size_t i; 285 286 for (i = 0; i < nr_attrs; i++) { 287 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 288 if (evsel == NULL) 289 goto out_delete_partial_list; 290 list_add_tail(&evsel->node, &head); 291 } 292 293 perf_evlist__splice_list_tail(evlist, &head); 294 295 return 0; 296 297 out_delete_partial_list: 298 __evlist__for_each_safe(&head, n, evsel) 299 perf_evsel__delete(evsel); 300 return -1; 301 } 302 303 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 304 struct perf_event_attr *attrs, size_t nr_attrs) 305 { 306 size_t i; 307 308 for (i = 0; i < nr_attrs; i++) 309 event_attr_init(attrs + i); 310 311 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 312 } 313 314 struct perf_evsel * 315 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 316 { 317 struct perf_evsel *evsel; 318 319 evlist__for_each(evlist, evsel) { 320 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 321 (int)evsel->attr.config == id) 322 return evsel; 323 } 324 325 return NULL; 326 } 327 328 struct perf_evsel * 329 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 330 const char *name) 331 { 332 struct perf_evsel *evsel; 333 334 evlist__for_each(evlist, evsel) { 335 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 336 (strcmp(evsel->name, name) == 0)) 337 return evsel; 338 } 339 340 return NULL; 341 } 342 343 int perf_evlist__add_newtp(struct perf_evlist *evlist, 344 const char *sys, const char *name, void *handler) 345 { 346 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 347 348 if (IS_ERR(evsel)) 349 return -1; 350 351 evsel->handler = handler; 352 perf_evlist__add(evlist, evsel); 353 return 0; 354 } 355 356 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 357 struct perf_evsel *evsel) 358 { 359 if (evsel->system_wide) 360 return 1; 361 else 362 return thread_map__nr(evlist->threads); 363 } 364 365 void perf_evlist__disable(struct perf_evlist *evlist) 366 { 367 struct perf_evsel *pos; 368 369 evlist__for_each(evlist, pos) { 370 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 371 continue; 372 perf_evsel__disable(pos); 373 } 374 375 evlist->enabled = false; 376 } 377 378 void perf_evlist__enable(struct perf_evlist *evlist) 379 { 380 struct perf_evsel *pos; 381 382 evlist__for_each(evlist, pos) { 383 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 384 continue; 385 perf_evsel__enable(pos); 386 } 387 388 evlist->enabled = true; 389 } 390 391 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 392 { 393 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 394 } 395 396 int perf_evlist__disable_event(struct perf_evlist *evlist, 397 struct perf_evsel *evsel) 398 { 399 int cpu, thread, err; 400 int nr_cpus = cpu_map__nr(evlist->cpus); 401 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 402 403 if (!evsel->fd) 404 return 0; 405 406 for (cpu = 0; cpu < nr_cpus; cpu++) { 407 for (thread = 0; thread < nr_threads; thread++) { 408 err = ioctl(FD(evsel, cpu, thread), 409 PERF_EVENT_IOC_DISABLE, 0); 410 if (err) 411 return err; 412 } 413 } 414 return 0; 415 } 416 417 int perf_evlist__enable_event(struct perf_evlist *evlist, 418 struct perf_evsel *evsel) 419 { 420 int cpu, thread, err; 421 int nr_cpus = cpu_map__nr(evlist->cpus); 422 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 423 424 if (!evsel->fd) 425 return -EINVAL; 426 427 for (cpu = 0; cpu < nr_cpus; cpu++) { 428 for (thread = 0; thread < nr_threads; thread++) { 429 err = ioctl(FD(evsel, cpu, thread), 430 PERF_EVENT_IOC_ENABLE, 0); 431 if (err) 432 return err; 433 } 434 } 435 return 0; 436 } 437 438 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 439 struct perf_evsel *evsel, int cpu) 440 { 441 int thread, err; 442 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 443 444 if (!evsel->fd) 445 return -EINVAL; 446 447 for (thread = 0; thread < nr_threads; thread++) { 448 err = ioctl(FD(evsel, cpu, thread), 449 PERF_EVENT_IOC_ENABLE, 0); 450 if (err) 451 return err; 452 } 453 return 0; 454 } 455 456 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 457 struct perf_evsel *evsel, 458 int thread) 459 { 460 int cpu, err; 461 int nr_cpus = cpu_map__nr(evlist->cpus); 462 463 if (!evsel->fd) 464 return -EINVAL; 465 466 for (cpu = 0; cpu < nr_cpus; cpu++) { 467 err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 468 if (err) 469 return err; 470 } 471 return 0; 472 } 473 474 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 475 struct perf_evsel *evsel, int idx) 476 { 477 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 478 479 if (per_cpu_mmaps) 480 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 481 else 482 return perf_evlist__enable_event_thread(evlist, evsel, idx); 483 } 484 485 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 486 { 487 int nr_cpus = cpu_map__nr(evlist->cpus); 488 int nr_threads = thread_map__nr(evlist->threads); 489 int nfds = 0; 490 struct perf_evsel *evsel; 491 492 evlist__for_each(evlist, evsel) { 493 if (evsel->system_wide) 494 nfds += nr_cpus; 495 else 496 nfds += nr_cpus * nr_threads; 497 } 498 499 if (fdarray__available_entries(&evlist->pollfd) < nfds && 500 fdarray__grow(&evlist->pollfd, nfds) < 0) 501 return -ENOMEM; 502 503 return 0; 504 } 505 506 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, int idx) 507 { 508 int pos = fdarray__add(&evlist->pollfd, fd, POLLIN | POLLERR | POLLHUP); 509 /* 510 * Save the idx so that when we filter out fds POLLHUP'ed we can 511 * close the associated evlist->mmap[] entry. 512 */ 513 if (pos >= 0) { 514 evlist->pollfd.priv[pos].idx = idx; 515 516 fcntl(fd, F_SETFL, O_NONBLOCK); 517 } 518 519 return pos; 520 } 521 522 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 523 { 524 return __perf_evlist__add_pollfd(evlist, fd, -1); 525 } 526 527 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd) 528 { 529 struct perf_evlist *evlist = container_of(fda, struct perf_evlist, pollfd); 530 531 perf_evlist__mmap_put(evlist, fda->priv[fd].idx); 532 } 533 534 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 535 { 536 return fdarray__filter(&evlist->pollfd, revents_and_mask, 537 perf_evlist__munmap_filtered); 538 } 539 540 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 541 { 542 return fdarray__poll(&evlist->pollfd, timeout); 543 } 544 545 static void perf_evlist__id_hash(struct perf_evlist *evlist, 546 struct perf_evsel *evsel, 547 int cpu, int thread, u64 id) 548 { 549 int hash; 550 struct perf_sample_id *sid = SID(evsel, cpu, thread); 551 552 sid->id = id; 553 sid->evsel = evsel; 554 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 555 hlist_add_head(&sid->node, &evlist->heads[hash]); 556 } 557 558 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 559 int cpu, int thread, u64 id) 560 { 561 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 562 evsel->id[evsel->ids++] = id; 563 } 564 565 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 566 struct perf_evsel *evsel, 567 int cpu, int thread, int fd) 568 { 569 u64 read_data[4] = { 0, }; 570 int id_idx = 1; /* The first entry is the counter value */ 571 u64 id; 572 int ret; 573 574 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 575 if (!ret) 576 goto add; 577 578 if (errno != ENOTTY) 579 return -1; 580 581 /* Legacy way to get event id.. All hail to old kernels! */ 582 583 /* 584 * This way does not work with group format read, so bail 585 * out in that case. 586 */ 587 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 588 return -1; 589 590 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 591 read(fd, &read_data, sizeof(read_data)) == -1) 592 return -1; 593 594 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 595 ++id_idx; 596 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 597 ++id_idx; 598 599 id = read_data[id_idx]; 600 601 add: 602 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 603 return 0; 604 } 605 606 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 607 struct perf_evsel *evsel, int idx, int cpu, 608 int thread) 609 { 610 struct perf_sample_id *sid = SID(evsel, cpu, thread); 611 sid->idx = idx; 612 if (evlist->cpus && cpu >= 0) 613 sid->cpu = evlist->cpus->map[cpu]; 614 else 615 sid->cpu = -1; 616 if (!evsel->system_wide && evlist->threads && thread >= 0) 617 sid->tid = thread_map__pid(evlist->threads, thread); 618 else 619 sid->tid = -1; 620 } 621 622 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 623 { 624 struct hlist_head *head; 625 struct perf_sample_id *sid; 626 int hash; 627 628 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 629 head = &evlist->heads[hash]; 630 631 hlist_for_each_entry(sid, head, node) 632 if (sid->id == id) 633 return sid; 634 635 return NULL; 636 } 637 638 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 639 { 640 struct perf_sample_id *sid; 641 642 if (evlist->nr_entries == 1 || !id) 643 return perf_evlist__first(evlist); 644 645 sid = perf_evlist__id2sid(evlist, id); 646 if (sid) 647 return sid->evsel; 648 649 if (!perf_evlist__sample_id_all(evlist)) 650 return perf_evlist__first(evlist); 651 652 return NULL; 653 } 654 655 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 656 u64 id) 657 { 658 struct perf_sample_id *sid; 659 660 if (!id) 661 return NULL; 662 663 sid = perf_evlist__id2sid(evlist, id); 664 if (sid) 665 return sid->evsel; 666 667 return NULL; 668 } 669 670 static int perf_evlist__event2id(struct perf_evlist *evlist, 671 union perf_event *event, u64 *id) 672 { 673 const u64 *array = event->sample.array; 674 ssize_t n; 675 676 n = (event->header.size - sizeof(event->header)) >> 3; 677 678 if (event->header.type == PERF_RECORD_SAMPLE) { 679 if (evlist->id_pos >= n) 680 return -1; 681 *id = array[evlist->id_pos]; 682 } else { 683 if (evlist->is_pos > n) 684 return -1; 685 n -= evlist->is_pos; 686 *id = array[n]; 687 } 688 return 0; 689 } 690 691 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 692 union perf_event *event) 693 { 694 struct perf_evsel *first = perf_evlist__first(evlist); 695 struct hlist_head *head; 696 struct perf_sample_id *sid; 697 int hash; 698 u64 id; 699 700 if (evlist->nr_entries == 1) 701 return first; 702 703 if (!first->attr.sample_id_all && 704 event->header.type != PERF_RECORD_SAMPLE) 705 return first; 706 707 if (perf_evlist__event2id(evlist, event, &id)) 708 return NULL; 709 710 /* Synthesized events have an id of zero */ 711 if (!id) 712 return first; 713 714 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 715 head = &evlist->heads[hash]; 716 717 hlist_for_each_entry(sid, head, node) { 718 if (sid->id == id) 719 return sid->evsel; 720 } 721 return NULL; 722 } 723 724 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) 725 { 726 struct perf_mmap *md = &evlist->mmap[idx]; 727 u64 head; 728 u64 old = md->prev; 729 unsigned char *data = md->base + page_size; 730 union perf_event *event = NULL; 731 732 /* 733 * Check if event was unmapped due to a POLLHUP/POLLERR. 734 */ 735 if (!atomic_read(&md->refcnt)) 736 return NULL; 737 738 head = perf_mmap__read_head(md); 739 if (evlist->overwrite) { 740 /* 741 * If we're further behind than half the buffer, there's a chance 742 * the writer will bite our tail and mess up the samples under us. 743 * 744 * If we somehow ended up ahead of the head, we got messed up. 745 * 746 * In either case, truncate and restart at head. 747 */ 748 int diff = head - old; 749 if (diff > md->mask / 2 || diff < 0) { 750 fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); 751 752 /* 753 * head points to a known good entry, start there. 754 */ 755 old = head; 756 } 757 } 758 759 if (old != head) { 760 size_t size; 761 762 event = (union perf_event *)&data[old & md->mask]; 763 size = event->header.size; 764 765 /* 766 * Event straddles the mmap boundary -- header should always 767 * be inside due to u64 alignment of output. 768 */ 769 if ((old & md->mask) + size != ((old + size) & md->mask)) { 770 unsigned int offset = old; 771 unsigned int len = min(sizeof(*event), size), cpy; 772 void *dst = md->event_copy; 773 774 do { 775 cpy = min(md->mask + 1 - (offset & md->mask), len); 776 memcpy(dst, &data[offset & md->mask], cpy); 777 offset += cpy; 778 dst += cpy; 779 len -= cpy; 780 } while (len); 781 782 event = (union perf_event *) md->event_copy; 783 } 784 785 old += size; 786 } 787 788 md->prev = old; 789 790 return event; 791 } 792 793 static bool perf_mmap__empty(struct perf_mmap *md) 794 { 795 return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; 796 } 797 798 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) 799 { 800 atomic_inc(&evlist->mmap[idx].refcnt); 801 } 802 803 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) 804 { 805 BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); 806 807 if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) 808 __perf_evlist__munmap(evlist, idx); 809 } 810 811 void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) 812 { 813 struct perf_mmap *md = &evlist->mmap[idx]; 814 815 if (!evlist->overwrite) { 816 u64 old = md->prev; 817 818 perf_mmap__write_tail(md, old); 819 } 820 821 if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) 822 perf_evlist__mmap_put(evlist, idx); 823 } 824 825 int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, 826 struct auxtrace_mmap_params *mp __maybe_unused, 827 void *userpg __maybe_unused, 828 int fd __maybe_unused) 829 { 830 return 0; 831 } 832 833 void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) 834 { 835 } 836 837 void __weak auxtrace_mmap_params__init( 838 struct auxtrace_mmap_params *mp __maybe_unused, 839 off_t auxtrace_offset __maybe_unused, 840 unsigned int auxtrace_pages __maybe_unused, 841 bool auxtrace_overwrite __maybe_unused) 842 { 843 } 844 845 void __weak auxtrace_mmap_params__set_idx( 846 struct auxtrace_mmap_params *mp __maybe_unused, 847 struct perf_evlist *evlist __maybe_unused, 848 int idx __maybe_unused, 849 bool per_cpu __maybe_unused) 850 { 851 } 852 853 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) 854 { 855 if (evlist->mmap[idx].base != NULL) { 856 munmap(evlist->mmap[idx].base, evlist->mmap_len); 857 evlist->mmap[idx].base = NULL; 858 atomic_set(&evlist->mmap[idx].refcnt, 0); 859 } 860 auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); 861 } 862 863 void perf_evlist__munmap(struct perf_evlist *evlist) 864 { 865 int i; 866 867 if (evlist->mmap == NULL) 868 return; 869 870 for (i = 0; i < evlist->nr_mmaps; i++) 871 __perf_evlist__munmap(evlist, i); 872 873 zfree(&evlist->mmap); 874 } 875 876 static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) 877 { 878 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 879 if (cpu_map__empty(evlist->cpus)) 880 evlist->nr_mmaps = thread_map__nr(evlist->threads); 881 evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 882 return evlist->mmap != NULL ? 0 : -ENOMEM; 883 } 884 885 struct mmap_params { 886 int prot; 887 int mask; 888 struct auxtrace_mmap_params auxtrace_mp; 889 }; 890 891 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, 892 struct mmap_params *mp, int fd) 893 { 894 /* 895 * The last one will be done at perf_evlist__mmap_consume(), so that we 896 * make sure we don't prevent tools from consuming every last event in 897 * the ring buffer. 898 * 899 * I.e. we can get the POLLHUP meaning that the fd doesn't exist 900 * anymore, but the last events for it are still in the ring buffer, 901 * waiting to be consumed. 902 * 903 * Tools can chose to ignore this at their own discretion, but the 904 * evlist layer can't just drop it when filtering events in 905 * perf_evlist__filter_pollfd(). 906 */ 907 atomic_set(&evlist->mmap[idx].refcnt, 2); 908 evlist->mmap[idx].prev = 0; 909 evlist->mmap[idx].mask = mp->mask; 910 evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, 911 MAP_SHARED, fd, 0); 912 if (evlist->mmap[idx].base == MAP_FAILED) { 913 pr_debug2("failed to mmap perf event ring buffer, error %d\n", 914 errno); 915 evlist->mmap[idx].base = NULL; 916 return -1; 917 } 918 919 if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, 920 &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) 921 return -1; 922 923 return 0; 924 } 925 926 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 927 struct mmap_params *mp, int cpu, 928 int thread, int *output) 929 { 930 struct perf_evsel *evsel; 931 932 evlist__for_each(evlist, evsel) { 933 int fd; 934 935 if (evsel->system_wide && thread) 936 continue; 937 938 fd = FD(evsel, cpu, thread); 939 940 if (*output == -1) { 941 *output = fd; 942 if (__perf_evlist__mmap(evlist, idx, mp, *output) < 0) 943 return -1; 944 } else { 945 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 946 return -1; 947 948 perf_evlist__mmap_get(evlist, idx); 949 } 950 951 /* 952 * The system_wide flag causes a selected event to be opened 953 * always without a pid. Consequently it will never get a 954 * POLLHUP, but it is used for tracking in combination with 955 * other events, so it should not need to be polled anyway. 956 * Therefore don't add it for polling. 957 */ 958 if (!evsel->system_wide && 959 __perf_evlist__add_pollfd(evlist, fd, idx) < 0) { 960 perf_evlist__mmap_put(evlist, idx); 961 return -1; 962 } 963 964 if (evsel->attr.read_format & PERF_FORMAT_ID) { 965 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 966 fd) < 0) 967 return -1; 968 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 969 thread); 970 } 971 } 972 973 return 0; 974 } 975 976 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 977 struct mmap_params *mp) 978 { 979 int cpu, thread; 980 int nr_cpus = cpu_map__nr(evlist->cpus); 981 int nr_threads = thread_map__nr(evlist->threads); 982 983 pr_debug2("perf event ring buffer mmapped per cpu\n"); 984 for (cpu = 0; cpu < nr_cpus; cpu++) { 985 int output = -1; 986 987 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 988 true); 989 990 for (thread = 0; thread < nr_threads; thread++) { 991 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 992 thread, &output)) 993 goto out_unmap; 994 } 995 } 996 997 return 0; 998 999 out_unmap: 1000 for (cpu = 0; cpu < nr_cpus; cpu++) 1001 __perf_evlist__munmap(evlist, cpu); 1002 return -1; 1003 } 1004 1005 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 1006 struct mmap_params *mp) 1007 { 1008 int thread; 1009 int nr_threads = thread_map__nr(evlist->threads); 1010 1011 pr_debug2("perf event ring buffer mmapped per thread\n"); 1012 for (thread = 0; thread < nr_threads; thread++) { 1013 int output = -1; 1014 1015 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 1016 false); 1017 1018 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 1019 &output)) 1020 goto out_unmap; 1021 } 1022 1023 return 0; 1024 1025 out_unmap: 1026 for (thread = 0; thread < nr_threads; thread++) 1027 __perf_evlist__munmap(evlist, thread); 1028 return -1; 1029 } 1030 1031 static size_t perf_evlist__mmap_size(unsigned long pages) 1032 { 1033 if (pages == UINT_MAX) { 1034 int max; 1035 1036 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 1037 /* 1038 * Pick a once upon a time good value, i.e. things look 1039 * strange since we can't read a sysctl value, but lets not 1040 * die yet... 1041 */ 1042 max = 512; 1043 } else { 1044 max -= (page_size / 1024); 1045 } 1046 1047 pages = (max * 1024) / page_size; 1048 if (!is_power_of_2(pages)) 1049 pages = rounddown_pow_of_two(pages); 1050 } else if (!is_power_of_2(pages)) 1051 return 0; 1052 1053 return (pages + 1) * page_size; 1054 } 1055 1056 static long parse_pages_arg(const char *str, unsigned long min, 1057 unsigned long max) 1058 { 1059 unsigned long pages, val; 1060 static struct parse_tag tags[] = { 1061 { .tag = 'B', .mult = 1 }, 1062 { .tag = 'K', .mult = 1 << 10 }, 1063 { .tag = 'M', .mult = 1 << 20 }, 1064 { .tag = 'G', .mult = 1 << 30 }, 1065 { .tag = 0 }, 1066 }; 1067 1068 if (str == NULL) 1069 return -EINVAL; 1070 1071 val = parse_tag_value(str, tags); 1072 if (val != (unsigned long) -1) { 1073 /* we got file size value */ 1074 pages = PERF_ALIGN(val, page_size) / page_size; 1075 } else { 1076 /* we got pages count value */ 1077 char *eptr; 1078 pages = strtoul(str, &eptr, 10); 1079 if (*eptr != '\0') 1080 return -EINVAL; 1081 } 1082 1083 if (pages == 0 && min == 0) { 1084 /* leave number of pages at 0 */ 1085 } else if (!is_power_of_2(pages)) { 1086 /* round pages up to next power of 2 */ 1087 pages = roundup_pow_of_two(pages); 1088 if (!pages) 1089 return -EINVAL; 1090 pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n", 1091 pages * page_size, pages); 1092 } 1093 1094 if (pages > max) 1095 return -EINVAL; 1096 1097 return pages; 1098 } 1099 1100 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 1101 { 1102 unsigned long max = UINT_MAX; 1103 long pages; 1104 1105 if (max > SIZE_MAX / page_size) 1106 max = SIZE_MAX / page_size; 1107 1108 pages = parse_pages_arg(str, 1, max); 1109 if (pages < 0) { 1110 pr_err("Invalid argument for --mmap_pages/-m\n"); 1111 return -1; 1112 } 1113 1114 *mmap_pages = pages; 1115 return 0; 1116 } 1117 1118 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 1119 int unset __maybe_unused) 1120 { 1121 return __perf_evlist__parse_mmap_pages(opt->value, str); 1122 } 1123 1124 /** 1125 * perf_evlist__mmap_ex - Create mmaps to receive events. 1126 * @evlist: list of events 1127 * @pages: map length in pages 1128 * @overwrite: overwrite older events? 1129 * @auxtrace_pages - auxtrace map length in pages 1130 * @auxtrace_overwrite - overwrite older auxtrace data? 1131 * 1132 * If @overwrite is %false the user needs to signal event consumption using 1133 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1134 * automatically. 1135 * 1136 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1137 * consumption using auxtrace_mmap__write_tail(). 1138 * 1139 * Return: %0 on success, negative error code otherwise. 1140 */ 1141 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1142 bool overwrite, unsigned int auxtrace_pages, 1143 bool auxtrace_overwrite) 1144 { 1145 struct perf_evsel *evsel; 1146 const struct cpu_map *cpus = evlist->cpus; 1147 const struct thread_map *threads = evlist->threads; 1148 struct mmap_params mp = { 1149 .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), 1150 }; 1151 1152 if (evlist->mmap == NULL && perf_evlist__alloc_mmap(evlist) < 0) 1153 return -ENOMEM; 1154 1155 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1156 return -ENOMEM; 1157 1158 evlist->overwrite = overwrite; 1159 evlist->mmap_len = perf_evlist__mmap_size(pages); 1160 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1161 mp.mask = evlist->mmap_len - page_size - 1; 1162 1163 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1164 auxtrace_pages, auxtrace_overwrite); 1165 1166 evlist__for_each(evlist, evsel) { 1167 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1168 evsel->sample_id == NULL && 1169 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1170 return -ENOMEM; 1171 } 1172 1173 if (cpu_map__empty(cpus)) 1174 return perf_evlist__mmap_per_thread(evlist, &mp); 1175 1176 return perf_evlist__mmap_per_cpu(evlist, &mp); 1177 } 1178 1179 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, 1180 bool overwrite) 1181 { 1182 return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); 1183 } 1184 1185 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1186 { 1187 struct cpu_map *cpus; 1188 struct thread_map *threads; 1189 1190 threads = thread_map__new_str(target->pid, target->tid, target->uid); 1191 1192 if (!threads) 1193 return -1; 1194 1195 if (target__uses_dummy_map(target)) 1196 cpus = cpu_map__dummy_new(); 1197 else 1198 cpus = cpu_map__new(target->cpu_list); 1199 1200 if (!cpus) 1201 goto out_delete_threads; 1202 1203 evlist->has_user_cpus = !!target->cpu_list; 1204 1205 perf_evlist__set_maps(evlist, cpus, threads); 1206 1207 return 0; 1208 1209 out_delete_threads: 1210 thread_map__put(threads); 1211 return -1; 1212 } 1213 1214 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1215 struct thread_map *threads) 1216 { 1217 /* 1218 * Allow for the possibility that one or another of the maps isn't being 1219 * changed i.e. don't put it. Note we are assuming the maps that are 1220 * being applied are brand new and evlist is taking ownership of the 1221 * original reference count of 1. If that is not the case it is up to 1222 * the caller to increase the reference count. 1223 */ 1224 if (cpus != evlist->cpus) { 1225 cpu_map__put(evlist->cpus); 1226 evlist->cpus = cpus; 1227 } 1228 1229 if (threads != evlist->threads) { 1230 thread_map__put(evlist->threads); 1231 evlist->threads = threads; 1232 } 1233 1234 perf_evlist__propagate_maps(evlist); 1235 } 1236 1237 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1238 { 1239 struct perf_evsel *evsel; 1240 int err = 0; 1241 const int ncpus = cpu_map__nr(evlist->cpus), 1242 nthreads = thread_map__nr(evlist->threads); 1243 1244 evlist__for_each(evlist, evsel) { 1245 if (evsel->filter == NULL) 1246 continue; 1247 1248 /* 1249 * filters only work for tracepoint event, which doesn't have cpu limit. 1250 * So evlist and evsel should always be same. 1251 */ 1252 err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); 1253 if (err) { 1254 *err_evsel = evsel; 1255 break; 1256 } 1257 } 1258 1259 return err; 1260 } 1261 1262 int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) 1263 { 1264 struct perf_evsel *evsel; 1265 int err = 0; 1266 1267 evlist__for_each(evlist, evsel) { 1268 err = perf_evsel__set_filter(evsel, filter); 1269 if (err) 1270 break; 1271 } 1272 1273 return err; 1274 } 1275 1276 int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1277 { 1278 char *filter; 1279 int ret = -1; 1280 size_t i; 1281 1282 for (i = 0; i < npids; ++i) { 1283 if (i == 0) { 1284 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1285 return -1; 1286 } else { 1287 char *tmp; 1288 1289 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1290 goto out_free; 1291 1292 free(filter); 1293 filter = tmp; 1294 } 1295 } 1296 1297 ret = perf_evlist__set_filter(evlist, filter); 1298 out_free: 1299 free(filter); 1300 return ret; 1301 } 1302 1303 int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) 1304 { 1305 return perf_evlist__set_filter_pids(evlist, 1, &pid); 1306 } 1307 1308 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1309 { 1310 struct perf_evsel *pos; 1311 1312 if (evlist->nr_entries == 1) 1313 return true; 1314 1315 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1316 return false; 1317 1318 evlist__for_each(evlist, pos) { 1319 if (pos->id_pos != evlist->id_pos || 1320 pos->is_pos != evlist->is_pos) 1321 return false; 1322 } 1323 1324 return true; 1325 } 1326 1327 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1328 { 1329 struct perf_evsel *evsel; 1330 1331 if (evlist->combined_sample_type) 1332 return evlist->combined_sample_type; 1333 1334 evlist__for_each(evlist, evsel) 1335 evlist->combined_sample_type |= evsel->attr.sample_type; 1336 1337 return evlist->combined_sample_type; 1338 } 1339 1340 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1341 { 1342 evlist->combined_sample_type = 0; 1343 return __perf_evlist__combined_sample_type(evlist); 1344 } 1345 1346 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1347 { 1348 struct perf_evsel *evsel; 1349 u64 branch_type = 0; 1350 1351 evlist__for_each(evlist, evsel) 1352 branch_type |= evsel->attr.branch_sample_type; 1353 return branch_type; 1354 } 1355 1356 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1357 { 1358 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1359 u64 read_format = first->attr.read_format; 1360 u64 sample_type = first->attr.sample_type; 1361 1362 evlist__for_each(evlist, pos) { 1363 if (read_format != pos->attr.read_format) 1364 return false; 1365 } 1366 1367 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1368 if ((sample_type & PERF_SAMPLE_READ) && 1369 !(read_format & PERF_FORMAT_ID)) { 1370 return false; 1371 } 1372 1373 return true; 1374 } 1375 1376 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1377 { 1378 struct perf_evsel *first = perf_evlist__first(evlist); 1379 return first->attr.read_format; 1380 } 1381 1382 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1383 { 1384 struct perf_evsel *first = perf_evlist__first(evlist); 1385 struct perf_sample *data; 1386 u64 sample_type; 1387 u16 size = 0; 1388 1389 if (!first->attr.sample_id_all) 1390 goto out; 1391 1392 sample_type = first->attr.sample_type; 1393 1394 if (sample_type & PERF_SAMPLE_TID) 1395 size += sizeof(data->tid) * 2; 1396 1397 if (sample_type & PERF_SAMPLE_TIME) 1398 size += sizeof(data->time); 1399 1400 if (sample_type & PERF_SAMPLE_ID) 1401 size += sizeof(data->id); 1402 1403 if (sample_type & PERF_SAMPLE_STREAM_ID) 1404 size += sizeof(data->stream_id); 1405 1406 if (sample_type & PERF_SAMPLE_CPU) 1407 size += sizeof(data->cpu) * 2; 1408 1409 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1410 size += sizeof(data->id); 1411 out: 1412 return size; 1413 } 1414 1415 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1416 { 1417 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1418 1419 evlist__for_each_continue(evlist, pos) { 1420 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1421 return false; 1422 } 1423 1424 return true; 1425 } 1426 1427 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1428 { 1429 struct perf_evsel *first = perf_evlist__first(evlist); 1430 return first->attr.sample_id_all; 1431 } 1432 1433 void perf_evlist__set_selected(struct perf_evlist *evlist, 1434 struct perf_evsel *evsel) 1435 { 1436 evlist->selected = evsel; 1437 } 1438 1439 void perf_evlist__close(struct perf_evlist *evlist) 1440 { 1441 struct perf_evsel *evsel; 1442 int ncpus = cpu_map__nr(evlist->cpus); 1443 int nthreads = thread_map__nr(evlist->threads); 1444 int n; 1445 1446 evlist__for_each_reverse(evlist, evsel) { 1447 n = evsel->cpus ? evsel->cpus->nr : ncpus; 1448 perf_evsel__close(evsel, n, nthreads); 1449 } 1450 } 1451 1452 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1453 { 1454 struct cpu_map *cpus; 1455 struct thread_map *threads; 1456 int err = -ENOMEM; 1457 1458 /* 1459 * Try reading /sys/devices/system/cpu/online to get 1460 * an all cpus map. 1461 * 1462 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1463 * code needs an overhaul to properly forward the 1464 * error, and we may not want to do that fallback to a 1465 * default cpu identity map :-\ 1466 */ 1467 cpus = cpu_map__new(NULL); 1468 if (!cpus) 1469 goto out; 1470 1471 threads = thread_map__new_dummy(); 1472 if (!threads) 1473 goto out_put; 1474 1475 perf_evlist__set_maps(evlist, cpus, threads); 1476 out: 1477 return err; 1478 out_put: 1479 cpu_map__put(cpus); 1480 goto out; 1481 } 1482 1483 int perf_evlist__open(struct perf_evlist *evlist) 1484 { 1485 struct perf_evsel *evsel; 1486 int err; 1487 1488 /* 1489 * Default: one fd per CPU, all threads, aka systemwide 1490 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1491 */ 1492 if (evlist->threads == NULL && evlist->cpus == NULL) { 1493 err = perf_evlist__create_syswide_maps(evlist); 1494 if (err < 0) 1495 goto out_err; 1496 } 1497 1498 perf_evlist__update_id_pos(evlist); 1499 1500 evlist__for_each(evlist, evsel) { 1501 err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); 1502 if (err < 0) 1503 goto out_err; 1504 } 1505 1506 return 0; 1507 out_err: 1508 perf_evlist__close(evlist); 1509 errno = -err; 1510 return err; 1511 } 1512 1513 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1514 const char *argv[], bool pipe_output, 1515 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1516 { 1517 int child_ready_pipe[2], go_pipe[2]; 1518 char bf; 1519 1520 if (pipe(child_ready_pipe) < 0) { 1521 perror("failed to create 'ready' pipe"); 1522 return -1; 1523 } 1524 1525 if (pipe(go_pipe) < 0) { 1526 perror("failed to create 'go' pipe"); 1527 goto out_close_ready_pipe; 1528 } 1529 1530 evlist->workload.pid = fork(); 1531 if (evlist->workload.pid < 0) { 1532 perror("failed to fork"); 1533 goto out_close_pipes; 1534 } 1535 1536 if (!evlist->workload.pid) { 1537 int ret; 1538 1539 if (pipe_output) 1540 dup2(2, 1); 1541 1542 signal(SIGTERM, SIG_DFL); 1543 1544 close(child_ready_pipe[0]); 1545 close(go_pipe[1]); 1546 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1547 1548 /* 1549 * Tell the parent we're ready to go 1550 */ 1551 close(child_ready_pipe[1]); 1552 1553 /* 1554 * Wait until the parent tells us to go. 1555 */ 1556 ret = read(go_pipe[0], &bf, 1); 1557 /* 1558 * The parent will ask for the execvp() to be performed by 1559 * writing exactly one byte, in workload.cork_fd, usually via 1560 * perf_evlist__start_workload(). 1561 * 1562 * For cancelling the workload without actually running it, 1563 * the parent will just close workload.cork_fd, without writing 1564 * anything, i.e. read will return zero and we just exit() 1565 * here. 1566 */ 1567 if (ret != 1) { 1568 if (ret == -1) 1569 perror("unable to read pipe"); 1570 exit(ret); 1571 } 1572 1573 execvp(argv[0], (char **)argv); 1574 1575 if (exec_error) { 1576 union sigval val; 1577 1578 val.sival_int = errno; 1579 if (sigqueue(getppid(), SIGUSR1, val)) 1580 perror(argv[0]); 1581 } else 1582 perror(argv[0]); 1583 exit(-1); 1584 } 1585 1586 if (exec_error) { 1587 struct sigaction act = { 1588 .sa_flags = SA_SIGINFO, 1589 .sa_sigaction = exec_error, 1590 }; 1591 sigaction(SIGUSR1, &act, NULL); 1592 } 1593 1594 if (target__none(target)) { 1595 if (evlist->threads == NULL) { 1596 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1597 __func__, __LINE__); 1598 goto out_close_pipes; 1599 } 1600 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1601 } 1602 1603 close(child_ready_pipe[1]); 1604 close(go_pipe[0]); 1605 /* 1606 * wait for child to settle 1607 */ 1608 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1609 perror("unable to read pipe"); 1610 goto out_close_pipes; 1611 } 1612 1613 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1614 evlist->workload.cork_fd = go_pipe[1]; 1615 close(child_ready_pipe[0]); 1616 return 0; 1617 1618 out_close_pipes: 1619 close(go_pipe[0]); 1620 close(go_pipe[1]); 1621 out_close_ready_pipe: 1622 close(child_ready_pipe[0]); 1623 close(child_ready_pipe[1]); 1624 return -1; 1625 } 1626 1627 int perf_evlist__start_workload(struct perf_evlist *evlist) 1628 { 1629 if (evlist->workload.cork_fd > 0) { 1630 char bf = 0; 1631 int ret; 1632 /* 1633 * Remove the cork, let it rip! 1634 */ 1635 ret = write(evlist->workload.cork_fd, &bf, 1); 1636 if (ret < 0) 1637 perror("enable to write to pipe"); 1638 1639 close(evlist->workload.cork_fd); 1640 return ret; 1641 } 1642 1643 return 0; 1644 } 1645 1646 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1647 struct perf_sample *sample) 1648 { 1649 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1650 1651 if (!evsel) 1652 return -EFAULT; 1653 return perf_evsel__parse_sample(evsel, event, sample); 1654 } 1655 1656 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1657 { 1658 struct perf_evsel *evsel; 1659 size_t printed = 0; 1660 1661 evlist__for_each(evlist, evsel) { 1662 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1663 perf_evsel__name(evsel)); 1664 } 1665 1666 return printed + fprintf(fp, "\n"); 1667 } 1668 1669 int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, 1670 int err, char *buf, size_t size) 1671 { 1672 int printed, value; 1673 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1674 1675 switch (err) { 1676 case EACCES: 1677 case EPERM: 1678 printed = scnprintf(buf, size, 1679 "Error:\t%s.\n" 1680 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1681 1682 value = perf_event_paranoid(); 1683 1684 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1685 1686 if (value >= 2) { 1687 printed += scnprintf(buf + printed, size - printed, 1688 "For your workloads it needs to be <= 1\nHint:\t"); 1689 } 1690 printed += scnprintf(buf + printed, size - printed, 1691 "For system wide tracing it needs to be set to -1.\n"); 1692 1693 printed += scnprintf(buf + printed, size - printed, 1694 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1695 "Hint:\tThe current value is %d.", value); 1696 break; 1697 default: 1698 scnprintf(buf, size, "%s", emsg); 1699 break; 1700 } 1701 1702 return 0; 1703 } 1704 1705 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1706 { 1707 char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf)); 1708 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1709 1710 switch (err) { 1711 case EPERM: 1712 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1713 printed += scnprintf(buf + printed, size - printed, 1714 "Error:\t%s.\n" 1715 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1716 "Hint:\tTried using %zd kB.\n", 1717 emsg, pages_max_per_user, pages_attempted); 1718 1719 if (pages_attempted >= pages_max_per_user) { 1720 printed += scnprintf(buf + printed, size - printed, 1721 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1722 pages_max_per_user + pages_attempted); 1723 } 1724 1725 printed += scnprintf(buf + printed, size - printed, 1726 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1727 break; 1728 default: 1729 scnprintf(buf, size, "%s", emsg); 1730 break; 1731 } 1732 1733 return 0; 1734 } 1735 1736 void perf_evlist__to_front(struct perf_evlist *evlist, 1737 struct perf_evsel *move_evsel) 1738 { 1739 struct perf_evsel *evsel, *n; 1740 LIST_HEAD(move); 1741 1742 if (move_evsel == perf_evlist__first(evlist)) 1743 return; 1744 1745 evlist__for_each_safe(evlist, n, evsel) { 1746 if (evsel->leader == move_evsel->leader) 1747 list_move_tail(&evsel->node, &move); 1748 } 1749 1750 list_splice(&move, &evlist->entries); 1751 } 1752 1753 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1754 struct perf_evsel *tracking_evsel) 1755 { 1756 struct perf_evsel *evsel; 1757 1758 if (tracking_evsel->tracking) 1759 return; 1760 1761 evlist__for_each(evlist, evsel) { 1762 if (evsel != tracking_evsel) 1763 evsel->tracking = false; 1764 } 1765 1766 tracking_evsel->tracking = true; 1767 } 1768