1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 #include "util.h" 10 #include <api/fs/fs.h> 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <poll.h> 14 #include "cpumap.h" 15 #include "thread_map.h" 16 #include "target.h" 17 #include "evlist.h" 18 #include "evsel.h" 19 #include "debug.h" 20 #include "units.h" 21 #include "asm/bug.h" 22 #include "bpf-event.h" 23 #include <signal.h> 24 #include <unistd.h> 25 26 #include "parse-events.h" 27 #include <subcmd/parse-options.h> 28 29 #include <fcntl.h> 30 #include <sys/ioctl.h> 31 #include <sys/mman.h> 32 33 #include <linux/bitops.h> 34 #include <linux/hash.h> 35 #include <linux/log2.h> 36 #include <linux/err.h> 37 38 #ifdef LACKS_SIGQUEUE_PROTOTYPE 39 int sigqueue(pid_t pid, int sig, const union sigval value); 40 #endif 41 42 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 43 #define SID(e, x, y) xyarray__entry(e->sample_id, x, y) 44 45 void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, 46 struct thread_map *threads) 47 { 48 int i; 49 50 for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) 51 INIT_HLIST_HEAD(&evlist->heads[i]); 52 INIT_LIST_HEAD(&evlist->entries); 53 perf_evlist__set_maps(evlist, cpus, threads); 54 fdarray__init(&evlist->pollfd, 64); 55 evlist->workload.pid = -1; 56 evlist->bkw_mmap_state = BKW_MMAP_NOTREADY; 57 } 58 59 struct perf_evlist *perf_evlist__new(void) 60 { 61 struct perf_evlist *evlist = zalloc(sizeof(*evlist)); 62 63 if (evlist != NULL) 64 perf_evlist__init(evlist, NULL, NULL); 65 66 return evlist; 67 } 68 69 struct perf_evlist *perf_evlist__new_default(void) 70 { 71 struct perf_evlist *evlist = perf_evlist__new(); 72 73 if (evlist && perf_evlist__add_default(evlist)) { 74 perf_evlist__delete(evlist); 75 evlist = NULL; 76 } 77 78 return evlist; 79 } 80 81 struct perf_evlist *perf_evlist__new_dummy(void) 82 { 83 struct perf_evlist *evlist = perf_evlist__new(); 84 85 if (evlist && perf_evlist__add_dummy(evlist)) { 86 perf_evlist__delete(evlist); 87 evlist = NULL; 88 } 89 90 return evlist; 91 } 92 93 /** 94 * perf_evlist__set_id_pos - set the positions of event ids. 95 * @evlist: selected event list 96 * 97 * Events with compatible sample types all have the same id_pos 98 * and is_pos. For convenience, put a copy on evlist. 99 */ 100 void perf_evlist__set_id_pos(struct perf_evlist *evlist) 101 { 102 struct perf_evsel *first = perf_evlist__first(evlist); 103 104 evlist->id_pos = first->id_pos; 105 evlist->is_pos = first->is_pos; 106 } 107 108 static void perf_evlist__update_id_pos(struct perf_evlist *evlist) 109 { 110 struct perf_evsel *evsel; 111 112 evlist__for_each_entry(evlist, evsel) 113 perf_evsel__calc_id_pos(evsel); 114 115 perf_evlist__set_id_pos(evlist); 116 } 117 118 static void perf_evlist__purge(struct perf_evlist *evlist) 119 { 120 struct perf_evsel *pos, *n; 121 122 evlist__for_each_entry_safe(evlist, n, pos) { 123 list_del_init(&pos->node); 124 pos->evlist = NULL; 125 perf_evsel__delete(pos); 126 } 127 128 evlist->nr_entries = 0; 129 } 130 131 void perf_evlist__exit(struct perf_evlist *evlist) 132 { 133 zfree(&evlist->mmap); 134 zfree(&evlist->overwrite_mmap); 135 fdarray__exit(&evlist->pollfd); 136 } 137 138 void perf_evlist__delete(struct perf_evlist *evlist) 139 { 140 if (evlist == NULL) 141 return; 142 143 perf_evlist__munmap(evlist); 144 perf_evlist__close(evlist); 145 cpu_map__put(evlist->cpus); 146 thread_map__put(evlist->threads); 147 evlist->cpus = NULL; 148 evlist->threads = NULL; 149 perf_evlist__purge(evlist); 150 perf_evlist__exit(evlist); 151 free(evlist); 152 } 153 154 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, 155 struct perf_evsel *evsel) 156 { 157 /* 158 * We already have cpus for evsel (via PMU sysfs) so 159 * keep it, if there's no target cpu list defined. 160 */ 161 if (!evsel->own_cpus || evlist->has_user_cpus) { 162 cpu_map__put(evsel->cpus); 163 evsel->cpus = cpu_map__get(evlist->cpus); 164 } else if (evsel->cpus != evsel->own_cpus) { 165 cpu_map__put(evsel->cpus); 166 evsel->cpus = cpu_map__get(evsel->own_cpus); 167 } 168 169 thread_map__put(evsel->threads); 170 evsel->threads = thread_map__get(evlist->threads); 171 } 172 173 static void perf_evlist__propagate_maps(struct perf_evlist *evlist) 174 { 175 struct perf_evsel *evsel; 176 177 evlist__for_each_entry(evlist, evsel) 178 __perf_evlist__propagate_maps(evlist, evsel); 179 } 180 181 void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) 182 { 183 entry->evlist = evlist; 184 list_add_tail(&entry->node, &evlist->entries); 185 entry->idx = evlist->nr_entries; 186 entry->tracking = !entry->idx; 187 188 if (!evlist->nr_entries++) 189 perf_evlist__set_id_pos(evlist); 190 191 __perf_evlist__propagate_maps(evlist, entry); 192 } 193 194 void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel) 195 { 196 evsel->evlist = NULL; 197 list_del_init(&evsel->node); 198 evlist->nr_entries -= 1; 199 } 200 201 void perf_evlist__splice_list_tail(struct perf_evlist *evlist, 202 struct list_head *list) 203 { 204 struct perf_evsel *evsel, *temp; 205 206 __evlist__for_each_entry_safe(list, temp, evsel) { 207 list_del_init(&evsel->node); 208 perf_evlist__add(evlist, evsel); 209 } 210 } 211 212 void __perf_evlist__set_leader(struct list_head *list) 213 { 214 struct perf_evsel *evsel, *leader; 215 216 leader = list_entry(list->next, struct perf_evsel, node); 217 evsel = list_entry(list->prev, struct perf_evsel, node); 218 219 leader->nr_members = evsel->idx - leader->idx + 1; 220 221 __evlist__for_each_entry(list, evsel) { 222 evsel->leader = leader; 223 } 224 } 225 226 void perf_evlist__set_leader(struct perf_evlist *evlist) 227 { 228 if (evlist->nr_entries) { 229 evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; 230 __perf_evlist__set_leader(&evlist->entries); 231 } 232 } 233 234 int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) 235 { 236 struct perf_evsel *evsel = perf_evsel__new_cycles(precise); 237 238 if (evsel == NULL) 239 return -ENOMEM; 240 241 perf_evlist__add(evlist, evsel); 242 return 0; 243 } 244 245 int perf_evlist__add_dummy(struct perf_evlist *evlist) 246 { 247 struct perf_event_attr attr = { 248 .type = PERF_TYPE_SOFTWARE, 249 .config = PERF_COUNT_SW_DUMMY, 250 .size = sizeof(attr), /* to capture ABI version */ 251 }; 252 struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); 253 254 if (evsel == NULL) 255 return -ENOMEM; 256 257 perf_evlist__add(evlist, evsel); 258 return 0; 259 } 260 261 static int perf_evlist__add_attrs(struct perf_evlist *evlist, 262 struct perf_event_attr *attrs, size_t nr_attrs) 263 { 264 struct perf_evsel *evsel, *n; 265 LIST_HEAD(head); 266 size_t i; 267 268 for (i = 0; i < nr_attrs; i++) { 269 evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i); 270 if (evsel == NULL) 271 goto out_delete_partial_list; 272 list_add_tail(&evsel->node, &head); 273 } 274 275 perf_evlist__splice_list_tail(evlist, &head); 276 277 return 0; 278 279 out_delete_partial_list: 280 __evlist__for_each_entry_safe(&head, n, evsel) 281 perf_evsel__delete(evsel); 282 return -1; 283 } 284 285 int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, 286 struct perf_event_attr *attrs, size_t nr_attrs) 287 { 288 size_t i; 289 290 for (i = 0; i < nr_attrs; i++) 291 event_attr_init(attrs + i); 292 293 return perf_evlist__add_attrs(evlist, attrs, nr_attrs); 294 } 295 296 struct perf_evsel * 297 perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id) 298 { 299 struct perf_evsel *evsel; 300 301 evlist__for_each_entry(evlist, evsel) { 302 if (evsel->attr.type == PERF_TYPE_TRACEPOINT && 303 (int)evsel->attr.config == id) 304 return evsel; 305 } 306 307 return NULL; 308 } 309 310 struct perf_evsel * 311 perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, 312 const char *name) 313 { 314 struct perf_evsel *evsel; 315 316 evlist__for_each_entry(evlist, evsel) { 317 if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) && 318 (strcmp(evsel->name, name) == 0)) 319 return evsel; 320 } 321 322 return NULL; 323 } 324 325 int perf_evlist__add_newtp(struct perf_evlist *evlist, 326 const char *sys, const char *name, void *handler) 327 { 328 struct perf_evsel *evsel = perf_evsel__newtp(sys, name); 329 330 if (IS_ERR(evsel)) 331 return -1; 332 333 evsel->handler = handler; 334 perf_evlist__add(evlist, evsel); 335 return 0; 336 } 337 338 static int perf_evlist__nr_threads(struct perf_evlist *evlist, 339 struct perf_evsel *evsel) 340 { 341 if (evsel->system_wide) 342 return 1; 343 else 344 return thread_map__nr(evlist->threads); 345 } 346 347 void perf_evlist__disable(struct perf_evlist *evlist) 348 { 349 struct perf_evsel *pos; 350 351 evlist__for_each_entry(evlist, pos) { 352 if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) 353 continue; 354 perf_evsel__disable(pos); 355 } 356 357 evlist->enabled = false; 358 } 359 360 void perf_evlist__enable(struct perf_evlist *evlist) 361 { 362 struct perf_evsel *pos; 363 364 evlist__for_each_entry(evlist, pos) { 365 if (!perf_evsel__is_group_leader(pos) || !pos->fd) 366 continue; 367 perf_evsel__enable(pos); 368 } 369 370 evlist->enabled = true; 371 } 372 373 void perf_evlist__toggle_enable(struct perf_evlist *evlist) 374 { 375 (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); 376 } 377 378 static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, 379 struct perf_evsel *evsel, int cpu) 380 { 381 int thread; 382 int nr_threads = perf_evlist__nr_threads(evlist, evsel); 383 384 if (!evsel->fd) 385 return -EINVAL; 386 387 for (thread = 0; thread < nr_threads; thread++) { 388 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 389 if (err) 390 return err; 391 } 392 return 0; 393 } 394 395 static int perf_evlist__enable_event_thread(struct perf_evlist *evlist, 396 struct perf_evsel *evsel, 397 int thread) 398 { 399 int cpu; 400 int nr_cpus = cpu_map__nr(evlist->cpus); 401 402 if (!evsel->fd) 403 return -EINVAL; 404 405 for (cpu = 0; cpu < nr_cpus; cpu++) { 406 int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0); 407 if (err) 408 return err; 409 } 410 return 0; 411 } 412 413 int perf_evlist__enable_event_idx(struct perf_evlist *evlist, 414 struct perf_evsel *evsel, int idx) 415 { 416 bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus); 417 418 if (per_cpu_mmaps) 419 return perf_evlist__enable_event_cpu(evlist, evsel, idx); 420 else 421 return perf_evlist__enable_event_thread(evlist, evsel, idx); 422 } 423 424 int perf_evlist__alloc_pollfd(struct perf_evlist *evlist) 425 { 426 int nr_cpus = cpu_map__nr(evlist->cpus); 427 int nr_threads = thread_map__nr(evlist->threads); 428 int nfds = 0; 429 struct perf_evsel *evsel; 430 431 evlist__for_each_entry(evlist, evsel) { 432 if (evsel->system_wide) 433 nfds += nr_cpus; 434 else 435 nfds += nr_cpus * nr_threads; 436 } 437 438 if (fdarray__available_entries(&evlist->pollfd) < nfds && 439 fdarray__grow(&evlist->pollfd, nfds) < 0) 440 return -ENOMEM; 441 442 return 0; 443 } 444 445 static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd, 446 struct perf_mmap *map, short revent) 447 { 448 int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP); 449 /* 450 * Save the idx so that when we filter out fds POLLHUP'ed we can 451 * close the associated evlist->mmap[] entry. 452 */ 453 if (pos >= 0) { 454 evlist->pollfd.priv[pos].ptr = map; 455 456 fcntl(fd, F_SETFL, O_NONBLOCK); 457 } 458 459 return pos; 460 } 461 462 int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd) 463 { 464 return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN); 465 } 466 467 static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd, 468 void *arg __maybe_unused) 469 { 470 struct perf_mmap *map = fda->priv[fd].ptr; 471 472 if (map) 473 perf_mmap__put(map); 474 } 475 476 int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask) 477 { 478 return fdarray__filter(&evlist->pollfd, revents_and_mask, 479 perf_evlist__munmap_filtered, NULL); 480 } 481 482 int perf_evlist__poll(struct perf_evlist *evlist, int timeout) 483 { 484 return fdarray__poll(&evlist->pollfd, timeout); 485 } 486 487 static void perf_evlist__id_hash(struct perf_evlist *evlist, 488 struct perf_evsel *evsel, 489 int cpu, int thread, u64 id) 490 { 491 int hash; 492 struct perf_sample_id *sid = SID(evsel, cpu, thread); 493 494 sid->id = id; 495 sid->evsel = evsel; 496 hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS); 497 hlist_add_head(&sid->node, &evlist->heads[hash]); 498 } 499 500 void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, 501 int cpu, int thread, u64 id) 502 { 503 perf_evlist__id_hash(evlist, evsel, cpu, thread, id); 504 evsel->id[evsel->ids++] = id; 505 } 506 507 int perf_evlist__id_add_fd(struct perf_evlist *evlist, 508 struct perf_evsel *evsel, 509 int cpu, int thread, int fd) 510 { 511 u64 read_data[4] = { 0, }; 512 int id_idx = 1; /* The first entry is the counter value */ 513 u64 id; 514 int ret; 515 516 ret = ioctl(fd, PERF_EVENT_IOC_ID, &id); 517 if (!ret) 518 goto add; 519 520 if (errno != ENOTTY) 521 return -1; 522 523 /* Legacy way to get event id.. All hail to old kernels! */ 524 525 /* 526 * This way does not work with group format read, so bail 527 * out in that case. 528 */ 529 if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP) 530 return -1; 531 532 if (!(evsel->attr.read_format & PERF_FORMAT_ID) || 533 read(fd, &read_data, sizeof(read_data)) == -1) 534 return -1; 535 536 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 537 ++id_idx; 538 if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 539 ++id_idx; 540 541 id = read_data[id_idx]; 542 543 add: 544 perf_evlist__id_add(evlist, evsel, cpu, thread, id); 545 return 0; 546 } 547 548 static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, 549 struct perf_evsel *evsel, int idx, int cpu, 550 int thread) 551 { 552 struct perf_sample_id *sid = SID(evsel, cpu, thread); 553 sid->idx = idx; 554 if (evlist->cpus && cpu >= 0) 555 sid->cpu = evlist->cpus->map[cpu]; 556 else 557 sid->cpu = -1; 558 if (!evsel->system_wide && evlist->threads && thread >= 0) 559 sid->tid = thread_map__pid(evlist->threads, thread); 560 else 561 sid->tid = -1; 562 } 563 564 struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id) 565 { 566 struct hlist_head *head; 567 struct perf_sample_id *sid; 568 int hash; 569 570 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 571 head = &evlist->heads[hash]; 572 573 hlist_for_each_entry(sid, head, node) 574 if (sid->id == id) 575 return sid; 576 577 return NULL; 578 } 579 580 struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) 581 { 582 struct perf_sample_id *sid; 583 584 if (evlist->nr_entries == 1 || !id) 585 return perf_evlist__first(evlist); 586 587 sid = perf_evlist__id2sid(evlist, id); 588 if (sid) 589 return sid->evsel; 590 591 if (!perf_evlist__sample_id_all(evlist)) 592 return perf_evlist__first(evlist); 593 594 return NULL; 595 } 596 597 struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist, 598 u64 id) 599 { 600 struct perf_sample_id *sid; 601 602 if (!id) 603 return NULL; 604 605 sid = perf_evlist__id2sid(evlist, id); 606 if (sid) 607 return sid->evsel; 608 609 return NULL; 610 } 611 612 static int perf_evlist__event2id(struct perf_evlist *evlist, 613 union perf_event *event, u64 *id) 614 { 615 const u64 *array = event->sample.array; 616 ssize_t n; 617 618 n = (event->header.size - sizeof(event->header)) >> 3; 619 620 if (event->header.type == PERF_RECORD_SAMPLE) { 621 if (evlist->id_pos >= n) 622 return -1; 623 *id = array[evlist->id_pos]; 624 } else { 625 if (evlist->is_pos > n) 626 return -1; 627 n -= evlist->is_pos; 628 *id = array[n]; 629 } 630 return 0; 631 } 632 633 struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, 634 union perf_event *event) 635 { 636 struct perf_evsel *first = perf_evlist__first(evlist); 637 struct hlist_head *head; 638 struct perf_sample_id *sid; 639 int hash; 640 u64 id; 641 642 if (evlist->nr_entries == 1) 643 return first; 644 645 if (!first->attr.sample_id_all && 646 event->header.type != PERF_RECORD_SAMPLE) 647 return first; 648 649 if (perf_evlist__event2id(evlist, event, &id)) 650 return NULL; 651 652 /* Synthesized events have an id of zero */ 653 if (!id) 654 return first; 655 656 hash = hash_64(id, PERF_EVLIST__HLIST_BITS); 657 head = &evlist->heads[hash]; 658 659 hlist_for_each_entry(sid, head, node) { 660 if (sid->id == id) 661 return sid->evsel; 662 } 663 return NULL; 664 } 665 666 static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) 667 { 668 int i; 669 670 if (!evlist->overwrite_mmap) 671 return 0; 672 673 for (i = 0; i < evlist->nr_mmaps; i++) { 674 int fd = evlist->overwrite_mmap[i].fd; 675 int err; 676 677 if (fd < 0) 678 continue; 679 err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0); 680 if (err) 681 return err; 682 } 683 return 0; 684 } 685 686 static int perf_evlist__pause(struct perf_evlist *evlist) 687 { 688 return perf_evlist__set_paused(evlist, true); 689 } 690 691 static int perf_evlist__resume(struct perf_evlist *evlist) 692 { 693 return perf_evlist__set_paused(evlist, false); 694 } 695 696 static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) 697 { 698 int i; 699 700 if (evlist->mmap) 701 for (i = 0; i < evlist->nr_mmaps; i++) 702 perf_mmap__munmap(&evlist->mmap[i]); 703 704 if (evlist->overwrite_mmap) 705 for (i = 0; i < evlist->nr_mmaps; i++) 706 perf_mmap__munmap(&evlist->overwrite_mmap[i]); 707 } 708 709 void perf_evlist__munmap(struct perf_evlist *evlist) 710 { 711 perf_evlist__munmap_nofree(evlist); 712 zfree(&evlist->mmap); 713 zfree(&evlist->overwrite_mmap); 714 } 715 716 static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist, 717 bool overwrite) 718 { 719 int i; 720 struct perf_mmap *map; 721 722 evlist->nr_mmaps = cpu_map__nr(evlist->cpus); 723 if (cpu_map__empty(evlist->cpus)) 724 evlist->nr_mmaps = thread_map__nr(evlist->threads); 725 map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap)); 726 if (!map) 727 return NULL; 728 729 for (i = 0; i < evlist->nr_mmaps; i++) { 730 map[i].fd = -1; 731 map[i].overwrite = overwrite; 732 /* 733 * When the perf_mmap() call is made we grab one refcount, plus 734 * one extra to let perf_mmap__consume() get the last 735 * events after all real references (perf_mmap__get()) are 736 * dropped. 737 * 738 * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and 739 * thus does perf_mmap__get() on it. 740 */ 741 refcount_set(&map[i].refcnt, 0); 742 } 743 return map; 744 } 745 746 static bool 747 perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, 748 struct perf_evsel *evsel) 749 { 750 if (evsel->attr.write_backward) 751 return false; 752 return true; 753 } 754 755 static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, 756 struct mmap_params *mp, int cpu_idx, 757 int thread, int *_output, int *_output_overwrite) 758 { 759 struct perf_evsel *evsel; 760 int revent; 761 int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx); 762 763 evlist__for_each_entry(evlist, evsel) { 764 struct perf_mmap *maps = evlist->mmap; 765 int *output = _output; 766 int fd; 767 int cpu; 768 769 mp->prot = PROT_READ | PROT_WRITE; 770 if (evsel->attr.write_backward) { 771 output = _output_overwrite; 772 maps = evlist->overwrite_mmap; 773 774 if (!maps) { 775 maps = perf_evlist__alloc_mmap(evlist, true); 776 if (!maps) 777 return -1; 778 evlist->overwrite_mmap = maps; 779 if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) 780 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); 781 } 782 mp->prot &= ~PROT_WRITE; 783 } 784 785 if (evsel->system_wide && thread) 786 continue; 787 788 cpu = cpu_map__idx(evsel->cpus, evlist_cpu); 789 if (cpu == -1) 790 continue; 791 792 fd = FD(evsel, cpu, thread); 793 794 if (*output == -1) { 795 *output = fd; 796 797 if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) 798 return -1; 799 } else { 800 if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) 801 return -1; 802 803 perf_mmap__get(&maps[idx]); 804 } 805 806 revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0; 807 808 /* 809 * The system_wide flag causes a selected event to be opened 810 * always without a pid. Consequently it will never get a 811 * POLLHUP, but it is used for tracking in combination with 812 * other events, so it should not need to be polled anyway. 813 * Therefore don't add it for polling. 814 */ 815 if (!evsel->system_wide && 816 __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) { 817 perf_mmap__put(&maps[idx]); 818 return -1; 819 } 820 821 if (evsel->attr.read_format & PERF_FORMAT_ID) { 822 if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread, 823 fd) < 0) 824 return -1; 825 perf_evlist__set_sid_idx(evlist, evsel, idx, cpu, 826 thread); 827 } 828 } 829 830 return 0; 831 } 832 833 static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, 834 struct mmap_params *mp) 835 { 836 int cpu, thread; 837 int nr_cpus = cpu_map__nr(evlist->cpus); 838 int nr_threads = thread_map__nr(evlist->threads); 839 840 pr_debug2("perf event ring buffer mmapped per cpu\n"); 841 for (cpu = 0; cpu < nr_cpus; cpu++) { 842 int output = -1; 843 int output_overwrite = -1; 844 845 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, 846 true); 847 848 for (thread = 0; thread < nr_threads; thread++) { 849 if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, 850 thread, &output, &output_overwrite)) 851 goto out_unmap; 852 } 853 } 854 855 return 0; 856 857 out_unmap: 858 perf_evlist__munmap_nofree(evlist); 859 return -1; 860 } 861 862 static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, 863 struct mmap_params *mp) 864 { 865 int thread; 866 int nr_threads = thread_map__nr(evlist->threads); 867 868 pr_debug2("perf event ring buffer mmapped per thread\n"); 869 for (thread = 0; thread < nr_threads; thread++) { 870 int output = -1; 871 int output_overwrite = -1; 872 873 auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, 874 false); 875 876 if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, 877 &output, &output_overwrite)) 878 goto out_unmap; 879 } 880 881 return 0; 882 883 out_unmap: 884 perf_evlist__munmap_nofree(evlist); 885 return -1; 886 } 887 888 unsigned long perf_event_mlock_kb_in_pages(void) 889 { 890 unsigned long pages; 891 int max; 892 893 if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) { 894 /* 895 * Pick a once upon a time good value, i.e. things look 896 * strange since we can't read a sysctl value, but lets not 897 * die yet... 898 */ 899 max = 512; 900 } else { 901 max -= (page_size / 1024); 902 } 903 904 pages = (max * 1024) / page_size; 905 if (!is_power_of_2(pages)) 906 pages = rounddown_pow_of_two(pages); 907 908 return pages; 909 } 910 911 size_t perf_evlist__mmap_size(unsigned long pages) 912 { 913 if (pages == UINT_MAX) 914 pages = perf_event_mlock_kb_in_pages(); 915 else if (!is_power_of_2(pages)) 916 return 0; 917 918 return (pages + 1) * page_size; 919 } 920 921 static long parse_pages_arg(const char *str, unsigned long min, 922 unsigned long max) 923 { 924 unsigned long pages, val; 925 static struct parse_tag tags[] = { 926 { .tag = 'B', .mult = 1 }, 927 { .tag = 'K', .mult = 1 << 10 }, 928 { .tag = 'M', .mult = 1 << 20 }, 929 { .tag = 'G', .mult = 1 << 30 }, 930 { .tag = 0 }, 931 }; 932 933 if (str == NULL) 934 return -EINVAL; 935 936 val = parse_tag_value(str, tags); 937 if (val != (unsigned long) -1) { 938 /* we got file size value */ 939 pages = PERF_ALIGN(val, page_size) / page_size; 940 } else { 941 /* we got pages count value */ 942 char *eptr; 943 pages = strtoul(str, &eptr, 10); 944 if (*eptr != '\0') 945 return -EINVAL; 946 } 947 948 if (pages == 0 && min == 0) { 949 /* leave number of pages at 0 */ 950 } else if (!is_power_of_2(pages)) { 951 char buf[100]; 952 953 /* round pages up to next power of 2 */ 954 pages = roundup_pow_of_two(pages); 955 if (!pages) 956 return -EINVAL; 957 958 unit_number__scnprintf(buf, sizeof(buf), pages * page_size); 959 pr_info("rounding mmap pages size to %s (%lu pages)\n", 960 buf, pages); 961 } 962 963 if (pages > max) 964 return -EINVAL; 965 966 return pages; 967 } 968 969 int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) 970 { 971 unsigned long max = UINT_MAX; 972 long pages; 973 974 if (max > SIZE_MAX / page_size) 975 max = SIZE_MAX / page_size; 976 977 pages = parse_pages_arg(str, 1, max); 978 if (pages < 0) { 979 pr_err("Invalid argument for --mmap_pages/-m\n"); 980 return -1; 981 } 982 983 *mmap_pages = pages; 984 return 0; 985 } 986 987 int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, 988 int unset __maybe_unused) 989 { 990 return __perf_evlist__parse_mmap_pages(opt->value, str); 991 } 992 993 /** 994 * perf_evlist__mmap_ex - Create mmaps to receive events. 995 * @evlist: list of events 996 * @pages: map length in pages 997 * @overwrite: overwrite older events? 998 * @auxtrace_pages - auxtrace map length in pages 999 * @auxtrace_overwrite - overwrite older auxtrace data? 1000 * 1001 * If @overwrite is %false the user needs to signal event consumption using 1002 * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this 1003 * automatically. 1004 * 1005 * Similarly, if @auxtrace_overwrite is %false the user needs to signal data 1006 * consumption using auxtrace_mmap__write_tail(). 1007 * 1008 * Return: %0 on success, negative error code otherwise. 1009 */ 1010 int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1011 unsigned int auxtrace_pages, 1012 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush) 1013 { 1014 struct perf_evsel *evsel; 1015 const struct cpu_map *cpus = evlist->cpus; 1016 const struct thread_map *threads = evlist->threads; 1017 /* 1018 * Delay setting mp.prot: set it before calling perf_mmap__mmap. 1019 * Its value is decided by evsel's write_backward. 1020 * So &mp should not be passed through const pointer. 1021 */ 1022 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush }; 1023 1024 if (!evlist->mmap) 1025 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1026 if (!evlist->mmap) 1027 return -ENOMEM; 1028 1029 if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) 1030 return -ENOMEM; 1031 1032 evlist->mmap_len = perf_evlist__mmap_size(pages); 1033 pr_debug("mmap size %zuB\n", evlist->mmap_len); 1034 mp.mask = evlist->mmap_len - page_size - 1; 1035 1036 auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, 1037 auxtrace_pages, auxtrace_overwrite); 1038 1039 evlist__for_each_entry(evlist, evsel) { 1040 if ((evsel->attr.read_format & PERF_FORMAT_ID) && 1041 evsel->sample_id == NULL && 1042 perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0) 1043 return -ENOMEM; 1044 } 1045 1046 if (cpu_map__empty(cpus)) 1047 return perf_evlist__mmap_per_thread(evlist, &mp); 1048 1049 return perf_evlist__mmap_per_cpu(evlist, &mp); 1050 } 1051 1052 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1053 { 1054 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1); 1055 } 1056 1057 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1058 { 1059 bool all_threads = (target->per_thread && target->system_wide); 1060 struct cpu_map *cpus; 1061 struct thread_map *threads; 1062 1063 /* 1064 * If specify '-a' and '--per-thread' to perf record, perf record 1065 * will override '--per-thread'. target->per_thread = false and 1066 * target->system_wide = true. 1067 * 1068 * If specify '--per-thread' only to perf record, 1069 * target->per_thread = true and target->system_wide = false. 1070 * 1071 * So target->per_thread && target->system_wide is false. 1072 * For perf record, thread_map__new_str doesn't call 1073 * thread_map__new_all_cpus. That will keep perf record's 1074 * current behavior. 1075 * 1076 * For perf stat, it allows the case that target->per_thread and 1077 * target->system_wide are all true. It means to collect system-wide 1078 * per-thread data. thread_map__new_str will call 1079 * thread_map__new_all_cpus to enumerate all threads. 1080 */ 1081 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1082 all_threads); 1083 1084 if (!threads) 1085 return -1; 1086 1087 if (target__uses_dummy_map(target)) 1088 cpus = cpu_map__dummy_new(); 1089 else 1090 cpus = cpu_map__new(target->cpu_list); 1091 1092 if (!cpus) 1093 goto out_delete_threads; 1094 1095 evlist->has_user_cpus = !!target->cpu_list; 1096 1097 perf_evlist__set_maps(evlist, cpus, threads); 1098 1099 return 0; 1100 1101 out_delete_threads: 1102 thread_map__put(threads); 1103 return -1; 1104 } 1105 1106 void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, 1107 struct thread_map *threads) 1108 { 1109 /* 1110 * Allow for the possibility that one or another of the maps isn't being 1111 * changed i.e. don't put it. Note we are assuming the maps that are 1112 * being applied are brand new and evlist is taking ownership of the 1113 * original reference count of 1. If that is not the case it is up to 1114 * the caller to increase the reference count. 1115 */ 1116 if (cpus != evlist->cpus) { 1117 cpu_map__put(evlist->cpus); 1118 evlist->cpus = cpu_map__get(cpus); 1119 } 1120 1121 if (threads != evlist->threads) { 1122 thread_map__put(evlist->threads); 1123 evlist->threads = thread_map__get(threads); 1124 } 1125 1126 perf_evlist__propagate_maps(evlist); 1127 } 1128 1129 void __perf_evlist__set_sample_bit(struct perf_evlist *evlist, 1130 enum perf_event_sample_format bit) 1131 { 1132 struct perf_evsel *evsel; 1133 1134 evlist__for_each_entry(evlist, evsel) 1135 __perf_evsel__set_sample_bit(evsel, bit); 1136 } 1137 1138 void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist, 1139 enum perf_event_sample_format bit) 1140 { 1141 struct perf_evsel *evsel; 1142 1143 evlist__for_each_entry(evlist, evsel) 1144 __perf_evsel__reset_sample_bit(evsel, bit); 1145 } 1146 1147 int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) 1148 { 1149 struct perf_evsel *evsel; 1150 int err = 0; 1151 1152 evlist__for_each_entry(evlist, evsel) { 1153 if (evsel->filter == NULL) 1154 continue; 1155 1156 /* 1157 * filters only work for tracepoint event, which doesn't have cpu limit. 1158 * So evlist and evsel should always be same. 1159 */ 1160 err = perf_evsel__apply_filter(evsel, evsel->filter); 1161 if (err) { 1162 *err_evsel = evsel; 1163 break; 1164 } 1165 } 1166 1167 return err; 1168 } 1169 1170 int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter) 1171 { 1172 struct perf_evsel *evsel; 1173 int err = 0; 1174 1175 evlist__for_each_entry(evlist, evsel) { 1176 if (evsel->attr.type != PERF_TYPE_TRACEPOINT) 1177 continue; 1178 1179 err = perf_evsel__set_filter(evsel, filter); 1180 if (err) 1181 break; 1182 } 1183 1184 return err; 1185 } 1186 1187 int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) 1188 { 1189 char *filter; 1190 int ret = -1; 1191 size_t i; 1192 1193 for (i = 0; i < npids; ++i) { 1194 if (i == 0) { 1195 if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) 1196 return -1; 1197 } else { 1198 char *tmp; 1199 1200 if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) 1201 goto out_free; 1202 1203 free(filter); 1204 filter = tmp; 1205 } 1206 } 1207 1208 ret = perf_evlist__set_tp_filter(evlist, filter); 1209 out_free: 1210 free(filter); 1211 return ret; 1212 } 1213 1214 int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid) 1215 { 1216 return perf_evlist__set_tp_filter_pids(evlist, 1, &pid); 1217 } 1218 1219 bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) 1220 { 1221 struct perf_evsel *pos; 1222 1223 if (evlist->nr_entries == 1) 1224 return true; 1225 1226 if (evlist->id_pos < 0 || evlist->is_pos < 0) 1227 return false; 1228 1229 evlist__for_each_entry(evlist, pos) { 1230 if (pos->id_pos != evlist->id_pos || 1231 pos->is_pos != evlist->is_pos) 1232 return false; 1233 } 1234 1235 return true; 1236 } 1237 1238 u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1239 { 1240 struct perf_evsel *evsel; 1241 1242 if (evlist->combined_sample_type) 1243 return evlist->combined_sample_type; 1244 1245 evlist__for_each_entry(evlist, evsel) 1246 evlist->combined_sample_type |= evsel->attr.sample_type; 1247 1248 return evlist->combined_sample_type; 1249 } 1250 1251 u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) 1252 { 1253 evlist->combined_sample_type = 0; 1254 return __perf_evlist__combined_sample_type(evlist); 1255 } 1256 1257 u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) 1258 { 1259 struct perf_evsel *evsel; 1260 u64 branch_type = 0; 1261 1262 evlist__for_each_entry(evlist, evsel) 1263 branch_type |= evsel->attr.branch_sample_type; 1264 return branch_type; 1265 } 1266 1267 bool perf_evlist__valid_read_format(struct perf_evlist *evlist) 1268 { 1269 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1270 u64 read_format = first->attr.read_format; 1271 u64 sample_type = first->attr.sample_type; 1272 1273 evlist__for_each_entry(evlist, pos) { 1274 if (read_format != pos->attr.read_format) 1275 return false; 1276 } 1277 1278 /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ 1279 if ((sample_type & PERF_SAMPLE_READ) && 1280 !(read_format & PERF_FORMAT_ID)) { 1281 return false; 1282 } 1283 1284 return true; 1285 } 1286 1287 u64 perf_evlist__read_format(struct perf_evlist *evlist) 1288 { 1289 struct perf_evsel *first = perf_evlist__first(evlist); 1290 return first->attr.read_format; 1291 } 1292 1293 u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist) 1294 { 1295 struct perf_evsel *first = perf_evlist__first(evlist); 1296 struct perf_sample *data; 1297 u64 sample_type; 1298 u16 size = 0; 1299 1300 if (!first->attr.sample_id_all) 1301 goto out; 1302 1303 sample_type = first->attr.sample_type; 1304 1305 if (sample_type & PERF_SAMPLE_TID) 1306 size += sizeof(data->tid) * 2; 1307 1308 if (sample_type & PERF_SAMPLE_TIME) 1309 size += sizeof(data->time); 1310 1311 if (sample_type & PERF_SAMPLE_ID) 1312 size += sizeof(data->id); 1313 1314 if (sample_type & PERF_SAMPLE_STREAM_ID) 1315 size += sizeof(data->stream_id); 1316 1317 if (sample_type & PERF_SAMPLE_CPU) 1318 size += sizeof(data->cpu) * 2; 1319 1320 if (sample_type & PERF_SAMPLE_IDENTIFIER) 1321 size += sizeof(data->id); 1322 out: 1323 return size; 1324 } 1325 1326 bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist) 1327 { 1328 struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; 1329 1330 evlist__for_each_entry_continue(evlist, pos) { 1331 if (first->attr.sample_id_all != pos->attr.sample_id_all) 1332 return false; 1333 } 1334 1335 return true; 1336 } 1337 1338 bool perf_evlist__sample_id_all(struct perf_evlist *evlist) 1339 { 1340 struct perf_evsel *first = perf_evlist__first(evlist); 1341 return first->attr.sample_id_all; 1342 } 1343 1344 void perf_evlist__set_selected(struct perf_evlist *evlist, 1345 struct perf_evsel *evsel) 1346 { 1347 evlist->selected = evsel; 1348 } 1349 1350 void perf_evlist__close(struct perf_evlist *evlist) 1351 { 1352 struct perf_evsel *evsel; 1353 1354 evlist__for_each_entry_reverse(evlist, evsel) 1355 perf_evsel__close(evsel); 1356 } 1357 1358 static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) 1359 { 1360 struct cpu_map *cpus; 1361 struct thread_map *threads; 1362 int err = -ENOMEM; 1363 1364 /* 1365 * Try reading /sys/devices/system/cpu/online to get 1366 * an all cpus map. 1367 * 1368 * FIXME: -ENOMEM is the best we can do here, the cpu_map 1369 * code needs an overhaul to properly forward the 1370 * error, and we may not want to do that fallback to a 1371 * default cpu identity map :-\ 1372 */ 1373 cpus = cpu_map__new(NULL); 1374 if (!cpus) 1375 goto out; 1376 1377 threads = thread_map__new_dummy(); 1378 if (!threads) 1379 goto out_put; 1380 1381 perf_evlist__set_maps(evlist, cpus, threads); 1382 out: 1383 return err; 1384 out_put: 1385 cpu_map__put(cpus); 1386 goto out; 1387 } 1388 1389 int perf_evlist__open(struct perf_evlist *evlist) 1390 { 1391 struct perf_evsel *evsel; 1392 int err; 1393 1394 /* 1395 * Default: one fd per CPU, all threads, aka systemwide 1396 * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL 1397 */ 1398 if (evlist->threads == NULL && evlist->cpus == NULL) { 1399 err = perf_evlist__create_syswide_maps(evlist); 1400 if (err < 0) 1401 goto out_err; 1402 } 1403 1404 perf_evlist__update_id_pos(evlist); 1405 1406 evlist__for_each_entry(evlist, evsel) { 1407 err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); 1408 if (err < 0) 1409 goto out_err; 1410 } 1411 1412 return 0; 1413 out_err: 1414 perf_evlist__close(evlist); 1415 errno = -err; 1416 return err; 1417 } 1418 1419 int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target, 1420 const char *argv[], bool pipe_output, 1421 void (*exec_error)(int signo, siginfo_t *info, void *ucontext)) 1422 { 1423 int child_ready_pipe[2], go_pipe[2]; 1424 char bf; 1425 1426 if (pipe(child_ready_pipe) < 0) { 1427 perror("failed to create 'ready' pipe"); 1428 return -1; 1429 } 1430 1431 if (pipe(go_pipe) < 0) { 1432 perror("failed to create 'go' pipe"); 1433 goto out_close_ready_pipe; 1434 } 1435 1436 evlist->workload.pid = fork(); 1437 if (evlist->workload.pid < 0) { 1438 perror("failed to fork"); 1439 goto out_close_pipes; 1440 } 1441 1442 if (!evlist->workload.pid) { 1443 int ret; 1444 1445 if (pipe_output) 1446 dup2(2, 1); 1447 1448 signal(SIGTERM, SIG_DFL); 1449 1450 close(child_ready_pipe[0]); 1451 close(go_pipe[1]); 1452 fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); 1453 1454 /* 1455 * Tell the parent we're ready to go 1456 */ 1457 close(child_ready_pipe[1]); 1458 1459 /* 1460 * Wait until the parent tells us to go. 1461 */ 1462 ret = read(go_pipe[0], &bf, 1); 1463 /* 1464 * The parent will ask for the execvp() to be performed by 1465 * writing exactly one byte, in workload.cork_fd, usually via 1466 * perf_evlist__start_workload(). 1467 * 1468 * For cancelling the workload without actually running it, 1469 * the parent will just close workload.cork_fd, without writing 1470 * anything, i.e. read will return zero and we just exit() 1471 * here. 1472 */ 1473 if (ret != 1) { 1474 if (ret == -1) 1475 perror("unable to read pipe"); 1476 exit(ret); 1477 } 1478 1479 execvp(argv[0], (char **)argv); 1480 1481 if (exec_error) { 1482 union sigval val; 1483 1484 val.sival_int = errno; 1485 if (sigqueue(getppid(), SIGUSR1, val)) 1486 perror(argv[0]); 1487 } else 1488 perror(argv[0]); 1489 exit(-1); 1490 } 1491 1492 if (exec_error) { 1493 struct sigaction act = { 1494 .sa_flags = SA_SIGINFO, 1495 .sa_sigaction = exec_error, 1496 }; 1497 sigaction(SIGUSR1, &act, NULL); 1498 } 1499 1500 if (target__none(target)) { 1501 if (evlist->threads == NULL) { 1502 fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n", 1503 __func__, __LINE__); 1504 goto out_close_pipes; 1505 } 1506 thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); 1507 } 1508 1509 close(child_ready_pipe[1]); 1510 close(go_pipe[0]); 1511 /* 1512 * wait for child to settle 1513 */ 1514 if (read(child_ready_pipe[0], &bf, 1) == -1) { 1515 perror("unable to read pipe"); 1516 goto out_close_pipes; 1517 } 1518 1519 fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC); 1520 evlist->workload.cork_fd = go_pipe[1]; 1521 close(child_ready_pipe[0]); 1522 return 0; 1523 1524 out_close_pipes: 1525 close(go_pipe[0]); 1526 close(go_pipe[1]); 1527 out_close_ready_pipe: 1528 close(child_ready_pipe[0]); 1529 close(child_ready_pipe[1]); 1530 return -1; 1531 } 1532 1533 int perf_evlist__start_workload(struct perf_evlist *evlist) 1534 { 1535 if (evlist->workload.cork_fd > 0) { 1536 char bf = 0; 1537 int ret; 1538 /* 1539 * Remove the cork, let it rip! 1540 */ 1541 ret = write(evlist->workload.cork_fd, &bf, 1); 1542 if (ret < 0) 1543 perror("unable to write to pipe"); 1544 1545 close(evlist->workload.cork_fd); 1546 return ret; 1547 } 1548 1549 return 0; 1550 } 1551 1552 int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, 1553 struct perf_sample *sample) 1554 { 1555 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1556 1557 if (!evsel) 1558 return -EFAULT; 1559 return perf_evsel__parse_sample(evsel, event, sample); 1560 } 1561 1562 int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, 1563 union perf_event *event, 1564 u64 *timestamp) 1565 { 1566 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1567 1568 if (!evsel) 1569 return -EFAULT; 1570 return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); 1571 } 1572 1573 size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) 1574 { 1575 struct perf_evsel *evsel; 1576 size_t printed = 0; 1577 1578 evlist__for_each_entry(evlist, evsel) { 1579 printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "", 1580 perf_evsel__name(evsel)); 1581 } 1582 1583 return printed + fprintf(fp, "\n"); 1584 } 1585 1586 int perf_evlist__strerror_open(struct perf_evlist *evlist, 1587 int err, char *buf, size_t size) 1588 { 1589 int printed, value; 1590 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1591 1592 switch (err) { 1593 case EACCES: 1594 case EPERM: 1595 printed = scnprintf(buf, size, 1596 "Error:\t%s.\n" 1597 "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg); 1598 1599 value = perf_event_paranoid(); 1600 1601 printed += scnprintf(buf + printed, size - printed, "\nHint:\t"); 1602 1603 if (value >= 2) { 1604 printed += scnprintf(buf + printed, size - printed, 1605 "For your workloads it needs to be <= 1\nHint:\t"); 1606 } 1607 printed += scnprintf(buf + printed, size - printed, 1608 "For system wide tracing it needs to be set to -1.\n"); 1609 1610 printed += scnprintf(buf + printed, size - printed, 1611 "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" 1612 "Hint:\tThe current value is %d.", value); 1613 break; 1614 case EINVAL: { 1615 struct perf_evsel *first = perf_evlist__first(evlist); 1616 int max_freq; 1617 1618 if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) 1619 goto out_default; 1620 1621 if (first->attr.sample_freq < (u64)max_freq) 1622 goto out_default; 1623 1624 printed = scnprintf(buf, size, 1625 "Error:\t%s.\n" 1626 "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" 1627 "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", 1628 emsg, max_freq, first->attr.sample_freq); 1629 break; 1630 } 1631 default: 1632 out_default: 1633 scnprintf(buf, size, "%s", emsg); 1634 break; 1635 } 1636 1637 return 0; 1638 } 1639 1640 int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size) 1641 { 1642 char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf)); 1643 int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0; 1644 1645 switch (err) { 1646 case EPERM: 1647 sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user); 1648 printed += scnprintf(buf + printed, size - printed, 1649 "Error:\t%s.\n" 1650 "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n" 1651 "Hint:\tTried using %zd kB.\n", 1652 emsg, pages_max_per_user, pages_attempted); 1653 1654 if (pages_attempted >= pages_max_per_user) { 1655 printed += scnprintf(buf + printed, size - printed, 1656 "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n", 1657 pages_max_per_user + pages_attempted); 1658 } 1659 1660 printed += scnprintf(buf + printed, size - printed, 1661 "Hint:\tTry using a smaller -m/--mmap-pages value."); 1662 break; 1663 default: 1664 scnprintf(buf, size, "%s", emsg); 1665 break; 1666 } 1667 1668 return 0; 1669 } 1670 1671 void perf_evlist__to_front(struct perf_evlist *evlist, 1672 struct perf_evsel *move_evsel) 1673 { 1674 struct perf_evsel *evsel, *n; 1675 LIST_HEAD(move); 1676 1677 if (move_evsel == perf_evlist__first(evlist)) 1678 return; 1679 1680 evlist__for_each_entry_safe(evlist, n, evsel) { 1681 if (evsel->leader == move_evsel->leader) 1682 list_move_tail(&evsel->node, &move); 1683 } 1684 1685 list_splice(&move, &evlist->entries); 1686 } 1687 1688 void perf_evlist__set_tracking_event(struct perf_evlist *evlist, 1689 struct perf_evsel *tracking_evsel) 1690 { 1691 struct perf_evsel *evsel; 1692 1693 if (tracking_evsel->tracking) 1694 return; 1695 1696 evlist__for_each_entry(evlist, evsel) { 1697 if (evsel != tracking_evsel) 1698 evsel->tracking = false; 1699 } 1700 1701 tracking_evsel->tracking = true; 1702 } 1703 1704 struct perf_evsel * 1705 perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, 1706 const char *str) 1707 { 1708 struct perf_evsel *evsel; 1709 1710 evlist__for_each_entry(evlist, evsel) { 1711 if (!evsel->name) 1712 continue; 1713 if (strcmp(str, evsel->name) == 0) 1714 return evsel; 1715 } 1716 1717 return NULL; 1718 } 1719 1720 void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, 1721 enum bkw_mmap_state state) 1722 { 1723 enum bkw_mmap_state old_state = evlist->bkw_mmap_state; 1724 enum action { 1725 NONE, 1726 PAUSE, 1727 RESUME, 1728 } action = NONE; 1729 1730 if (!evlist->overwrite_mmap) 1731 return; 1732 1733 switch (old_state) { 1734 case BKW_MMAP_NOTREADY: { 1735 if (state != BKW_MMAP_RUNNING) 1736 goto state_err; 1737 break; 1738 } 1739 case BKW_MMAP_RUNNING: { 1740 if (state != BKW_MMAP_DATA_PENDING) 1741 goto state_err; 1742 action = PAUSE; 1743 break; 1744 } 1745 case BKW_MMAP_DATA_PENDING: { 1746 if (state != BKW_MMAP_EMPTY) 1747 goto state_err; 1748 break; 1749 } 1750 case BKW_MMAP_EMPTY: { 1751 if (state != BKW_MMAP_RUNNING) 1752 goto state_err; 1753 action = RESUME; 1754 break; 1755 } 1756 default: 1757 WARN_ONCE(1, "Shouldn't get there\n"); 1758 } 1759 1760 evlist->bkw_mmap_state = state; 1761 1762 switch (action) { 1763 case PAUSE: 1764 perf_evlist__pause(evlist); 1765 break; 1766 case RESUME: 1767 perf_evlist__resume(evlist); 1768 break; 1769 case NONE: 1770 default: 1771 break; 1772 } 1773 1774 state_err: 1775 return; 1776 } 1777 1778 bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) 1779 { 1780 struct perf_evsel *evsel; 1781 1782 evlist__for_each_entry(evlist, evsel) { 1783 if (!evsel->attr.exclude_kernel) 1784 return false; 1785 } 1786 1787 return true; 1788 } 1789 1790 /* 1791 * Events in data file are not collect in groups, but we still want 1792 * the group display. Set the artificial group and set the leader's 1793 * forced_leader flag to notify the display code. 1794 */ 1795 void perf_evlist__force_leader(struct perf_evlist *evlist) 1796 { 1797 if (!evlist->nr_groups) { 1798 struct perf_evsel *leader = perf_evlist__first(evlist); 1799 1800 perf_evlist__set_leader(evlist); 1801 leader->forced_leader = true; 1802 } 1803 } 1804 1805 struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, 1806 struct perf_evsel *evsel) 1807 { 1808 struct perf_evsel *c2, *leader; 1809 bool is_open = true; 1810 1811 leader = evsel->leader; 1812 pr_debug("Weak group for %s/%d failed\n", 1813 leader->name, leader->nr_members); 1814 1815 /* 1816 * for_each_group_member doesn't work here because it doesn't 1817 * include the first entry. 1818 */ 1819 evlist__for_each_entry(evsel_list, c2) { 1820 if (c2 == evsel) 1821 is_open = false; 1822 if (c2->leader == leader) { 1823 if (is_open) 1824 perf_evsel__close(c2); 1825 c2->leader = c2; 1826 c2->nr_members = 0; 1827 } 1828 } 1829 return leader; 1830 } 1831 1832 int perf_evlist__add_sb_event(struct perf_evlist **evlist, 1833 struct perf_event_attr *attr, 1834 perf_evsel__sb_cb_t cb, 1835 void *data) 1836 { 1837 struct perf_evsel *evsel; 1838 bool new_evlist = (*evlist) == NULL; 1839 1840 if (*evlist == NULL) 1841 *evlist = perf_evlist__new(); 1842 if (*evlist == NULL) 1843 return -1; 1844 1845 if (!attr->sample_id_all) { 1846 pr_warning("enabling sample_id_all for all side band events\n"); 1847 attr->sample_id_all = 1; 1848 } 1849 1850 evsel = perf_evsel__new_idx(attr, (*evlist)->nr_entries); 1851 if (!evsel) 1852 goto out_err; 1853 1854 evsel->side_band.cb = cb; 1855 evsel->side_band.data = data; 1856 perf_evlist__add(*evlist, evsel); 1857 return 0; 1858 1859 out_err: 1860 if (new_evlist) { 1861 perf_evlist__delete(*evlist); 1862 *evlist = NULL; 1863 } 1864 return -1; 1865 } 1866 1867 static void *perf_evlist__poll_thread(void *arg) 1868 { 1869 struct perf_evlist *evlist = arg; 1870 bool draining = false; 1871 int i, done = 0; 1872 1873 while (!done) { 1874 bool got_data = false; 1875 1876 if (evlist->thread.done) 1877 draining = true; 1878 1879 if (!draining) 1880 perf_evlist__poll(evlist, 1000); 1881 1882 for (i = 0; i < evlist->nr_mmaps; i++) { 1883 struct perf_mmap *map = &evlist->mmap[i]; 1884 union perf_event *event; 1885 1886 if (perf_mmap__read_init(map)) 1887 continue; 1888 while ((event = perf_mmap__read_event(map)) != NULL) { 1889 struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); 1890 1891 if (evsel && evsel->side_band.cb) 1892 evsel->side_band.cb(event, evsel->side_band.data); 1893 else 1894 pr_warning("cannot locate proper evsel for the side band event\n"); 1895 1896 perf_mmap__consume(map); 1897 got_data = true; 1898 } 1899 perf_mmap__read_done(map); 1900 } 1901 1902 if (draining && !got_data) 1903 break; 1904 } 1905 return NULL; 1906 } 1907 1908 int perf_evlist__start_sb_thread(struct perf_evlist *evlist, 1909 struct target *target) 1910 { 1911 struct perf_evsel *counter; 1912 1913 if (!evlist) 1914 return 0; 1915 1916 if (perf_evlist__create_maps(evlist, target)) 1917 goto out_delete_evlist; 1918 1919 evlist__for_each_entry(evlist, counter) { 1920 if (perf_evsel__open(counter, evlist->cpus, 1921 evlist->threads) < 0) 1922 goto out_delete_evlist; 1923 } 1924 1925 if (perf_evlist__mmap(evlist, UINT_MAX)) 1926 goto out_delete_evlist; 1927 1928 evlist__for_each_entry(evlist, counter) { 1929 if (perf_evsel__enable(counter)) 1930 goto out_delete_evlist; 1931 } 1932 1933 evlist->thread.done = 0; 1934 if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) 1935 goto out_delete_evlist; 1936 1937 return 0; 1938 1939 out_delete_evlist: 1940 perf_evlist__delete(evlist); 1941 evlist = NULL; 1942 return -1; 1943 } 1944 1945 void perf_evlist__stop_sb_thread(struct perf_evlist *evlist) 1946 { 1947 if (!evlist) 1948 return; 1949 evlist->thread.done = 1; 1950 pthread_join(evlist->thread.th, NULL); 1951 perf_evlist__delete(evlist); 1952 } 1953