1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 10 #include <byteswap.h> 11 #include "asm/bug.h" 12 #include "evsel.h" 13 #include "evlist.h" 14 #include "util.h" 15 #include "cpumap.h" 16 #include "thread_map.h" 17 18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) 20 21 int __perf_evsel__sample_size(u64 sample_type) 22 { 23 u64 mask = sample_type & PERF_SAMPLE_MASK; 24 int size = 0; 25 int i; 26 27 for (i = 0; i < 64; i++) { 28 if (mask & (1ULL << i)) 29 size++; 30 } 31 32 size *= sizeof(u64); 33 34 return size; 35 } 36 37 static void hists__init(struct hists *hists) 38 { 39 memset(hists, 0, sizeof(*hists)); 40 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; 41 hists->entries_in = &hists->entries_in_array[0]; 42 hists->entries_collapsed = RB_ROOT; 43 hists->entries = RB_ROOT; 44 pthread_mutex_init(&hists->lock, NULL); 45 } 46 47 void perf_evsel__init(struct perf_evsel *evsel, 48 struct perf_event_attr *attr, int idx) 49 { 50 evsel->idx = idx; 51 evsel->attr = *attr; 52 INIT_LIST_HEAD(&evsel->node); 53 hists__init(&evsel->hists); 54 } 55 56 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 57 { 58 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 59 60 if (evsel != NULL) 61 perf_evsel__init(evsel, attr, idx); 62 63 return evsel; 64 } 65 66 void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) 67 { 68 struct perf_event_attr *attr = &evsel->attr; 69 int track = !evsel->idx; /* only the first counter needs these */ 70 71 attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0; 72 attr->inherit = !opts->no_inherit; 73 attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | 74 PERF_FORMAT_TOTAL_TIME_RUNNING | 75 PERF_FORMAT_ID; 76 77 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID; 78 79 /* 80 * We default some events to a 1 default interval. But keep 81 * it a weak assumption overridable by the user. 82 */ 83 if (!attr->sample_period || (opts->user_freq != UINT_MAX && 84 opts->user_interval != ULLONG_MAX)) { 85 if (opts->freq) { 86 attr->sample_type |= PERF_SAMPLE_PERIOD; 87 attr->freq = 1; 88 attr->sample_freq = opts->freq; 89 } else { 90 attr->sample_period = opts->default_interval; 91 } 92 } 93 94 if (opts->no_samples) 95 attr->sample_freq = 0; 96 97 if (opts->inherit_stat) 98 attr->inherit_stat = 1; 99 100 if (opts->sample_address) { 101 attr->sample_type |= PERF_SAMPLE_ADDR; 102 attr->mmap_data = track; 103 } 104 105 if (opts->call_graph) 106 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 107 108 if (opts->system_wide) 109 attr->sample_type |= PERF_SAMPLE_CPU; 110 111 if (opts->period) 112 attr->sample_type |= PERF_SAMPLE_PERIOD; 113 114 if (opts->sample_id_all_avail && 115 (opts->sample_time || opts->system_wide || 116 !opts->no_inherit || opts->cpu_list)) 117 attr->sample_type |= PERF_SAMPLE_TIME; 118 119 if (opts->raw_samples) { 120 attr->sample_type |= PERF_SAMPLE_TIME; 121 attr->sample_type |= PERF_SAMPLE_RAW; 122 attr->sample_type |= PERF_SAMPLE_CPU; 123 } 124 125 if (opts->no_delay) { 126 attr->watermark = 0; 127 attr->wakeup_events = 1; 128 } 129 130 attr->mmap = track; 131 attr->comm = track; 132 133 if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) { 134 attr->disabled = 1; 135 attr->enable_on_exec = 1; 136 } 137 } 138 139 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 140 { 141 int cpu, thread; 142 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 143 144 if (evsel->fd) { 145 for (cpu = 0; cpu < ncpus; cpu++) { 146 for (thread = 0; thread < nthreads; thread++) { 147 FD(evsel, cpu, thread) = -1; 148 } 149 } 150 } 151 152 return evsel->fd != NULL ? 0 : -ENOMEM; 153 } 154 155 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) 156 { 157 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); 158 if (evsel->sample_id == NULL) 159 return -ENOMEM; 160 161 evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); 162 if (evsel->id == NULL) { 163 xyarray__delete(evsel->sample_id); 164 evsel->sample_id = NULL; 165 return -ENOMEM; 166 } 167 168 return 0; 169 } 170 171 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 172 { 173 evsel->counts = zalloc((sizeof(*evsel->counts) + 174 (ncpus * sizeof(struct perf_counts_values)))); 175 return evsel->counts != NULL ? 0 : -ENOMEM; 176 } 177 178 void perf_evsel__free_fd(struct perf_evsel *evsel) 179 { 180 xyarray__delete(evsel->fd); 181 evsel->fd = NULL; 182 } 183 184 void perf_evsel__free_id(struct perf_evsel *evsel) 185 { 186 xyarray__delete(evsel->sample_id); 187 evsel->sample_id = NULL; 188 free(evsel->id); 189 evsel->id = NULL; 190 } 191 192 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 193 { 194 int cpu, thread; 195 196 for (cpu = 0; cpu < ncpus; cpu++) 197 for (thread = 0; thread < nthreads; ++thread) { 198 close(FD(evsel, cpu, thread)); 199 FD(evsel, cpu, thread) = -1; 200 } 201 } 202 203 void perf_evsel__exit(struct perf_evsel *evsel) 204 { 205 assert(list_empty(&evsel->node)); 206 xyarray__delete(evsel->fd); 207 xyarray__delete(evsel->sample_id); 208 free(evsel->id); 209 } 210 211 void perf_evsel__delete(struct perf_evsel *evsel) 212 { 213 perf_evsel__exit(evsel); 214 close_cgroup(evsel->cgrp); 215 free(evsel->name); 216 free(evsel); 217 } 218 219 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 220 int cpu, int thread, bool scale) 221 { 222 struct perf_counts_values count; 223 size_t nv = scale ? 3 : 1; 224 225 if (FD(evsel, cpu, thread) < 0) 226 return -EINVAL; 227 228 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 229 return -ENOMEM; 230 231 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 232 return -errno; 233 234 if (scale) { 235 if (count.run == 0) 236 count.val = 0; 237 else if (count.run < count.ena) 238 count.val = (u64)((double)count.val * count.ena / count.run + 0.5); 239 } else 240 count.ena = count.run = 0; 241 242 evsel->counts->cpu[cpu] = count; 243 return 0; 244 } 245 246 int __perf_evsel__read(struct perf_evsel *evsel, 247 int ncpus, int nthreads, bool scale) 248 { 249 size_t nv = scale ? 3 : 1; 250 int cpu, thread; 251 struct perf_counts_values *aggr = &evsel->counts->aggr, count; 252 253 aggr->val = aggr->ena = aggr->run = 0; 254 255 for (cpu = 0; cpu < ncpus; cpu++) { 256 for (thread = 0; thread < nthreads; thread++) { 257 if (FD(evsel, cpu, thread) < 0) 258 continue; 259 260 if (readn(FD(evsel, cpu, thread), 261 &count, nv * sizeof(u64)) < 0) 262 return -errno; 263 264 aggr->val += count.val; 265 if (scale) { 266 aggr->ena += count.ena; 267 aggr->run += count.run; 268 } 269 } 270 } 271 272 evsel->counts->scaled = 0; 273 if (scale) { 274 if (aggr->run == 0) { 275 evsel->counts->scaled = -1; 276 aggr->val = 0; 277 return 0; 278 } 279 280 if (aggr->run < aggr->ena) { 281 evsel->counts->scaled = 1; 282 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); 283 } 284 } else 285 aggr->ena = aggr->run = 0; 286 287 return 0; 288 } 289 290 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 291 struct thread_map *threads, bool group, 292 struct xyarray *group_fds) 293 { 294 int cpu, thread; 295 unsigned long flags = 0; 296 int pid = -1, err; 297 298 if (evsel->fd == NULL && 299 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 300 return -ENOMEM; 301 302 if (evsel->cgrp) { 303 flags = PERF_FLAG_PID_CGROUP; 304 pid = evsel->cgrp->fd; 305 } 306 307 for (cpu = 0; cpu < cpus->nr; cpu++) { 308 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; 309 310 for (thread = 0; thread < threads->nr; thread++) { 311 312 if (!evsel->cgrp) 313 pid = threads->map[thread]; 314 315 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 316 pid, 317 cpus->map[cpu], 318 group_fd, flags); 319 if (FD(evsel, cpu, thread) < 0) { 320 err = -errno; 321 goto out_close; 322 } 323 324 if (group && group_fd == -1) 325 group_fd = FD(evsel, cpu, thread); 326 } 327 } 328 329 return 0; 330 331 out_close: 332 do { 333 while (--thread >= 0) { 334 close(FD(evsel, cpu, thread)); 335 FD(evsel, cpu, thread) = -1; 336 } 337 thread = threads->nr; 338 } while (--cpu >= 0); 339 return err; 340 } 341 342 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads) 343 { 344 if (evsel->fd == NULL) 345 return; 346 347 perf_evsel__close_fd(evsel, ncpus, nthreads); 348 perf_evsel__free_fd(evsel); 349 evsel->fd = NULL; 350 } 351 352 static struct { 353 struct cpu_map map; 354 int cpus[1]; 355 } empty_cpu_map = { 356 .map.nr = 1, 357 .cpus = { -1, }, 358 }; 359 360 static struct { 361 struct thread_map map; 362 int threads[1]; 363 } empty_thread_map = { 364 .map.nr = 1, 365 .threads = { -1, }, 366 }; 367 368 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 369 struct thread_map *threads, bool group, 370 struct xyarray *group_fd) 371 { 372 if (cpus == NULL) { 373 /* Work around old compiler warnings about strict aliasing */ 374 cpus = &empty_cpu_map.map; 375 } 376 377 if (threads == NULL) 378 threads = &empty_thread_map.map; 379 380 return __perf_evsel__open(evsel, cpus, threads, group, group_fd); 381 } 382 383 int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 384 struct cpu_map *cpus, bool group, 385 struct xyarray *group_fd) 386 { 387 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, 388 group_fd); 389 } 390 391 int perf_evsel__open_per_thread(struct perf_evsel *evsel, 392 struct thread_map *threads, bool group, 393 struct xyarray *group_fd) 394 { 395 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, 396 group_fd); 397 } 398 399 static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 400 struct perf_sample *sample) 401 { 402 const u64 *array = event->sample.array; 403 404 array += ((event->header.size - 405 sizeof(event->header)) / sizeof(u64)) - 1; 406 407 if (type & PERF_SAMPLE_CPU) { 408 u32 *p = (u32 *)array; 409 sample->cpu = *p; 410 array--; 411 } 412 413 if (type & PERF_SAMPLE_STREAM_ID) { 414 sample->stream_id = *array; 415 array--; 416 } 417 418 if (type & PERF_SAMPLE_ID) { 419 sample->id = *array; 420 array--; 421 } 422 423 if (type & PERF_SAMPLE_TIME) { 424 sample->time = *array; 425 array--; 426 } 427 428 if (type & PERF_SAMPLE_TID) { 429 u32 *p = (u32 *)array; 430 sample->pid = p[0]; 431 sample->tid = p[1]; 432 } 433 434 return 0; 435 } 436 437 static bool sample_overlap(const union perf_event *event, 438 const void *offset, u64 size) 439 { 440 const void *base = event; 441 442 if (offset + size > base + event->header.size) 443 return true; 444 445 return false; 446 } 447 448 int perf_event__parse_sample(const union perf_event *event, u64 type, 449 int sample_size, bool sample_id_all, 450 struct perf_sample *data, bool swapped) 451 { 452 const u64 *array; 453 454 /* 455 * used for cross-endian analysis. See git commit 65014ab3 456 * for why this goofiness is needed. 457 */ 458 union { 459 u64 val64; 460 u32 val32[2]; 461 } u; 462 463 memset(data, 0, sizeof(*data)); 464 data->cpu = data->pid = data->tid = -1; 465 data->stream_id = data->id = data->time = -1ULL; 466 467 if (event->header.type != PERF_RECORD_SAMPLE) { 468 if (!sample_id_all) 469 return 0; 470 return perf_event__parse_id_sample(event, type, data); 471 } 472 473 array = event->sample.array; 474 475 if (sample_size + sizeof(event->header) > event->header.size) 476 return -EFAULT; 477 478 if (type & PERF_SAMPLE_IP) { 479 data->ip = event->ip.ip; 480 array++; 481 } 482 483 if (type & PERF_SAMPLE_TID) { 484 u.val64 = *array; 485 if (swapped) { 486 /* undo swap of u64, then swap on individual u32s */ 487 u.val64 = bswap_64(u.val64); 488 u.val32[0] = bswap_32(u.val32[0]); 489 u.val32[1] = bswap_32(u.val32[1]); 490 } 491 492 data->pid = u.val32[0]; 493 data->tid = u.val32[1]; 494 array++; 495 } 496 497 if (type & PERF_SAMPLE_TIME) { 498 data->time = *array; 499 array++; 500 } 501 502 data->addr = 0; 503 if (type & PERF_SAMPLE_ADDR) { 504 data->addr = *array; 505 array++; 506 } 507 508 data->id = -1ULL; 509 if (type & PERF_SAMPLE_ID) { 510 data->id = *array; 511 array++; 512 } 513 514 if (type & PERF_SAMPLE_STREAM_ID) { 515 data->stream_id = *array; 516 array++; 517 } 518 519 if (type & PERF_SAMPLE_CPU) { 520 521 u.val64 = *array; 522 if (swapped) { 523 /* undo swap of u64, then swap on individual u32s */ 524 u.val64 = bswap_64(u.val64); 525 u.val32[0] = bswap_32(u.val32[0]); 526 } 527 528 data->cpu = u.val32[0]; 529 array++; 530 } 531 532 if (type & PERF_SAMPLE_PERIOD) { 533 data->period = *array; 534 array++; 535 } 536 537 if (type & PERF_SAMPLE_READ) { 538 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); 539 return -1; 540 } 541 542 if (type & PERF_SAMPLE_CALLCHAIN) { 543 if (sample_overlap(event, array, sizeof(data->callchain->nr))) 544 return -EFAULT; 545 546 data->callchain = (struct ip_callchain *)array; 547 548 if (sample_overlap(event, array, data->callchain->nr)) 549 return -EFAULT; 550 551 array += 1 + data->callchain->nr; 552 } 553 554 if (type & PERF_SAMPLE_RAW) { 555 const u64 *pdata; 556 557 u.val64 = *array; 558 if (WARN_ONCE(swapped, 559 "Endianness of raw data not corrected!\n")) { 560 /* undo swap of u64, then swap on individual u32s */ 561 u.val64 = bswap_64(u.val64); 562 u.val32[0] = bswap_32(u.val32[0]); 563 u.val32[1] = bswap_32(u.val32[1]); 564 } 565 566 if (sample_overlap(event, array, sizeof(u32))) 567 return -EFAULT; 568 569 data->raw_size = u.val32[0]; 570 pdata = (void *) array + sizeof(u32); 571 572 if (sample_overlap(event, pdata, data->raw_size)) 573 return -EFAULT; 574 575 data->raw_data = (void *) pdata; 576 } 577 578 return 0; 579 } 580 581 int perf_event__synthesize_sample(union perf_event *event, u64 type, 582 const struct perf_sample *sample, 583 bool swapped) 584 { 585 u64 *array; 586 587 /* 588 * used for cross-endian analysis. See git commit 65014ab3 589 * for why this goofiness is needed. 590 */ 591 union { 592 u64 val64; 593 u32 val32[2]; 594 } u; 595 596 array = event->sample.array; 597 598 if (type & PERF_SAMPLE_IP) { 599 event->ip.ip = sample->ip; 600 array++; 601 } 602 603 if (type & PERF_SAMPLE_TID) { 604 u.val32[0] = sample->pid; 605 u.val32[1] = sample->tid; 606 if (swapped) { 607 /* 608 * Inverse of what is done in perf_event__parse_sample 609 */ 610 u.val32[0] = bswap_32(u.val32[0]); 611 u.val32[1] = bswap_32(u.val32[1]); 612 u.val64 = bswap_64(u.val64); 613 } 614 615 *array = u.val64; 616 array++; 617 } 618 619 if (type & PERF_SAMPLE_TIME) { 620 *array = sample->time; 621 array++; 622 } 623 624 if (type & PERF_SAMPLE_ADDR) { 625 *array = sample->addr; 626 array++; 627 } 628 629 if (type & PERF_SAMPLE_ID) { 630 *array = sample->id; 631 array++; 632 } 633 634 if (type & PERF_SAMPLE_STREAM_ID) { 635 *array = sample->stream_id; 636 array++; 637 } 638 639 if (type & PERF_SAMPLE_CPU) { 640 u.val32[0] = sample->cpu; 641 if (swapped) { 642 /* 643 * Inverse of what is done in perf_event__parse_sample 644 */ 645 u.val32[0] = bswap_32(u.val32[0]); 646 u.val64 = bswap_64(u.val64); 647 } 648 *array = u.val64; 649 array++; 650 } 651 652 if (type & PERF_SAMPLE_PERIOD) { 653 *array = sample->period; 654 array++; 655 } 656 657 return 0; 658 } 659