1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 10 #include <byteswap.h> 11 #include <errno.h> 12 #include <inttypes.h> 13 #include <linux/bitops.h> 14 #include <api/fs/fs.h> 15 #include <api/fs/tracing_path.h> 16 #include <traceevent/event-parse.h> 17 #include <linux/hw_breakpoint.h> 18 #include <linux/perf_event.h> 19 #include <linux/compiler.h> 20 #include <linux/err.h> 21 #include <sys/ioctl.h> 22 #include <sys/resource.h> 23 #include <sys/types.h> 24 #include <dirent.h> 25 #include "asm/bug.h" 26 #include "callchain.h" 27 #include "cgroup.h" 28 #include "event.h" 29 #include "evsel.h" 30 #include "evlist.h" 31 #include "util.h" 32 #include "cpumap.h" 33 #include "thread_map.h" 34 #include "target.h" 35 #include "perf_regs.h" 36 #include "debug.h" 37 #include "trace-event.h" 38 #include "stat.h" 39 #include "memswap.h" 40 #include "util/parse-branch-options.h" 41 42 #include "sane_ctype.h" 43 44 static struct { 45 bool sample_id_all; 46 bool exclude_guest; 47 bool mmap2; 48 bool cloexec; 49 bool clockid; 50 bool clockid_wrong; 51 bool lbr_flags; 52 bool write_backward; 53 bool group_read; 54 } perf_missing_features; 55 56 static clockid_t clockid; 57 58 static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused) 59 { 60 return 0; 61 } 62 63 void __weak test_attr__ready(void) { } 64 65 static void perf_evsel__no_extra_fini(struct perf_evsel *evsel __maybe_unused) 66 { 67 } 68 69 static struct { 70 size_t size; 71 int (*init)(struct perf_evsel *evsel); 72 void (*fini)(struct perf_evsel *evsel); 73 } perf_evsel__object = { 74 .size = sizeof(struct perf_evsel), 75 .init = perf_evsel__no_extra_init, 76 .fini = perf_evsel__no_extra_fini, 77 }; 78 79 int perf_evsel__object_config(size_t object_size, 80 int (*init)(struct perf_evsel *evsel), 81 void (*fini)(struct perf_evsel *evsel)) 82 { 83 84 if (object_size == 0) 85 goto set_methods; 86 87 if (perf_evsel__object.size > object_size) 88 return -EINVAL; 89 90 perf_evsel__object.size = object_size; 91 92 set_methods: 93 if (init != NULL) 94 perf_evsel__object.init = init; 95 96 if (fini != NULL) 97 perf_evsel__object.fini = fini; 98 99 return 0; 100 } 101 102 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 103 104 int __perf_evsel__sample_size(u64 sample_type) 105 { 106 u64 mask = sample_type & PERF_SAMPLE_MASK; 107 int size = 0; 108 int i; 109 110 for (i = 0; i < 64; i++) { 111 if (mask & (1ULL << i)) 112 size++; 113 } 114 115 size *= sizeof(u64); 116 117 return size; 118 } 119 120 /** 121 * __perf_evsel__calc_id_pos - calculate id_pos. 122 * @sample_type: sample type 123 * 124 * This function returns the position of the event id (PERF_SAMPLE_ID or 125 * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct 126 * sample_event. 127 */ 128 static int __perf_evsel__calc_id_pos(u64 sample_type) 129 { 130 int idx = 0; 131 132 if (sample_type & PERF_SAMPLE_IDENTIFIER) 133 return 0; 134 135 if (!(sample_type & PERF_SAMPLE_ID)) 136 return -1; 137 138 if (sample_type & PERF_SAMPLE_IP) 139 idx += 1; 140 141 if (sample_type & PERF_SAMPLE_TID) 142 idx += 1; 143 144 if (sample_type & PERF_SAMPLE_TIME) 145 idx += 1; 146 147 if (sample_type & PERF_SAMPLE_ADDR) 148 idx += 1; 149 150 return idx; 151 } 152 153 /** 154 * __perf_evsel__calc_is_pos - calculate is_pos. 155 * @sample_type: sample type 156 * 157 * This function returns the position (counting backwards) of the event id 158 * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if 159 * sample_id_all is used there is an id sample appended to non-sample events. 160 */ 161 static int __perf_evsel__calc_is_pos(u64 sample_type) 162 { 163 int idx = 1; 164 165 if (sample_type & PERF_SAMPLE_IDENTIFIER) 166 return 1; 167 168 if (!(sample_type & PERF_SAMPLE_ID)) 169 return -1; 170 171 if (sample_type & PERF_SAMPLE_CPU) 172 idx += 1; 173 174 if (sample_type & PERF_SAMPLE_STREAM_ID) 175 idx += 1; 176 177 return idx; 178 } 179 180 void perf_evsel__calc_id_pos(struct perf_evsel *evsel) 181 { 182 evsel->id_pos = __perf_evsel__calc_id_pos(evsel->attr.sample_type); 183 evsel->is_pos = __perf_evsel__calc_is_pos(evsel->attr.sample_type); 184 } 185 186 void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, 187 enum perf_event_sample_format bit) 188 { 189 if (!(evsel->attr.sample_type & bit)) { 190 evsel->attr.sample_type |= bit; 191 evsel->sample_size += sizeof(u64); 192 perf_evsel__calc_id_pos(evsel); 193 } 194 } 195 196 void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, 197 enum perf_event_sample_format bit) 198 { 199 if (evsel->attr.sample_type & bit) { 200 evsel->attr.sample_type &= ~bit; 201 evsel->sample_size -= sizeof(u64); 202 perf_evsel__calc_id_pos(evsel); 203 } 204 } 205 206 void perf_evsel__set_sample_id(struct perf_evsel *evsel, 207 bool can_sample_identifier) 208 { 209 if (can_sample_identifier) { 210 perf_evsel__reset_sample_bit(evsel, ID); 211 perf_evsel__set_sample_bit(evsel, IDENTIFIER); 212 } else { 213 perf_evsel__set_sample_bit(evsel, ID); 214 } 215 evsel->attr.read_format |= PERF_FORMAT_ID; 216 } 217 218 /** 219 * perf_evsel__is_function_event - Return whether given evsel is a function 220 * trace event 221 * 222 * @evsel - evsel selector to be tested 223 * 224 * Return %true if event is function trace event 225 */ 226 bool perf_evsel__is_function_event(struct perf_evsel *evsel) 227 { 228 #define FUNCTION_EVENT "ftrace:function" 229 230 return evsel->name && 231 !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT)); 232 233 #undef FUNCTION_EVENT 234 } 235 236 void perf_evsel__init(struct perf_evsel *evsel, 237 struct perf_event_attr *attr, int idx) 238 { 239 evsel->idx = idx; 240 evsel->tracking = !idx; 241 evsel->attr = *attr; 242 evsel->leader = evsel; 243 evsel->unit = ""; 244 evsel->scale = 1.0; 245 evsel->evlist = NULL; 246 evsel->bpf_fd = -1; 247 INIT_LIST_HEAD(&evsel->node); 248 INIT_LIST_HEAD(&evsel->config_terms); 249 perf_evsel__object.init(evsel); 250 evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); 251 perf_evsel__calc_id_pos(evsel); 252 evsel->cmdline_group_boundary = false; 253 evsel->metric_expr = NULL; 254 evsel->metric_name = NULL; 255 evsel->metric_events = NULL; 256 evsel->collect_stat = false; 257 } 258 259 struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) 260 { 261 struct perf_evsel *evsel = zalloc(perf_evsel__object.size); 262 263 if (evsel != NULL) 264 perf_evsel__init(evsel, attr, idx); 265 266 if (perf_evsel__is_bpf_output(evsel)) { 267 evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | 268 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), 269 evsel->attr.sample_period = 1; 270 } 271 272 return evsel; 273 } 274 275 static bool perf_event_can_profile_kernel(void) 276 { 277 return geteuid() == 0 || perf_event_paranoid() == -1; 278 } 279 280 struct perf_evsel *perf_evsel__new_cycles(bool precise) 281 { 282 struct perf_event_attr attr = { 283 .type = PERF_TYPE_HARDWARE, 284 .config = PERF_COUNT_HW_CPU_CYCLES, 285 .exclude_kernel = !perf_event_can_profile_kernel(), 286 }; 287 struct perf_evsel *evsel; 288 289 event_attr_init(&attr); 290 291 if (!precise) 292 goto new_event; 293 /* 294 * Unnamed union member, not supported as struct member named 295 * initializer in older compilers such as gcc 4.4.7 296 * 297 * Just for probing the precise_ip: 298 */ 299 attr.sample_period = 1; 300 301 perf_event_attr__set_max_precise_ip(&attr); 302 /* 303 * Now let the usual logic to set up the perf_event_attr defaults 304 * to kick in when we return and before perf_evsel__open() is called. 305 */ 306 attr.sample_period = 0; 307 new_event: 308 evsel = perf_evsel__new(&attr); 309 if (evsel == NULL) 310 goto out; 311 312 /* use asprintf() because free(evsel) assumes name is allocated */ 313 if (asprintf(&evsel->name, "cycles%s%s%.*s", 314 (attr.precise_ip || attr.exclude_kernel) ? ":" : "", 315 attr.exclude_kernel ? "u" : "", 316 attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0) 317 goto error_free; 318 out: 319 return evsel; 320 error_free: 321 perf_evsel__delete(evsel); 322 evsel = NULL; 323 goto out; 324 } 325 326 /* 327 * Returns pointer with encoded error via <linux/err.h> interface. 328 */ 329 struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx) 330 { 331 struct perf_evsel *evsel = zalloc(perf_evsel__object.size); 332 int err = -ENOMEM; 333 334 if (evsel == NULL) { 335 goto out_err; 336 } else { 337 struct perf_event_attr attr = { 338 .type = PERF_TYPE_TRACEPOINT, 339 .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | 340 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD), 341 }; 342 343 if (asprintf(&evsel->name, "%s:%s", sys, name) < 0) 344 goto out_free; 345 346 evsel->tp_format = trace_event__tp_format(sys, name); 347 if (IS_ERR(evsel->tp_format)) { 348 err = PTR_ERR(evsel->tp_format); 349 goto out_free; 350 } 351 352 event_attr_init(&attr); 353 attr.config = evsel->tp_format->id; 354 attr.sample_period = 1; 355 perf_evsel__init(evsel, &attr, idx); 356 } 357 358 return evsel; 359 360 out_free: 361 zfree(&evsel->name); 362 free(evsel); 363 out_err: 364 return ERR_PTR(err); 365 } 366 367 const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = { 368 "cycles", 369 "instructions", 370 "cache-references", 371 "cache-misses", 372 "branches", 373 "branch-misses", 374 "bus-cycles", 375 "stalled-cycles-frontend", 376 "stalled-cycles-backend", 377 "ref-cycles", 378 }; 379 380 static const char *__perf_evsel__hw_name(u64 config) 381 { 382 if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config]) 383 return perf_evsel__hw_names[config]; 384 385 return "unknown-hardware"; 386 } 387 388 static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size) 389 { 390 int colon = 0, r = 0; 391 struct perf_event_attr *attr = &evsel->attr; 392 bool exclude_guest_default = false; 393 394 #define MOD_PRINT(context, mod) do { \ 395 if (!attr->exclude_##context) { \ 396 if (!colon) colon = ++r; \ 397 r += scnprintf(bf + r, size - r, "%c", mod); \ 398 } } while(0) 399 400 if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) { 401 MOD_PRINT(kernel, 'k'); 402 MOD_PRINT(user, 'u'); 403 MOD_PRINT(hv, 'h'); 404 exclude_guest_default = true; 405 } 406 407 if (attr->precise_ip) { 408 if (!colon) 409 colon = ++r; 410 r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); 411 exclude_guest_default = true; 412 } 413 414 if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { 415 MOD_PRINT(host, 'H'); 416 MOD_PRINT(guest, 'G'); 417 } 418 #undef MOD_PRINT 419 if (colon) 420 bf[colon - 1] = ':'; 421 return r; 422 } 423 424 static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size) 425 { 426 int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config)); 427 return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); 428 } 429 430 const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = { 431 "cpu-clock", 432 "task-clock", 433 "page-faults", 434 "context-switches", 435 "cpu-migrations", 436 "minor-faults", 437 "major-faults", 438 "alignment-faults", 439 "emulation-faults", 440 "dummy", 441 }; 442 443 static const char *__perf_evsel__sw_name(u64 config) 444 { 445 if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config]) 446 return perf_evsel__sw_names[config]; 447 return "unknown-software"; 448 } 449 450 static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size) 451 { 452 int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config)); 453 return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); 454 } 455 456 static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) 457 { 458 int r; 459 460 r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr); 461 462 if (type & HW_BREAKPOINT_R) 463 r += scnprintf(bf + r, size - r, "r"); 464 465 if (type & HW_BREAKPOINT_W) 466 r += scnprintf(bf + r, size - r, "w"); 467 468 if (type & HW_BREAKPOINT_X) 469 r += scnprintf(bf + r, size - r, "x"); 470 471 return r; 472 } 473 474 static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size) 475 { 476 struct perf_event_attr *attr = &evsel->attr; 477 int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type); 478 return r + perf_evsel__add_modifiers(evsel, bf + r, size - r); 479 } 480 481 const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX] 482 [PERF_EVSEL__MAX_ALIASES] = { 483 { "L1-dcache", "l1-d", "l1d", "L1-data", }, 484 { "L1-icache", "l1-i", "l1i", "L1-instruction", }, 485 { "LLC", "L2", }, 486 { "dTLB", "d-tlb", "Data-TLB", }, 487 { "iTLB", "i-tlb", "Instruction-TLB", }, 488 { "branch", "branches", "bpu", "btb", "bpc", }, 489 { "node", }, 490 }; 491 492 const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX] 493 [PERF_EVSEL__MAX_ALIASES] = { 494 { "load", "loads", "read", }, 495 { "store", "stores", "write", }, 496 { "prefetch", "prefetches", "speculative-read", "speculative-load", }, 497 }; 498 499 const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX] 500 [PERF_EVSEL__MAX_ALIASES] = { 501 { "refs", "Reference", "ops", "access", }, 502 { "misses", "miss", }, 503 }; 504 505 #define C(x) PERF_COUNT_HW_CACHE_##x 506 #define CACHE_READ (1 << C(OP_READ)) 507 #define CACHE_WRITE (1 << C(OP_WRITE)) 508 #define CACHE_PREFETCH (1 << C(OP_PREFETCH)) 509 #define COP(x) (1 << x) 510 511 /* 512 * cache operartion stat 513 * L1I : Read and prefetch only 514 * ITLB and BPU : Read-only 515 */ 516 static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = { 517 [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 518 [C(L1I)] = (CACHE_READ | CACHE_PREFETCH), 519 [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 520 [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 521 [C(ITLB)] = (CACHE_READ), 522 [C(BPU)] = (CACHE_READ), 523 [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH), 524 }; 525 526 bool perf_evsel__is_cache_op_valid(u8 type, u8 op) 527 { 528 if (perf_evsel__hw_cache_stat[type] & COP(op)) 529 return true; /* valid */ 530 else 531 return false; /* invalid */ 532 } 533 534 int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, 535 char *bf, size_t size) 536 { 537 if (result) { 538 return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0], 539 perf_evsel__hw_cache_op[op][0], 540 perf_evsel__hw_cache_result[result][0]); 541 } 542 543 return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0], 544 perf_evsel__hw_cache_op[op][1]); 545 } 546 547 static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) 548 { 549 u8 op, result, type = (config >> 0) & 0xff; 550 const char *err = "unknown-ext-hardware-cache-type"; 551 552 if (type >= PERF_COUNT_HW_CACHE_MAX) 553 goto out_err; 554 555 op = (config >> 8) & 0xff; 556 err = "unknown-ext-hardware-cache-op"; 557 if (op >= PERF_COUNT_HW_CACHE_OP_MAX) 558 goto out_err; 559 560 result = (config >> 16) & 0xff; 561 err = "unknown-ext-hardware-cache-result"; 562 if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 563 goto out_err; 564 565 err = "invalid-cache"; 566 if (!perf_evsel__is_cache_op_valid(type, op)) 567 goto out_err; 568 569 return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size); 570 out_err: 571 return scnprintf(bf, size, "%s", err); 572 } 573 574 static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size) 575 { 576 int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size); 577 return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); 578 } 579 580 static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size) 581 { 582 int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config); 583 return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret); 584 } 585 586 const char *perf_evsel__name(struct perf_evsel *evsel) 587 { 588 char bf[128]; 589 590 if (evsel->name) 591 return evsel->name; 592 593 switch (evsel->attr.type) { 594 case PERF_TYPE_RAW: 595 perf_evsel__raw_name(evsel, bf, sizeof(bf)); 596 break; 597 598 case PERF_TYPE_HARDWARE: 599 perf_evsel__hw_name(evsel, bf, sizeof(bf)); 600 break; 601 602 case PERF_TYPE_HW_CACHE: 603 perf_evsel__hw_cache_name(evsel, bf, sizeof(bf)); 604 break; 605 606 case PERF_TYPE_SOFTWARE: 607 perf_evsel__sw_name(evsel, bf, sizeof(bf)); 608 break; 609 610 case PERF_TYPE_TRACEPOINT: 611 scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint"); 612 break; 613 614 case PERF_TYPE_BREAKPOINT: 615 perf_evsel__bp_name(evsel, bf, sizeof(bf)); 616 break; 617 618 default: 619 scnprintf(bf, sizeof(bf), "unknown attr type: %d", 620 evsel->attr.type); 621 break; 622 } 623 624 evsel->name = strdup(bf); 625 626 return evsel->name ?: "unknown"; 627 } 628 629 const char *perf_evsel__group_name(struct perf_evsel *evsel) 630 { 631 return evsel->group_name ?: "anon group"; 632 } 633 634 int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) 635 { 636 int ret; 637 struct perf_evsel *pos; 638 const char *group_name = perf_evsel__group_name(evsel); 639 640 ret = scnprintf(buf, size, "%s", group_name); 641 642 ret += scnprintf(buf + ret, size - ret, " { %s", 643 perf_evsel__name(evsel)); 644 645 for_each_group_member(pos, evsel) 646 ret += scnprintf(buf + ret, size - ret, ", %s", 647 perf_evsel__name(pos)); 648 649 ret += scnprintf(buf + ret, size - ret, " }"); 650 651 return ret; 652 } 653 654 void perf_evsel__config_callchain(struct perf_evsel *evsel, 655 struct record_opts *opts, 656 struct callchain_param *param) 657 { 658 bool function = perf_evsel__is_function_event(evsel); 659 struct perf_event_attr *attr = &evsel->attr; 660 661 perf_evsel__set_sample_bit(evsel, CALLCHAIN); 662 663 attr->sample_max_stack = param->max_stack; 664 665 if (param->record_mode == CALLCHAIN_LBR) { 666 if (!opts->branch_stack) { 667 if (attr->exclude_user) { 668 pr_warning("LBR callstack option is only available " 669 "to get user callchain information. " 670 "Falling back to framepointers.\n"); 671 } else { 672 perf_evsel__set_sample_bit(evsel, BRANCH_STACK); 673 attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | 674 PERF_SAMPLE_BRANCH_CALL_STACK | 675 PERF_SAMPLE_BRANCH_NO_CYCLES | 676 PERF_SAMPLE_BRANCH_NO_FLAGS; 677 } 678 } else 679 pr_warning("Cannot use LBR callstack with branch stack. " 680 "Falling back to framepointers.\n"); 681 } 682 683 if (param->record_mode == CALLCHAIN_DWARF) { 684 if (!function) { 685 perf_evsel__set_sample_bit(evsel, REGS_USER); 686 perf_evsel__set_sample_bit(evsel, STACK_USER); 687 attr->sample_regs_user |= PERF_REGS_MASK; 688 attr->sample_stack_user = param->dump_size; 689 attr->exclude_callchain_user = 1; 690 } else { 691 pr_info("Cannot use DWARF unwind for function trace event," 692 " falling back to framepointers.\n"); 693 } 694 } 695 696 if (function) { 697 pr_info("Disabling user space callchains for function trace event.\n"); 698 attr->exclude_callchain_user = 1; 699 } 700 } 701 702 static void 703 perf_evsel__reset_callgraph(struct perf_evsel *evsel, 704 struct callchain_param *param) 705 { 706 struct perf_event_attr *attr = &evsel->attr; 707 708 perf_evsel__reset_sample_bit(evsel, CALLCHAIN); 709 if (param->record_mode == CALLCHAIN_LBR) { 710 perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); 711 attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | 712 PERF_SAMPLE_BRANCH_CALL_STACK); 713 } 714 if (param->record_mode == CALLCHAIN_DWARF) { 715 perf_evsel__reset_sample_bit(evsel, REGS_USER); 716 perf_evsel__reset_sample_bit(evsel, STACK_USER); 717 } 718 } 719 720 static void apply_config_terms(struct perf_evsel *evsel, 721 struct record_opts *opts) 722 { 723 struct perf_evsel_config_term *term; 724 struct list_head *config_terms = &evsel->config_terms; 725 struct perf_event_attr *attr = &evsel->attr; 726 struct callchain_param param; 727 u32 dump_size = 0; 728 int max_stack = 0; 729 const char *callgraph_buf = NULL; 730 731 /* callgraph default */ 732 param.record_mode = callchain_param.record_mode; 733 734 list_for_each_entry(term, config_terms, list) { 735 switch (term->type) { 736 case PERF_EVSEL__CONFIG_TERM_PERIOD: 737 if (!(term->weak && opts->user_interval != ULLONG_MAX)) { 738 attr->sample_period = term->val.period; 739 attr->freq = 0; 740 } 741 break; 742 case PERF_EVSEL__CONFIG_TERM_FREQ: 743 if (!(term->weak && opts->user_freq != UINT_MAX)) { 744 attr->sample_freq = term->val.freq; 745 attr->freq = 1; 746 } 747 break; 748 case PERF_EVSEL__CONFIG_TERM_TIME: 749 if (term->val.time) 750 perf_evsel__set_sample_bit(evsel, TIME); 751 else 752 perf_evsel__reset_sample_bit(evsel, TIME); 753 break; 754 case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: 755 callgraph_buf = term->val.callgraph; 756 break; 757 case PERF_EVSEL__CONFIG_TERM_BRANCH: 758 if (term->val.branch && strcmp(term->val.branch, "no")) { 759 perf_evsel__set_sample_bit(evsel, BRANCH_STACK); 760 parse_branch_str(term->val.branch, 761 &attr->branch_sample_type); 762 } else 763 perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); 764 break; 765 case PERF_EVSEL__CONFIG_TERM_STACK_USER: 766 dump_size = term->val.stack_user; 767 break; 768 case PERF_EVSEL__CONFIG_TERM_MAX_STACK: 769 max_stack = term->val.max_stack; 770 break; 771 case PERF_EVSEL__CONFIG_TERM_INHERIT: 772 /* 773 * attr->inherit should has already been set by 774 * perf_evsel__config. If user explicitly set 775 * inherit using config terms, override global 776 * opt->no_inherit setting. 777 */ 778 attr->inherit = term->val.inherit ? 1 : 0; 779 break; 780 case PERF_EVSEL__CONFIG_TERM_OVERWRITE: 781 attr->write_backward = term->val.overwrite ? 1 : 0; 782 break; 783 case PERF_EVSEL__CONFIG_TERM_DRV_CFG: 784 BUG_ON(1); 785 default: 786 break; 787 } 788 } 789 790 /* User explicitly set per-event callgraph, clear the old setting and reset. */ 791 if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { 792 if (max_stack) { 793 param.max_stack = max_stack; 794 if (callgraph_buf == NULL) 795 callgraph_buf = "fp"; 796 } 797 798 /* parse callgraph parameters */ 799 if (callgraph_buf != NULL) { 800 if (!strcmp(callgraph_buf, "no")) { 801 param.enabled = false; 802 param.record_mode = CALLCHAIN_NONE; 803 } else { 804 param.enabled = true; 805 if (parse_callchain_record(callgraph_buf, ¶m)) { 806 pr_err("per-event callgraph setting for %s failed. " 807 "Apply callgraph global setting for it\n", 808 evsel->name); 809 return; 810 } 811 } 812 } 813 if (dump_size > 0) { 814 dump_size = round_up(dump_size, sizeof(u64)); 815 param.dump_size = dump_size; 816 } 817 818 /* If global callgraph set, clear it */ 819 if (callchain_param.enabled) 820 perf_evsel__reset_callgraph(evsel, &callchain_param); 821 822 /* set perf-event callgraph */ 823 if (param.enabled) 824 perf_evsel__config_callchain(evsel, opts, ¶m); 825 } 826 } 827 828 /* 829 * The enable_on_exec/disabled value strategy: 830 * 831 * 1) For any type of traced program: 832 * - all independent events and group leaders are disabled 833 * - all group members are enabled 834 * 835 * Group members are ruled by group leaders. They need to 836 * be enabled, because the group scheduling relies on that. 837 * 838 * 2) For traced programs executed by perf: 839 * - all independent events and group leaders have 840 * enable_on_exec set 841 * - we don't specifically enable or disable any event during 842 * the record command 843 * 844 * Independent events and group leaders are initially disabled 845 * and get enabled by exec. Group members are ruled by group 846 * leaders as stated in 1). 847 * 848 * 3) For traced programs attached by perf (pid/tid): 849 * - we specifically enable or disable all events during 850 * the record command 851 * 852 * When attaching events to already running traced we 853 * enable/disable events specifically, as there's no 854 * initial traced exec call. 855 */ 856 void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, 857 struct callchain_param *callchain) 858 { 859 struct perf_evsel *leader = evsel->leader; 860 struct perf_event_attr *attr = &evsel->attr; 861 int track = evsel->tracking; 862 bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; 863 864 attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; 865 attr->inherit = !opts->no_inherit; 866 attr->write_backward = opts->overwrite ? 1 : 0; 867 868 perf_evsel__set_sample_bit(evsel, IP); 869 perf_evsel__set_sample_bit(evsel, TID); 870 871 if (evsel->sample_read) { 872 perf_evsel__set_sample_bit(evsel, READ); 873 874 /* 875 * We need ID even in case of single event, because 876 * PERF_SAMPLE_READ process ID specific data. 877 */ 878 perf_evsel__set_sample_id(evsel, false); 879 880 /* 881 * Apply group format only if we belong to group 882 * with more than one members. 883 */ 884 if (leader->nr_members > 1) { 885 attr->read_format |= PERF_FORMAT_GROUP; 886 attr->inherit = 0; 887 } 888 } 889 890 /* 891 * We default some events to have a default interval. But keep 892 * it a weak assumption overridable by the user. 893 */ 894 if (!attr->sample_period || (opts->user_freq != UINT_MAX || 895 opts->user_interval != ULLONG_MAX)) { 896 if (opts->freq) { 897 perf_evsel__set_sample_bit(evsel, PERIOD); 898 attr->freq = 1; 899 attr->sample_freq = opts->freq; 900 } else { 901 attr->sample_period = opts->default_interval; 902 } 903 } 904 905 /* 906 * Disable sampling for all group members other 907 * than leader in case leader 'leads' the sampling. 908 */ 909 if ((leader != evsel) && leader->sample_read) { 910 attr->sample_freq = 0; 911 attr->sample_period = 0; 912 } 913 914 if (opts->no_samples) 915 attr->sample_freq = 0; 916 917 if (opts->inherit_stat) { 918 evsel->attr.read_format |= 919 PERF_FORMAT_TOTAL_TIME_ENABLED | 920 PERF_FORMAT_TOTAL_TIME_RUNNING | 921 PERF_FORMAT_ID; 922 attr->inherit_stat = 1; 923 } 924 925 if (opts->sample_address) { 926 perf_evsel__set_sample_bit(evsel, ADDR); 927 attr->mmap_data = track; 928 } 929 930 /* 931 * We don't allow user space callchains for function trace 932 * event, due to issues with page faults while tracing page 933 * fault handler and its overall trickiness nature. 934 */ 935 if (perf_evsel__is_function_event(evsel)) 936 evsel->attr.exclude_callchain_user = 1; 937 938 if (callchain && callchain->enabled && !evsel->no_aux_samples) 939 perf_evsel__config_callchain(evsel, opts, callchain); 940 941 if (opts->sample_intr_regs) { 942 attr->sample_regs_intr = opts->sample_intr_regs; 943 perf_evsel__set_sample_bit(evsel, REGS_INTR); 944 } 945 946 if (opts->sample_user_regs) { 947 attr->sample_regs_user |= opts->sample_user_regs; 948 perf_evsel__set_sample_bit(evsel, REGS_USER); 949 } 950 951 if (target__has_cpu(&opts->target) || opts->sample_cpu) 952 perf_evsel__set_sample_bit(evsel, CPU); 953 954 if (opts->period) 955 perf_evsel__set_sample_bit(evsel, PERIOD); 956 957 /* 958 * When the user explicitly disabled time don't force it here. 959 */ 960 if (opts->sample_time && 961 (!perf_missing_features.sample_id_all && 962 (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || 963 opts->sample_time_set))) 964 perf_evsel__set_sample_bit(evsel, TIME); 965 966 if (opts->raw_samples && !evsel->no_aux_samples) { 967 perf_evsel__set_sample_bit(evsel, TIME); 968 perf_evsel__set_sample_bit(evsel, RAW); 969 perf_evsel__set_sample_bit(evsel, CPU); 970 } 971 972 if (opts->sample_address) 973 perf_evsel__set_sample_bit(evsel, DATA_SRC); 974 975 if (opts->sample_phys_addr) 976 perf_evsel__set_sample_bit(evsel, PHYS_ADDR); 977 978 if (opts->no_buffering) { 979 attr->watermark = 0; 980 attr->wakeup_events = 1; 981 } 982 if (opts->branch_stack && !evsel->no_aux_samples) { 983 perf_evsel__set_sample_bit(evsel, BRANCH_STACK); 984 attr->branch_sample_type = opts->branch_stack; 985 } 986 987 if (opts->sample_weight) 988 perf_evsel__set_sample_bit(evsel, WEIGHT); 989 990 attr->task = track; 991 attr->mmap = track; 992 attr->mmap2 = track && !perf_missing_features.mmap2; 993 attr->comm = track; 994 995 if (opts->record_namespaces) 996 attr->namespaces = track; 997 998 if (opts->record_switch_events) 999 attr->context_switch = track; 1000 1001 if (opts->sample_transaction) 1002 perf_evsel__set_sample_bit(evsel, TRANSACTION); 1003 1004 if (opts->running_time) { 1005 evsel->attr.read_format |= 1006 PERF_FORMAT_TOTAL_TIME_ENABLED | 1007 PERF_FORMAT_TOTAL_TIME_RUNNING; 1008 } 1009 1010 /* 1011 * XXX see the function comment above 1012 * 1013 * Disabling only independent events or group leaders, 1014 * keeping group members enabled. 1015 */ 1016 if (perf_evsel__is_group_leader(evsel)) 1017 attr->disabled = 1; 1018 1019 /* 1020 * Setting enable_on_exec for independent events and 1021 * group leaders for traced executed by perf. 1022 */ 1023 if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) && 1024 !opts->initial_delay) 1025 attr->enable_on_exec = 1; 1026 1027 if (evsel->immediate) { 1028 attr->disabled = 0; 1029 attr->enable_on_exec = 0; 1030 } 1031 1032 clockid = opts->clockid; 1033 if (opts->use_clockid) { 1034 attr->use_clockid = 1; 1035 attr->clockid = opts->clockid; 1036 } 1037 1038 if (evsel->precise_max) 1039 perf_event_attr__set_max_precise_ip(attr); 1040 1041 if (opts->all_user) { 1042 attr->exclude_kernel = 1; 1043 attr->exclude_user = 0; 1044 } 1045 1046 if (opts->all_kernel) { 1047 attr->exclude_kernel = 0; 1048 attr->exclude_user = 1; 1049 } 1050 1051 /* 1052 * Apply event specific term settings, 1053 * it overloads any global configuration. 1054 */ 1055 apply_config_terms(evsel, opts); 1056 1057 evsel->ignore_missing_thread = opts->ignore_missing_thread; 1058 } 1059 1060 static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 1061 { 1062 if (evsel->system_wide) 1063 nthreads = 1; 1064 1065 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 1066 1067 if (evsel->fd) { 1068 int cpu, thread; 1069 for (cpu = 0; cpu < ncpus; cpu++) { 1070 for (thread = 0; thread < nthreads; thread++) { 1071 FD(evsel, cpu, thread) = -1; 1072 } 1073 } 1074 } 1075 1076 return evsel->fd != NULL ? 0 : -ENOMEM; 1077 } 1078 1079 static int perf_evsel__run_ioctl(struct perf_evsel *evsel, 1080 int ioc, void *arg) 1081 { 1082 int cpu, thread; 1083 1084 for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { 1085 for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { 1086 int fd = FD(evsel, cpu, thread), 1087 err = ioctl(fd, ioc, arg); 1088 1089 if (err) 1090 return err; 1091 } 1092 } 1093 1094 return 0; 1095 } 1096 1097 int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter) 1098 { 1099 return perf_evsel__run_ioctl(evsel, 1100 PERF_EVENT_IOC_SET_FILTER, 1101 (void *)filter); 1102 } 1103 1104 int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter) 1105 { 1106 char *new_filter = strdup(filter); 1107 1108 if (new_filter != NULL) { 1109 free(evsel->filter); 1110 evsel->filter = new_filter; 1111 return 0; 1112 } 1113 1114 return -1; 1115 } 1116 1117 static int perf_evsel__append_filter(struct perf_evsel *evsel, 1118 const char *fmt, const char *filter) 1119 { 1120 char *new_filter; 1121 1122 if (evsel->filter == NULL) 1123 return perf_evsel__set_filter(evsel, filter); 1124 1125 if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) { 1126 free(evsel->filter); 1127 evsel->filter = new_filter; 1128 return 0; 1129 } 1130 1131 return -1; 1132 } 1133 1134 int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter) 1135 { 1136 return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter); 1137 } 1138 1139 int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter) 1140 { 1141 return perf_evsel__append_filter(evsel, "%s,%s", filter); 1142 } 1143 1144 int perf_evsel__enable(struct perf_evsel *evsel) 1145 { 1146 return perf_evsel__run_ioctl(evsel, 1147 PERF_EVENT_IOC_ENABLE, 1148 0); 1149 } 1150 1151 int perf_evsel__disable(struct perf_evsel *evsel) 1152 { 1153 return perf_evsel__run_ioctl(evsel, 1154 PERF_EVENT_IOC_DISABLE, 1155 0); 1156 } 1157 1158 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) 1159 { 1160 if (ncpus == 0 || nthreads == 0) 1161 return 0; 1162 1163 if (evsel->system_wide) 1164 nthreads = 1; 1165 1166 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); 1167 if (evsel->sample_id == NULL) 1168 return -ENOMEM; 1169 1170 evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); 1171 if (evsel->id == NULL) { 1172 xyarray__delete(evsel->sample_id); 1173 evsel->sample_id = NULL; 1174 return -ENOMEM; 1175 } 1176 1177 return 0; 1178 } 1179 1180 static void perf_evsel__free_fd(struct perf_evsel *evsel) 1181 { 1182 xyarray__delete(evsel->fd); 1183 evsel->fd = NULL; 1184 } 1185 1186 static void perf_evsel__free_id(struct perf_evsel *evsel) 1187 { 1188 xyarray__delete(evsel->sample_id); 1189 evsel->sample_id = NULL; 1190 zfree(&evsel->id); 1191 } 1192 1193 static void perf_evsel__free_config_terms(struct perf_evsel *evsel) 1194 { 1195 struct perf_evsel_config_term *term, *h; 1196 1197 list_for_each_entry_safe(term, h, &evsel->config_terms, list) { 1198 list_del(&term->list); 1199 free(term); 1200 } 1201 } 1202 1203 void perf_evsel__close_fd(struct perf_evsel *evsel) 1204 { 1205 int cpu, thread; 1206 1207 for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) 1208 for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { 1209 close(FD(evsel, cpu, thread)); 1210 FD(evsel, cpu, thread) = -1; 1211 } 1212 } 1213 1214 void perf_evsel__exit(struct perf_evsel *evsel) 1215 { 1216 assert(list_empty(&evsel->node)); 1217 assert(evsel->evlist == NULL); 1218 perf_evsel__free_fd(evsel); 1219 perf_evsel__free_id(evsel); 1220 perf_evsel__free_config_terms(evsel); 1221 close_cgroup(evsel->cgrp); 1222 cpu_map__put(evsel->cpus); 1223 cpu_map__put(evsel->own_cpus); 1224 thread_map__put(evsel->threads); 1225 zfree(&evsel->group_name); 1226 zfree(&evsel->name); 1227 perf_evsel__object.fini(evsel); 1228 } 1229 1230 void perf_evsel__delete(struct perf_evsel *evsel) 1231 { 1232 perf_evsel__exit(evsel); 1233 free(evsel); 1234 } 1235 1236 void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, 1237 struct perf_counts_values *count) 1238 { 1239 struct perf_counts_values tmp; 1240 1241 if (!evsel->prev_raw_counts) 1242 return; 1243 1244 if (cpu == -1) { 1245 tmp = evsel->prev_raw_counts->aggr; 1246 evsel->prev_raw_counts->aggr = *count; 1247 } else { 1248 tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); 1249 *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; 1250 } 1251 1252 count->val = count->val - tmp.val; 1253 count->ena = count->ena - tmp.ena; 1254 count->run = count->run - tmp.run; 1255 } 1256 1257 void perf_counts_values__scale(struct perf_counts_values *count, 1258 bool scale, s8 *pscaled) 1259 { 1260 s8 scaled = 0; 1261 1262 if (scale) { 1263 if (count->run == 0) { 1264 scaled = -1; 1265 count->val = 0; 1266 } else if (count->run < count->ena) { 1267 scaled = 1; 1268 count->val = (u64)((double) count->val * count->ena / count->run + 0.5); 1269 } 1270 } else 1271 count->ena = count->run = 0; 1272 1273 if (pscaled) 1274 *pscaled = scaled; 1275 } 1276 1277 static int perf_evsel__read_size(struct perf_evsel *evsel) 1278 { 1279 u64 read_format = evsel->attr.read_format; 1280 int entry = sizeof(u64); /* value */ 1281 int size = 0; 1282 int nr = 1; 1283 1284 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 1285 size += sizeof(u64); 1286 1287 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 1288 size += sizeof(u64); 1289 1290 if (read_format & PERF_FORMAT_ID) 1291 entry += sizeof(u64); 1292 1293 if (read_format & PERF_FORMAT_GROUP) { 1294 nr = evsel->nr_members; 1295 size += sizeof(u64); 1296 } 1297 1298 size += entry * nr; 1299 return size; 1300 } 1301 1302 int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, 1303 struct perf_counts_values *count) 1304 { 1305 size_t size = perf_evsel__read_size(evsel); 1306 1307 memset(count, 0, sizeof(*count)); 1308 1309 if (FD(evsel, cpu, thread) < 0) 1310 return -EINVAL; 1311 1312 if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) 1313 return -errno; 1314 1315 return 0; 1316 } 1317 1318 static int 1319 perf_evsel__read_one(struct perf_evsel *evsel, int cpu, int thread) 1320 { 1321 struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread); 1322 1323 return perf_evsel__read(evsel, cpu, thread, count); 1324 } 1325 1326 static void 1327 perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread, 1328 u64 val, u64 ena, u64 run) 1329 { 1330 struct perf_counts_values *count; 1331 1332 count = perf_counts(counter->counts, cpu, thread); 1333 1334 count->val = val; 1335 count->ena = ena; 1336 count->run = run; 1337 count->loaded = true; 1338 } 1339 1340 static int 1341 perf_evsel__process_group_data(struct perf_evsel *leader, 1342 int cpu, int thread, u64 *data) 1343 { 1344 u64 read_format = leader->attr.read_format; 1345 struct sample_read_value *v; 1346 u64 nr, ena = 0, run = 0, i; 1347 1348 nr = *data++; 1349 1350 if (nr != (u64) leader->nr_members) 1351 return -EINVAL; 1352 1353 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 1354 ena = *data++; 1355 1356 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 1357 run = *data++; 1358 1359 v = (struct sample_read_value *) data; 1360 1361 perf_evsel__set_count(leader, cpu, thread, 1362 v[0].value, ena, run); 1363 1364 for (i = 1; i < nr; i++) { 1365 struct perf_evsel *counter; 1366 1367 counter = perf_evlist__id2evsel(leader->evlist, v[i].id); 1368 if (!counter) 1369 return -EINVAL; 1370 1371 perf_evsel__set_count(counter, cpu, thread, 1372 v[i].value, ena, run); 1373 } 1374 1375 return 0; 1376 } 1377 1378 static int 1379 perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread) 1380 { 1381 struct perf_stat_evsel *ps = leader->stats; 1382 u64 read_format = leader->attr.read_format; 1383 int size = perf_evsel__read_size(leader); 1384 u64 *data = ps->group_data; 1385 1386 if (!(read_format & PERF_FORMAT_ID)) 1387 return -EINVAL; 1388 1389 if (!perf_evsel__is_group_leader(leader)) 1390 return -EINVAL; 1391 1392 if (!data) { 1393 data = zalloc(size); 1394 if (!data) 1395 return -ENOMEM; 1396 1397 ps->group_data = data; 1398 } 1399 1400 if (FD(leader, cpu, thread) < 0) 1401 return -EINVAL; 1402 1403 if (readn(FD(leader, cpu, thread), data, size) <= 0) 1404 return -errno; 1405 1406 return perf_evsel__process_group_data(leader, cpu, thread, data); 1407 } 1408 1409 int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread) 1410 { 1411 u64 read_format = evsel->attr.read_format; 1412 1413 if (read_format & PERF_FORMAT_GROUP) 1414 return perf_evsel__read_group(evsel, cpu, thread); 1415 else 1416 return perf_evsel__read_one(evsel, cpu, thread); 1417 } 1418 1419 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 1420 int cpu, int thread, bool scale) 1421 { 1422 struct perf_counts_values count; 1423 size_t nv = scale ? 3 : 1; 1424 1425 if (FD(evsel, cpu, thread) < 0) 1426 return -EINVAL; 1427 1428 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) 1429 return -ENOMEM; 1430 1431 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0) 1432 return -errno; 1433 1434 perf_evsel__compute_deltas(evsel, cpu, thread, &count); 1435 perf_counts_values__scale(&count, scale, NULL); 1436 *perf_counts(evsel->counts, cpu, thread) = count; 1437 return 0; 1438 } 1439 1440 static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) 1441 { 1442 struct perf_evsel *leader = evsel->leader; 1443 int fd; 1444 1445 if (perf_evsel__is_group_leader(evsel)) 1446 return -1; 1447 1448 /* 1449 * Leader must be already processed/open, 1450 * if not it's a bug. 1451 */ 1452 BUG_ON(!leader->fd); 1453 1454 fd = FD(leader, cpu, thread); 1455 BUG_ON(fd == -1); 1456 1457 return fd; 1458 } 1459 1460 struct bit_names { 1461 int bit; 1462 const char *name; 1463 }; 1464 1465 static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits) 1466 { 1467 bool first_bit = true; 1468 int i = 0; 1469 1470 do { 1471 if (value & bits[i].bit) { 1472 buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name); 1473 first_bit = false; 1474 } 1475 } while (bits[++i].name != NULL); 1476 } 1477 1478 static void __p_sample_type(char *buf, size_t size, u64 value) 1479 { 1480 #define bit_name(n) { PERF_SAMPLE_##n, #n } 1481 struct bit_names bits[] = { 1482 bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), 1483 bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), 1484 bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), 1485 bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), 1486 bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), 1487 bit_name(WEIGHT), bit_name(PHYS_ADDR), 1488 { .name = NULL, } 1489 }; 1490 #undef bit_name 1491 __p_bits(buf, size, value, bits); 1492 } 1493 1494 static void __p_branch_sample_type(char *buf, size_t size, u64 value) 1495 { 1496 #define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n } 1497 struct bit_names bits[] = { 1498 bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY), 1499 bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL), 1500 bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), 1501 bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), 1502 bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), 1503 { .name = NULL, } 1504 }; 1505 #undef bit_name 1506 __p_bits(buf, size, value, bits); 1507 } 1508 1509 static void __p_read_format(char *buf, size_t size, u64 value) 1510 { 1511 #define bit_name(n) { PERF_FORMAT_##n, #n } 1512 struct bit_names bits[] = { 1513 bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), 1514 bit_name(ID), bit_name(GROUP), 1515 { .name = NULL, } 1516 }; 1517 #undef bit_name 1518 __p_bits(buf, size, value, bits); 1519 } 1520 1521 #define BUF_SIZE 1024 1522 1523 #define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val)) 1524 #define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) 1525 #define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) 1526 #define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) 1527 #define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val) 1528 #define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) 1529 1530 #define PRINT_ATTRn(_n, _f, _p) \ 1531 do { \ 1532 if (attr->_f) { \ 1533 _p(attr->_f); \ 1534 ret += attr__fprintf(fp, _n, buf, priv);\ 1535 } \ 1536 } while (0) 1537 1538 #define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) 1539 1540 int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, 1541 attr__fprintf_f attr__fprintf, void *priv) 1542 { 1543 char buf[BUF_SIZE]; 1544 int ret = 0; 1545 1546 PRINT_ATTRf(type, p_unsigned); 1547 PRINT_ATTRf(size, p_unsigned); 1548 PRINT_ATTRf(config, p_hex); 1549 PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); 1550 PRINT_ATTRf(sample_type, p_sample_type); 1551 PRINT_ATTRf(read_format, p_read_format); 1552 1553 PRINT_ATTRf(disabled, p_unsigned); 1554 PRINT_ATTRf(inherit, p_unsigned); 1555 PRINT_ATTRf(pinned, p_unsigned); 1556 PRINT_ATTRf(exclusive, p_unsigned); 1557 PRINT_ATTRf(exclude_user, p_unsigned); 1558 PRINT_ATTRf(exclude_kernel, p_unsigned); 1559 PRINT_ATTRf(exclude_hv, p_unsigned); 1560 PRINT_ATTRf(exclude_idle, p_unsigned); 1561 PRINT_ATTRf(mmap, p_unsigned); 1562 PRINT_ATTRf(comm, p_unsigned); 1563 PRINT_ATTRf(freq, p_unsigned); 1564 PRINT_ATTRf(inherit_stat, p_unsigned); 1565 PRINT_ATTRf(enable_on_exec, p_unsigned); 1566 PRINT_ATTRf(task, p_unsigned); 1567 PRINT_ATTRf(watermark, p_unsigned); 1568 PRINT_ATTRf(precise_ip, p_unsigned); 1569 PRINT_ATTRf(mmap_data, p_unsigned); 1570 PRINT_ATTRf(sample_id_all, p_unsigned); 1571 PRINT_ATTRf(exclude_host, p_unsigned); 1572 PRINT_ATTRf(exclude_guest, p_unsigned); 1573 PRINT_ATTRf(exclude_callchain_kernel, p_unsigned); 1574 PRINT_ATTRf(exclude_callchain_user, p_unsigned); 1575 PRINT_ATTRf(mmap2, p_unsigned); 1576 PRINT_ATTRf(comm_exec, p_unsigned); 1577 PRINT_ATTRf(use_clockid, p_unsigned); 1578 PRINT_ATTRf(context_switch, p_unsigned); 1579 PRINT_ATTRf(write_backward, p_unsigned); 1580 1581 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); 1582 PRINT_ATTRf(bp_type, p_unsigned); 1583 PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); 1584 PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); 1585 PRINT_ATTRf(branch_sample_type, p_branch_sample_type); 1586 PRINT_ATTRf(sample_regs_user, p_hex); 1587 PRINT_ATTRf(sample_stack_user, p_unsigned); 1588 PRINT_ATTRf(clockid, p_signed); 1589 PRINT_ATTRf(sample_regs_intr, p_hex); 1590 PRINT_ATTRf(aux_watermark, p_unsigned); 1591 PRINT_ATTRf(sample_max_stack, p_unsigned); 1592 1593 return ret; 1594 } 1595 1596 static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, 1597 void *priv __maybe_unused) 1598 { 1599 return fprintf(fp, " %-32s %s\n", name, val); 1600 } 1601 1602 static void perf_evsel__remove_fd(struct perf_evsel *pos, 1603 int nr_cpus, int nr_threads, 1604 int thread_idx) 1605 { 1606 for (int cpu = 0; cpu < nr_cpus; cpu++) 1607 for (int thread = thread_idx; thread < nr_threads - 1; thread++) 1608 FD(pos, cpu, thread) = FD(pos, cpu, thread + 1); 1609 } 1610 1611 static int update_fds(struct perf_evsel *evsel, 1612 int nr_cpus, int cpu_idx, 1613 int nr_threads, int thread_idx) 1614 { 1615 struct perf_evsel *pos; 1616 1617 if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) 1618 return -EINVAL; 1619 1620 evlist__for_each_entry(evsel->evlist, pos) { 1621 nr_cpus = pos != evsel ? nr_cpus : cpu_idx; 1622 1623 perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); 1624 1625 /* 1626 * Since fds for next evsel has not been created, 1627 * there is no need to iterate whole event list. 1628 */ 1629 if (pos == evsel) 1630 break; 1631 } 1632 return 0; 1633 } 1634 1635 static bool ignore_missing_thread(struct perf_evsel *evsel, 1636 int nr_cpus, int cpu, 1637 struct thread_map *threads, 1638 int thread, int err) 1639 { 1640 pid_t ignore_pid = thread_map__pid(threads, thread); 1641 1642 if (!evsel->ignore_missing_thread) 1643 return false; 1644 1645 /* The system wide setup does not work with threads. */ 1646 if (evsel->system_wide) 1647 return false; 1648 1649 /* The -ESRCH is perf event syscall errno for pid's not found. */ 1650 if (err != -ESRCH) 1651 return false; 1652 1653 /* If there's only one thread, let it fail. */ 1654 if (threads->nr == 1) 1655 return false; 1656 1657 /* 1658 * We should remove fd for missing_thread first 1659 * because thread_map__remove() will decrease threads->nr. 1660 */ 1661 if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) 1662 return false; 1663 1664 if (thread_map__remove(threads, thread)) 1665 return false; 1666 1667 pr_warning("WARNING: Ignored open failure for pid %d\n", 1668 ignore_pid); 1669 return true; 1670 } 1671 1672 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 1673 struct thread_map *threads) 1674 { 1675 int cpu, thread, nthreads; 1676 unsigned long flags = PERF_FLAG_FD_CLOEXEC; 1677 int pid = -1, err; 1678 enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE; 1679 1680 if (perf_missing_features.write_backward && evsel->attr.write_backward) 1681 return -EINVAL; 1682 1683 if (cpus == NULL) { 1684 static struct cpu_map *empty_cpu_map; 1685 1686 if (empty_cpu_map == NULL) { 1687 empty_cpu_map = cpu_map__dummy_new(); 1688 if (empty_cpu_map == NULL) 1689 return -ENOMEM; 1690 } 1691 1692 cpus = empty_cpu_map; 1693 } 1694 1695 if (threads == NULL) { 1696 static struct thread_map *empty_thread_map; 1697 1698 if (empty_thread_map == NULL) { 1699 empty_thread_map = thread_map__new_by_tid(-1); 1700 if (empty_thread_map == NULL) 1701 return -ENOMEM; 1702 } 1703 1704 threads = empty_thread_map; 1705 } 1706 1707 if (evsel->system_wide) 1708 nthreads = 1; 1709 else 1710 nthreads = threads->nr; 1711 1712 if (evsel->fd == NULL && 1713 perf_evsel__alloc_fd(evsel, cpus->nr, nthreads) < 0) 1714 return -ENOMEM; 1715 1716 if (evsel->cgrp) { 1717 flags |= PERF_FLAG_PID_CGROUP; 1718 pid = evsel->cgrp->fd; 1719 } 1720 1721 fallback_missing_features: 1722 if (perf_missing_features.clockid_wrong) 1723 evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */ 1724 if (perf_missing_features.clockid) { 1725 evsel->attr.use_clockid = 0; 1726 evsel->attr.clockid = 0; 1727 } 1728 if (perf_missing_features.cloexec) 1729 flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; 1730 if (perf_missing_features.mmap2) 1731 evsel->attr.mmap2 = 0; 1732 if (perf_missing_features.exclude_guest) 1733 evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; 1734 if (perf_missing_features.lbr_flags) 1735 evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | 1736 PERF_SAMPLE_BRANCH_NO_CYCLES); 1737 if (perf_missing_features.group_read && evsel->attr.inherit) 1738 evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID); 1739 retry_sample_id: 1740 if (perf_missing_features.sample_id_all) 1741 evsel->attr.sample_id_all = 0; 1742 1743 if (verbose >= 2) { 1744 fprintf(stderr, "%.60s\n", graph_dotted_line); 1745 fprintf(stderr, "perf_event_attr:\n"); 1746 perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL); 1747 fprintf(stderr, "%.60s\n", graph_dotted_line); 1748 } 1749 1750 for (cpu = 0; cpu < cpus->nr; cpu++) { 1751 1752 for (thread = 0; thread < nthreads; thread++) { 1753 int fd, group_fd; 1754 1755 if (!evsel->cgrp && !evsel->system_wide) 1756 pid = thread_map__pid(threads, thread); 1757 1758 group_fd = get_group_fd(evsel, cpu, thread); 1759 retry_open: 1760 pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", 1761 pid, cpus->map[cpu], group_fd, flags); 1762 1763 test_attr__ready(); 1764 1765 fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], 1766 group_fd, flags); 1767 1768 FD(evsel, cpu, thread) = fd; 1769 1770 if (fd < 0) { 1771 err = -errno; 1772 1773 if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { 1774 /* 1775 * We just removed 1 thread, so take a step 1776 * back on thread index and lower the upper 1777 * nthreads limit. 1778 */ 1779 nthreads--; 1780 thread--; 1781 1782 /* ... and pretend like nothing have happened. */ 1783 err = 0; 1784 continue; 1785 } 1786 1787 pr_debug2("\nsys_perf_event_open failed, error %d\n", 1788 err); 1789 goto try_fallback; 1790 } 1791 1792 pr_debug2(" = %d\n", fd); 1793 1794 if (evsel->bpf_fd >= 0) { 1795 int evt_fd = fd; 1796 int bpf_fd = evsel->bpf_fd; 1797 1798 err = ioctl(evt_fd, 1799 PERF_EVENT_IOC_SET_BPF, 1800 bpf_fd); 1801 if (err && errno != EEXIST) { 1802 pr_err("failed to attach bpf fd %d: %s\n", 1803 bpf_fd, strerror(errno)); 1804 err = -EINVAL; 1805 goto out_close; 1806 } 1807 } 1808 1809 set_rlimit = NO_CHANGE; 1810 1811 /* 1812 * If we succeeded but had to kill clockid, fail and 1813 * have perf_evsel__open_strerror() print us a nice 1814 * error. 1815 */ 1816 if (perf_missing_features.clockid || 1817 perf_missing_features.clockid_wrong) { 1818 err = -EINVAL; 1819 goto out_close; 1820 } 1821 } 1822 } 1823 1824 return 0; 1825 1826 try_fallback: 1827 /* 1828 * perf stat needs between 5 and 22 fds per CPU. When we run out 1829 * of them try to increase the limits. 1830 */ 1831 if (err == -EMFILE && set_rlimit < INCREASED_MAX) { 1832 struct rlimit l; 1833 int old_errno = errno; 1834 1835 if (getrlimit(RLIMIT_NOFILE, &l) == 0) { 1836 if (set_rlimit == NO_CHANGE) 1837 l.rlim_cur = l.rlim_max; 1838 else { 1839 l.rlim_cur = l.rlim_max + 1000; 1840 l.rlim_max = l.rlim_cur; 1841 } 1842 if (setrlimit(RLIMIT_NOFILE, &l) == 0) { 1843 set_rlimit++; 1844 errno = old_errno; 1845 goto retry_open; 1846 } 1847 } 1848 errno = old_errno; 1849 } 1850 1851 if (err != -EINVAL || cpu > 0 || thread > 0) 1852 goto out_close; 1853 1854 /* 1855 * Must probe features in the order they were added to the 1856 * perf_event_attr interface. 1857 */ 1858 if (!perf_missing_features.write_backward && evsel->attr.write_backward) { 1859 perf_missing_features.write_backward = true; 1860 pr_debug2("switching off write_backward\n"); 1861 goto out_close; 1862 } else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) { 1863 perf_missing_features.clockid_wrong = true; 1864 pr_debug2("switching off clockid\n"); 1865 goto fallback_missing_features; 1866 } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) { 1867 perf_missing_features.clockid = true; 1868 pr_debug2("switching off use_clockid\n"); 1869 goto fallback_missing_features; 1870 } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { 1871 perf_missing_features.cloexec = true; 1872 pr_debug2("switching off cloexec flag\n"); 1873 goto fallback_missing_features; 1874 } else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) { 1875 perf_missing_features.mmap2 = true; 1876 pr_debug2("switching off mmap2\n"); 1877 goto fallback_missing_features; 1878 } else if (!perf_missing_features.exclude_guest && 1879 (evsel->attr.exclude_guest || evsel->attr.exclude_host)) { 1880 perf_missing_features.exclude_guest = true; 1881 pr_debug2("switching off exclude_guest, exclude_host\n"); 1882 goto fallback_missing_features; 1883 } else if (!perf_missing_features.sample_id_all) { 1884 perf_missing_features.sample_id_all = true; 1885 pr_debug2("switching off sample_id_all\n"); 1886 goto retry_sample_id; 1887 } else if (!perf_missing_features.lbr_flags && 1888 (evsel->attr.branch_sample_type & 1889 (PERF_SAMPLE_BRANCH_NO_CYCLES | 1890 PERF_SAMPLE_BRANCH_NO_FLAGS))) { 1891 perf_missing_features.lbr_flags = true; 1892 pr_debug2("switching off branch sample type no (cycles/flags)\n"); 1893 goto fallback_missing_features; 1894 } else if (!perf_missing_features.group_read && 1895 evsel->attr.inherit && 1896 (evsel->attr.read_format & PERF_FORMAT_GROUP)) { 1897 perf_missing_features.group_read = true; 1898 pr_debug2("switching off group read\n"); 1899 goto fallback_missing_features; 1900 } 1901 out_close: 1902 do { 1903 while (--thread >= 0) { 1904 close(FD(evsel, cpu, thread)); 1905 FD(evsel, cpu, thread) = -1; 1906 } 1907 thread = nthreads; 1908 } while (--cpu >= 0); 1909 return err; 1910 } 1911 1912 void perf_evsel__close(struct perf_evsel *evsel) 1913 { 1914 if (evsel->fd == NULL) 1915 return; 1916 1917 perf_evsel__close_fd(evsel); 1918 perf_evsel__free_fd(evsel); 1919 } 1920 1921 int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 1922 struct cpu_map *cpus) 1923 { 1924 return perf_evsel__open(evsel, cpus, NULL); 1925 } 1926 1927 int perf_evsel__open_per_thread(struct perf_evsel *evsel, 1928 struct thread_map *threads) 1929 { 1930 return perf_evsel__open(evsel, NULL, threads); 1931 } 1932 1933 static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel, 1934 const union perf_event *event, 1935 struct perf_sample *sample) 1936 { 1937 u64 type = evsel->attr.sample_type; 1938 const u64 *array = event->sample.array; 1939 bool swapped = evsel->needs_swap; 1940 union u64_swap u; 1941 1942 array += ((event->header.size - 1943 sizeof(event->header)) / sizeof(u64)) - 1; 1944 1945 if (type & PERF_SAMPLE_IDENTIFIER) { 1946 sample->id = *array; 1947 array--; 1948 } 1949 1950 if (type & PERF_SAMPLE_CPU) { 1951 u.val64 = *array; 1952 if (swapped) { 1953 /* undo swap of u64, then swap on individual u32s */ 1954 u.val64 = bswap_64(u.val64); 1955 u.val32[0] = bswap_32(u.val32[0]); 1956 } 1957 1958 sample->cpu = u.val32[0]; 1959 array--; 1960 } 1961 1962 if (type & PERF_SAMPLE_STREAM_ID) { 1963 sample->stream_id = *array; 1964 array--; 1965 } 1966 1967 if (type & PERF_SAMPLE_ID) { 1968 sample->id = *array; 1969 array--; 1970 } 1971 1972 if (type & PERF_SAMPLE_TIME) { 1973 sample->time = *array; 1974 array--; 1975 } 1976 1977 if (type & PERF_SAMPLE_TID) { 1978 u.val64 = *array; 1979 if (swapped) { 1980 /* undo swap of u64, then swap on individual u32s */ 1981 u.val64 = bswap_64(u.val64); 1982 u.val32[0] = bswap_32(u.val32[0]); 1983 u.val32[1] = bswap_32(u.val32[1]); 1984 } 1985 1986 sample->pid = u.val32[0]; 1987 sample->tid = u.val32[1]; 1988 array--; 1989 } 1990 1991 return 0; 1992 } 1993 1994 static inline bool overflow(const void *endp, u16 max_size, const void *offset, 1995 u64 size) 1996 { 1997 return size > max_size || offset + size > endp; 1998 } 1999 2000 #define OVERFLOW_CHECK(offset, size, max_size) \ 2001 do { \ 2002 if (overflow(endp, (max_size), (offset), (size))) \ 2003 return -EFAULT; \ 2004 } while (0) 2005 2006 #define OVERFLOW_CHECK_u64(offset) \ 2007 OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) 2008 2009 static int 2010 perf_event__check_size(union perf_event *event, unsigned int sample_size) 2011 { 2012 /* 2013 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes 2014 * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to 2015 * check the format does not go past the end of the event. 2016 */ 2017 if (sample_size + sizeof(event->header) > event->header.size) 2018 return -EFAULT; 2019 2020 return 0; 2021 } 2022 2023 int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, 2024 struct perf_sample *data) 2025 { 2026 u64 type = evsel->attr.sample_type; 2027 bool swapped = evsel->needs_swap; 2028 const u64 *array; 2029 u16 max_size = event->header.size; 2030 const void *endp = (void *)event + max_size; 2031 u64 sz; 2032 2033 /* 2034 * used for cross-endian analysis. See git commit 65014ab3 2035 * for why this goofiness is needed. 2036 */ 2037 union u64_swap u; 2038 2039 memset(data, 0, sizeof(*data)); 2040 data->cpu = data->pid = data->tid = -1; 2041 data->stream_id = data->id = data->time = -1ULL; 2042 data->period = evsel->attr.sample_period; 2043 data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; 2044 data->id = -1ULL; 2045 data->data_src = PERF_MEM_DATA_SRC_NONE; 2046 2047 if (event->header.type != PERF_RECORD_SAMPLE) { 2048 if (!evsel->attr.sample_id_all) 2049 return 0; 2050 return perf_evsel__parse_id_sample(evsel, event, data); 2051 } 2052 2053 array = event->sample.array; 2054 2055 if (perf_event__check_size(event, evsel->sample_size)) 2056 return -EFAULT; 2057 2058 if (type & PERF_SAMPLE_IDENTIFIER) { 2059 data->id = *array; 2060 array++; 2061 } 2062 2063 if (type & PERF_SAMPLE_IP) { 2064 data->ip = *array; 2065 array++; 2066 } 2067 2068 if (type & PERF_SAMPLE_TID) { 2069 u.val64 = *array; 2070 if (swapped) { 2071 /* undo swap of u64, then swap on individual u32s */ 2072 u.val64 = bswap_64(u.val64); 2073 u.val32[0] = bswap_32(u.val32[0]); 2074 u.val32[1] = bswap_32(u.val32[1]); 2075 } 2076 2077 data->pid = u.val32[0]; 2078 data->tid = u.val32[1]; 2079 array++; 2080 } 2081 2082 if (type & PERF_SAMPLE_TIME) { 2083 data->time = *array; 2084 array++; 2085 } 2086 2087 if (type & PERF_SAMPLE_ADDR) { 2088 data->addr = *array; 2089 array++; 2090 } 2091 2092 if (type & PERF_SAMPLE_ID) { 2093 data->id = *array; 2094 array++; 2095 } 2096 2097 if (type & PERF_SAMPLE_STREAM_ID) { 2098 data->stream_id = *array; 2099 array++; 2100 } 2101 2102 if (type & PERF_SAMPLE_CPU) { 2103 2104 u.val64 = *array; 2105 if (swapped) { 2106 /* undo swap of u64, then swap on individual u32s */ 2107 u.val64 = bswap_64(u.val64); 2108 u.val32[0] = bswap_32(u.val32[0]); 2109 } 2110 2111 data->cpu = u.val32[0]; 2112 array++; 2113 } 2114 2115 if (type & PERF_SAMPLE_PERIOD) { 2116 data->period = *array; 2117 array++; 2118 } 2119 2120 if (type & PERF_SAMPLE_READ) { 2121 u64 read_format = evsel->attr.read_format; 2122 2123 OVERFLOW_CHECK_u64(array); 2124 if (read_format & PERF_FORMAT_GROUP) 2125 data->read.group.nr = *array; 2126 else 2127 data->read.one.value = *array; 2128 2129 array++; 2130 2131 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 2132 OVERFLOW_CHECK_u64(array); 2133 data->read.time_enabled = *array; 2134 array++; 2135 } 2136 2137 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 2138 OVERFLOW_CHECK_u64(array); 2139 data->read.time_running = *array; 2140 array++; 2141 } 2142 2143 /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ 2144 if (read_format & PERF_FORMAT_GROUP) { 2145 const u64 max_group_nr = UINT64_MAX / 2146 sizeof(struct sample_read_value); 2147 2148 if (data->read.group.nr > max_group_nr) 2149 return -EFAULT; 2150 sz = data->read.group.nr * 2151 sizeof(struct sample_read_value); 2152 OVERFLOW_CHECK(array, sz, max_size); 2153 data->read.group.values = 2154 (struct sample_read_value *)array; 2155 array = (void *)array + sz; 2156 } else { 2157 OVERFLOW_CHECK_u64(array); 2158 data->read.one.id = *array; 2159 array++; 2160 } 2161 } 2162 2163 if (type & PERF_SAMPLE_CALLCHAIN) { 2164 const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); 2165 2166 OVERFLOW_CHECK_u64(array); 2167 data->callchain = (struct ip_callchain *)array++; 2168 if (data->callchain->nr > max_callchain_nr) 2169 return -EFAULT; 2170 sz = data->callchain->nr * sizeof(u64); 2171 OVERFLOW_CHECK(array, sz, max_size); 2172 array = (void *)array + sz; 2173 } 2174 2175 if (type & PERF_SAMPLE_RAW) { 2176 OVERFLOW_CHECK_u64(array); 2177 u.val64 = *array; 2178 2179 /* 2180 * Undo swap of u64, then swap on individual u32s, 2181 * get the size of the raw area and undo all of the 2182 * swap. The pevent interface handles endianity by 2183 * itself. 2184 */ 2185 if (swapped) { 2186 u.val64 = bswap_64(u.val64); 2187 u.val32[0] = bswap_32(u.val32[0]); 2188 u.val32[1] = bswap_32(u.val32[1]); 2189 } 2190 data->raw_size = u.val32[0]; 2191 2192 /* 2193 * The raw data is aligned on 64bits including the 2194 * u32 size, so it's safe to use mem_bswap_64. 2195 */ 2196 if (swapped) 2197 mem_bswap_64((void *) array, data->raw_size); 2198 2199 array = (void *)array + sizeof(u32); 2200 2201 OVERFLOW_CHECK(array, data->raw_size, max_size); 2202 data->raw_data = (void *)array; 2203 array = (void *)array + data->raw_size; 2204 } 2205 2206 if (type & PERF_SAMPLE_BRANCH_STACK) { 2207 const u64 max_branch_nr = UINT64_MAX / 2208 sizeof(struct branch_entry); 2209 2210 OVERFLOW_CHECK_u64(array); 2211 data->branch_stack = (struct branch_stack *)array++; 2212 2213 if (data->branch_stack->nr > max_branch_nr) 2214 return -EFAULT; 2215 sz = data->branch_stack->nr * sizeof(struct branch_entry); 2216 OVERFLOW_CHECK(array, sz, max_size); 2217 array = (void *)array + sz; 2218 } 2219 2220 if (type & PERF_SAMPLE_REGS_USER) { 2221 OVERFLOW_CHECK_u64(array); 2222 data->user_regs.abi = *array; 2223 array++; 2224 2225 if (data->user_regs.abi) { 2226 u64 mask = evsel->attr.sample_regs_user; 2227 2228 sz = hweight_long(mask) * sizeof(u64); 2229 OVERFLOW_CHECK(array, sz, max_size); 2230 data->user_regs.mask = mask; 2231 data->user_regs.regs = (u64 *)array; 2232 array = (void *)array + sz; 2233 } 2234 } 2235 2236 if (type & PERF_SAMPLE_STACK_USER) { 2237 OVERFLOW_CHECK_u64(array); 2238 sz = *array++; 2239 2240 data->user_stack.offset = ((char *)(array - 1) 2241 - (char *) event); 2242 2243 if (!sz) { 2244 data->user_stack.size = 0; 2245 } else { 2246 OVERFLOW_CHECK(array, sz, max_size); 2247 data->user_stack.data = (char *)array; 2248 array = (void *)array + sz; 2249 OVERFLOW_CHECK_u64(array); 2250 data->user_stack.size = *array++; 2251 if (WARN_ONCE(data->user_stack.size > sz, 2252 "user stack dump failure\n")) 2253 return -EFAULT; 2254 } 2255 } 2256 2257 if (type & PERF_SAMPLE_WEIGHT) { 2258 OVERFLOW_CHECK_u64(array); 2259 data->weight = *array; 2260 array++; 2261 } 2262 2263 if (type & PERF_SAMPLE_DATA_SRC) { 2264 OVERFLOW_CHECK_u64(array); 2265 data->data_src = *array; 2266 array++; 2267 } 2268 2269 if (type & PERF_SAMPLE_TRANSACTION) { 2270 OVERFLOW_CHECK_u64(array); 2271 data->transaction = *array; 2272 array++; 2273 } 2274 2275 data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE; 2276 if (type & PERF_SAMPLE_REGS_INTR) { 2277 OVERFLOW_CHECK_u64(array); 2278 data->intr_regs.abi = *array; 2279 array++; 2280 2281 if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { 2282 u64 mask = evsel->attr.sample_regs_intr; 2283 2284 sz = hweight_long(mask) * sizeof(u64); 2285 OVERFLOW_CHECK(array, sz, max_size); 2286 data->intr_regs.mask = mask; 2287 data->intr_regs.regs = (u64 *)array; 2288 array = (void *)array + sz; 2289 } 2290 } 2291 2292 data->phys_addr = 0; 2293 if (type & PERF_SAMPLE_PHYS_ADDR) { 2294 data->phys_addr = *array; 2295 array++; 2296 } 2297 2298 return 0; 2299 } 2300 2301 int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, 2302 union perf_event *event, 2303 u64 *timestamp) 2304 { 2305 u64 type = evsel->attr.sample_type; 2306 const u64 *array; 2307 2308 if (!(type & PERF_SAMPLE_TIME)) 2309 return -1; 2310 2311 if (event->header.type != PERF_RECORD_SAMPLE) { 2312 struct perf_sample data = { 2313 .time = -1ULL, 2314 }; 2315 2316 if (!evsel->attr.sample_id_all) 2317 return -1; 2318 if (perf_evsel__parse_id_sample(evsel, event, &data)) 2319 return -1; 2320 2321 *timestamp = data.time; 2322 return 0; 2323 } 2324 2325 array = event->sample.array; 2326 2327 if (perf_event__check_size(event, evsel->sample_size)) 2328 return -EFAULT; 2329 2330 if (type & PERF_SAMPLE_IDENTIFIER) 2331 array++; 2332 2333 if (type & PERF_SAMPLE_IP) 2334 array++; 2335 2336 if (type & PERF_SAMPLE_TID) 2337 array++; 2338 2339 if (type & PERF_SAMPLE_TIME) 2340 *timestamp = *array; 2341 2342 return 0; 2343 } 2344 2345 size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, 2346 u64 read_format) 2347 { 2348 size_t sz, result = sizeof(struct sample_event); 2349 2350 if (type & PERF_SAMPLE_IDENTIFIER) 2351 result += sizeof(u64); 2352 2353 if (type & PERF_SAMPLE_IP) 2354 result += sizeof(u64); 2355 2356 if (type & PERF_SAMPLE_TID) 2357 result += sizeof(u64); 2358 2359 if (type & PERF_SAMPLE_TIME) 2360 result += sizeof(u64); 2361 2362 if (type & PERF_SAMPLE_ADDR) 2363 result += sizeof(u64); 2364 2365 if (type & PERF_SAMPLE_ID) 2366 result += sizeof(u64); 2367 2368 if (type & PERF_SAMPLE_STREAM_ID) 2369 result += sizeof(u64); 2370 2371 if (type & PERF_SAMPLE_CPU) 2372 result += sizeof(u64); 2373 2374 if (type & PERF_SAMPLE_PERIOD) 2375 result += sizeof(u64); 2376 2377 if (type & PERF_SAMPLE_READ) { 2378 result += sizeof(u64); 2379 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 2380 result += sizeof(u64); 2381 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) 2382 result += sizeof(u64); 2383 /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ 2384 if (read_format & PERF_FORMAT_GROUP) { 2385 sz = sample->read.group.nr * 2386 sizeof(struct sample_read_value); 2387 result += sz; 2388 } else { 2389 result += sizeof(u64); 2390 } 2391 } 2392 2393 if (type & PERF_SAMPLE_CALLCHAIN) { 2394 sz = (sample->callchain->nr + 1) * sizeof(u64); 2395 result += sz; 2396 } 2397 2398 if (type & PERF_SAMPLE_RAW) { 2399 result += sizeof(u32); 2400 result += sample->raw_size; 2401 } 2402 2403 if (type & PERF_SAMPLE_BRANCH_STACK) { 2404 sz = sample->branch_stack->nr * sizeof(struct branch_entry); 2405 sz += sizeof(u64); 2406 result += sz; 2407 } 2408 2409 if (type & PERF_SAMPLE_REGS_USER) { 2410 if (sample->user_regs.abi) { 2411 result += sizeof(u64); 2412 sz = hweight_long(sample->user_regs.mask) * sizeof(u64); 2413 result += sz; 2414 } else { 2415 result += sizeof(u64); 2416 } 2417 } 2418 2419 if (type & PERF_SAMPLE_STACK_USER) { 2420 sz = sample->user_stack.size; 2421 result += sizeof(u64); 2422 if (sz) { 2423 result += sz; 2424 result += sizeof(u64); 2425 } 2426 } 2427 2428 if (type & PERF_SAMPLE_WEIGHT) 2429 result += sizeof(u64); 2430 2431 if (type & PERF_SAMPLE_DATA_SRC) 2432 result += sizeof(u64); 2433 2434 if (type & PERF_SAMPLE_TRANSACTION) 2435 result += sizeof(u64); 2436 2437 if (type & PERF_SAMPLE_REGS_INTR) { 2438 if (sample->intr_regs.abi) { 2439 result += sizeof(u64); 2440 sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); 2441 result += sz; 2442 } else { 2443 result += sizeof(u64); 2444 } 2445 } 2446 2447 if (type & PERF_SAMPLE_PHYS_ADDR) 2448 result += sizeof(u64); 2449 2450 return result; 2451 } 2452 2453 int perf_event__synthesize_sample(union perf_event *event, u64 type, 2454 u64 read_format, 2455 const struct perf_sample *sample, 2456 bool swapped) 2457 { 2458 u64 *array; 2459 size_t sz; 2460 /* 2461 * used for cross-endian analysis. See git commit 65014ab3 2462 * for why this goofiness is needed. 2463 */ 2464 union u64_swap u; 2465 2466 array = event->sample.array; 2467 2468 if (type & PERF_SAMPLE_IDENTIFIER) { 2469 *array = sample->id; 2470 array++; 2471 } 2472 2473 if (type & PERF_SAMPLE_IP) { 2474 *array = sample->ip; 2475 array++; 2476 } 2477 2478 if (type & PERF_SAMPLE_TID) { 2479 u.val32[0] = sample->pid; 2480 u.val32[1] = sample->tid; 2481 if (swapped) { 2482 /* 2483 * Inverse of what is done in perf_evsel__parse_sample 2484 */ 2485 u.val32[0] = bswap_32(u.val32[0]); 2486 u.val32[1] = bswap_32(u.val32[1]); 2487 u.val64 = bswap_64(u.val64); 2488 } 2489 2490 *array = u.val64; 2491 array++; 2492 } 2493 2494 if (type & PERF_SAMPLE_TIME) { 2495 *array = sample->time; 2496 array++; 2497 } 2498 2499 if (type & PERF_SAMPLE_ADDR) { 2500 *array = sample->addr; 2501 array++; 2502 } 2503 2504 if (type & PERF_SAMPLE_ID) { 2505 *array = sample->id; 2506 array++; 2507 } 2508 2509 if (type & PERF_SAMPLE_STREAM_ID) { 2510 *array = sample->stream_id; 2511 array++; 2512 } 2513 2514 if (type & PERF_SAMPLE_CPU) { 2515 u.val32[0] = sample->cpu; 2516 if (swapped) { 2517 /* 2518 * Inverse of what is done in perf_evsel__parse_sample 2519 */ 2520 u.val32[0] = bswap_32(u.val32[0]); 2521 u.val64 = bswap_64(u.val64); 2522 } 2523 *array = u.val64; 2524 array++; 2525 } 2526 2527 if (type & PERF_SAMPLE_PERIOD) { 2528 *array = sample->period; 2529 array++; 2530 } 2531 2532 if (type & PERF_SAMPLE_READ) { 2533 if (read_format & PERF_FORMAT_GROUP) 2534 *array = sample->read.group.nr; 2535 else 2536 *array = sample->read.one.value; 2537 array++; 2538 2539 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { 2540 *array = sample->read.time_enabled; 2541 array++; 2542 } 2543 2544 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { 2545 *array = sample->read.time_running; 2546 array++; 2547 } 2548 2549 /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ 2550 if (read_format & PERF_FORMAT_GROUP) { 2551 sz = sample->read.group.nr * 2552 sizeof(struct sample_read_value); 2553 memcpy(array, sample->read.group.values, sz); 2554 array = (void *)array + sz; 2555 } else { 2556 *array = sample->read.one.id; 2557 array++; 2558 } 2559 } 2560 2561 if (type & PERF_SAMPLE_CALLCHAIN) { 2562 sz = (sample->callchain->nr + 1) * sizeof(u64); 2563 memcpy(array, sample->callchain, sz); 2564 array = (void *)array + sz; 2565 } 2566 2567 if (type & PERF_SAMPLE_RAW) { 2568 u.val32[0] = sample->raw_size; 2569 if (WARN_ONCE(swapped, 2570 "Endianness of raw data not corrected!\n")) { 2571 /* 2572 * Inverse of what is done in perf_evsel__parse_sample 2573 */ 2574 u.val32[0] = bswap_32(u.val32[0]); 2575 u.val32[1] = bswap_32(u.val32[1]); 2576 u.val64 = bswap_64(u.val64); 2577 } 2578 *array = u.val64; 2579 array = (void *)array + sizeof(u32); 2580 2581 memcpy(array, sample->raw_data, sample->raw_size); 2582 array = (void *)array + sample->raw_size; 2583 } 2584 2585 if (type & PERF_SAMPLE_BRANCH_STACK) { 2586 sz = sample->branch_stack->nr * sizeof(struct branch_entry); 2587 sz += sizeof(u64); 2588 memcpy(array, sample->branch_stack, sz); 2589 array = (void *)array + sz; 2590 } 2591 2592 if (type & PERF_SAMPLE_REGS_USER) { 2593 if (sample->user_regs.abi) { 2594 *array++ = sample->user_regs.abi; 2595 sz = hweight_long(sample->user_regs.mask) * sizeof(u64); 2596 memcpy(array, sample->user_regs.regs, sz); 2597 array = (void *)array + sz; 2598 } else { 2599 *array++ = 0; 2600 } 2601 } 2602 2603 if (type & PERF_SAMPLE_STACK_USER) { 2604 sz = sample->user_stack.size; 2605 *array++ = sz; 2606 if (sz) { 2607 memcpy(array, sample->user_stack.data, sz); 2608 array = (void *)array + sz; 2609 *array++ = sz; 2610 } 2611 } 2612 2613 if (type & PERF_SAMPLE_WEIGHT) { 2614 *array = sample->weight; 2615 array++; 2616 } 2617 2618 if (type & PERF_SAMPLE_DATA_SRC) { 2619 *array = sample->data_src; 2620 array++; 2621 } 2622 2623 if (type & PERF_SAMPLE_TRANSACTION) { 2624 *array = sample->transaction; 2625 array++; 2626 } 2627 2628 if (type & PERF_SAMPLE_REGS_INTR) { 2629 if (sample->intr_regs.abi) { 2630 *array++ = sample->intr_regs.abi; 2631 sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); 2632 memcpy(array, sample->intr_regs.regs, sz); 2633 array = (void *)array + sz; 2634 } else { 2635 *array++ = 0; 2636 } 2637 } 2638 2639 if (type & PERF_SAMPLE_PHYS_ADDR) { 2640 *array = sample->phys_addr; 2641 array++; 2642 } 2643 2644 return 0; 2645 } 2646 2647 struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name) 2648 { 2649 return pevent_find_field(evsel->tp_format, name); 2650 } 2651 2652 void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, 2653 const char *name) 2654 { 2655 struct format_field *field = perf_evsel__field(evsel, name); 2656 int offset; 2657 2658 if (!field) 2659 return NULL; 2660 2661 offset = field->offset; 2662 2663 if (field->flags & FIELD_IS_DYNAMIC) { 2664 offset = *(int *)(sample->raw_data + field->offset); 2665 offset &= 0xffff; 2666 } 2667 2668 return sample->raw_data + offset; 2669 } 2670 2671 u64 format_field__intval(struct format_field *field, struct perf_sample *sample, 2672 bool needs_swap) 2673 { 2674 u64 value; 2675 void *ptr = sample->raw_data + field->offset; 2676 2677 switch (field->size) { 2678 case 1: 2679 return *(u8 *)ptr; 2680 case 2: 2681 value = *(u16 *)ptr; 2682 break; 2683 case 4: 2684 value = *(u32 *)ptr; 2685 break; 2686 case 8: 2687 memcpy(&value, ptr, sizeof(u64)); 2688 break; 2689 default: 2690 return 0; 2691 } 2692 2693 if (!needs_swap) 2694 return value; 2695 2696 switch (field->size) { 2697 case 2: 2698 return bswap_16(value); 2699 case 4: 2700 return bswap_32(value); 2701 case 8: 2702 return bswap_64(value); 2703 default: 2704 return 0; 2705 } 2706 2707 return 0; 2708 } 2709 2710 u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, 2711 const char *name) 2712 { 2713 struct format_field *field = perf_evsel__field(evsel, name); 2714 2715 if (!field) 2716 return 0; 2717 2718 return field ? format_field__intval(field, sample, evsel->needs_swap) : 0; 2719 } 2720 2721 bool perf_evsel__fallback(struct perf_evsel *evsel, int err, 2722 char *msg, size_t msgsize) 2723 { 2724 int paranoid; 2725 2726 if ((err == ENOENT || err == ENXIO || err == ENODEV) && 2727 evsel->attr.type == PERF_TYPE_HARDWARE && 2728 evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) { 2729 /* 2730 * If it's cycles then fall back to hrtimer based 2731 * cpu-clock-tick sw counter, which is always available even if 2732 * no PMU support. 2733 * 2734 * PPC returns ENXIO until 2.6.37 (behavior changed with commit 2735 * b0a873e). 2736 */ 2737 scnprintf(msg, msgsize, "%s", 2738 "The cycles event is not supported, trying to fall back to cpu-clock-ticks"); 2739 2740 evsel->attr.type = PERF_TYPE_SOFTWARE; 2741 evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK; 2742 2743 zfree(&evsel->name); 2744 return true; 2745 } else if (err == EACCES && !evsel->attr.exclude_kernel && 2746 (paranoid = perf_event_paranoid()) > 1) { 2747 const char *name = perf_evsel__name(evsel); 2748 char *new_name; 2749 2750 if (asprintf(&new_name, "%s%su", name, strchr(name, ':') ? "" : ":") < 0) 2751 return false; 2752 2753 if (evsel->name) 2754 free(evsel->name); 2755 evsel->name = new_name; 2756 scnprintf(msg, msgsize, 2757 "kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid); 2758 evsel->attr.exclude_kernel = 1; 2759 2760 return true; 2761 } 2762 2763 return false; 2764 } 2765 2766 static bool find_process(const char *name) 2767 { 2768 size_t len = strlen(name); 2769 DIR *dir; 2770 struct dirent *d; 2771 int ret = -1; 2772 2773 dir = opendir(procfs__mountpoint()); 2774 if (!dir) 2775 return false; 2776 2777 /* Walk through the directory. */ 2778 while (ret && (d = readdir(dir)) != NULL) { 2779 char path[PATH_MAX]; 2780 char *data; 2781 size_t size; 2782 2783 if ((d->d_type != DT_DIR) || 2784 !strcmp(".", d->d_name) || 2785 !strcmp("..", d->d_name)) 2786 continue; 2787 2788 scnprintf(path, sizeof(path), "%s/%s/comm", 2789 procfs__mountpoint(), d->d_name); 2790 2791 if (filename__read_str(path, &data, &size)) 2792 continue; 2793 2794 ret = strncmp(name, data, len); 2795 free(data); 2796 } 2797 2798 closedir(dir); 2799 return ret ? false : true; 2800 } 2801 2802 int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, 2803 int err, char *msg, size_t size) 2804 { 2805 char sbuf[STRERR_BUFSIZE]; 2806 int printed = 0; 2807 2808 switch (err) { 2809 case EPERM: 2810 case EACCES: 2811 if (err == EPERM) 2812 printed = scnprintf(msg, size, 2813 "No permission to enable %s event.\n\n", 2814 perf_evsel__name(evsel)); 2815 2816 return scnprintf(msg + printed, size - printed, 2817 "You may not have permission to collect %sstats.\n\n" 2818 "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" 2819 "which controls use of the performance events system by\n" 2820 "unprivileged users (without CAP_SYS_ADMIN).\n\n" 2821 "The current value is %d:\n\n" 2822 " -1: Allow use of (almost) all events by all users\n" 2823 " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n" 2824 ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n" 2825 " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n" 2826 ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" 2827 ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n" 2828 "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n" 2829 " kernel.perf_event_paranoid = -1\n" , 2830 target->system_wide ? "system-wide " : "", 2831 perf_event_paranoid()); 2832 case ENOENT: 2833 return scnprintf(msg, size, "The %s event is not supported.", 2834 perf_evsel__name(evsel)); 2835 case EMFILE: 2836 return scnprintf(msg, size, "%s", 2837 "Too many events are opened.\n" 2838 "Probably the maximum number of open file descriptors has been reached.\n" 2839 "Hint: Try again after reducing the number of events.\n" 2840 "Hint: Try increasing the limit with 'ulimit -n <limit>'"); 2841 case ENOMEM: 2842 if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 && 2843 access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0) 2844 return scnprintf(msg, size, 2845 "Not enough memory to setup event with callchain.\n" 2846 "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n" 2847 "Hint: Current value: %d", sysctl_perf_event_max_stack); 2848 break; 2849 case ENODEV: 2850 if (target->cpu_list) 2851 return scnprintf(msg, size, "%s", 2852 "No such device - did you specify an out-of-range profile CPU?"); 2853 break; 2854 case EOPNOTSUPP: 2855 if (evsel->attr.sample_period != 0) 2856 return scnprintf(msg, size, 2857 "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", 2858 perf_evsel__name(evsel)); 2859 if (evsel->attr.precise_ip) 2860 return scnprintf(msg, size, "%s", 2861 "\'precise\' request may not be supported. Try removing 'p' modifier."); 2862 #if defined(__i386__) || defined(__x86_64__) 2863 if (evsel->attr.type == PERF_TYPE_HARDWARE) 2864 return scnprintf(msg, size, "%s", 2865 "No hardware sampling interrupt available.\n" 2866 "No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it."); 2867 #endif 2868 break; 2869 case EBUSY: 2870 if (find_process("oprofiled")) 2871 return scnprintf(msg, size, 2872 "The PMU counters are busy/taken by another profiler.\n" 2873 "We found oprofile daemon running, please stop it and try again."); 2874 break; 2875 case EINVAL: 2876 if (evsel->attr.write_backward && perf_missing_features.write_backward) 2877 return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel."); 2878 if (perf_missing_features.clockid) 2879 return scnprintf(msg, size, "clockid feature not supported."); 2880 if (perf_missing_features.clockid_wrong) 2881 return scnprintf(msg, size, "wrong clockid (%d).", clockid); 2882 break; 2883 default: 2884 break; 2885 } 2886 2887 return scnprintf(msg, size, 2888 "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" 2889 "/bin/dmesg may provide additional information.\n" 2890 "No CONFIG_PERF_EVENTS=y kernel support configured?", 2891 err, str_error_r(err, sbuf, sizeof(sbuf)), 2892 perf_evsel__name(evsel)); 2893 } 2894 2895 struct perf_env *perf_evsel__env(struct perf_evsel *evsel) 2896 { 2897 if (evsel && evsel->evlist) 2898 return evsel->evlist->env; 2899 return NULL; 2900 } 2901