1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 10 #include <byteswap.h> 11 #include "asm/bug.h" 12 #include "evsel.h" 13 #include "evlist.h" 14 #include "util.h" 15 #include "cpumap.h" 16 #include "thread_map.h" 17 18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 19 #define GROUP_FD(group_fd, cpu) (*(int *)xyarray__entry(group_fd, cpu, 0)) 20 21 int __perf_evsel__sample_size(u64 sample_type) 22 { 23 u64 mask = sample_type & PERF_SAMPLE_MASK; 24 int size = 0; 25 int i; 26 27 for (i = 0; i < 64; i++) { 28 if (mask & (1ULL << i)) 29 size++; 30 } 31 32 size *= sizeof(u64); 33 34 return size; 35 } 36 37 void perf_evsel__init(struct perf_evsel *evsel, 38 struct perf_event_attr *attr, int idx) 39 { 40 evsel->idx = idx; 41 evsel->attr = *attr; 42 INIT_LIST_HEAD(&evsel->node); 43 hists__init(&evsel->hists); 44 } 45 46 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 47 { 48 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 49 50 if (evsel != NULL) 51 perf_evsel__init(evsel, attr, idx); 52 53 return evsel; 54 } 55 56 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 57 { 58 int cpu, thread; 59 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 60 61 if (evsel->fd) { 62 for (cpu = 0; cpu < ncpus; cpu++) { 63 for (thread = 0; thread < nthreads; thread++) { 64 FD(evsel, cpu, thread) = -1; 65 } 66 } 67 } 68 69 return evsel->fd != NULL ? 0 : -ENOMEM; 70 } 71 72 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) 73 { 74 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); 75 if (evsel->sample_id == NULL) 76 return -ENOMEM; 77 78 evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); 79 if (evsel->id == NULL) { 80 xyarray__delete(evsel->sample_id); 81 evsel->sample_id = NULL; 82 return -ENOMEM; 83 } 84 85 return 0; 86 } 87 88 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 89 { 90 evsel->counts = zalloc((sizeof(*evsel->counts) + 91 (ncpus * sizeof(struct perf_counts_values)))); 92 return evsel->counts != NULL ? 0 : -ENOMEM; 93 } 94 95 void perf_evsel__free_fd(struct perf_evsel *evsel) 96 { 97 xyarray__delete(evsel->fd); 98 evsel->fd = NULL; 99 } 100 101 void perf_evsel__free_id(struct perf_evsel *evsel) 102 { 103 xyarray__delete(evsel->sample_id); 104 evsel->sample_id = NULL; 105 free(evsel->id); 106 evsel->id = NULL; 107 } 108 109 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 110 { 111 int cpu, thread; 112 113 for (cpu = 0; cpu < ncpus; cpu++) 114 for (thread = 0; thread < nthreads; ++thread) { 115 close(FD(evsel, cpu, thread)); 116 FD(evsel, cpu, thread) = -1; 117 } 118 } 119 120 void perf_evsel__exit(struct perf_evsel *evsel) 121 { 122 assert(list_empty(&evsel->node)); 123 xyarray__delete(evsel->fd); 124 xyarray__delete(evsel->sample_id); 125 free(evsel->id); 126 } 127 128 void perf_evsel__delete(struct perf_evsel *evsel) 129 { 130 perf_evsel__exit(evsel); 131 close_cgroup(evsel->cgrp); 132 free(evsel->name); 133 free(evsel); 134 } 135 136 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 137 int cpu, int thread, bool scale) 138 { 139 struct perf_counts_values count; 140 size_t nv = scale ? 3 : 1; 141 142 if (FD(evsel, cpu, thread) < 0) 143 return -EINVAL; 144 145 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 146 return -ENOMEM; 147 148 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 149 return -errno; 150 151 if (scale) { 152 if (count.run == 0) 153 count.val = 0; 154 else if (count.run < count.ena) 155 count.val = (u64)((double)count.val * count.ena / count.run + 0.5); 156 } else 157 count.ena = count.run = 0; 158 159 evsel->counts->cpu[cpu] = count; 160 return 0; 161 } 162 163 int __perf_evsel__read(struct perf_evsel *evsel, 164 int ncpus, int nthreads, bool scale) 165 { 166 size_t nv = scale ? 3 : 1; 167 int cpu, thread; 168 struct perf_counts_values *aggr = &evsel->counts->aggr, count; 169 170 aggr->val = aggr->ena = aggr->run = 0; 171 172 for (cpu = 0; cpu < ncpus; cpu++) { 173 for (thread = 0; thread < nthreads; thread++) { 174 if (FD(evsel, cpu, thread) < 0) 175 continue; 176 177 if (readn(FD(evsel, cpu, thread), 178 &count, nv * sizeof(u64)) < 0) 179 return -errno; 180 181 aggr->val += count.val; 182 if (scale) { 183 aggr->ena += count.ena; 184 aggr->run += count.run; 185 } 186 } 187 } 188 189 evsel->counts->scaled = 0; 190 if (scale) { 191 if (aggr->run == 0) { 192 evsel->counts->scaled = -1; 193 aggr->val = 0; 194 return 0; 195 } 196 197 if (aggr->run < aggr->ena) { 198 evsel->counts->scaled = 1; 199 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); 200 } 201 } else 202 aggr->ena = aggr->run = 0; 203 204 return 0; 205 } 206 207 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 208 struct thread_map *threads, bool group, 209 struct xyarray *group_fds) 210 { 211 int cpu, thread; 212 unsigned long flags = 0; 213 int pid = -1, err; 214 215 if (evsel->fd == NULL && 216 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 217 return -ENOMEM; 218 219 if (evsel->cgrp) { 220 flags = PERF_FLAG_PID_CGROUP; 221 pid = evsel->cgrp->fd; 222 } 223 224 for (cpu = 0; cpu < cpus->nr; cpu++) { 225 int group_fd = group_fds ? GROUP_FD(group_fds, cpu) : -1; 226 227 for (thread = 0; thread < threads->nr; thread++) { 228 229 if (!evsel->cgrp) 230 pid = threads->map[thread]; 231 232 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 233 pid, 234 cpus->map[cpu], 235 group_fd, flags); 236 if (FD(evsel, cpu, thread) < 0) { 237 err = -errno; 238 goto out_close; 239 } 240 241 if (group && group_fd == -1) 242 group_fd = FD(evsel, cpu, thread); 243 } 244 } 245 246 return 0; 247 248 out_close: 249 do { 250 while (--thread >= 0) { 251 close(FD(evsel, cpu, thread)); 252 FD(evsel, cpu, thread) = -1; 253 } 254 thread = threads->nr; 255 } while (--cpu >= 0); 256 return err; 257 } 258 259 void perf_evsel__close(struct perf_evsel *evsel, int ncpus, int nthreads) 260 { 261 if (evsel->fd == NULL) 262 return; 263 264 perf_evsel__close_fd(evsel, ncpus, nthreads); 265 perf_evsel__free_fd(evsel); 266 evsel->fd = NULL; 267 } 268 269 static struct { 270 struct cpu_map map; 271 int cpus[1]; 272 } empty_cpu_map = { 273 .map.nr = 1, 274 .cpus = { -1, }, 275 }; 276 277 static struct { 278 struct thread_map map; 279 int threads[1]; 280 } empty_thread_map = { 281 .map.nr = 1, 282 .threads = { -1, }, 283 }; 284 285 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 286 struct thread_map *threads, bool group, 287 struct xyarray *group_fd) 288 { 289 if (cpus == NULL) { 290 /* Work around old compiler warnings about strict aliasing */ 291 cpus = &empty_cpu_map.map; 292 } 293 294 if (threads == NULL) 295 threads = &empty_thread_map.map; 296 297 return __perf_evsel__open(evsel, cpus, threads, group, group_fd); 298 } 299 300 int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 301 struct cpu_map *cpus, bool group, 302 struct xyarray *group_fd) 303 { 304 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group, 305 group_fd); 306 } 307 308 int perf_evsel__open_per_thread(struct perf_evsel *evsel, 309 struct thread_map *threads, bool group, 310 struct xyarray *group_fd) 311 { 312 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group, 313 group_fd); 314 } 315 316 static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 317 struct perf_sample *sample) 318 { 319 const u64 *array = event->sample.array; 320 321 array += ((event->header.size - 322 sizeof(event->header)) / sizeof(u64)) - 1; 323 324 if (type & PERF_SAMPLE_CPU) { 325 u32 *p = (u32 *)array; 326 sample->cpu = *p; 327 array--; 328 } 329 330 if (type & PERF_SAMPLE_STREAM_ID) { 331 sample->stream_id = *array; 332 array--; 333 } 334 335 if (type & PERF_SAMPLE_ID) { 336 sample->id = *array; 337 array--; 338 } 339 340 if (type & PERF_SAMPLE_TIME) { 341 sample->time = *array; 342 array--; 343 } 344 345 if (type & PERF_SAMPLE_TID) { 346 u32 *p = (u32 *)array; 347 sample->pid = p[0]; 348 sample->tid = p[1]; 349 } 350 351 return 0; 352 } 353 354 static bool sample_overlap(const union perf_event *event, 355 const void *offset, u64 size) 356 { 357 const void *base = event; 358 359 if (offset + size > base + event->header.size) 360 return true; 361 362 return false; 363 } 364 365 int perf_event__parse_sample(const union perf_event *event, u64 type, 366 int sample_size, bool sample_id_all, 367 struct perf_sample *data, bool swapped) 368 { 369 const u64 *array; 370 371 /* 372 * used for cross-endian analysis. See git commit 65014ab3 373 * for why this goofiness is needed. 374 */ 375 union { 376 u64 val64; 377 u32 val32[2]; 378 } u; 379 380 381 data->cpu = data->pid = data->tid = -1; 382 data->stream_id = data->id = data->time = -1ULL; 383 384 if (event->header.type != PERF_RECORD_SAMPLE) { 385 if (!sample_id_all) 386 return 0; 387 return perf_event__parse_id_sample(event, type, data); 388 } 389 390 array = event->sample.array; 391 392 if (sample_size + sizeof(event->header) > event->header.size) 393 return -EFAULT; 394 395 if (type & PERF_SAMPLE_IP) { 396 data->ip = event->ip.ip; 397 array++; 398 } 399 400 if (type & PERF_SAMPLE_TID) { 401 u.val64 = *array; 402 if (swapped) { 403 /* undo swap of u64, then swap on individual u32s */ 404 u.val64 = bswap_64(u.val64); 405 u.val32[0] = bswap_32(u.val32[0]); 406 u.val32[1] = bswap_32(u.val32[1]); 407 } 408 409 data->pid = u.val32[0]; 410 data->tid = u.val32[1]; 411 array++; 412 } 413 414 if (type & PERF_SAMPLE_TIME) { 415 data->time = *array; 416 array++; 417 } 418 419 data->addr = 0; 420 if (type & PERF_SAMPLE_ADDR) { 421 data->addr = *array; 422 array++; 423 } 424 425 data->id = -1ULL; 426 if (type & PERF_SAMPLE_ID) { 427 data->id = *array; 428 array++; 429 } 430 431 if (type & PERF_SAMPLE_STREAM_ID) { 432 data->stream_id = *array; 433 array++; 434 } 435 436 if (type & PERF_SAMPLE_CPU) { 437 438 u.val64 = *array; 439 if (swapped) { 440 /* undo swap of u64, then swap on individual u32s */ 441 u.val64 = bswap_64(u.val64); 442 u.val32[0] = bswap_32(u.val32[0]); 443 } 444 445 data->cpu = u.val32[0]; 446 array++; 447 } 448 449 if (type & PERF_SAMPLE_PERIOD) { 450 data->period = *array; 451 array++; 452 } 453 454 if (type & PERF_SAMPLE_READ) { 455 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); 456 return -1; 457 } 458 459 if (type & PERF_SAMPLE_CALLCHAIN) { 460 if (sample_overlap(event, array, sizeof(data->callchain->nr))) 461 return -EFAULT; 462 463 data->callchain = (struct ip_callchain *)array; 464 465 if (sample_overlap(event, array, data->callchain->nr)) 466 return -EFAULT; 467 468 array += 1 + data->callchain->nr; 469 } 470 471 if (type & PERF_SAMPLE_RAW) { 472 const u64 *pdata; 473 474 u.val64 = *array; 475 if (WARN_ONCE(swapped, 476 "Endianness of raw data not corrected!\n")) { 477 /* undo swap of u64, then swap on individual u32s */ 478 u.val64 = bswap_64(u.val64); 479 u.val32[0] = bswap_32(u.val32[0]); 480 u.val32[1] = bswap_32(u.val32[1]); 481 } 482 483 if (sample_overlap(event, array, sizeof(u32))) 484 return -EFAULT; 485 486 data->raw_size = u.val32[0]; 487 pdata = (void *) array + sizeof(u32); 488 489 if (sample_overlap(event, pdata, data->raw_size)) 490 return -EFAULT; 491 492 data->raw_data = (void *) pdata; 493 } 494 495 return 0; 496 } 497