1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 10 #include <byteswap.h> 11 #include "asm/bug.h" 12 #include "evsel.h" 13 #include "evlist.h" 14 #include "util.h" 15 #include "cpumap.h" 16 #include "thread_map.h" 17 18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 19 20 int __perf_evsel__sample_size(u64 sample_type) 21 { 22 u64 mask = sample_type & PERF_SAMPLE_MASK; 23 int size = 0; 24 int i; 25 26 for (i = 0; i < 64; i++) { 27 if (mask & (1ULL << i)) 28 size++; 29 } 30 31 size *= sizeof(u64); 32 33 return size; 34 } 35 36 void perf_evsel__init(struct perf_evsel *evsel, 37 struct perf_event_attr *attr, int idx) 38 { 39 evsel->idx = idx; 40 evsel->attr = *attr; 41 INIT_LIST_HEAD(&evsel->node); 42 hists__init(&evsel->hists); 43 } 44 45 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 46 { 47 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 48 49 if (evsel != NULL) 50 perf_evsel__init(evsel, attr, idx); 51 52 return evsel; 53 } 54 55 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 56 { 57 int cpu, thread; 58 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 59 60 if (evsel->fd) { 61 for (cpu = 0; cpu < ncpus; cpu++) { 62 for (thread = 0; thread < nthreads; thread++) { 63 FD(evsel, cpu, thread) = -1; 64 } 65 } 66 } 67 68 return evsel->fd != NULL ? 0 : -ENOMEM; 69 } 70 71 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) 72 { 73 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); 74 if (evsel->sample_id == NULL) 75 return -ENOMEM; 76 77 evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); 78 if (evsel->id == NULL) { 79 xyarray__delete(evsel->sample_id); 80 evsel->sample_id = NULL; 81 return -ENOMEM; 82 } 83 84 return 0; 85 } 86 87 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 88 { 89 evsel->counts = zalloc((sizeof(*evsel->counts) + 90 (ncpus * sizeof(struct perf_counts_values)))); 91 return evsel->counts != NULL ? 0 : -ENOMEM; 92 } 93 94 void perf_evsel__free_fd(struct perf_evsel *evsel) 95 { 96 xyarray__delete(evsel->fd); 97 evsel->fd = NULL; 98 } 99 100 void perf_evsel__free_id(struct perf_evsel *evsel) 101 { 102 xyarray__delete(evsel->sample_id); 103 evsel->sample_id = NULL; 104 free(evsel->id); 105 evsel->id = NULL; 106 } 107 108 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 109 { 110 int cpu, thread; 111 112 for (cpu = 0; cpu < ncpus; cpu++) 113 for (thread = 0; thread < nthreads; ++thread) { 114 close(FD(evsel, cpu, thread)); 115 FD(evsel, cpu, thread) = -1; 116 } 117 } 118 119 void perf_evsel__exit(struct perf_evsel *evsel) 120 { 121 assert(list_empty(&evsel->node)); 122 xyarray__delete(evsel->fd); 123 xyarray__delete(evsel->sample_id); 124 free(evsel->id); 125 } 126 127 void perf_evsel__delete(struct perf_evsel *evsel) 128 { 129 perf_evsel__exit(evsel); 130 close_cgroup(evsel->cgrp); 131 free(evsel->name); 132 free(evsel); 133 } 134 135 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 136 int cpu, int thread, bool scale) 137 { 138 struct perf_counts_values count; 139 size_t nv = scale ? 3 : 1; 140 141 if (FD(evsel, cpu, thread) < 0) 142 return -EINVAL; 143 144 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 145 return -ENOMEM; 146 147 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 148 return -errno; 149 150 if (scale) { 151 if (count.run == 0) 152 count.val = 0; 153 else if (count.run < count.ena) 154 count.val = (u64)((double)count.val * count.ena / count.run + 0.5); 155 } else 156 count.ena = count.run = 0; 157 158 evsel->counts->cpu[cpu] = count; 159 return 0; 160 } 161 162 int __perf_evsel__read(struct perf_evsel *evsel, 163 int ncpus, int nthreads, bool scale) 164 { 165 size_t nv = scale ? 3 : 1; 166 int cpu, thread; 167 struct perf_counts_values *aggr = &evsel->counts->aggr, count; 168 169 aggr->val = aggr->ena = aggr->run = 0; 170 171 for (cpu = 0; cpu < ncpus; cpu++) { 172 for (thread = 0; thread < nthreads; thread++) { 173 if (FD(evsel, cpu, thread) < 0) 174 continue; 175 176 if (readn(FD(evsel, cpu, thread), 177 &count, nv * sizeof(u64)) < 0) 178 return -errno; 179 180 aggr->val += count.val; 181 if (scale) { 182 aggr->ena += count.ena; 183 aggr->run += count.run; 184 } 185 } 186 } 187 188 evsel->counts->scaled = 0; 189 if (scale) { 190 if (aggr->run == 0) { 191 evsel->counts->scaled = -1; 192 aggr->val = 0; 193 return 0; 194 } 195 196 if (aggr->run < aggr->ena) { 197 evsel->counts->scaled = 1; 198 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); 199 } 200 } else 201 aggr->ena = aggr->run = 0; 202 203 return 0; 204 } 205 206 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 207 struct thread_map *threads, bool group) 208 { 209 int cpu, thread; 210 unsigned long flags = 0; 211 int pid = -1; 212 213 if (evsel->fd == NULL && 214 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 215 return -1; 216 217 if (evsel->cgrp) { 218 flags = PERF_FLAG_PID_CGROUP; 219 pid = evsel->cgrp->fd; 220 } 221 222 for (cpu = 0; cpu < cpus->nr; cpu++) { 223 int group_fd = -1; 224 225 for (thread = 0; thread < threads->nr; thread++) { 226 227 if (!evsel->cgrp) 228 pid = threads->map[thread]; 229 230 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 231 pid, 232 cpus->map[cpu], 233 group_fd, flags); 234 if (FD(evsel, cpu, thread) < 0) 235 goto out_close; 236 237 if (group && group_fd == -1) 238 group_fd = FD(evsel, cpu, thread); 239 } 240 } 241 242 return 0; 243 244 out_close: 245 do { 246 while (--thread >= 0) { 247 close(FD(evsel, cpu, thread)); 248 FD(evsel, cpu, thread) = -1; 249 } 250 thread = threads->nr; 251 } while (--cpu >= 0); 252 return -1; 253 } 254 255 static struct { 256 struct cpu_map map; 257 int cpus[1]; 258 } empty_cpu_map = { 259 .map.nr = 1, 260 .cpus = { -1, }, 261 }; 262 263 static struct { 264 struct thread_map map; 265 int threads[1]; 266 } empty_thread_map = { 267 .map.nr = 1, 268 .threads = { -1, }, 269 }; 270 271 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 272 struct thread_map *threads, bool group) 273 { 274 if (cpus == NULL) { 275 /* Work around old compiler warnings about strict aliasing */ 276 cpus = &empty_cpu_map.map; 277 } 278 279 if (threads == NULL) 280 threads = &empty_thread_map.map; 281 282 return __perf_evsel__open(evsel, cpus, threads, group); 283 } 284 285 int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 286 struct cpu_map *cpus, bool group) 287 { 288 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group); 289 } 290 291 int perf_evsel__open_per_thread(struct perf_evsel *evsel, 292 struct thread_map *threads, bool group) 293 { 294 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group); 295 } 296 297 static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 298 struct perf_sample *sample) 299 { 300 const u64 *array = event->sample.array; 301 302 array += ((event->header.size - 303 sizeof(event->header)) / sizeof(u64)) - 1; 304 305 if (type & PERF_SAMPLE_CPU) { 306 u32 *p = (u32 *)array; 307 sample->cpu = *p; 308 array--; 309 } 310 311 if (type & PERF_SAMPLE_STREAM_ID) { 312 sample->stream_id = *array; 313 array--; 314 } 315 316 if (type & PERF_SAMPLE_ID) { 317 sample->id = *array; 318 array--; 319 } 320 321 if (type & PERF_SAMPLE_TIME) { 322 sample->time = *array; 323 array--; 324 } 325 326 if (type & PERF_SAMPLE_TID) { 327 u32 *p = (u32 *)array; 328 sample->pid = p[0]; 329 sample->tid = p[1]; 330 } 331 332 return 0; 333 } 334 335 static bool sample_overlap(const union perf_event *event, 336 const void *offset, u64 size) 337 { 338 const void *base = event; 339 340 if (offset + size > base + event->header.size) 341 return true; 342 343 return false; 344 } 345 346 int perf_event__parse_sample(const union perf_event *event, u64 type, 347 int sample_size, bool sample_id_all, 348 struct perf_sample *data, bool swapped) 349 { 350 const u64 *array; 351 352 /* 353 * used for cross-endian analysis. See git commit 65014ab3 354 * for why this goofiness is needed. 355 */ 356 union { 357 u64 val64; 358 u32 val32[2]; 359 } u; 360 361 362 data->cpu = data->pid = data->tid = -1; 363 data->stream_id = data->id = data->time = -1ULL; 364 365 if (event->header.type != PERF_RECORD_SAMPLE) { 366 if (!sample_id_all) 367 return 0; 368 return perf_event__parse_id_sample(event, type, data); 369 } 370 371 array = event->sample.array; 372 373 if (sample_size + sizeof(event->header) > event->header.size) 374 return -EFAULT; 375 376 if (type & PERF_SAMPLE_IP) { 377 data->ip = event->ip.ip; 378 array++; 379 } 380 381 if (type & PERF_SAMPLE_TID) { 382 u.val64 = *array; 383 if (swapped) { 384 /* undo swap of u64, then swap on individual u32s */ 385 u.val64 = bswap_64(u.val64); 386 u.val32[0] = bswap_32(u.val32[0]); 387 u.val32[1] = bswap_32(u.val32[1]); 388 } 389 390 data->pid = u.val32[0]; 391 data->tid = u.val32[1]; 392 array++; 393 } 394 395 if (type & PERF_SAMPLE_TIME) { 396 data->time = *array; 397 array++; 398 } 399 400 data->addr = 0; 401 if (type & PERF_SAMPLE_ADDR) { 402 data->addr = *array; 403 array++; 404 } 405 406 data->id = -1ULL; 407 if (type & PERF_SAMPLE_ID) { 408 data->id = *array; 409 array++; 410 } 411 412 if (type & PERF_SAMPLE_STREAM_ID) { 413 data->stream_id = *array; 414 array++; 415 } 416 417 if (type & PERF_SAMPLE_CPU) { 418 419 u.val64 = *array; 420 if (swapped) { 421 /* undo swap of u64, then swap on individual u32s */ 422 u.val64 = bswap_64(u.val64); 423 u.val32[0] = bswap_32(u.val32[0]); 424 } 425 426 data->cpu = u.val32[0]; 427 array++; 428 } 429 430 if (type & PERF_SAMPLE_PERIOD) { 431 data->period = *array; 432 array++; 433 } 434 435 if (type & PERF_SAMPLE_READ) { 436 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); 437 return -1; 438 } 439 440 if (type & PERF_SAMPLE_CALLCHAIN) { 441 if (sample_overlap(event, array, sizeof(data->callchain->nr))) 442 return -EFAULT; 443 444 data->callchain = (struct ip_callchain *)array; 445 446 if (sample_overlap(event, array, data->callchain->nr)) 447 return -EFAULT; 448 449 array += 1 + data->callchain->nr; 450 } 451 452 if (type & PERF_SAMPLE_RAW) { 453 const u64 *pdata; 454 455 u.val64 = *array; 456 if (WARN_ONCE(swapped, 457 "Endianness of raw data not corrected!\n")) { 458 /* undo swap of u64, then swap on individual u32s */ 459 u.val64 = bswap_64(u.val64); 460 u.val32[0] = bswap_32(u.val32[0]); 461 u.val32[1] = bswap_32(u.val32[1]); 462 } 463 464 if (sample_overlap(event, array, sizeof(u32))) 465 return -EFAULT; 466 467 data->raw_size = u.val32[0]; 468 pdata = (void *) array + sizeof(u32); 469 470 if (sample_overlap(event, pdata, data->raw_size)) 471 return -EFAULT; 472 473 data->raw_data = (void *) pdata; 474 } 475 476 return 0; 477 } 478