1 /* 2 * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> 3 * 4 * Parts came from builtin-{top,stat,record}.c, see those files for further 5 * copyright notes. 6 * 7 * Released under the GPL v2. (and only v2, not any later version) 8 */ 9 10 #include <byteswap.h> 11 #include "asm/bug.h" 12 #include "evsel.h" 13 #include "evlist.h" 14 #include "util.h" 15 #include "cpumap.h" 16 #include "thread_map.h" 17 18 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 19 20 int __perf_evsel__sample_size(u64 sample_type) 21 { 22 u64 mask = sample_type & PERF_SAMPLE_MASK; 23 int size = 0; 24 int i; 25 26 for (i = 0; i < 64; i++) { 27 if (mask & (1ULL << i)) 28 size++; 29 } 30 31 size *= sizeof(u64); 32 33 return size; 34 } 35 36 void perf_evsel__init(struct perf_evsel *evsel, 37 struct perf_event_attr *attr, int idx) 38 { 39 evsel->idx = idx; 40 evsel->attr = *attr; 41 INIT_LIST_HEAD(&evsel->node); 42 } 43 44 struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 45 { 46 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 47 48 if (evsel != NULL) 49 perf_evsel__init(evsel, attr, idx); 50 51 return evsel; 52 } 53 54 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 55 { 56 int cpu, thread; 57 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 58 59 if (evsel->fd) { 60 for (cpu = 0; cpu < ncpus; cpu++) { 61 for (thread = 0; thread < nthreads; thread++) { 62 FD(evsel, cpu, thread) = -1; 63 } 64 } 65 } 66 67 return evsel->fd != NULL ? 0 : -ENOMEM; 68 } 69 70 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) 71 { 72 evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id)); 73 if (evsel->sample_id == NULL) 74 return -ENOMEM; 75 76 evsel->id = zalloc(ncpus * nthreads * sizeof(u64)); 77 if (evsel->id == NULL) { 78 xyarray__delete(evsel->sample_id); 79 evsel->sample_id = NULL; 80 return -ENOMEM; 81 } 82 83 return 0; 84 } 85 86 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 87 { 88 evsel->counts = zalloc((sizeof(*evsel->counts) + 89 (ncpus * sizeof(struct perf_counts_values)))); 90 return evsel->counts != NULL ? 0 : -ENOMEM; 91 } 92 93 void perf_evsel__free_fd(struct perf_evsel *evsel) 94 { 95 xyarray__delete(evsel->fd); 96 evsel->fd = NULL; 97 } 98 99 void perf_evsel__free_id(struct perf_evsel *evsel) 100 { 101 xyarray__delete(evsel->sample_id); 102 evsel->sample_id = NULL; 103 free(evsel->id); 104 evsel->id = NULL; 105 } 106 107 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 108 { 109 int cpu, thread; 110 111 for (cpu = 0; cpu < ncpus; cpu++) 112 for (thread = 0; thread < nthreads; ++thread) { 113 close(FD(evsel, cpu, thread)); 114 FD(evsel, cpu, thread) = -1; 115 } 116 } 117 118 void perf_evsel__exit(struct perf_evsel *evsel) 119 { 120 assert(list_empty(&evsel->node)); 121 xyarray__delete(evsel->fd); 122 xyarray__delete(evsel->sample_id); 123 free(evsel->id); 124 } 125 126 void perf_evsel__delete(struct perf_evsel *evsel) 127 { 128 perf_evsel__exit(evsel); 129 close_cgroup(evsel->cgrp); 130 free(evsel->name); 131 free(evsel); 132 } 133 134 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, 135 int cpu, int thread, bool scale) 136 { 137 struct perf_counts_values count; 138 size_t nv = scale ? 3 : 1; 139 140 if (FD(evsel, cpu, thread) < 0) 141 return -EINVAL; 142 143 if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 144 return -ENOMEM; 145 146 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 147 return -errno; 148 149 if (scale) { 150 if (count.run == 0) 151 count.val = 0; 152 else if (count.run < count.ena) 153 count.val = (u64)((double)count.val * count.ena / count.run + 0.5); 154 } else 155 count.ena = count.run = 0; 156 157 evsel->counts->cpu[cpu] = count; 158 return 0; 159 } 160 161 int __perf_evsel__read(struct perf_evsel *evsel, 162 int ncpus, int nthreads, bool scale) 163 { 164 size_t nv = scale ? 3 : 1; 165 int cpu, thread; 166 struct perf_counts_values *aggr = &evsel->counts->aggr, count; 167 168 aggr->val = aggr->ena = aggr->run = 0; 169 170 for (cpu = 0; cpu < ncpus; cpu++) { 171 for (thread = 0; thread < nthreads; thread++) { 172 if (FD(evsel, cpu, thread) < 0) 173 continue; 174 175 if (readn(FD(evsel, cpu, thread), 176 &count, nv * sizeof(u64)) < 0) 177 return -errno; 178 179 aggr->val += count.val; 180 if (scale) { 181 aggr->ena += count.ena; 182 aggr->run += count.run; 183 } 184 } 185 } 186 187 evsel->counts->scaled = 0; 188 if (scale) { 189 if (aggr->run == 0) { 190 evsel->counts->scaled = -1; 191 aggr->val = 0; 192 return 0; 193 } 194 195 if (aggr->run < aggr->ena) { 196 evsel->counts->scaled = 1; 197 aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5); 198 } 199 } else 200 aggr->ena = aggr->run = 0; 201 202 return 0; 203 } 204 205 static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 206 struct thread_map *threads, bool group) 207 { 208 int cpu, thread; 209 unsigned long flags = 0; 210 int pid = -1; 211 212 if (evsel->fd == NULL && 213 perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 214 return -1; 215 216 if (evsel->cgrp) { 217 flags = PERF_FLAG_PID_CGROUP; 218 pid = evsel->cgrp->fd; 219 } 220 221 for (cpu = 0; cpu < cpus->nr; cpu++) { 222 int group_fd = -1; 223 224 for (thread = 0; thread < threads->nr; thread++) { 225 226 if (!evsel->cgrp) 227 pid = threads->map[thread]; 228 229 FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 230 pid, 231 cpus->map[cpu], 232 group_fd, flags); 233 if (FD(evsel, cpu, thread) < 0) 234 goto out_close; 235 236 if (group && group_fd == -1) 237 group_fd = FD(evsel, cpu, thread); 238 } 239 } 240 241 return 0; 242 243 out_close: 244 do { 245 while (--thread >= 0) { 246 close(FD(evsel, cpu, thread)); 247 FD(evsel, cpu, thread) = -1; 248 } 249 thread = threads->nr; 250 } while (--cpu >= 0); 251 return -1; 252 } 253 254 static struct { 255 struct cpu_map map; 256 int cpus[1]; 257 } empty_cpu_map = { 258 .map.nr = 1, 259 .cpus = { -1, }, 260 }; 261 262 static struct { 263 struct thread_map map; 264 int threads[1]; 265 } empty_thread_map = { 266 .map.nr = 1, 267 .threads = { -1, }, 268 }; 269 270 int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 271 struct thread_map *threads, bool group) 272 { 273 if (cpus == NULL) { 274 /* Work around old compiler warnings about strict aliasing */ 275 cpus = &empty_cpu_map.map; 276 } 277 278 if (threads == NULL) 279 threads = &empty_thread_map.map; 280 281 return __perf_evsel__open(evsel, cpus, threads, group); 282 } 283 284 int perf_evsel__open_per_cpu(struct perf_evsel *evsel, 285 struct cpu_map *cpus, bool group) 286 { 287 return __perf_evsel__open(evsel, cpus, &empty_thread_map.map, group); 288 } 289 290 int perf_evsel__open_per_thread(struct perf_evsel *evsel, 291 struct thread_map *threads, bool group) 292 { 293 return __perf_evsel__open(evsel, &empty_cpu_map.map, threads, group); 294 } 295 296 static int perf_event__parse_id_sample(const union perf_event *event, u64 type, 297 struct perf_sample *sample) 298 { 299 const u64 *array = event->sample.array; 300 301 array += ((event->header.size - 302 sizeof(event->header)) / sizeof(u64)) - 1; 303 304 if (type & PERF_SAMPLE_CPU) { 305 u32 *p = (u32 *)array; 306 sample->cpu = *p; 307 array--; 308 } 309 310 if (type & PERF_SAMPLE_STREAM_ID) { 311 sample->stream_id = *array; 312 array--; 313 } 314 315 if (type & PERF_SAMPLE_ID) { 316 sample->id = *array; 317 array--; 318 } 319 320 if (type & PERF_SAMPLE_TIME) { 321 sample->time = *array; 322 array--; 323 } 324 325 if (type & PERF_SAMPLE_TID) { 326 u32 *p = (u32 *)array; 327 sample->pid = p[0]; 328 sample->tid = p[1]; 329 } 330 331 return 0; 332 } 333 334 static bool sample_overlap(const union perf_event *event, 335 const void *offset, u64 size) 336 { 337 const void *base = event; 338 339 if (offset + size > base + event->header.size) 340 return true; 341 342 return false; 343 } 344 345 int perf_event__parse_sample(const union perf_event *event, u64 type, 346 int sample_size, bool sample_id_all, 347 struct perf_sample *data, bool swapped) 348 { 349 const u64 *array; 350 351 /* 352 * used for cross-endian analysis. See git commit 65014ab3 353 * for why this goofiness is needed. 354 */ 355 union { 356 u64 val64; 357 u32 val32[2]; 358 } u; 359 360 361 data->cpu = data->pid = data->tid = -1; 362 data->stream_id = data->id = data->time = -1ULL; 363 364 if (event->header.type != PERF_RECORD_SAMPLE) { 365 if (!sample_id_all) 366 return 0; 367 return perf_event__parse_id_sample(event, type, data); 368 } 369 370 array = event->sample.array; 371 372 if (sample_size + sizeof(event->header) > event->header.size) 373 return -EFAULT; 374 375 if (type & PERF_SAMPLE_IP) { 376 data->ip = event->ip.ip; 377 array++; 378 } 379 380 if (type & PERF_SAMPLE_TID) { 381 u.val64 = *array; 382 if (swapped) { 383 /* undo swap of u64, then swap on individual u32s */ 384 u.val64 = bswap_64(u.val64); 385 u.val32[0] = bswap_32(u.val32[0]); 386 u.val32[1] = bswap_32(u.val32[1]); 387 } 388 389 data->pid = u.val32[0]; 390 data->tid = u.val32[1]; 391 array++; 392 } 393 394 if (type & PERF_SAMPLE_TIME) { 395 data->time = *array; 396 array++; 397 } 398 399 data->addr = 0; 400 if (type & PERF_SAMPLE_ADDR) { 401 data->addr = *array; 402 array++; 403 } 404 405 data->id = -1ULL; 406 if (type & PERF_SAMPLE_ID) { 407 data->id = *array; 408 array++; 409 } 410 411 if (type & PERF_SAMPLE_STREAM_ID) { 412 data->stream_id = *array; 413 array++; 414 } 415 416 if (type & PERF_SAMPLE_CPU) { 417 418 u.val64 = *array; 419 if (swapped) { 420 /* undo swap of u64, then swap on individual u32s */ 421 u.val64 = bswap_64(u.val64); 422 u.val32[0] = bswap_32(u.val32[0]); 423 } 424 425 data->cpu = u.val32[0]; 426 array++; 427 } 428 429 if (type & PERF_SAMPLE_PERIOD) { 430 data->period = *array; 431 array++; 432 } 433 434 if (type & PERF_SAMPLE_READ) { 435 fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n"); 436 return -1; 437 } 438 439 if (type & PERF_SAMPLE_CALLCHAIN) { 440 if (sample_overlap(event, array, sizeof(data->callchain->nr))) 441 return -EFAULT; 442 443 data->callchain = (struct ip_callchain *)array; 444 445 if (sample_overlap(event, array, data->callchain->nr)) 446 return -EFAULT; 447 448 array += 1 + data->callchain->nr; 449 } 450 451 if (type & PERF_SAMPLE_RAW) { 452 const u64 *pdata; 453 454 u.val64 = *array; 455 if (WARN_ONCE(swapped, 456 "Endianness of raw data not corrected!\n")) { 457 /* undo swap of u64, then swap on individual u32s */ 458 u.val64 = bswap_64(u.val64); 459 u.val32[0] = bswap_32(u.val32[0]); 460 u.val32[1] = bswap_32(u.val32[1]); 461 } 462 463 if (sample_overlap(event, array, sizeof(u32))) 464 return -EFAULT; 465 466 data->raw_size = u.val32[0]; 467 pdata = (void *) array + sizeof(u32); 468 469 if (sample_overlap(event, pdata, data->raw_size)) 470 return -EFAULT; 471 472 data->raw_data = (void *) pdata; 473 } 474 475 return 0; 476 } 477