1 // SPDX-License-Identifier: GPL-2.0 2 #include <api/fs/fs.h> 3 #include "cpumap.h" 4 #include "debug.h" 5 #include "event.h" 6 #include <assert.h> 7 #include <dirent.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <linux/bitmap.h> 11 #include "asm/bug.h" 12 13 #include <linux/ctype.h> 14 #include <linux/zalloc.h> 15 16 static struct perf_cpu max_cpu_num; 17 static struct perf_cpu max_present_cpu_num; 18 static int max_node_num; 19 /** 20 * The numa node X as read from /sys/devices/system/node/nodeX indexed by the 21 * CPU number. 22 */ 23 static int *cpunode_map; 24 25 bool perf_record_cpu_map_data__test_bit(int i, 26 const struct perf_record_cpu_map_data *data) 27 { 28 int bit_word32 = i / 32; 29 __u32 bit_mask32 = 1U << (i & 31); 30 int bit_word64 = i / 64; 31 __u64 bit_mask64 = ((__u64)1) << (i & 63); 32 33 return (data->mask32_data.long_size == 4) 34 ? (bit_word32 < data->mask32_data.nr) && 35 (data->mask32_data.mask[bit_word32] & bit_mask32) != 0 36 : (bit_word64 < data->mask64_data.nr) && 37 (data->mask64_data.mask[bit_word64] & bit_mask64) != 0; 38 } 39 40 /* Read ith mask value from data into the given 64-bit sized bitmap */ 41 static void perf_record_cpu_map_data__read_one_mask(const struct perf_record_cpu_map_data *data, 42 int i, unsigned long *bitmap) 43 { 44 #if __SIZEOF_LONG__ == 8 45 if (data->mask32_data.long_size == 4) 46 bitmap[0] = data->mask32_data.mask[i]; 47 else 48 bitmap[0] = data->mask64_data.mask[i]; 49 #else 50 if (data->mask32_data.long_size == 4) { 51 bitmap[0] = data->mask32_data.mask[i]; 52 bitmap[1] = 0; 53 } else { 54 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 55 bitmap[0] = (unsigned long)(data->mask64_data.mask[i] >> 32); 56 bitmap[1] = (unsigned long)data->mask64_data.mask[i]; 57 #else 58 bitmap[0] = (unsigned long)data->mask64_data.mask[i]; 59 bitmap[1] = (unsigned long)(data->mask64_data.mask[i] >> 32); 60 #endif 61 } 62 #endif 63 } 64 static struct perf_cpu_map *cpu_map__from_entries(const struct perf_record_cpu_map_data *data) 65 { 66 struct perf_cpu_map *map; 67 68 map = perf_cpu_map__empty_new(data->cpus_data.nr); 69 if (map) { 70 unsigned i; 71 72 for (i = 0; i < data->cpus_data.nr; i++) { 73 /* 74 * Special treatment for -1, which is not real cpu number, 75 * and we need to use (int) -1 to initialize map[i], 76 * otherwise it would become 65535. 77 */ 78 if (data->cpus_data.cpu[i] == (u16) -1) 79 map->map[i].cpu = -1; 80 else 81 map->map[i].cpu = (int) data->cpus_data.cpu[i]; 82 } 83 } 84 85 return map; 86 } 87 88 static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_data *data) 89 { 90 DECLARE_BITMAP(local_copy, 64); 91 int weight = 0, mask_nr = data->mask32_data.nr; 92 struct perf_cpu_map *map; 93 94 for (int i = 0; i < mask_nr; i++) { 95 perf_record_cpu_map_data__read_one_mask(data, i, local_copy); 96 weight += bitmap_weight(local_copy, 64); 97 } 98 99 map = perf_cpu_map__empty_new(weight); 100 if (!map) 101 return NULL; 102 103 for (int i = 0, j = 0; i < mask_nr; i++) { 104 int cpus_per_i = (i * data->mask32_data.long_size * BITS_PER_BYTE); 105 int cpu; 106 107 perf_record_cpu_map_data__read_one_mask(data, i, local_copy); 108 for_each_set_bit(cpu, local_copy, 64) 109 map->map[j++].cpu = cpu + cpus_per_i; 110 } 111 return map; 112 113 } 114 115 struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data) 116 { 117 if (data->type == PERF_CPU_MAP__CPUS) 118 return cpu_map__from_entries(data); 119 else 120 return cpu_map__from_mask(data); 121 } 122 123 size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp) 124 { 125 #define BUFSIZE 1024 126 char buf[BUFSIZE]; 127 128 cpu_map__snprint(map, buf, sizeof(buf)); 129 return fprintf(fp, "%s\n", buf); 130 #undef BUFSIZE 131 } 132 133 struct perf_cpu_map *perf_cpu_map__empty_new(int nr) 134 { 135 struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr); 136 137 if (cpus != NULL) { 138 int i; 139 140 cpus->nr = nr; 141 for (i = 0; i < nr; i++) 142 cpus->map[i].cpu = -1; 143 144 refcount_set(&cpus->refcnt, 1); 145 } 146 147 return cpus; 148 } 149 150 struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr) 151 { 152 struct cpu_aggr_map *cpus = malloc(sizeof(*cpus) + sizeof(struct aggr_cpu_id) * nr); 153 154 if (cpus != NULL) { 155 int i; 156 157 cpus->nr = nr; 158 for (i = 0; i < nr; i++) 159 cpus->map[i] = aggr_cpu_id__empty(); 160 161 refcount_set(&cpus->refcnt, 1); 162 } 163 164 return cpus; 165 } 166 167 static int cpu__get_topology_int(int cpu, const char *name, int *value) 168 { 169 char path[PATH_MAX]; 170 171 snprintf(path, PATH_MAX, 172 "devices/system/cpu/cpu%d/topology/%s", cpu, name); 173 174 return sysfs__read_int(path, value); 175 } 176 177 int cpu__get_socket_id(struct perf_cpu cpu) 178 { 179 int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value); 180 return ret ?: value; 181 } 182 183 struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused) 184 { 185 struct aggr_cpu_id id = aggr_cpu_id__empty(); 186 187 id.socket = cpu__get_socket_id(cpu); 188 return id; 189 } 190 191 static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) 192 { 193 struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer; 194 struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer; 195 196 if (a->node != b->node) 197 return a->node - b->node; 198 else if (a->socket != b->socket) 199 return a->socket - b->socket; 200 else if (a->die != b->die) 201 return a->die - b->die; 202 else if (a->core != b->core) 203 return a->core - b->core; 204 else 205 return a->thread - b->thread; 206 } 207 208 struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, 209 aggr_cpu_id_get_t get_id, 210 void *data) 211 { 212 int idx; 213 struct perf_cpu cpu; 214 struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr); 215 216 if (!c) 217 return NULL; 218 219 /* Reset size as it may only be partially filled */ 220 c->nr = 0; 221 222 perf_cpu_map__for_each_cpu(cpu, idx, cpus) { 223 bool duplicate = false; 224 struct aggr_cpu_id cpu_id = get_id(cpu, data); 225 226 for (int j = 0; j < c->nr; j++) { 227 if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) { 228 duplicate = true; 229 break; 230 } 231 } 232 if (!duplicate) { 233 c->map[c->nr] = cpu_id; 234 c->nr++; 235 } 236 } 237 /* Trim. */ 238 if (c->nr != cpus->nr) { 239 struct cpu_aggr_map *trimmed_c = 240 realloc(c, 241 sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr); 242 243 if (trimmed_c) 244 c = trimmed_c; 245 } 246 /* ensure we process id in increasing order */ 247 qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); 248 249 return c; 250 251 } 252 253 int cpu__get_die_id(struct perf_cpu cpu) 254 { 255 int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value); 256 257 return ret ?: value; 258 } 259 260 struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data) 261 { 262 struct aggr_cpu_id id; 263 int die; 264 265 die = cpu__get_die_id(cpu); 266 /* There is no die_id on legacy system. */ 267 if (die == -1) 268 die = 0; 269 270 /* 271 * die_id is relative to socket, so start 272 * with the socket ID and then add die to 273 * make a unique ID. 274 */ 275 id = aggr_cpu_id__socket(cpu, data); 276 if (aggr_cpu_id__is_empty(&id)) 277 return id; 278 279 id.die = die; 280 return id; 281 } 282 283 int cpu__get_core_id(struct perf_cpu cpu) 284 { 285 int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value); 286 return ret ?: value; 287 } 288 289 struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data) 290 { 291 struct aggr_cpu_id id; 292 int core = cpu__get_core_id(cpu); 293 294 /* aggr_cpu_id__die returns a struct with socket and die set. */ 295 id = aggr_cpu_id__die(cpu, data); 296 if (aggr_cpu_id__is_empty(&id)) 297 return id; 298 299 /* 300 * core_id is relative to socket and die, we need a global id. 301 * So we combine the result from cpu_map__get_die with the core id 302 */ 303 id.core = core; 304 return id; 305 306 } 307 308 struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data) 309 { 310 struct aggr_cpu_id id; 311 312 /* aggr_cpu_id__core returns a struct with socket, die and core set. */ 313 id = aggr_cpu_id__core(cpu, data); 314 if (aggr_cpu_id__is_empty(&id)) 315 return id; 316 317 id.cpu = cpu; 318 return id; 319 320 } 321 322 struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused) 323 { 324 struct aggr_cpu_id id = aggr_cpu_id__empty(); 325 326 id.node = cpu__get_node(cpu); 327 return id; 328 } 329 330 /* setup simple routines to easily access node numbers given a cpu number */ 331 static int get_max_num(char *path, int *max) 332 { 333 size_t num; 334 char *buf; 335 int err = 0; 336 337 if (filename__read_str(path, &buf, &num)) 338 return -1; 339 340 buf[num] = '\0'; 341 342 /* start on the right, to find highest node num */ 343 while (--num) { 344 if ((buf[num] == ',') || (buf[num] == '-')) { 345 num++; 346 break; 347 } 348 } 349 if (sscanf(&buf[num], "%d", max) < 1) { 350 err = -1; 351 goto out; 352 } 353 354 /* convert from 0-based to 1-based */ 355 (*max)++; 356 357 out: 358 free(buf); 359 return err; 360 } 361 362 /* Determine highest possible cpu in the system for sparse allocation */ 363 static void set_max_cpu_num(void) 364 { 365 const char *mnt; 366 char path[PATH_MAX]; 367 int ret = -1; 368 369 /* set up default */ 370 max_cpu_num.cpu = 4096; 371 max_present_cpu_num.cpu = 4096; 372 373 mnt = sysfs__mountpoint(); 374 if (!mnt) 375 goto out; 376 377 /* get the highest possible cpu number for a sparse allocation */ 378 ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt); 379 if (ret >= PATH_MAX) { 380 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 381 goto out; 382 } 383 384 ret = get_max_num(path, &max_cpu_num.cpu); 385 if (ret) 386 goto out; 387 388 /* get the highest present cpu number for a sparse allocation */ 389 ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt); 390 if (ret >= PATH_MAX) { 391 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 392 goto out; 393 } 394 395 ret = get_max_num(path, &max_present_cpu_num.cpu); 396 397 out: 398 if (ret) 399 pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu); 400 } 401 402 /* Determine highest possible node in the system for sparse allocation */ 403 static void set_max_node_num(void) 404 { 405 const char *mnt; 406 char path[PATH_MAX]; 407 int ret = -1; 408 409 /* set up default */ 410 max_node_num = 8; 411 412 mnt = sysfs__mountpoint(); 413 if (!mnt) 414 goto out; 415 416 /* get the highest possible cpu number for a sparse allocation */ 417 ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt); 418 if (ret >= PATH_MAX) { 419 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 420 goto out; 421 } 422 423 ret = get_max_num(path, &max_node_num); 424 425 out: 426 if (ret) 427 pr_err("Failed to read max nodes, using default of %d\n", max_node_num); 428 } 429 430 int cpu__max_node(void) 431 { 432 if (unlikely(!max_node_num)) 433 set_max_node_num(); 434 435 return max_node_num; 436 } 437 438 struct perf_cpu cpu__max_cpu(void) 439 { 440 if (unlikely(!max_cpu_num.cpu)) 441 set_max_cpu_num(); 442 443 return max_cpu_num; 444 } 445 446 struct perf_cpu cpu__max_present_cpu(void) 447 { 448 if (unlikely(!max_present_cpu_num.cpu)) 449 set_max_cpu_num(); 450 451 return max_present_cpu_num; 452 } 453 454 455 int cpu__get_node(struct perf_cpu cpu) 456 { 457 if (unlikely(cpunode_map == NULL)) { 458 pr_debug("cpu_map not initialized\n"); 459 return -1; 460 } 461 462 return cpunode_map[cpu.cpu]; 463 } 464 465 static int init_cpunode_map(void) 466 { 467 int i; 468 469 set_max_cpu_num(); 470 set_max_node_num(); 471 472 cpunode_map = calloc(max_cpu_num.cpu, sizeof(int)); 473 if (!cpunode_map) { 474 pr_err("%s: calloc failed\n", __func__); 475 return -1; 476 } 477 478 for (i = 0; i < max_cpu_num.cpu; i++) 479 cpunode_map[i] = -1; 480 481 return 0; 482 } 483 484 int cpu__setup_cpunode_map(void) 485 { 486 struct dirent *dent1, *dent2; 487 DIR *dir1, *dir2; 488 unsigned int cpu, mem; 489 char buf[PATH_MAX]; 490 char path[PATH_MAX]; 491 const char *mnt; 492 int n; 493 494 /* initialize globals */ 495 if (init_cpunode_map()) 496 return -1; 497 498 mnt = sysfs__mountpoint(); 499 if (!mnt) 500 return 0; 501 502 n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt); 503 if (n >= PATH_MAX) { 504 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 505 return -1; 506 } 507 508 dir1 = opendir(path); 509 if (!dir1) 510 return 0; 511 512 /* walk tree and setup map */ 513 while ((dent1 = readdir(dir1)) != NULL) { 514 if (dent1->d_type != DT_DIR || sscanf(dent1->d_name, "node%u", &mem) < 1) 515 continue; 516 517 n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name); 518 if (n >= PATH_MAX) { 519 pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX); 520 continue; 521 } 522 523 dir2 = opendir(buf); 524 if (!dir2) 525 continue; 526 while ((dent2 = readdir(dir2)) != NULL) { 527 if (dent2->d_type != DT_LNK || sscanf(dent2->d_name, "cpu%u", &cpu) < 1) 528 continue; 529 cpunode_map[cpu] = mem; 530 } 531 closedir(dir2); 532 } 533 closedir(dir1); 534 return 0; 535 } 536 537 size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size) 538 { 539 int i, start = -1; 540 bool first = true; 541 size_t ret = 0; 542 543 #define COMMA first ? "" : "," 544 545 for (i = 0; i < map->nr + 1; i++) { 546 struct perf_cpu cpu = { .cpu = INT_MAX }; 547 bool last = i == map->nr; 548 549 if (!last) 550 cpu = map->map[i]; 551 552 if (start == -1) { 553 start = i; 554 if (last) { 555 ret += snprintf(buf + ret, size - ret, 556 "%s%d", COMMA, 557 map->map[i].cpu); 558 } 559 } else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) { 560 int end = i - 1; 561 562 if (start == end) { 563 ret += snprintf(buf + ret, size - ret, 564 "%s%d", COMMA, 565 map->map[start].cpu); 566 } else { 567 ret += snprintf(buf + ret, size - ret, 568 "%s%d-%d", COMMA, 569 map->map[start].cpu, map->map[end].cpu); 570 } 571 first = false; 572 start = i; 573 } 574 } 575 576 #undef COMMA 577 578 pr_debug2("cpumask list: %s\n", buf); 579 return ret; 580 } 581 582 static char hex_char(unsigned char val) 583 { 584 if (val < 10) 585 return val + '0'; 586 if (val < 16) 587 return val - 10 + 'a'; 588 return '?'; 589 } 590 591 size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) 592 { 593 int i, cpu; 594 char *ptr = buf; 595 unsigned char *bitmap; 596 struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1); 597 598 if (buf == NULL) 599 return 0; 600 601 bitmap = zalloc(last_cpu.cpu / 8 + 1); 602 if (bitmap == NULL) { 603 buf[0] = '\0'; 604 return 0; 605 } 606 607 for (i = 0; i < map->nr; i++) { 608 cpu = perf_cpu_map__cpu(map, i).cpu; 609 bitmap[cpu / 8] |= 1 << (cpu % 8); 610 } 611 612 for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { 613 unsigned char bits = bitmap[cpu / 8]; 614 615 if (cpu % 8) 616 bits >>= 4; 617 else 618 bits &= 0xf; 619 620 *ptr++ = hex_char(bits); 621 if ((cpu % 32) == 0 && cpu > 0) 622 *ptr++ = ','; 623 } 624 *ptr = '\0'; 625 free(bitmap); 626 627 buf[size - 1] = '\0'; 628 return ptr - buf; 629 } 630 631 const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */ 632 { 633 static const struct perf_cpu_map *online = NULL; 634 635 if (!online) 636 online = perf_cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ 637 638 return online; 639 } 640 641 bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b) 642 { 643 return a->thread == b->thread && 644 a->node == b->node && 645 a->socket == b->socket && 646 a->die == b->die && 647 a->core == b->core && 648 a->cpu.cpu == b->cpu.cpu; 649 } 650 651 bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a) 652 { 653 return a->thread == -1 && 654 a->node == -1 && 655 a->socket == -1 && 656 a->die == -1 && 657 a->core == -1 && 658 a->cpu.cpu == -1; 659 } 660 661 struct aggr_cpu_id aggr_cpu_id__empty(void) 662 { 663 struct aggr_cpu_id ret = { 664 .thread = -1, 665 .node = -1, 666 .socket = -1, 667 .die = -1, 668 .core = -1, 669 .cpu = (struct perf_cpu){ .cpu = -1 }, 670 }; 671 return ret; 672 } 673