1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory bandwidth monitoring and allocation library 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Authors: 8 * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 */ 11 #include "resctrl.h" 12 13 #define UNCORE_IMC "uncore_imc" 14 #define READ_FILE_NAME "events/cas_count_read" 15 #define DYN_PMU_PATH "/sys/bus/event_source/devices" 16 #define SCALE 0.00006103515625 17 #define MAX_IMCS 20 18 #define MAX_TOKENS 5 19 20 #define CON_MBM_LOCAL_BYTES_PATH \ 21 "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" 22 23 struct membw_read_format { 24 __u64 value; /* The value of the event */ 25 __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */ 26 __u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */ 27 __u64 id; /* if PERF_FORMAT_ID */ 28 }; 29 30 struct imc_counter_config { 31 __u32 type; 32 __u64 event; 33 __u64 umask; 34 struct perf_event_attr pe; 35 struct membw_read_format return_value; 36 int fd; 37 }; 38 39 static char mbm_total_path[1024]; 40 static int imcs; 41 static struct imc_counter_config imc_counters_config[MAX_IMCS]; 42 static const struct resctrl_test *current_test; 43 44 static void read_mem_bw_initialize_perf_event_attr(int i) 45 { 46 memset(&imc_counters_config[i].pe, 0, 47 sizeof(struct perf_event_attr)); 48 imc_counters_config[i].pe.type = imc_counters_config[i].type; 49 imc_counters_config[i].pe.size = sizeof(struct perf_event_attr); 50 imc_counters_config[i].pe.disabled = 1; 51 imc_counters_config[i].pe.inherit = 1; 52 imc_counters_config[i].pe.exclude_guest = 0; 53 imc_counters_config[i].pe.config = 54 imc_counters_config[i].umask << 8 | 55 imc_counters_config[i].event; 56 imc_counters_config[i].pe.sample_type = PERF_SAMPLE_IDENTIFIER; 57 imc_counters_config[i].pe.read_format = 58 PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; 59 } 60 61 static void read_mem_bw_ioctl_perf_event_ioc_reset_enable(int i) 62 { 63 ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_RESET, 0); 64 ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_ENABLE, 0); 65 } 66 67 static void read_mem_bw_ioctl_perf_event_ioc_disable(int i) 68 { 69 ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_DISABLE, 0); 70 } 71 72 /* 73 * get_read_event_and_umask: Parse config into event and umask 74 * @cas_count_cfg: Config 75 * @count: iMC number 76 */ 77 static void get_read_event_and_umask(char *cas_count_cfg, int count) 78 { 79 char *token[MAX_TOKENS]; 80 int i = 0; 81 82 token[0] = strtok(cas_count_cfg, "=,"); 83 84 for (i = 1; i < MAX_TOKENS; i++) 85 token[i] = strtok(NULL, "=,"); 86 87 for (i = 0; i < MAX_TOKENS - 1; i++) { 88 if (!token[i]) 89 break; 90 if (strcmp(token[i], "event") == 0) 91 imc_counters_config[count].event = strtol(token[i + 1], NULL, 16); 92 if (strcmp(token[i], "umask") == 0) 93 imc_counters_config[count].umask = strtol(token[i + 1], NULL, 16); 94 } 95 } 96 97 static int open_perf_read_event(int i, int cpu_no) 98 { 99 imc_counters_config[i].fd = 100 perf_event_open(&imc_counters_config[i].pe, -1, cpu_no, -1, 101 PERF_FLAG_FD_CLOEXEC); 102 103 if (imc_counters_config[i].fd == -1) { 104 fprintf(stderr, "Error opening leader %llx\n", 105 imc_counters_config[i].pe.config); 106 107 return -1; 108 } 109 110 return 0; 111 } 112 113 /* Get type and config of an iMC counter's read event. */ 114 static int read_from_imc_dir(char *imc_dir, int count) 115 { 116 char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024]; 117 FILE *fp; 118 119 /* Get type of iMC counter */ 120 sprintf(imc_counter_type, "%s%s", imc_dir, "type"); 121 fp = fopen(imc_counter_type, "r"); 122 if (!fp) { 123 ksft_perror("Failed to open iMC counter type file"); 124 125 return -1; 126 } 127 if (fscanf(fp, "%u", &imc_counters_config[count].type) <= 0) { 128 ksft_perror("Could not get iMC type"); 129 fclose(fp); 130 131 return -1; 132 } 133 fclose(fp); 134 135 /* Get read config */ 136 sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME); 137 fp = fopen(imc_counter_cfg, "r"); 138 if (!fp) { 139 ksft_perror("Failed to open iMC config file"); 140 141 return -1; 142 } 143 if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) { 144 ksft_perror("Could not get iMC cas count read"); 145 fclose(fp); 146 147 return -1; 148 } 149 fclose(fp); 150 151 get_read_event_and_umask(cas_count_cfg, count); 152 153 return 0; 154 } 155 156 /* 157 * A system can have 'n' number of iMC (Integrated Memory Controller) 158 * counters, get that 'n'. Discover the properties of the available 159 * counters in support of needed performance measurement via perf. 160 * For each iMC counter get it's type and config. Also obtain each 161 * counter's event and umask for the memory read events that will be 162 * measured. 163 * 164 * Enumerate all these details into an array of structures. 165 * 166 * Return: >= 0 on success. < 0 on failure. 167 */ 168 static int num_of_imcs(void) 169 { 170 char imc_dir[512], *temp; 171 unsigned int count = 0; 172 struct dirent *ep; 173 int ret; 174 DIR *dp; 175 176 dp = opendir(DYN_PMU_PATH); 177 if (dp) { 178 while ((ep = readdir(dp))) { 179 temp = strstr(ep->d_name, UNCORE_IMC); 180 if (!temp) 181 continue; 182 183 /* 184 * imc counters are named as "uncore_imc_<n>", hence 185 * increment the pointer to point to <n>. Note that 186 * sizeof(UNCORE_IMC) would count for null character as 187 * well and hence the last underscore character in 188 * uncore_imc'_' need not be counted. 189 */ 190 temp = temp + sizeof(UNCORE_IMC); 191 192 /* 193 * Some directories under "DYN_PMU_PATH" could have 194 * names like "uncore_imc_free_running", hence, check if 195 * first character is a numerical digit or not. 196 */ 197 if (temp[0] >= '0' && temp[0] <= '9') { 198 sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH, 199 ep->d_name); 200 ret = read_from_imc_dir(imc_dir, count); 201 if (ret) { 202 closedir(dp); 203 204 return ret; 205 } 206 count++; 207 } 208 } 209 closedir(dp); 210 if (count == 0) { 211 ksft_print_msg("Unable to find iMC counters\n"); 212 213 return -1; 214 } 215 } else { 216 ksft_perror("Unable to open PMU directory"); 217 218 return -1; 219 } 220 221 return count; 222 } 223 224 int initialize_read_mem_bw_imc(void) 225 { 226 int imc; 227 228 imcs = num_of_imcs(); 229 if (imcs <= 0) 230 return imcs; 231 232 /* Initialize perf_event_attr structures for all iMC's */ 233 for (imc = 0; imc < imcs; imc++) 234 read_mem_bw_initialize_perf_event_attr(imc); 235 236 return 0; 237 } 238 239 static void perf_close_imc_read_mem_bw(void) 240 { 241 int mc; 242 243 for (mc = 0; mc < imcs; mc++) { 244 if (imc_counters_config[mc].fd != -1) 245 close(imc_counters_config[mc].fd); 246 } 247 } 248 249 /* 250 * perf_open_imc_read_mem_bw - Open perf fds for IMCs 251 * @cpu_no: CPU number that the benchmark PID is bound to 252 * 253 * Return: = 0 on success. < 0 on failure. 254 */ 255 static int perf_open_imc_read_mem_bw(int cpu_no) 256 { 257 int imc, ret; 258 259 for (imc = 0; imc < imcs; imc++) 260 imc_counters_config[imc].fd = -1; 261 262 for (imc = 0; imc < imcs; imc++) { 263 ret = open_perf_read_event(imc, cpu_no); 264 if (ret) 265 goto close_fds; 266 } 267 268 return 0; 269 270 close_fds: 271 perf_close_imc_read_mem_bw(); 272 return -1; 273 } 274 275 /* 276 * do_imc_read_mem_bw_test - Perform memory bandwidth test 277 * 278 * Runs memory bandwidth test over one second period. Also, handles starting 279 * and stopping of the IMC perf counters around the test. 280 */ 281 static void do_imc_read_mem_bw_test(void) 282 { 283 int imc; 284 285 for (imc = 0; imc < imcs; imc++) 286 read_mem_bw_ioctl_perf_event_ioc_reset_enable(imc); 287 288 sleep(1); 289 290 /* Stop counters after a second to get results. */ 291 for (imc = 0; imc < imcs; imc++) 292 read_mem_bw_ioctl_perf_event_ioc_disable(imc); 293 } 294 295 /* 296 * get_read_mem_bw_imc - Memory read bandwidth as reported by iMC counters 297 * 298 * Memory read bandwidth utilized by a process on a socket can be calculated 299 * using iMC counters' read events. Perf events are used to read these 300 * counters. 301 * 302 * Return: = 0 on success. < 0 on failure. 303 */ 304 static int get_read_mem_bw_imc(float *bw_imc) 305 { 306 float reads = 0, of_mul_read = 1; 307 int imc; 308 309 /* 310 * Log read event values from all iMC counters into 311 * struct imc_counter_config. 312 * Take overflow into consideration before calculating total bandwidth. 313 */ 314 for (imc = 0; imc < imcs; imc++) { 315 struct imc_counter_config *r = 316 &imc_counters_config[imc]; 317 318 if (read(r->fd, &r->return_value, 319 sizeof(struct membw_read_format)) == -1) { 320 ksft_perror("Couldn't get read bandwidth through iMC"); 321 return -1; 322 } 323 324 __u64 r_time_enabled = r->return_value.time_enabled; 325 __u64 r_time_running = r->return_value.time_running; 326 327 if (r_time_enabled != r_time_running) 328 of_mul_read = (float)r_time_enabled / 329 (float)r_time_running; 330 331 reads += r->return_value.value * of_mul_read * SCALE; 332 } 333 334 *bw_imc = reads; 335 return 0; 336 } 337 338 /* 339 * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path" 340 * @param: Parameters passed to resctrl_val() 341 * @domain_id: Domain ID (cache ID; for MB, L3 cache ID) 342 */ 343 void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, 344 int domain_id) 345 { 346 sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH, 347 param->ctrlgrp, domain_id); 348 } 349 350 /* 351 * Open file to read MBM local bytes from resctrl FS 352 */ 353 static FILE *open_mem_bw_resctrl(const char *mbm_bw_file) 354 { 355 FILE *fp; 356 357 fp = fopen(mbm_bw_file, "r"); 358 if (!fp) 359 ksft_perror("Failed to open total memory bandwidth file"); 360 361 return fp; 362 } 363 364 /* 365 * Get MBM Local bytes as reported by resctrl FS 366 */ 367 static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total) 368 { 369 if (fscanf(fp, "%lu\n", mbm_total) <= 0) { 370 ksft_perror("Could not get MBM local bytes"); 371 return -1; 372 } 373 return 0; 374 } 375 376 static pid_t bm_pid; 377 378 void ctrlc_handler(int signum, siginfo_t *info, void *ptr) 379 { 380 /* Only kill child after bm_pid is set after fork() */ 381 if (bm_pid) 382 kill(bm_pid, SIGKILL); 383 umount_resctrlfs(); 384 if (current_test && current_test->cleanup) 385 current_test->cleanup(); 386 ksft_print_msg("Ending\n\n"); 387 388 exit(EXIT_SUCCESS); 389 } 390 391 /* 392 * Register CTRL-C handler for parent, as it has to kill 393 * child process before exiting. 394 */ 395 int signal_handler_register(const struct resctrl_test *test) 396 { 397 struct sigaction sigact = {}; 398 int ret = 0; 399 400 bm_pid = 0; 401 402 current_test = test; 403 sigact.sa_sigaction = ctrlc_handler; 404 sigemptyset(&sigact.sa_mask); 405 sigact.sa_flags = SA_SIGINFO; 406 if (sigaction(SIGINT, &sigact, NULL) || 407 sigaction(SIGTERM, &sigact, NULL) || 408 sigaction(SIGHUP, &sigact, NULL)) { 409 ksft_perror("sigaction"); 410 ret = -1; 411 } 412 return ret; 413 } 414 415 /* 416 * Reset signal handler to SIG_DFL. 417 * Non-Value return because the caller should keep 418 * the error code of other path even if sigaction fails. 419 */ 420 void signal_handler_unregister(void) 421 { 422 struct sigaction sigact = {}; 423 424 current_test = NULL; 425 sigact.sa_handler = SIG_DFL; 426 sigemptyset(&sigact.sa_mask); 427 if (sigaction(SIGINT, &sigact, NULL) || 428 sigaction(SIGTERM, &sigact, NULL) || 429 sigaction(SIGHUP, &sigact, NULL)) { 430 ksft_perror("sigaction"); 431 } 432 } 433 434 /* 435 * print_results_bw: the memory bandwidth results are stored in a file 436 * @filename: file that stores the results 437 * @bm_pid: child pid that runs benchmark 438 * @bw_imc: perf imc counter value 439 * @bw_resc: memory bandwidth value 440 * 441 * Return: 0 on success, < 0 on error. 442 */ 443 static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc, 444 unsigned long bw_resc) 445 { 446 unsigned long diff = fabs(bw_imc - bw_resc); 447 FILE *fp; 448 449 if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) { 450 printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc); 451 printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff); 452 } else { 453 fp = fopen(filename, "a"); 454 if (!fp) { 455 ksft_perror("Cannot open results file"); 456 457 return -1; 458 } 459 if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n", 460 (int)bm_pid, bw_imc, bw_resc, diff) <= 0) { 461 ksft_print_msg("Could not log results\n"); 462 fclose(fp); 463 464 return -1; 465 } 466 fclose(fp); 467 } 468 469 return 0; 470 } 471 472 /* 473 * measure_read_mem_bw - Measures read memory bandwidth numbers while benchmark runs 474 * @uparams: User supplied parameters 475 * @param: Parameters passed to resctrl_val() 476 * @bm_pid: PID that runs the benchmark 477 * 478 * Measure memory bandwidth from resctrl and from another source which is 479 * perf imc value or could be something else if perf imc event is not 480 * available. Compare the two values to validate resctrl value. It takes 481 * 1 sec to measure the data. 482 * resctrl does not distinguish between read and write operations so 483 * its data includes all memory operations. 484 */ 485 int measure_read_mem_bw(const struct user_params *uparams, 486 struct resctrl_val_param *param, pid_t bm_pid) 487 { 488 unsigned long bw_resc, bw_resc_start, bw_resc_end; 489 FILE *mem_bw_fp; 490 float bw_imc; 491 int ret; 492 493 mem_bw_fp = open_mem_bw_resctrl(mbm_total_path); 494 if (!mem_bw_fp) 495 return -1; 496 497 ret = perf_open_imc_read_mem_bw(uparams->cpu); 498 if (ret < 0) 499 goto close_fp; 500 501 ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start); 502 if (ret < 0) 503 goto close_imc; 504 505 rewind(mem_bw_fp); 506 507 do_imc_read_mem_bw_test(); 508 509 ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end); 510 if (ret < 0) 511 goto close_imc; 512 513 ret = get_read_mem_bw_imc(&bw_imc); 514 if (ret < 0) 515 goto close_imc; 516 517 perf_close_imc_read_mem_bw(); 518 fclose(mem_bw_fp); 519 520 bw_resc = (bw_resc_end - bw_resc_start) / MB; 521 522 return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); 523 524 close_imc: 525 perf_close_imc_read_mem_bw(); 526 close_fp: 527 fclose(mem_bw_fp); 528 return ret; 529 } 530 531 /* 532 * resctrl_val: execute benchmark and measure memory bandwidth on 533 * the benchmark 534 * @test: test information structure 535 * @uparams: user supplied parameters 536 * @param: parameters passed to resctrl_val() 537 * 538 * Return: 0 when the test was run, < 0 on error. 539 */ 540 int resctrl_val(const struct resctrl_test *test, 541 const struct user_params *uparams, 542 struct resctrl_val_param *param) 543 { 544 unsigned char *buf = NULL; 545 cpu_set_t old_affinity; 546 int domain_id; 547 int ret = 0; 548 pid_t ppid; 549 550 if (strcmp(param->filename, "") == 0) 551 sprintf(param->filename, "stdio"); 552 553 ret = get_domain_id(test->resource, uparams->cpu, &domain_id); 554 if (ret < 0) { 555 ksft_print_msg("Could not get domain ID\n"); 556 return ret; 557 } 558 559 ppid = getpid(); 560 561 /* Taskset test to specified CPU. */ 562 ret = taskset_benchmark(ppid, uparams->cpu, &old_affinity); 563 if (ret) 564 return ret; 565 566 /* Write test to specified control & monitoring group in resctrl FS. */ 567 ret = write_bm_pid_to_resctrl(ppid, param->ctrlgrp, param->mongrp); 568 if (ret) 569 goto reset_affinity; 570 571 if (param->init) { 572 ret = param->init(param, domain_id); 573 if (ret) 574 goto reset_affinity; 575 } 576 577 /* 578 * If not running user provided benchmark, run the default 579 * "fill_buf". First phase of "fill_buf" is to prepare the 580 * buffer that the benchmark will operate on. No measurements 581 * are needed during this phase and prepared memory will be 582 * passed to next part of benchmark via copy-on-write thus 583 * no impact on the benchmark that relies on reading from 584 * memory only. 585 */ 586 if (param->fill_buf) { 587 buf = alloc_buffer(param->fill_buf->buf_size, 588 param->fill_buf->memflush); 589 if (!buf) { 590 ret = -ENOMEM; 591 goto reset_affinity; 592 } 593 } 594 595 fflush(stdout); 596 bm_pid = fork(); 597 if (bm_pid == -1) { 598 ret = -errno; 599 ksft_perror("Unable to fork"); 600 goto free_buf; 601 } 602 603 /* 604 * What needs to be measured runs in separate process until 605 * terminated. 606 */ 607 if (bm_pid == 0) { 608 if (param->fill_buf) 609 fill_cache_read(buf, param->fill_buf->buf_size, false); 610 else if (uparams->benchmark_cmd[0]) 611 execvp(uparams->benchmark_cmd[0], (char **)uparams->benchmark_cmd); 612 exit(EXIT_SUCCESS); 613 } 614 615 ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); 616 617 /* Give benchmark enough time to fully run. */ 618 sleep(1); 619 620 /* Test runs until the callback setup() tells the test to stop. */ 621 while (1) { 622 ret = param->setup(test, uparams, param); 623 if (ret == END_OF_TESTS) { 624 ret = 0; 625 break; 626 } 627 if (ret < 0) 628 break; 629 630 ret = param->measure(uparams, param, bm_pid); 631 if (ret) 632 break; 633 } 634 635 kill(bm_pid, SIGKILL); 636 free_buf: 637 free(buf); 638 reset_affinity: 639 taskset_restore(ppid, &old_affinity); 640 return ret; 641 } 642