xref: /linux/tools/testing/selftests/resctrl/resctrl_val.c (revision cdd30ebb1b9f36159d66f088b61aee264e649d7a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Memory bandwidth monitoring and allocation library
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  */
11 #include "resctrl.h"
12 
13 #define UNCORE_IMC		"uncore_imc"
14 #define READ_FILE_NAME		"events/cas_count_read"
15 #define DYN_PMU_PATH		"/sys/bus/event_source/devices"
16 #define SCALE			0.00006103515625
17 #define MAX_IMCS		20
18 #define MAX_TOKENS		5
19 
20 #define CON_MBM_LOCAL_BYTES_PATH		\
21 	"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
22 
23 struct membw_read_format {
24 	__u64 value;         /* The value of the event */
25 	__u64 time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
26 	__u64 time_running;  /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
27 	__u64 id;            /* if PERF_FORMAT_ID */
28 };
29 
30 struct imc_counter_config {
31 	__u32 type;
32 	__u64 event;
33 	__u64 umask;
34 	struct perf_event_attr pe;
35 	struct membw_read_format return_value;
36 	int fd;
37 };
38 
39 static char mbm_total_path[1024];
40 static int imcs;
41 static struct imc_counter_config imc_counters_config[MAX_IMCS];
42 static const struct resctrl_test *current_test;
43 
44 static void read_mem_bw_initialize_perf_event_attr(int i)
45 {
46 	memset(&imc_counters_config[i].pe, 0,
47 	       sizeof(struct perf_event_attr));
48 	imc_counters_config[i].pe.type = imc_counters_config[i].type;
49 	imc_counters_config[i].pe.size = sizeof(struct perf_event_attr);
50 	imc_counters_config[i].pe.disabled = 1;
51 	imc_counters_config[i].pe.inherit = 1;
52 	imc_counters_config[i].pe.exclude_guest = 0;
53 	imc_counters_config[i].pe.config =
54 		imc_counters_config[i].umask << 8 |
55 		imc_counters_config[i].event;
56 	imc_counters_config[i].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
57 	imc_counters_config[i].pe.read_format =
58 		PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
59 }
60 
61 static void read_mem_bw_ioctl_perf_event_ioc_reset_enable(int i)
62 {
63 	ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_RESET, 0);
64 	ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_ENABLE, 0);
65 }
66 
67 static void read_mem_bw_ioctl_perf_event_ioc_disable(int i)
68 {
69 	ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_DISABLE, 0);
70 }
71 
72 /*
73  * get_read_event_and_umask:	Parse config into event and umask
74  * @cas_count_cfg:	Config
75  * @count:		iMC number
76  */
77 static void get_read_event_and_umask(char *cas_count_cfg, int count)
78 {
79 	char *token[MAX_TOKENS];
80 	int i = 0;
81 
82 	token[0] = strtok(cas_count_cfg, "=,");
83 
84 	for (i = 1; i < MAX_TOKENS; i++)
85 		token[i] = strtok(NULL, "=,");
86 
87 	for (i = 0; i < MAX_TOKENS - 1; i++) {
88 		if (!token[i])
89 			break;
90 		if (strcmp(token[i], "event") == 0)
91 			imc_counters_config[count].event = strtol(token[i + 1], NULL, 16);
92 		if (strcmp(token[i], "umask") == 0)
93 			imc_counters_config[count].umask = strtol(token[i + 1], NULL, 16);
94 	}
95 }
96 
97 static int open_perf_read_event(int i, int cpu_no)
98 {
99 	imc_counters_config[i].fd =
100 		perf_event_open(&imc_counters_config[i].pe, -1, cpu_no, -1,
101 				PERF_FLAG_FD_CLOEXEC);
102 
103 	if (imc_counters_config[i].fd == -1) {
104 		fprintf(stderr, "Error opening leader %llx\n",
105 			imc_counters_config[i].pe.config);
106 
107 		return -1;
108 	}
109 
110 	return 0;
111 }
112 
113 /* Get type and config of an iMC counter's read event. */
114 static int read_from_imc_dir(char *imc_dir, int count)
115 {
116 	char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
117 	FILE *fp;
118 
119 	/* Get type of iMC counter */
120 	sprintf(imc_counter_type, "%s%s", imc_dir, "type");
121 	fp = fopen(imc_counter_type, "r");
122 	if (!fp) {
123 		ksft_perror("Failed to open iMC counter type file");
124 
125 		return -1;
126 	}
127 	if (fscanf(fp, "%u", &imc_counters_config[count].type) <= 0) {
128 		ksft_perror("Could not get iMC type");
129 		fclose(fp);
130 
131 		return -1;
132 	}
133 	fclose(fp);
134 
135 	/* Get read config */
136 	sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
137 	fp = fopen(imc_counter_cfg, "r");
138 	if (!fp) {
139 		ksft_perror("Failed to open iMC config file");
140 
141 		return -1;
142 	}
143 	if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) {
144 		ksft_perror("Could not get iMC cas count read");
145 		fclose(fp);
146 
147 		return -1;
148 	}
149 	fclose(fp);
150 
151 	get_read_event_and_umask(cas_count_cfg, count);
152 
153 	return 0;
154 }
155 
156 /*
157  * A system can have 'n' number of iMC (Integrated Memory Controller)
158  * counters, get that 'n'. Discover the properties of the available
159  * counters in support of needed performance measurement via perf.
160  * For each iMC counter get it's type and config. Also obtain each
161  * counter's event and umask for the memory read events that will be
162  * measured.
163  *
164  * Enumerate all these details into an array of structures.
165  *
166  * Return: >= 0 on success. < 0 on failure.
167  */
168 static int num_of_imcs(void)
169 {
170 	char imc_dir[512], *temp;
171 	unsigned int count = 0;
172 	struct dirent *ep;
173 	int ret;
174 	DIR *dp;
175 
176 	dp = opendir(DYN_PMU_PATH);
177 	if (dp) {
178 		while ((ep = readdir(dp))) {
179 			temp = strstr(ep->d_name, UNCORE_IMC);
180 			if (!temp)
181 				continue;
182 
183 			/*
184 			 * imc counters are named as "uncore_imc_<n>", hence
185 			 * increment the pointer to point to <n>. Note that
186 			 * sizeof(UNCORE_IMC) would count for null character as
187 			 * well and hence the last underscore character in
188 			 * uncore_imc'_' need not be counted.
189 			 */
190 			temp = temp + sizeof(UNCORE_IMC);
191 
192 			/*
193 			 * Some directories under "DYN_PMU_PATH" could have
194 			 * names like "uncore_imc_free_running", hence, check if
195 			 * first character is a numerical digit or not.
196 			 */
197 			if (temp[0] >= '0' && temp[0] <= '9') {
198 				sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
199 					ep->d_name);
200 				ret = read_from_imc_dir(imc_dir, count);
201 				if (ret) {
202 					closedir(dp);
203 
204 					return ret;
205 				}
206 				count++;
207 			}
208 		}
209 		closedir(dp);
210 		if (count == 0) {
211 			ksft_print_msg("Unable to find iMC counters\n");
212 
213 			return -1;
214 		}
215 	} else {
216 		ksft_perror("Unable to open PMU directory");
217 
218 		return -1;
219 	}
220 
221 	return count;
222 }
223 
224 int initialize_read_mem_bw_imc(void)
225 {
226 	int imc;
227 
228 	imcs = num_of_imcs();
229 	if (imcs <= 0)
230 		return imcs;
231 
232 	/* Initialize perf_event_attr structures for all iMC's */
233 	for (imc = 0; imc < imcs; imc++)
234 		read_mem_bw_initialize_perf_event_attr(imc);
235 
236 	return 0;
237 }
238 
239 static void perf_close_imc_read_mem_bw(void)
240 {
241 	int mc;
242 
243 	for (mc = 0; mc < imcs; mc++) {
244 		if (imc_counters_config[mc].fd != -1)
245 			close(imc_counters_config[mc].fd);
246 	}
247 }
248 
249 /*
250  * perf_open_imc_read_mem_bw - Open perf fds for IMCs
251  * @cpu_no: CPU number that the benchmark PID is bound to
252  *
253  * Return: = 0 on success. < 0 on failure.
254  */
255 static int perf_open_imc_read_mem_bw(int cpu_no)
256 {
257 	int imc, ret;
258 
259 	for (imc = 0; imc < imcs; imc++)
260 		imc_counters_config[imc].fd = -1;
261 
262 	for (imc = 0; imc < imcs; imc++) {
263 		ret = open_perf_read_event(imc, cpu_no);
264 		if (ret)
265 			goto close_fds;
266 	}
267 
268 	return 0;
269 
270 close_fds:
271 	perf_close_imc_read_mem_bw();
272 	return -1;
273 }
274 
275 /*
276  * do_imc_read_mem_bw_test - Perform memory bandwidth test
277  *
278  * Runs memory bandwidth test over one second period. Also, handles starting
279  * and stopping of the IMC perf counters around the test.
280  */
281 static void do_imc_read_mem_bw_test(void)
282 {
283 	int imc;
284 
285 	for (imc = 0; imc < imcs; imc++)
286 		read_mem_bw_ioctl_perf_event_ioc_reset_enable(imc);
287 
288 	sleep(1);
289 
290 	/* Stop counters after a second to get results. */
291 	for (imc = 0; imc < imcs; imc++)
292 		read_mem_bw_ioctl_perf_event_ioc_disable(imc);
293 }
294 
295 /*
296  * get_read_mem_bw_imc - Memory read bandwidth as reported by iMC counters
297  *
298  * Memory read bandwidth utilized by a process on a socket can be calculated
299  * using iMC counters' read events. Perf events are used to read these
300  * counters.
301  *
302  * Return: = 0 on success. < 0 on failure.
303  */
304 static int get_read_mem_bw_imc(float *bw_imc)
305 {
306 	float reads = 0, of_mul_read = 1;
307 	int imc;
308 
309 	/*
310 	 * Log read event values from all iMC counters into
311 	 * struct imc_counter_config.
312 	 * Take overflow into consideration before calculating total bandwidth.
313 	 */
314 	for (imc = 0; imc < imcs; imc++) {
315 		struct imc_counter_config *r =
316 			&imc_counters_config[imc];
317 
318 		if (read(r->fd, &r->return_value,
319 			 sizeof(struct membw_read_format)) == -1) {
320 			ksft_perror("Couldn't get read bandwidth through iMC");
321 			return -1;
322 		}
323 
324 		__u64 r_time_enabled = r->return_value.time_enabled;
325 		__u64 r_time_running = r->return_value.time_running;
326 
327 		if (r_time_enabled != r_time_running)
328 			of_mul_read = (float)r_time_enabled /
329 					(float)r_time_running;
330 
331 		reads += r->return_value.value * of_mul_read * SCALE;
332 	}
333 
334 	*bw_imc = reads;
335 	return 0;
336 }
337 
338 /*
339  * initialize_mem_bw_resctrl:	Appropriately populate "mbm_total_path"
340  * @param:	Parameters passed to resctrl_val()
341  * @domain_id:	Domain ID (cache ID; for MB, L3 cache ID)
342  */
343 void initialize_mem_bw_resctrl(const struct resctrl_val_param *param,
344 			       int domain_id)
345 {
346 	sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
347 		param->ctrlgrp, domain_id);
348 }
349 
350 /*
351  * Open file to read MBM local bytes from resctrl FS
352  */
353 static FILE *open_mem_bw_resctrl(const char *mbm_bw_file)
354 {
355 	FILE *fp;
356 
357 	fp = fopen(mbm_bw_file, "r");
358 	if (!fp)
359 		ksft_perror("Failed to open total memory bandwidth file");
360 
361 	return fp;
362 }
363 
364 /*
365  * Get MBM Local bytes as reported by resctrl FS
366  */
367 static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total)
368 {
369 	if (fscanf(fp, "%lu\n", mbm_total) <= 0) {
370 		ksft_perror("Could not get MBM local bytes");
371 		return -1;
372 	}
373 	return 0;
374 }
375 
376 static pid_t bm_pid;
377 
378 void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
379 {
380 	/* Only kill child after bm_pid is set after fork() */
381 	if (bm_pid)
382 		kill(bm_pid, SIGKILL);
383 	umount_resctrlfs();
384 	if (current_test && current_test->cleanup)
385 		current_test->cleanup();
386 	ksft_print_msg("Ending\n\n");
387 
388 	exit(EXIT_SUCCESS);
389 }
390 
391 /*
392  * Register CTRL-C handler for parent, as it has to kill
393  * child process before exiting.
394  */
395 int signal_handler_register(const struct resctrl_test *test)
396 {
397 	struct sigaction sigact = {};
398 	int ret = 0;
399 
400 	bm_pid = 0;
401 
402 	current_test = test;
403 	sigact.sa_sigaction = ctrlc_handler;
404 	sigemptyset(&sigact.sa_mask);
405 	sigact.sa_flags = SA_SIGINFO;
406 	if (sigaction(SIGINT, &sigact, NULL) ||
407 	    sigaction(SIGTERM, &sigact, NULL) ||
408 	    sigaction(SIGHUP, &sigact, NULL)) {
409 		ksft_perror("sigaction");
410 		ret = -1;
411 	}
412 	return ret;
413 }
414 
415 /*
416  * Reset signal handler to SIG_DFL.
417  * Non-Value return because the caller should keep
418  * the error code of other path even if sigaction fails.
419  */
420 void signal_handler_unregister(void)
421 {
422 	struct sigaction sigact = {};
423 
424 	current_test = NULL;
425 	sigact.sa_handler = SIG_DFL;
426 	sigemptyset(&sigact.sa_mask);
427 	if (sigaction(SIGINT, &sigact, NULL) ||
428 	    sigaction(SIGTERM, &sigact, NULL) ||
429 	    sigaction(SIGHUP, &sigact, NULL)) {
430 		ksft_perror("sigaction");
431 	}
432 }
433 
434 /*
435  * print_results_bw:	the memory bandwidth results are stored in a file
436  * @filename:		file that stores the results
437  * @bm_pid:		child pid that runs benchmark
438  * @bw_imc:		perf imc counter value
439  * @bw_resc:		memory bandwidth value
440  *
441  * Return:		0 on success, < 0 on error.
442  */
443 static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc,
444 			    unsigned long bw_resc)
445 {
446 	unsigned long diff = fabs(bw_imc - bw_resc);
447 	FILE *fp;
448 
449 	if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
450 		printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc);
451 		printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
452 	} else {
453 		fp = fopen(filename, "a");
454 		if (!fp) {
455 			ksft_perror("Cannot open results file");
456 
457 			return -1;
458 		}
459 		if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
460 			    (int)bm_pid, bw_imc, bw_resc, diff) <= 0) {
461 			ksft_print_msg("Could not log results\n");
462 			fclose(fp);
463 
464 			return -1;
465 		}
466 		fclose(fp);
467 	}
468 
469 	return 0;
470 }
471 
472 /*
473  * measure_read_mem_bw - Measures read memory bandwidth numbers while benchmark runs
474  * @uparams:		User supplied parameters
475  * @param:		Parameters passed to resctrl_val()
476  * @bm_pid:		PID that runs the benchmark
477  *
478  * Measure memory bandwidth from resctrl and from another source which is
479  * perf imc value or could be something else if perf imc event is not
480  * available. Compare the two values to validate resctrl value. It takes
481  * 1 sec to measure the data.
482  * resctrl does not distinguish between read and write operations so
483  * its data includes all memory operations.
484  */
485 int measure_read_mem_bw(const struct user_params *uparams,
486 			struct resctrl_val_param *param, pid_t bm_pid)
487 {
488 	unsigned long bw_resc, bw_resc_start, bw_resc_end;
489 	FILE *mem_bw_fp;
490 	float bw_imc;
491 	int ret;
492 
493 	mem_bw_fp = open_mem_bw_resctrl(mbm_total_path);
494 	if (!mem_bw_fp)
495 		return -1;
496 
497 	ret = perf_open_imc_read_mem_bw(uparams->cpu);
498 	if (ret < 0)
499 		goto close_fp;
500 
501 	ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start);
502 	if (ret < 0)
503 		goto close_imc;
504 
505 	rewind(mem_bw_fp);
506 
507 	do_imc_read_mem_bw_test();
508 
509 	ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end);
510 	if (ret < 0)
511 		goto close_imc;
512 
513 	ret = get_read_mem_bw_imc(&bw_imc);
514 	if (ret < 0)
515 		goto close_imc;
516 
517 	perf_close_imc_read_mem_bw();
518 	fclose(mem_bw_fp);
519 
520 	bw_resc = (bw_resc_end - bw_resc_start) / MB;
521 
522 	return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
523 
524 close_imc:
525 	perf_close_imc_read_mem_bw();
526 close_fp:
527 	fclose(mem_bw_fp);
528 	return ret;
529 }
530 
531 /*
532  * resctrl_val:	execute benchmark and measure memory bandwidth on
533  *			the benchmark
534  * @test:		test information structure
535  * @uparams:		user supplied parameters
536  * @param:		parameters passed to resctrl_val()
537  *
538  * Return:		0 when the test was run, < 0 on error.
539  */
540 int resctrl_val(const struct resctrl_test *test,
541 		const struct user_params *uparams,
542 		struct resctrl_val_param *param)
543 {
544 	unsigned char *buf = NULL;
545 	cpu_set_t old_affinity;
546 	int domain_id;
547 	int ret = 0;
548 	pid_t ppid;
549 
550 	if (strcmp(param->filename, "") == 0)
551 		sprintf(param->filename, "stdio");
552 
553 	ret = get_domain_id(test->resource, uparams->cpu, &domain_id);
554 	if (ret < 0) {
555 		ksft_print_msg("Could not get domain ID\n");
556 		return ret;
557 	}
558 
559 	ppid = getpid();
560 
561 	/* Taskset test to specified CPU. */
562 	ret = taskset_benchmark(ppid, uparams->cpu, &old_affinity);
563 	if (ret)
564 		return ret;
565 
566 	/* Write test to specified control & monitoring group in resctrl FS. */
567 	ret = write_bm_pid_to_resctrl(ppid, param->ctrlgrp, param->mongrp);
568 	if (ret)
569 		goto reset_affinity;
570 
571 	if (param->init) {
572 		ret = param->init(param, domain_id);
573 		if (ret)
574 			goto reset_affinity;
575 	}
576 
577 	/*
578 	 * If not running user provided benchmark, run the default
579 	 * "fill_buf". First phase of "fill_buf" is to prepare the
580 	 * buffer that the benchmark will operate on. No measurements
581 	 * are needed during this phase and prepared memory will be
582 	 * passed to next part of benchmark via copy-on-write thus
583 	 * no impact on the benchmark that relies on reading from
584 	 * memory only.
585 	 */
586 	if (param->fill_buf) {
587 		buf = alloc_buffer(param->fill_buf->buf_size,
588 				   param->fill_buf->memflush);
589 		if (!buf) {
590 			ret = -ENOMEM;
591 			goto reset_affinity;
592 		}
593 	}
594 
595 	fflush(stdout);
596 	bm_pid = fork();
597 	if (bm_pid == -1) {
598 		ret = -errno;
599 		ksft_perror("Unable to fork");
600 		goto free_buf;
601 	}
602 
603 	/*
604 	 * What needs to be measured runs in separate process until
605 	 * terminated.
606 	 */
607 	if (bm_pid == 0) {
608 		if (param->fill_buf)
609 			fill_cache_read(buf, param->fill_buf->buf_size, false);
610 		else if (uparams->benchmark_cmd[0])
611 			execvp(uparams->benchmark_cmd[0], (char **)uparams->benchmark_cmd);
612 		exit(EXIT_SUCCESS);
613 	}
614 
615 	ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid);
616 
617 	/* Give benchmark enough time to fully run. */
618 	sleep(1);
619 
620 	/* Test runs until the callback setup() tells the test to stop. */
621 	while (1) {
622 		ret = param->setup(test, uparams, param);
623 		if (ret == END_OF_TESTS) {
624 			ret = 0;
625 			break;
626 		}
627 		if (ret < 0)
628 			break;
629 
630 		ret = param->measure(uparams, param, bm_pid);
631 		if (ret)
632 			break;
633 	}
634 
635 	kill(bm_pid, SIGKILL);
636 free_buf:
637 	free(buf);
638 reset_affinity:
639 	taskset_restore(ppid, &old_affinity);
640 	return ret;
641 }
642