xref: /linux/tools/testing/selftests/resctrl/cat_test.c (revision 8e1bb4a41aa78d6105e59186af3dcd545fc66e70)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Cache Allocation Technology (CAT) test
4  *
5  * Copyright (C) 2018 Intel Corporation
6  *
7  * Authors:
8  *    Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
9  *    Fenghua Yu <fenghua.yu@intel.com>
10  */
11 #include "resctrl.h"
12 #include <unistd.h>
13 
14 #define RESULT_FILE_NAME	"result_cat"
15 #define NUM_OF_RUNS		5
16 
17 /*
18  * Minimum difference in LLC misses between a test with n+1 bits CBM to the
19  * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4
20  * bits in the CBM mask, the minimum difference must be at least
21  * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent.
22  *
23  * The relationship between number of used CBM bits and difference in LLC
24  * misses is not expected to be linear. With a small number of bits, the
25  * margin is smaller than with larger number of bits. For selftest purposes,
26  * however, linear approach is enough because ultimately only pass/fail
27  * decision has to be made and distinction between strong and stronger
28  * signal is irrelevant.
29  */
30 #define MIN_DIFF_PERCENT_PER_BIT	1UL
31 
32 static int show_results_info(__u64 sum_llc_val, int no_of_bits,
33 			     unsigned long cache_span,
34 			     unsigned long min_diff_percent,
35 			     unsigned long num_of_runs, bool platform,
36 			     __s64 *prev_avg_llc_val)
37 {
38 	__u64 avg_llc_val = 0;
39 	float avg_diff;
40 	int ret = 0;
41 
42 	avg_llc_val = sum_llc_val / num_of_runs;
43 	if (*prev_avg_llc_val) {
44 		float delta = (__s64)(avg_llc_val - *prev_avg_llc_val);
45 
46 		avg_diff = delta / *prev_avg_llc_val;
47 		ret = platform && (avg_diff * 100) < (float)min_diff_percent;
48 
49 		ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n",
50 			       ret ? "Fail:" : "Pass:", (float)min_diff_percent);
51 
52 		ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100);
53 	}
54 	*prev_avg_llc_val = avg_llc_val;
55 
56 	show_cache_info(no_of_bits, avg_llc_val, cache_span, true);
57 
58 	return ret;
59 }
60 
61 /* Remove the highest bit from CBM */
62 static unsigned long next_mask(unsigned long current_mask)
63 {
64 	return current_mask & (current_mask >> 1);
65 }
66 
67 static int check_results(struct resctrl_val_param *param, const char *cache_type,
68 			 unsigned long cache_total_size, unsigned long full_cache_mask,
69 			 unsigned long current_mask)
70 {
71 	char *token_array[8], temp[512];
72 	__u64 sum_llc_perf_miss = 0;
73 	__s64 prev_avg_llc_val = 0;
74 	unsigned long alloc_size;
75 	int runs = 0;
76 	int fail = 0;
77 	int ret;
78 	FILE *fp;
79 
80 	ksft_print_msg("Checking for pass/fail\n");
81 	fp = fopen(param->filename, "r");
82 	if (!fp) {
83 		ksft_perror("Cannot open file");
84 
85 		return -1;
86 	}
87 
88 	while (fgets(temp, sizeof(temp), fp)) {
89 		char *token = strtok(temp, ":\t");
90 		int fields = 0;
91 		int bits;
92 
93 		while (token) {
94 			token_array[fields++] = token;
95 			token = strtok(NULL, ":\t");
96 		}
97 
98 		sum_llc_perf_miss += strtoull(token_array[3], NULL, 0);
99 		runs++;
100 
101 		if (runs < NUM_OF_RUNS)
102 			continue;
103 
104 		if (!current_mask) {
105 			ksft_print_msg("Unexpected empty cache mask\n");
106 			break;
107 		}
108 
109 		alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask);
110 
111 		bits = count_bits(current_mask);
112 
113 		ret = show_results_info(sum_llc_perf_miss, bits,
114 					alloc_size / 64,
115 					MIN_DIFF_PERCENT_PER_BIT * (bits - 1),
116 					runs, get_vendor() == ARCH_INTEL,
117 					&prev_avg_llc_val);
118 		if (ret)
119 			fail = 1;
120 
121 		runs = 0;
122 		sum_llc_perf_miss = 0;
123 		current_mask = next_mask(current_mask);
124 	}
125 
126 	fclose(fp);
127 
128 	return fail;
129 }
130 
131 static void cat_test_cleanup(void)
132 {
133 	remove(RESULT_FILE_NAME);
134 }
135 
136 /*
137  * cat_test - Execute CAT benchmark and measure cache misses
138  * @test:		Test information structure
139  * @uparams:		User supplied parameters
140  * @param:		Parameters passed to cat_test()
141  * @span:		Buffer size for the benchmark
142  * @current_mask	Start mask for the first iteration
143  *
144  * Run CAT selftest by varying the allocated cache portion and comparing the
145  * impact on cache misses (the result analysis is done in check_results()
146  * and show_results_info(), not in this function).
147  *
148  * One bit is removed from the CAT allocation bit mask (in current_mask) for
149  * each subsequent test which keeps reducing the size of the allocated cache
150  * portion. A single test flushes the buffer, reads it to warm up the cache,
151  * and reads the buffer again. The cache misses are measured during the last
152  * read pass.
153  *
154  * Return:		0 when the test was run, < 0 on error.
155  */
156 static int cat_test(const struct resctrl_test *test,
157 		    const struct user_params *uparams,
158 		    struct resctrl_val_param *param,
159 		    size_t span, unsigned long current_mask)
160 {
161 	struct perf_event_read pe_read;
162 	struct perf_event_attr pea;
163 	cpu_set_t old_affinity;
164 	unsigned char *buf;
165 	char schemata[64];
166 	int ret, i, pe_fd;
167 	pid_t bm_pid;
168 
169 	if (strcmp(param->filename, "") == 0)
170 		sprintf(param->filename, "stdio");
171 
172 	bm_pid = getpid();
173 
174 	/* Taskset benchmark to specified cpu */
175 	ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity);
176 	if (ret)
177 		return ret;
178 
179 	/* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
180 	ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp);
181 	if (ret)
182 		goto reset_affinity;
183 
184 	perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES);
185 	perf_event_initialize_read_format(&pe_read);
186 	pe_fd = perf_open(&pea, bm_pid, uparams->cpu);
187 	if (pe_fd < 0) {
188 		ret = -1;
189 		goto reset_affinity;
190 	}
191 
192 	buf = alloc_buffer(span, 1);
193 	if (!buf) {
194 		ret = -1;
195 		goto pe_close;
196 	}
197 
198 	while (current_mask) {
199 		snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask);
200 		ret = write_schemata("", schemata, uparams->cpu, test->resource);
201 		if (ret)
202 			goto free_buf;
203 		snprintf(schemata, sizeof(schemata), "%lx", current_mask);
204 		ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource);
205 		if (ret)
206 			goto free_buf;
207 
208 		for (i = 0; i < NUM_OF_RUNS; i++) {
209 			mem_flush(buf, span);
210 			fill_cache_read(buf, span, true);
211 
212 			ret = perf_event_reset_enable(pe_fd);
213 			if (ret)
214 				goto free_buf;
215 
216 			fill_cache_read(buf, span, true);
217 
218 			ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid);
219 			if (ret)
220 				goto free_buf;
221 		}
222 		current_mask = next_mask(current_mask);
223 	}
224 
225 free_buf:
226 	free(buf);
227 pe_close:
228 	close(pe_fd);
229 reset_affinity:
230 	taskset_restore(bm_pid, &old_affinity);
231 
232 	return ret;
233 }
234 
235 static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams)
236 {
237 	unsigned long long_mask, start_mask, full_cache_mask;
238 	unsigned long cache_total_size = 0;
239 	int n = uparams->bits;
240 	unsigned int start;
241 	int count_of_bits;
242 	size_t span;
243 	int ret;
244 
245 	ret = get_full_cbm(test->resource, &full_cache_mask);
246 	if (ret)
247 		return ret;
248 	/* Get the largest contiguous exclusive portion of the cache */
249 	ret = get_mask_no_shareable(test->resource, &long_mask);
250 	if (ret)
251 		return ret;
252 
253 	/* Get L3/L2 cache size */
254 	ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size);
255 	if (ret)
256 		return ret;
257 	ksft_print_msg("Cache size :%lu\n", cache_total_size);
258 
259 	count_of_bits = count_contiguous_bits(long_mask, &start);
260 
261 	if (!n)
262 		n = count_of_bits / 2;
263 
264 	if (n > count_of_bits - 1) {
265 		ksft_print_msg("Invalid input value for no_of_bits n!\n");
266 		ksft_print_msg("Please enter value in range 1 to %d\n",
267 			       count_of_bits - 1);
268 		return -1;
269 	}
270 	start_mask = create_bit_mask(start, n);
271 
272 	struct resctrl_val_param param = {
273 		.ctrlgrp	= "c1",
274 		.filename	= RESULT_FILE_NAME,
275 		.num_of_runs	= 0,
276 	};
277 	param.mask = long_mask;
278 	span = cache_portion_size(cache_total_size, start_mask, full_cache_mask);
279 
280 	remove(param.filename);
281 
282 	ret = cat_test(test, uparams, &param, span, start_mask);
283 	if (ret)
284 		return ret;
285 
286 	ret = check_results(&param, test->resource,
287 			    cache_total_size, full_cache_mask, start_mask);
288 	return ret;
289 }
290 
291 static bool arch_supports_noncont_cat(const struct resctrl_test *test)
292 {
293 	unsigned int eax, ebx, ecx, edx;
294 
295 	/* AMD always supports non-contiguous CBM. */
296 	if (get_vendor() == ARCH_AMD)
297 		return true;
298 
299 	/* Intel support for non-contiguous CBM needs to be discovered. */
300 	if (!strcmp(test->resource, "L3"))
301 		__cpuid_count(0x10, 1, eax, ebx, ecx, edx);
302 	else if (!strcmp(test->resource, "L2"))
303 		__cpuid_count(0x10, 2, eax, ebx, ecx, edx);
304 	else
305 		return false;
306 
307 	return ((ecx >> 3) & 1);
308 }
309 
310 static int noncont_cat_run_test(const struct resctrl_test *test,
311 				const struct user_params *uparams)
312 {
313 	unsigned long full_cache_mask, cont_mask, noncont_mask;
314 	unsigned int sparse_masks;
315 	int bit_center, ret;
316 	char schemata[64];
317 
318 	/* Check to compare sparse_masks content to CPUID output. */
319 	ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks);
320 	if (ret)
321 		return ret;
322 
323 	if (arch_supports_noncont_cat(test) != sparse_masks) {
324 		ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n");
325 		return 1;
326 	}
327 
328 	/* Write checks initialization. */
329 	ret = get_full_cbm(test->resource, &full_cache_mask);
330 	if (ret < 0)
331 		return ret;
332 	bit_center = count_bits(full_cache_mask) / 2;
333 
334 	/*
335 	 * The bit_center needs to be at least 3 to properly calculate the CBM
336 	 * hole in the noncont_mask. If it's smaller return an error since the
337 	 * cache mask is too short and that shouldn't happen.
338 	 */
339 	if (bit_center < 3)
340 		return -EINVAL;
341 	cont_mask = full_cache_mask >> bit_center;
342 
343 	/* Contiguous mask write check. */
344 	snprintf(schemata, sizeof(schemata), "%lx", cont_mask);
345 	ret = write_schemata("", schemata, uparams->cpu, test->resource);
346 	if (ret) {
347 		ksft_print_msg("Write of contiguous CBM failed\n");
348 		return 1;
349 	}
350 
351 	/*
352 	 * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle.
353 	 * Output is compared with support information to catch any edge case errors.
354 	 */
355 	noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask;
356 	snprintf(schemata, sizeof(schemata), "%lx", noncont_mask);
357 	ret = write_schemata("", schemata, uparams->cpu, test->resource);
358 	if (ret && sparse_masks)
359 		ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n");
360 	else if (ret && !sparse_masks)
361 		ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n");
362 	else if (!ret && !sparse_masks)
363 		ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n");
364 
365 	return !ret == !sparse_masks;
366 }
367 
368 static bool noncont_cat_feature_check(const struct resctrl_test *test)
369 {
370 	if (!resctrl_resource_exists(test->resource))
371 		return false;
372 
373 	return resource_info_file_exists(test->resource, "sparse_masks");
374 }
375 
376 struct resctrl_test l3_cat_test = {
377 	.name = "L3_CAT",
378 	.group = "CAT",
379 	.resource = "L3",
380 	.feature_check = test_resource_feature_check,
381 	.run_test = cat_run_test,
382 	.cleanup = cat_test_cleanup,
383 };
384 
385 struct resctrl_test l3_noncont_cat_test = {
386 	.name = "L3_NONCONT_CAT",
387 	.group = "CAT",
388 	.resource = "L3",
389 	.feature_check = noncont_cat_feature_check,
390 	.run_test = noncont_cat_run_test,
391 };
392 
393 struct resctrl_test l2_noncont_cat_test = {
394 	.name = "L2_NONCONT_CAT",
395 	.group = "CAT",
396 	.resource = "L2",
397 	.feature_check = noncont_cat_feature_check,
398 	.run_test = noncont_cat_run_test,
399 };
400