1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Cache Allocation Technology (CAT) test 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Authors: 8 * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 */ 11 #include "resctrl.h" 12 #include <unistd.h> 13 14 #define RESULT_FILE_NAME "result_cat" 15 #define NUM_OF_RUNS 5 16 17 /* 18 * Minimum difference in LLC misses between a test with n+1 bits CBM to the 19 * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4 20 * bits in the CBM mask, the minimum difference must be at least 21 * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent. 22 * 23 * The relationship between number of used CBM bits and difference in LLC 24 * misses is not expected to be linear. With a small number of bits, the 25 * margin is smaller than with larger number of bits. For selftest purposes, 26 * however, linear approach is enough because ultimately only pass/fail 27 * decision has to be made and distinction between strong and stronger 28 * signal is irrelevant. 29 */ 30 #define MIN_DIFF_PERCENT_PER_BIT 1UL 31 32 static int show_results_info(__u64 sum_llc_val, int no_of_bits, 33 unsigned long cache_span, 34 unsigned long min_diff_percent, 35 unsigned long num_of_runs, bool platform, 36 __s64 *prev_avg_llc_val) 37 { 38 __u64 avg_llc_val = 0; 39 float avg_diff; 40 int ret = 0; 41 42 avg_llc_val = sum_llc_val / num_of_runs; 43 if (*prev_avg_llc_val) { 44 float delta = (__s64)(avg_llc_val - *prev_avg_llc_val); 45 46 avg_diff = delta / *prev_avg_llc_val; 47 ret = platform && (avg_diff * 100) < (float)min_diff_percent; 48 49 ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n", 50 ret ? "Fail:" : "Pass:", (float)min_diff_percent); 51 52 ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100); 53 } 54 *prev_avg_llc_val = avg_llc_val; 55 56 show_cache_info(no_of_bits, avg_llc_val, cache_span, true); 57 58 return ret; 59 } 60 61 /* Remove the highest bit from CBM */ 62 static unsigned long next_mask(unsigned long current_mask) 63 { 64 return current_mask & (current_mask >> 1); 65 } 66 67 static int check_results(struct resctrl_val_param *param, const char *cache_type, 68 unsigned long cache_total_size, unsigned long full_cache_mask, 69 unsigned long current_mask) 70 { 71 char *token_array[8], temp[512]; 72 __u64 sum_llc_perf_miss = 0; 73 __s64 prev_avg_llc_val = 0; 74 unsigned long alloc_size; 75 int runs = 0; 76 int fail = 0; 77 int ret; 78 FILE *fp; 79 80 ksft_print_msg("Checking for pass/fail\n"); 81 fp = fopen(param->filename, "r"); 82 if (!fp) { 83 ksft_perror("Cannot open file"); 84 85 return -1; 86 } 87 88 while (fgets(temp, sizeof(temp), fp)) { 89 char *token = strtok(temp, ":\t"); 90 int fields = 0; 91 int bits; 92 93 while (token) { 94 token_array[fields++] = token; 95 token = strtok(NULL, ":\t"); 96 } 97 98 sum_llc_perf_miss += strtoull(token_array[3], NULL, 0); 99 runs++; 100 101 if (runs < NUM_OF_RUNS) 102 continue; 103 104 if (!current_mask) { 105 ksft_print_msg("Unexpected empty cache mask\n"); 106 break; 107 } 108 109 alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask); 110 111 bits = count_bits(current_mask); 112 113 ret = show_results_info(sum_llc_perf_miss, bits, 114 alloc_size / 64, 115 MIN_DIFF_PERCENT_PER_BIT * (bits - 1), 116 runs, get_vendor() == ARCH_INTEL, 117 &prev_avg_llc_val); 118 if (ret) 119 fail = 1; 120 121 runs = 0; 122 sum_llc_perf_miss = 0; 123 current_mask = next_mask(current_mask); 124 } 125 126 fclose(fp); 127 128 return fail; 129 } 130 131 static void cat_test_cleanup(void) 132 { 133 remove(RESULT_FILE_NAME); 134 } 135 136 /* 137 * cat_test - Execute CAT benchmark and measure cache misses 138 * @test: Test information structure 139 * @uparams: User supplied parameters 140 * @param: Parameters passed to cat_test() 141 * @span: Buffer size for the benchmark 142 * @current_mask Start mask for the first iteration 143 * 144 * Run CAT selftest by varying the allocated cache portion and comparing the 145 * impact on cache misses (the result analysis is done in check_results() 146 * and show_results_info(), not in this function). 147 * 148 * One bit is removed from the CAT allocation bit mask (in current_mask) for 149 * each subsequent test which keeps reducing the size of the allocated cache 150 * portion. A single test flushes the buffer, reads it to warm up the cache, 151 * and reads the buffer again. The cache misses are measured during the last 152 * read pass. 153 * 154 * Return: 0 when the test was run, < 0 on error. 155 */ 156 static int cat_test(const struct resctrl_test *test, 157 const struct user_params *uparams, 158 struct resctrl_val_param *param, 159 size_t span, unsigned long current_mask) 160 { 161 struct perf_event_read pe_read; 162 struct perf_event_attr pea; 163 cpu_set_t old_affinity; 164 unsigned char *buf; 165 char schemata[64]; 166 int ret, i, pe_fd; 167 pid_t bm_pid; 168 169 if (strcmp(param->filename, "") == 0) 170 sprintf(param->filename, "stdio"); 171 172 bm_pid = getpid(); 173 174 /* Taskset benchmark to specified cpu */ 175 ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity); 176 if (ret) 177 return ret; 178 179 /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ 180 ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); 181 if (ret) 182 goto reset_affinity; 183 184 perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES); 185 perf_event_initialize_read_format(&pe_read); 186 pe_fd = perf_open(&pea, bm_pid, uparams->cpu); 187 if (pe_fd < 0) { 188 ret = -1; 189 goto reset_affinity; 190 } 191 192 buf = alloc_buffer(span, 1); 193 if (!buf) { 194 ret = -1; 195 goto pe_close; 196 } 197 198 while (current_mask) { 199 snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask); 200 ret = write_schemata("", schemata, uparams->cpu, test->resource); 201 if (ret) 202 goto free_buf; 203 snprintf(schemata, sizeof(schemata), "%lx", current_mask); 204 ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource); 205 if (ret) 206 goto free_buf; 207 208 for (i = 0; i < NUM_OF_RUNS; i++) { 209 mem_flush(buf, span); 210 fill_cache_read(buf, span, true); 211 212 ret = perf_event_reset_enable(pe_fd); 213 if (ret) 214 goto free_buf; 215 216 fill_cache_read(buf, span, true); 217 218 ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid); 219 if (ret) 220 goto free_buf; 221 } 222 current_mask = next_mask(current_mask); 223 } 224 225 free_buf: 226 free(buf); 227 pe_close: 228 close(pe_fd); 229 reset_affinity: 230 taskset_restore(bm_pid, &old_affinity); 231 232 return ret; 233 } 234 235 static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) 236 { 237 unsigned long long_mask, start_mask, full_cache_mask; 238 unsigned long cache_total_size = 0; 239 int n = uparams->bits; 240 unsigned int start; 241 int count_of_bits; 242 size_t span; 243 int ret; 244 245 ret = get_full_cbm(test->resource, &full_cache_mask); 246 if (ret) 247 return ret; 248 /* Get the largest contiguous exclusive portion of the cache */ 249 ret = get_mask_no_shareable(test->resource, &long_mask); 250 if (ret) 251 return ret; 252 253 /* Get L3/L2 cache size */ 254 ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size); 255 if (ret) 256 return ret; 257 ksft_print_msg("Cache size :%lu\n", cache_total_size); 258 259 count_of_bits = count_contiguous_bits(long_mask, &start); 260 261 if (!n) 262 n = count_of_bits / 2; 263 264 if (n > count_of_bits - 1) { 265 ksft_print_msg("Invalid input value for no_of_bits n!\n"); 266 ksft_print_msg("Please enter value in range 1 to %d\n", 267 count_of_bits - 1); 268 return -1; 269 } 270 start_mask = create_bit_mask(start, n); 271 272 struct resctrl_val_param param = { 273 .ctrlgrp = "c1", 274 .filename = RESULT_FILE_NAME, 275 .num_of_runs = 0, 276 }; 277 param.mask = long_mask; 278 span = cache_portion_size(cache_total_size, start_mask, full_cache_mask); 279 280 remove(param.filename); 281 282 ret = cat_test(test, uparams, ¶m, span, start_mask); 283 if (ret) 284 return ret; 285 286 ret = check_results(¶m, test->resource, 287 cache_total_size, full_cache_mask, start_mask); 288 return ret; 289 } 290 291 static bool arch_supports_noncont_cat(const struct resctrl_test *test) 292 { 293 /* AMD always supports non-contiguous CBM. */ 294 if (get_vendor() == ARCH_AMD) 295 return true; 296 297 #if defined(__i386__) || defined(__x86_64__) /* arch */ 298 unsigned int eax, ebx, ecx, edx; 299 /* Intel support for non-contiguous CBM needs to be discovered. */ 300 if (!strcmp(test->resource, "L3")) 301 __cpuid_count(0x10, 1, eax, ebx, ecx, edx); 302 else if (!strcmp(test->resource, "L2")) 303 __cpuid_count(0x10, 2, eax, ebx, ecx, edx); 304 else 305 return false; 306 307 return ((ecx >> 3) & 1); 308 #endif /* end arch */ 309 310 return false; 311 } 312 313 static int noncont_cat_run_test(const struct resctrl_test *test, 314 const struct user_params *uparams) 315 { 316 unsigned long full_cache_mask, cont_mask, noncont_mask; 317 unsigned int sparse_masks; 318 int bit_center, ret; 319 char schemata[64]; 320 321 /* Check to compare sparse_masks content to CPUID output. */ 322 ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks); 323 if (ret) 324 return ret; 325 326 if (arch_supports_noncont_cat(test) != sparse_masks) { 327 ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); 328 return 1; 329 } 330 331 /* Write checks initialization. */ 332 ret = get_full_cbm(test->resource, &full_cache_mask); 333 if (ret < 0) 334 return ret; 335 bit_center = count_bits(full_cache_mask) / 2; 336 337 /* 338 * The bit_center needs to be at least 3 to properly calculate the CBM 339 * hole in the noncont_mask. If it's smaller return an error since the 340 * cache mask is too short and that shouldn't happen. 341 */ 342 if (bit_center < 3) 343 return -EINVAL; 344 cont_mask = full_cache_mask >> bit_center; 345 346 /* Contiguous mask write check. */ 347 snprintf(schemata, sizeof(schemata), "%lx", cont_mask); 348 ret = write_schemata("", schemata, uparams->cpu, test->resource); 349 if (ret) { 350 ksft_print_msg("Write of contiguous CBM failed\n"); 351 return 1; 352 } 353 354 /* 355 * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle. 356 * Output is compared with support information to catch any edge case errors. 357 */ 358 noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask; 359 snprintf(schemata, sizeof(schemata), "%lx", noncont_mask); 360 ret = write_schemata("", schemata, uparams->cpu, test->resource); 361 if (ret && sparse_masks) 362 ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n"); 363 else if (ret && !sparse_masks) 364 ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n"); 365 else if (!ret && !sparse_masks) 366 ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n"); 367 368 return !ret == !sparse_masks; 369 } 370 371 static bool noncont_cat_feature_check(const struct resctrl_test *test) 372 { 373 if (!resctrl_resource_exists(test->resource)) 374 return false; 375 376 return resource_info_file_exists(test->resource, "sparse_masks"); 377 } 378 379 struct resctrl_test l3_cat_test = { 380 .name = "L3_CAT", 381 .group = "CAT", 382 .resource = "L3", 383 .feature_check = test_resource_feature_check, 384 .run_test = cat_run_test, 385 .cleanup = cat_test_cleanup, 386 }; 387 388 struct resctrl_test l3_noncont_cat_test = { 389 .name = "L3_NONCONT_CAT", 390 .group = "CAT", 391 .resource = "L3", 392 .feature_check = noncont_cat_feature_check, 393 .run_test = noncont_cat_run_test, 394 }; 395 396 struct resctrl_test l2_noncont_cat_test = { 397 .name = "L2_NONCONT_CAT", 398 .group = "CAT", 399 .resource = "L2", 400 .feature_check = noncont_cat_feature_check, 401 .run_test = noncont_cat_run_test, 402 }; 403