1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Cache Allocation Technology (CAT) test 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Authors: 8 * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 */ 11 #include "resctrl.h" 12 #include <unistd.h> 13 14 #define RESULT_FILE_NAME "result_cat" 15 #define NUM_OF_RUNS 5 16 17 static int show_results_info(__u64 sum_llc_val, int no_of_bits, 18 unsigned long cache_span, 19 unsigned long num_of_runs, bool platform, 20 __s64 *prev_avg_llc_val) 21 { 22 __u64 avg_llc_val = 0; 23 int ret = 0; 24 25 avg_llc_val = sum_llc_val / num_of_runs; 26 if (*prev_avg_llc_val) { 27 ret = platform && (avg_llc_val < *prev_avg_llc_val); 28 29 ksft_print_msg("%s Check cache miss rate increased\n", 30 ret ? "Fail:" : "Pass:"); 31 } 32 *prev_avg_llc_val = avg_llc_val; 33 34 show_cache_info(no_of_bits, avg_llc_val, cache_span, true); 35 36 return ret; 37 } 38 39 /* Remove the highest bits from CBM */ 40 static unsigned long next_mask(unsigned long current_mask) 41 { 42 return current_mask & (current_mask >> 2); 43 } 44 45 static int check_results(struct resctrl_val_param *param, const char *cache_type, 46 unsigned long cache_total_size, unsigned long full_cache_mask, 47 unsigned long current_mask) 48 { 49 char *token_array[8], temp[512]; 50 __u64 sum_llc_perf_miss = 0; 51 __s64 prev_avg_llc_val = 0; 52 unsigned long alloc_size; 53 int runs = 0; 54 int fail = 0; 55 int ret; 56 FILE *fp; 57 58 ksft_print_msg("Checking for pass/fail\n"); 59 fp = fopen(param->filename, "r"); 60 if (!fp) { 61 ksft_perror("Cannot open file"); 62 63 return -1; 64 } 65 66 while (fgets(temp, sizeof(temp), fp)) { 67 char *token = strtok(temp, ":\t"); 68 int fields = 0; 69 int bits; 70 71 while (token) { 72 token_array[fields++] = token; 73 token = strtok(NULL, ":\t"); 74 } 75 76 sum_llc_perf_miss += strtoull(token_array[3], NULL, 0); 77 runs++; 78 79 if (runs < NUM_OF_RUNS) 80 continue; 81 82 if (!current_mask) { 83 ksft_print_msg("Unexpected empty cache mask\n"); 84 break; 85 } 86 87 alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask); 88 89 bits = count_bits(current_mask); 90 91 ret = show_results_info(sum_llc_perf_miss, bits, 92 alloc_size / 64, 93 runs, get_vendor() == ARCH_INTEL, 94 &prev_avg_llc_val); 95 if (ret) 96 fail = 1; 97 98 runs = 0; 99 sum_llc_perf_miss = 0; 100 current_mask = next_mask(current_mask); 101 } 102 103 fclose(fp); 104 105 return fail; 106 } 107 108 static void cat_test_cleanup(void) 109 { 110 remove(RESULT_FILE_NAME); 111 } 112 113 /* 114 * cat_test - Execute CAT benchmark and measure cache misses 115 * @test: Test information structure 116 * @uparams: User supplied parameters 117 * @param: Parameters passed to cat_test() 118 * @span: Buffer size for the benchmark 119 * @current_mask Start mask for the first iteration 120 * 121 * Run CAT selftest by varying the allocated cache portion and comparing the 122 * impact on cache misses (the result analysis is done in check_results() 123 * and show_results_info(), not in this function). 124 * 125 * One bit is removed from the CAT allocation bit mask (in current_mask) for 126 * each subsequent test which keeps reducing the size of the allocated cache 127 * portion. A single test flushes the buffer, reads it to warm up the cache, 128 * and reads the buffer again. The cache misses are measured during the last 129 * read pass. 130 * 131 * Return: 0 when the test was run, < 0 on error. 132 */ 133 static int cat_test(const struct resctrl_test *test, 134 const struct user_params *uparams, 135 struct resctrl_val_param *param, 136 size_t span, unsigned long current_mask) 137 { 138 struct perf_event_attr pea; 139 cpu_set_t old_affinity; 140 unsigned char *buf; 141 char schemata[64]; 142 int ret, i, pe_fd; 143 pid_t bm_pid; 144 145 if (strcmp(param->filename, "") == 0) 146 sprintf(param->filename, "stdio"); 147 148 bm_pid = getpid(); 149 150 /* Taskset benchmark to specified cpu */ 151 ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity); 152 if (ret) 153 return ret; 154 155 /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/ 156 ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); 157 if (ret) 158 goto reset_affinity; 159 160 ret = minimize_l2_occupancy(test, uparams, param); 161 if (ret) 162 goto reset_affinity; 163 164 perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES); 165 pe_fd = perf_open(&pea, bm_pid, uparams->cpu); 166 if (pe_fd < 0) { 167 ret = -1; 168 goto reset_affinity; 169 } 170 171 buf = alloc_buffer(span, 1); 172 if (!buf) { 173 ret = -1; 174 goto pe_close; 175 } 176 177 while (current_mask) { 178 snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask); 179 ret = write_schemata("", schemata, uparams->cpu, test->resource); 180 if (ret) 181 goto free_buf; 182 snprintf(schemata, sizeof(schemata), "%lx", current_mask); 183 ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource); 184 if (ret) 185 goto free_buf; 186 187 for (i = 0; i < NUM_OF_RUNS; i++) { 188 mem_flush(buf, span); 189 fill_cache_read(buf, span, true); 190 191 ret = perf_event_reset_enable(pe_fd); 192 if (ret) 193 goto free_buf; 194 195 fill_cache_read(buf, span, true); 196 197 ret = perf_event_measure(pe_fd, param->filename, bm_pid); 198 if (ret) 199 goto free_buf; 200 } 201 current_mask = next_mask(current_mask); 202 } 203 204 free_buf: 205 free(buf); 206 pe_close: 207 close(pe_fd); 208 reset_affinity: 209 taskset_restore(bm_pid, &old_affinity); 210 211 return ret; 212 } 213 214 static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams) 215 { 216 unsigned long long_mask, start_mask, full_cache_mask; 217 unsigned long cache_total_size = 0; 218 int n = uparams->bits; 219 unsigned int start; 220 int count_of_bits; 221 size_t span; 222 int ret; 223 224 ret = get_full_cbm(test->resource, &full_cache_mask); 225 if (ret) 226 return ret; 227 /* Get the largest contiguous exclusive portion of the cache */ 228 ret = get_mask_no_shareable(test->resource, &long_mask); 229 if (ret) 230 return ret; 231 232 /* Get L3/L2 cache size */ 233 ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size); 234 if (ret) 235 return ret; 236 ksft_print_msg("Cache size :%lu\n", cache_total_size); 237 238 count_of_bits = count_contiguous_bits(long_mask, &start); 239 240 if (!n) 241 n = count_of_bits / 2; 242 243 if (n > count_of_bits - 1) { 244 ksft_print_msg("Invalid input value for no_of_bits n!\n"); 245 ksft_print_msg("Please enter value in range 1 to %d\n", 246 count_of_bits - 1); 247 return -1; 248 } 249 start_mask = create_bit_mask(start, n); 250 251 struct resctrl_val_param param = { 252 .ctrlgrp = "c1", 253 .filename = RESULT_FILE_NAME, 254 .num_of_runs = 0, 255 }; 256 param.mask = long_mask; 257 span = cache_portion_size(cache_total_size, start_mask, full_cache_mask); 258 259 remove(param.filename); 260 261 ret = cat_test(test, uparams, ¶m, span, start_mask); 262 if (ret) 263 return ret; 264 265 ret = check_results(¶m, test->resource, 266 cache_total_size, full_cache_mask, start_mask); 267 return ret; 268 } 269 270 static bool arch_supports_noncont_cat(const struct resctrl_test *test) 271 { 272 unsigned int vendor_id = get_vendor(); 273 274 /* AMD and Hygon always support non-contiguous CBM. */ 275 if (vendor_id == ARCH_AMD || vendor_id == ARCH_HYGON) 276 return true; 277 278 #if defined(__i386__) || defined(__x86_64__) /* arch */ 279 unsigned int eax, ebx, ecx, edx; 280 /* Intel support for non-contiguous CBM needs to be discovered. */ 281 if (!strcmp(test->resource, "L3")) 282 __cpuid_count(0x10, 1, eax, ebx, ecx, edx); 283 else if (!strcmp(test->resource, "L2")) 284 __cpuid_count(0x10, 2, eax, ebx, ecx, edx); 285 else 286 return false; 287 288 return ((ecx >> 3) & 1); 289 #endif /* end arch */ 290 291 return false; 292 } 293 294 static int noncont_cat_run_test(const struct resctrl_test *test, 295 const struct user_params *uparams) 296 { 297 unsigned long full_cache_mask, cont_mask, noncont_mask; 298 unsigned int sparse_masks; 299 int bit_center, ret; 300 char schemata[64]; 301 302 /* Check to compare sparse_masks content to CPUID output. */ 303 ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks); 304 if (ret) 305 return ret; 306 307 if (arch_supports_noncont_cat(test) != sparse_masks) { 308 ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n"); 309 return 1; 310 } 311 312 /* Write checks initialization. */ 313 ret = get_full_cbm(test->resource, &full_cache_mask); 314 if (ret < 0) 315 return ret; 316 bit_center = count_bits(full_cache_mask) / 2; 317 318 /* 319 * The bit_center needs to be at least 3 to properly calculate the CBM 320 * hole in the noncont_mask. If it's smaller return an error since the 321 * cache mask is too short and that shouldn't happen. 322 */ 323 if (bit_center < 3) 324 return -EINVAL; 325 cont_mask = full_cache_mask >> bit_center; 326 327 /* Contiguous mask write check. */ 328 snprintf(schemata, sizeof(schemata), "%lx", cont_mask); 329 ret = write_schemata("", schemata, uparams->cpu, test->resource); 330 if (ret) { 331 ksft_print_msg("Write of contiguous CBM failed\n"); 332 return 1; 333 } 334 335 /* 336 * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle. 337 * Output is compared with support information to catch any edge case errors. 338 */ 339 noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask; 340 snprintf(schemata, sizeof(schemata), "%lx", noncont_mask); 341 ret = write_schemata("", schemata, uparams->cpu, test->resource); 342 if (ret && sparse_masks) 343 ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n"); 344 else if (ret && !sparse_masks) 345 ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n"); 346 else if (!ret && !sparse_masks) 347 ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n"); 348 349 return !ret == !sparse_masks; 350 } 351 352 static bool noncont_cat_feature_check(const struct resctrl_test *test) 353 { 354 if (!resctrl_resource_exists(test->resource)) 355 return false; 356 357 return resource_info_file_exists(test->resource, "sparse_masks"); 358 } 359 360 struct resctrl_test l3_cat_test = { 361 .name = "L3_CAT", 362 .group = "CAT", 363 .resource = "L3", 364 .feature_check = test_resource_feature_check, 365 .run_test = cat_run_test, 366 .cleanup = cat_test_cleanup, 367 }; 368 369 struct resctrl_test l3_noncont_cat_test = { 370 .name = "L3_NONCONT_CAT", 371 .group = "CAT", 372 .resource = "L3", 373 .feature_check = noncont_cat_feature_check, 374 .run_test = noncont_cat_run_test, 375 }; 376 377 struct resctrl_test l2_noncont_cat_test = { 378 .name = "L2_NONCONT_CAT", 379 .group = "CAT", 380 .resource = "L2", 381 .feature_check = noncont_cat_feature_check, 382 .run_test = noncont_cat_run_test, 383 }; 384