1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fill_buf benchmark 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Authors: 8 * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 */ 11 #include <stdio.h> 12 #include <unistd.h> 13 #include <stdlib.h> 14 #include <sys/types.h> 15 #include <sys/wait.h> 16 #include <inttypes.h> 17 #include <string.h> 18 19 #include "resctrl.h" 20 21 #define CL_SIZE (64) 22 #define PAGE_SIZE (4 * 1024) 23 #define MB (1024 * 1024) 24 25 static void sb(void) 26 { 27 #if defined(__i386) || defined(__x86_64) 28 asm volatile("sfence\n\t" 29 : : : "memory"); 30 #endif 31 } 32 33 static void cl_flush(void *p) 34 { 35 #if defined(__i386) || defined(__x86_64) 36 asm volatile("clflush (%0)\n\t" 37 : : "r"(p) : "memory"); 38 #endif 39 } 40 41 void mem_flush(unsigned char *buf, size_t buf_size) 42 { 43 unsigned char *cp = buf; 44 size_t i = 0; 45 46 buf_size = buf_size / CL_SIZE; /* mem size in cache lines */ 47 48 for (i = 0; i < buf_size; i++) 49 cl_flush(&cp[i * CL_SIZE]); 50 51 sb(); 52 } 53 54 /* 55 * Buffer index step advance to workaround HW prefetching interfering with 56 * the measurements. 57 * 58 * Must be a prime to step through all indexes of the buffer. 59 * 60 * Some primes work better than others on some architectures (from MBA/MBM 61 * result stability point of view). 62 */ 63 #define FILL_IDX_MULT 23 64 65 static int fill_one_span_read(unsigned char *buf, size_t buf_size) 66 { 67 unsigned int size = buf_size / (CL_SIZE / 2); 68 unsigned int i, idx = 0; 69 unsigned char sum = 0; 70 71 /* 72 * Read the buffer in an order that is unexpected by HW prefetching 73 * optimizations to prevent them interfering with the caching pattern. 74 * 75 * The read order is (in terms of halves of cachelines): 76 * i * FILL_IDX_MULT % size 77 * The formula is open-coded below to avoiding modulo inside the loop 78 * as it improves MBA/MBM result stability on some architectures. 79 */ 80 for (i = 0; i < size; i++) { 81 sum += buf[idx * (CL_SIZE / 2)]; 82 83 idx += FILL_IDX_MULT; 84 while (idx >= size) 85 idx -= size; 86 } 87 88 return sum; 89 } 90 91 void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) 92 { 93 int ret = 0; 94 95 while (1) { 96 ret = fill_one_span_read(buf, buf_size); 97 if (once) 98 break; 99 } 100 101 /* Consume read result so that reading memory is not optimized out. */ 102 *value_sink = ret; 103 } 104 105 unsigned char *alloc_buffer(size_t buf_size, bool memflush) 106 { 107 void *buf = NULL; 108 uint64_t *p64; 109 ssize_t s64; 110 int ret; 111 112 ret = posix_memalign(&buf, PAGE_SIZE, buf_size); 113 if (ret < 0) 114 return NULL; 115 116 /* Initialize the buffer */ 117 p64 = buf; 118 s64 = buf_size / sizeof(uint64_t); 119 120 while (s64 > 0) { 121 *p64 = (uint64_t)rand(); 122 p64 += (CL_SIZE / sizeof(uint64_t)); 123 s64 -= (CL_SIZE / sizeof(uint64_t)); 124 } 125 126 /* Flush the memory before using to avoid "cache hot pages" effect */ 127 if (memflush) 128 mem_flush(buf, buf_size); 129 130 return buf; 131 } 132 133 ssize_t get_fill_buf_size(int cpu_no, const char *cache_type) 134 { 135 unsigned long cache_total_size = 0; 136 int ret; 137 138 ret = get_cache_size(cpu_no, cache_type, &cache_total_size); 139 if (ret) 140 return ret; 141 142 return cache_total_size * 2 > MINIMUM_SPAN ? 143 cache_total_size * 2 : MINIMUM_SPAN; 144 } 145