1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fill_buf benchmark 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Authors: 8 * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>, 9 * Fenghua Yu <fenghua.yu@intel.com> 10 */ 11 #include <stdio.h> 12 #include <unistd.h> 13 #include <stdlib.h> 14 #include <sys/types.h> 15 #include <sys/wait.h> 16 #include <inttypes.h> 17 #include <string.h> 18 19 #include "resctrl.h" 20 21 #define CL_SIZE (64) 22 #define PAGE_SIZE (4 * 1024) 23 #define MB (1024 * 1024) 24 25 static void sb(void) 26 { 27 #if defined(__i386) || defined(__x86_64) 28 asm volatile("sfence\n\t" 29 : : : "memory"); 30 #endif 31 } 32 33 static void cl_flush(void *p) 34 { 35 #if defined(__i386) || defined(__x86_64) 36 asm volatile("clflush (%0)\n\t" 37 : : "r"(p) : "memory"); 38 #endif 39 } 40 41 void mem_flush(unsigned char *buf, size_t buf_size) 42 { 43 unsigned char *cp = buf; 44 size_t i = 0; 45 46 buf_size = buf_size / CL_SIZE; /* mem size in cache lines */ 47 48 for (i = 0; i < buf_size; i++) 49 cl_flush(&cp[i * CL_SIZE]); 50 51 sb(); 52 } 53 54 /* 55 * Buffer index step advance to workaround HW prefetching interfering with 56 * the measurements. 57 * 58 * Must be a prime to step through all indexes of the buffer. 59 * 60 * Some primes work better than others on some architectures (from MBA/MBM 61 * result stability point of view). 62 */ 63 #define FILL_IDX_MULT 23 64 65 static int fill_one_span_read(unsigned char *buf, size_t buf_size) 66 { 67 unsigned int size = buf_size / (CL_SIZE / 2); 68 unsigned int i, idx = 0; 69 unsigned char sum = 0; 70 71 /* 72 * Read the buffer in an order that is unexpected by HW prefetching 73 * optimizations to prevent them interfering with the caching pattern. 74 * 75 * The read order is (in terms of halves of cachelines): 76 * i * FILL_IDX_MULT % size 77 * The formula is open-coded below to avoiding modulo inside the loop 78 * as it improves MBA/MBM result stability on some architectures. 79 */ 80 for (i = 0; i < size; i++) { 81 sum += buf[idx * (CL_SIZE / 2)]; 82 83 idx += FILL_IDX_MULT; 84 while (idx >= size) 85 idx -= size; 86 } 87 88 return sum; 89 } 90 91 static void fill_one_span_write(unsigned char *buf, size_t buf_size) 92 { 93 unsigned char *end_ptr = buf + buf_size; 94 unsigned char *p; 95 96 p = buf; 97 while (p < end_ptr) { 98 *p = '1'; 99 p += (CL_SIZE / 2); 100 } 101 } 102 103 void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) 104 { 105 int ret = 0; 106 107 while (1) { 108 ret = fill_one_span_read(buf, buf_size); 109 if (once) 110 break; 111 } 112 113 /* Consume read result so that reading memory is not optimized out. */ 114 *value_sink = ret; 115 } 116 117 static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once) 118 { 119 while (1) { 120 fill_one_span_write(buf, buf_size); 121 if (once) 122 break; 123 } 124 } 125 126 unsigned char *alloc_buffer(size_t buf_size, int memflush) 127 { 128 void *buf = NULL; 129 uint64_t *p64; 130 size_t s64; 131 int ret; 132 133 ret = posix_memalign(&buf, PAGE_SIZE, buf_size); 134 if (ret < 0) 135 return NULL; 136 137 /* Initialize the buffer */ 138 p64 = buf; 139 s64 = buf_size / sizeof(uint64_t); 140 141 while (s64 > 0) { 142 *p64 = (uint64_t)rand(); 143 p64 += (CL_SIZE / sizeof(uint64_t)); 144 s64 -= (CL_SIZE / sizeof(uint64_t)); 145 } 146 147 /* Flush the memory before using to avoid "cache hot pages" effect */ 148 if (memflush) 149 mem_flush(buf, buf_size); 150 151 return buf; 152 } 153 154 int run_fill_buf(size_t buf_size, int memflush, int op, bool once) 155 { 156 unsigned char *buf; 157 158 buf = alloc_buffer(buf_size, memflush); 159 if (!buf) 160 return -1; 161 162 if (op == 0) 163 fill_cache_read(buf, buf_size, once); 164 else 165 fill_cache_write(buf, buf_size, once); 166 free(buf); 167 168 return 0; 169 } 170