xref: /linux/tools/perf/bench/mem-functions.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * mem-memcpy.c
3  *
4  * Simple memcpy() and memset() benchmarks
5  *
6  * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
7  */
8 
9 #include "debug.h"
10 #include "../perf.h"
11 #include "../util/util.h"
12 #include <subcmd/parse-options.h>
13 #include "../util/header.h"
14 #include "../util/cloexec.h"
15 #include "bench.h"
16 #include "mem-memcpy-arch.h"
17 #include "mem-memset-arch.h"
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/time.h>
23 #include <errno.h>
24 
25 #define K 1024
26 
27 static const char	*size_str	= "1MB";
28 static const char	*function_str	= "all";
29 static int		nr_loops	= 1;
30 static bool		use_cycles;
31 static int		cycles_fd;
32 
33 static const struct option options[] = {
34 	OPT_STRING('s', "size", &size_str, "1MB",
35 		    "Specify the size of the memory buffers. "
36 		    "Available units: B, KB, MB, GB and TB (case insensitive)"),
37 
38 	OPT_STRING('f', "function", &function_str, "all",
39 		    "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
40 
41 	OPT_INTEGER('l', "nr_loops", &nr_loops,
42 		    "Specify the number of loops to run. (default: 1)"),
43 
44 	OPT_BOOLEAN('c', "cycles", &use_cycles,
45 		    "Use a cycles event instead of gettimeofday() to measure performance"),
46 
47 	OPT_END()
48 };
49 
50 typedef void *(*memcpy_t)(void *, const void *, size_t);
51 typedef void *(*memset_t)(void *, int, size_t);
52 
53 struct function {
54 	const char *name;
55 	const char *desc;
56 	union {
57 		memcpy_t memcpy;
58 		memset_t memset;
59 	} fn;
60 };
61 
62 static struct perf_event_attr cycle_attr = {
63 	.type		= PERF_TYPE_HARDWARE,
64 	.config		= PERF_COUNT_HW_CPU_CYCLES
65 };
66 
67 static int init_cycles(void)
68 {
69 	cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
70 
71 	if (cycles_fd < 0 && errno == ENOSYS) {
72 		pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
73 		return -1;
74 	}
75 
76 	return cycles_fd;
77 }
78 
79 static u64 get_cycles(void)
80 {
81 	int ret;
82 	u64 clk;
83 
84 	ret = read(cycles_fd, &clk, sizeof(u64));
85 	BUG_ON(ret != sizeof(u64));
86 
87 	return clk;
88 }
89 
90 static double timeval2double(struct timeval *ts)
91 {
92 	return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
93 }
94 
95 #define print_bps(x) do {						\
96 		if (x < K)						\
97 			printf(" %14lf bytes/sec\n", x);		\
98 		else if (x < K * K)					\
99 			printf(" %14lfd KB/sec\n", x / K);		\
100 		else if (x < K * K * K)					\
101 			printf(" %14lf MB/sec\n", x / K / K);		\
102 		else							\
103 			printf(" %14lf GB/sec\n", x / K / K / K);	\
104 	} while (0)
105 
106 struct bench_mem_info {
107 	const struct function *functions;
108 	u64 (*do_cycles)(const struct function *r, size_t size);
109 	double (*do_gettimeofday)(const struct function *r, size_t size);
110 	const char *const *usage;
111 };
112 
113 static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
114 {
115 	const struct function *r = &info->functions[r_idx];
116 	double result_bps = 0.0;
117 	u64 result_cycles = 0;
118 
119 	printf("# function '%s' (%s)\n", r->name, r->desc);
120 
121 	if (bench_format == BENCH_FORMAT_DEFAULT)
122 		printf("# Copying %s bytes ...\n\n", size_str);
123 
124 	if (use_cycles) {
125 		result_cycles = info->do_cycles(r, size);
126 	} else {
127 		result_bps = info->do_gettimeofday(r, size);
128 	}
129 
130 	switch (bench_format) {
131 	case BENCH_FORMAT_DEFAULT:
132 		if (use_cycles) {
133 			printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
134 		} else {
135 			print_bps(result_bps);
136 		}
137 		break;
138 
139 	case BENCH_FORMAT_SIMPLE:
140 		if (use_cycles) {
141 			printf("%lf\n", (double)result_cycles/size_total);
142 		} else {
143 			printf("%lf\n", result_bps);
144 		}
145 		break;
146 
147 	default:
148 		BUG_ON(1);
149 		break;
150 	}
151 }
152 
153 static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
154 {
155 	int i;
156 	size_t size;
157 	double size_total;
158 
159 	argc = parse_options(argc, argv, options, info->usage, 0);
160 
161 	if (use_cycles) {
162 		i = init_cycles();
163 		if (i < 0) {
164 			fprintf(stderr, "Failed to open cycles counter\n");
165 			return i;
166 		}
167 	}
168 
169 	size = (size_t)perf_atoll((char *)size_str);
170 	size_total = (double)size * nr_loops;
171 
172 	if ((s64)size <= 0) {
173 		fprintf(stderr, "Invalid size:%s\n", size_str);
174 		return 1;
175 	}
176 
177 	if (!strncmp(function_str, "all", 3)) {
178 		for (i = 0; info->functions[i].name; i++)
179 			__bench_mem_function(info, i, size, size_total);
180 		return 0;
181 	}
182 
183 	for (i = 0; info->functions[i].name; i++) {
184 		if (!strcmp(info->functions[i].name, function_str))
185 			break;
186 	}
187 	if (!info->functions[i].name) {
188 		if (strcmp(function_str, "help") && strcmp(function_str, "h"))
189 			printf("Unknown function: %s\n", function_str);
190 		printf("Available functions:\n");
191 		for (i = 0; info->functions[i].name; i++) {
192 			printf("\t%s ... %s\n",
193 			       info->functions[i].name, info->functions[i].desc);
194 		}
195 		return 1;
196 	}
197 
198 	__bench_mem_function(info, i, size, size_total);
199 
200 	return 0;
201 }
202 
203 static void memcpy_alloc_mem(void **dst, void **src, size_t size)
204 {
205 	*dst = zalloc(size);
206 	if (!*dst)
207 		die("memory allocation failed - maybe size is too large?\n");
208 
209 	*src = zalloc(size);
210 	if (!*src)
211 		die("memory allocation failed - maybe size is too large?\n");
212 
213 	/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
214 	memset(*src, 0, size);
215 }
216 
217 static u64 do_memcpy_cycles(const struct function *r, size_t size)
218 {
219 	u64 cycle_start = 0ULL, cycle_end = 0ULL;
220 	void *src = NULL, *dst = NULL;
221 	memcpy_t fn = r->fn.memcpy;
222 	int i;
223 
224 	memcpy_alloc_mem(&dst, &src, size);
225 
226 	/*
227 	 * We prefault the freshly allocated memory range here,
228 	 * to not measure page fault overhead:
229 	 */
230 	fn(dst, src, size);
231 
232 	cycle_start = get_cycles();
233 	for (i = 0; i < nr_loops; ++i)
234 		fn(dst, src, size);
235 	cycle_end = get_cycles();
236 
237 	free(src);
238 	free(dst);
239 	return cycle_end - cycle_start;
240 }
241 
242 static double do_memcpy_gettimeofday(const struct function *r, size_t size)
243 {
244 	struct timeval tv_start, tv_end, tv_diff;
245 	memcpy_t fn = r->fn.memcpy;
246 	void *src = NULL, *dst = NULL;
247 	int i;
248 
249 	memcpy_alloc_mem(&dst, &src, size);
250 
251 	/*
252 	 * We prefault the freshly allocated memory range here,
253 	 * to not measure page fault overhead:
254 	 */
255 	fn(dst, src, size);
256 
257 	BUG_ON(gettimeofday(&tv_start, NULL));
258 	for (i = 0; i < nr_loops; ++i)
259 		fn(dst, src, size);
260 	BUG_ON(gettimeofday(&tv_end, NULL));
261 
262 	timersub(&tv_end, &tv_start, &tv_diff);
263 
264 	free(src);
265 	free(dst);
266 
267 	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
268 }
269 
270 struct function memcpy_functions[] = {
271 	{ .name		= "default",
272 	  .desc		= "Default memcpy() provided by glibc",
273 	  .fn.memcpy	= memcpy },
274 
275 #ifdef HAVE_ARCH_X86_64_SUPPORT
276 # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
277 # include "mem-memcpy-x86-64-asm-def.h"
278 # undef MEMCPY_FN
279 #endif
280 
281 	{ .name = NULL, }
282 };
283 
284 static const char * const bench_mem_memcpy_usage[] = {
285 	"perf bench mem memcpy <options>",
286 	NULL
287 };
288 
289 int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
290 {
291 	struct bench_mem_info info = {
292 		.functions		= memcpy_functions,
293 		.do_cycles		= do_memcpy_cycles,
294 		.do_gettimeofday	= do_memcpy_gettimeofday,
295 		.usage			= bench_mem_memcpy_usage,
296 	};
297 
298 	return bench_mem_common(argc, argv, &info);
299 }
300 
301 static void memset_alloc_mem(void **dst, size_t size)
302 {
303 	*dst = zalloc(size);
304 	if (!*dst)
305 		die("memory allocation failed - maybe size is too large?\n");
306 }
307 
308 static u64 do_memset_cycles(const struct function *r, size_t size)
309 {
310 	u64 cycle_start = 0ULL, cycle_end = 0ULL;
311 	memset_t fn = r->fn.memset;
312 	void *dst = NULL;
313 	int i;
314 
315 	memset_alloc_mem(&dst, size);
316 
317 	/*
318 	 * We prefault the freshly allocated memory range here,
319 	 * to not measure page fault overhead:
320 	 */
321 	fn(dst, -1, size);
322 
323 	cycle_start = get_cycles();
324 	for (i = 0; i < nr_loops; ++i)
325 		fn(dst, i, size);
326 	cycle_end = get_cycles();
327 
328 	free(dst);
329 	return cycle_end - cycle_start;
330 }
331 
332 static double do_memset_gettimeofday(const struct function *r, size_t size)
333 {
334 	struct timeval tv_start, tv_end, tv_diff;
335 	memset_t fn = r->fn.memset;
336 	void *dst = NULL;
337 	int i;
338 
339 	memset_alloc_mem(&dst, size);
340 
341 	/*
342 	 * We prefault the freshly allocated memory range here,
343 	 * to not measure page fault overhead:
344 	 */
345 	fn(dst, -1, size);
346 
347 	BUG_ON(gettimeofday(&tv_start, NULL));
348 	for (i = 0; i < nr_loops; ++i)
349 		fn(dst, i, size);
350 	BUG_ON(gettimeofday(&tv_end, NULL));
351 
352 	timersub(&tv_end, &tv_start, &tv_diff);
353 
354 	free(dst);
355 	return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
356 }
357 
358 static const char * const bench_mem_memset_usage[] = {
359 	"perf bench mem memset <options>",
360 	NULL
361 };
362 
363 static const struct function memset_functions[] = {
364 	{ .name		= "default",
365 	  .desc		= "Default memset() provided by glibc",
366 	  .fn.memset	= memset },
367 
368 #ifdef HAVE_ARCH_X86_64_SUPPORT
369 # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
370 # include "mem-memset-x86-64-asm-def.h"
371 # undef MEMSET_FN
372 #endif
373 
374 	{ .name = NULL, }
375 };
376 
377 int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
378 {
379 	struct bench_mem_info info = {
380 		.functions		= memset_functions,
381 		.do_cycles		= do_memset_cycles,
382 		.do_gettimeofday	= do_memset_gettimeofday,
383 		.usage			= bench_mem_memset_usage,
384 	};
385 
386 	return bench_mem_common(argc, argv, &info);
387 }
388