xref: /linux/samples/bpf/test_lru_dist.c (revision 15a1fbdcfb519c2bd291ed01c6c94e0b89537a77)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2016 Facebook
4  */
5 #define _GNU_SOURCE
6 #include <linux/types.h>
7 #include <stdio.h>
8 #include <unistd.h>
9 #include <linux/bpf.h>
10 #include <errno.h>
11 #include <string.h>
12 #include <assert.h>
13 #include <sched.h>
14 #include <sys/wait.h>
15 #include <sys/stat.h>
16 #include <sys/resource.h>
17 #include <fcntl.h>
18 #include <stdlib.h>
19 #include <time.h>
20 
21 #include <bpf/bpf.h>
22 #include "bpf_util.h"
23 
24 #define min(a, b) ((a) < (b) ? (a) : (b))
25 #ifndef offsetof
26 # define offsetof(TYPE, MEMBER)	((size_t)&((TYPE *)0)->MEMBER)
27 #endif
28 #define container_of(ptr, type, member) ({			\
29 	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
30 	(type *)( (char *)__mptr - offsetof(type,member) );})
31 
32 static int nr_cpus;
33 static unsigned long long *dist_keys;
34 static unsigned int dist_key_counts;
35 
36 struct list_head {
37 	struct list_head *next, *prev;
38 };
39 
40 static inline void INIT_LIST_HEAD(struct list_head *list)
41 {
42 	list->next = list;
43 	list->prev = list;
44 }
45 
46 static inline int list_empty(const struct list_head *head)
47 {
48 	return head->next == head;
49 }
50 
51 static inline void __list_add(struct list_head *new,
52 			      struct list_head *prev,
53 			      struct list_head *next)
54 {
55 	next->prev = new;
56 	new->next = next;
57 	new->prev = prev;
58 	prev->next = new;
59 }
60 
61 static inline void list_add(struct list_head *new, struct list_head *head)
62 {
63 	__list_add(new, head, head->next);
64 }
65 
66 static inline void __list_del(struct list_head *prev, struct list_head *next)
67 {
68 	next->prev = prev;
69 	prev->next = next;
70 }
71 
72 static inline void __list_del_entry(struct list_head *entry)
73 {
74 	__list_del(entry->prev, entry->next);
75 }
76 
77 static inline void list_move(struct list_head *list, struct list_head *head)
78 {
79 	__list_del_entry(list);
80 	list_add(list, head);
81 }
82 
83 #define list_entry(ptr, type, member) \
84 	container_of(ptr, type, member)
85 
86 #define list_last_entry(ptr, type, member) \
87 	list_entry((ptr)->prev, type, member)
88 
89 struct pfect_lru_node {
90 	struct list_head list;
91 	unsigned long long key;
92 };
93 
94 struct pfect_lru {
95 	struct list_head list;
96 	struct pfect_lru_node *free_nodes;
97 	unsigned int cur_size;
98 	unsigned int lru_size;
99 	unsigned int nr_unique;
100 	unsigned int nr_misses;
101 	unsigned int total;
102 	int map_fd;
103 };
104 
105 static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size,
106 			   unsigned int nr_possible_elems)
107 {
108 	lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
109 				     sizeof(unsigned long long),
110 				     sizeof(struct pfect_lru_node *),
111 				     nr_possible_elems, 0);
112 	assert(lru->map_fd != -1);
113 
114 	lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node));
115 	assert(lru->free_nodes);
116 
117 	INIT_LIST_HEAD(&lru->list);
118 	lru->cur_size = 0;
119 	lru->lru_size = lru_size;
120 	lru->nr_unique = lru->nr_misses = lru->total = 0;
121 }
122 
123 static void pfect_lru_destroy(struct pfect_lru *lru)
124 {
125 	close(lru->map_fd);
126 	free(lru->free_nodes);
127 }
128 
129 static int pfect_lru_lookup_or_insert(struct pfect_lru *lru,
130 				      unsigned long long key)
131 {
132 	struct pfect_lru_node *node = NULL;
133 	int seen = 0;
134 
135 	lru->total++;
136 	if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) {
137 		if (node) {
138 			list_move(&node->list, &lru->list);
139 			return 1;
140 		}
141 		seen = 1;
142 	}
143 
144 	if (lru->cur_size < lru->lru_size) {
145 		node =  &lru->free_nodes[lru->cur_size++];
146 		INIT_LIST_HEAD(&node->list);
147 	} else {
148 		struct pfect_lru_node *null_node = NULL;
149 
150 		node = list_last_entry(&lru->list,
151 				       struct pfect_lru_node,
152 				       list);
153 		bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST);
154 	}
155 
156 	node->key = key;
157 	list_move(&node->list, &lru->list);
158 
159 	lru->nr_misses++;
160 	if (seen) {
161 		assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST));
162 	} else {
163 		lru->nr_unique++;
164 		assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST));
165 	}
166 
167 	return seen;
168 }
169 
170 static unsigned int read_keys(const char *dist_file,
171 			      unsigned long long **keys)
172 {
173 	struct stat fst;
174 	unsigned long long *retkeys;
175 	unsigned int counts = 0;
176 	int dist_fd;
177 	char *b, *l;
178 	int i;
179 
180 	dist_fd = open(dist_file, 0);
181 	assert(dist_fd != -1);
182 
183 	assert(fstat(dist_fd, &fst) == 0);
184 	b = malloc(fst.st_size);
185 	assert(b);
186 
187 	assert(read(dist_fd, b, fst.st_size) == fst.st_size);
188 	close(dist_fd);
189 	for (i = 0; i < fst.st_size; i++) {
190 		if (b[i] == '\n')
191 			counts++;
192 	}
193 	counts++; /* in case the last line has no \n */
194 
195 	retkeys = malloc(counts * sizeof(unsigned long long));
196 	assert(retkeys);
197 
198 	counts = 0;
199 	for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n"))
200 		retkeys[counts++] = strtoull(l, NULL, 10);
201 	free(b);
202 
203 	*keys = retkeys;
204 
205 	return counts;
206 }
207 
208 static int create_map(int map_type, int map_flags, unsigned int size)
209 {
210 	int map_fd;
211 
212 	map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
213 				sizeof(unsigned long long), size, map_flags);
214 
215 	if (map_fd == -1)
216 		perror("bpf_create_map");
217 
218 	return map_fd;
219 }
220 
221 static int sched_next_online(int pid, int next_to_try)
222 {
223 	cpu_set_t cpuset;
224 
225 	if (next_to_try == nr_cpus)
226 		return -1;
227 
228 	while (next_to_try < nr_cpus) {
229 		CPU_ZERO(&cpuset);
230 		CPU_SET(next_to_try++, &cpuset);
231 		if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
232 			break;
233 	}
234 
235 	return next_to_try;
236 }
237 
238 static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data),
239 			 void *data)
240 {
241 	int next_sched_cpu = 0;
242 	pid_t pid[tasks];
243 	int i;
244 
245 	for (i = 0; i < tasks; i++) {
246 		pid[i] = fork();
247 		if (pid[i] == 0) {
248 			next_sched_cpu = sched_next_online(0, next_sched_cpu);
249 			fn(i, data);
250 			exit(0);
251 		} else if (pid[i] == -1) {
252 			printf("couldn't spawn #%d process\n", i);
253 			exit(1);
254 		}
255 		/* It is mostly redundant and just allow the parent
256 		 * process to update next_shced_cpu for the next child
257 		 * process
258 		 */
259 		next_sched_cpu = sched_next_online(pid[i], next_sched_cpu);
260 	}
261 	for (i = 0; i < tasks; i++) {
262 		int status;
263 
264 		assert(waitpid(pid[i], &status, 0) == pid[i]);
265 		assert(status == 0);
266 	}
267 }
268 
269 static void do_test_lru_dist(int task, void *data)
270 {
271 	unsigned int nr_misses = 0;
272 	struct pfect_lru pfect_lru;
273 	unsigned long long key, value = 1234;
274 	unsigned int i;
275 
276 	unsigned int lru_map_fd = ((unsigned int *)data)[0];
277 	unsigned int lru_size = ((unsigned int *)data)[1];
278 	unsigned long long key_offset = task * dist_key_counts;
279 
280 	pfect_lru_init(&pfect_lru, lru_size, dist_key_counts);
281 
282 	for (i = 0; i < dist_key_counts; i++) {
283 		key = dist_keys[i] + key_offset;
284 
285 		pfect_lru_lookup_or_insert(&pfect_lru, key);
286 
287 		if (!bpf_map_lookup_elem(lru_map_fd, &key, &value))
288 			continue;
289 
290 		if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) {
291 			printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n",
292 			       key, errno);
293 			assert(0);
294 		}
295 
296 		nr_misses++;
297 	}
298 
299 	printf("    task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
300 	       task, pfect_lru.nr_unique, dist_key_counts, nr_misses,
301 	       dist_key_counts);
302 	printf("    task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n",
303 	       task, pfect_lru.nr_unique, pfect_lru.total,
304 	       pfect_lru.nr_misses, pfect_lru.total);
305 
306 	pfect_lru_destroy(&pfect_lru);
307 	close(lru_map_fd);
308 }
309 
310 static void test_parallel_lru_dist(int map_type, int map_flags,
311 				   int nr_tasks, unsigned int lru_size)
312 {
313 	int child_data[2];
314 	int lru_map_fd;
315 
316 	printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
317 	       map_flags);
318 
319 	if (map_flags & BPF_F_NO_COMMON_LRU)
320 		lru_map_fd = create_map(map_type, map_flags,
321 					nr_cpus * lru_size);
322 	else
323 		lru_map_fd = create_map(map_type, map_flags,
324 					nr_tasks * lru_size);
325 	assert(lru_map_fd != -1);
326 
327 	child_data[0] = lru_map_fd;
328 	child_data[1] = lru_size;
329 
330 	run_parallel(nr_tasks, do_test_lru_dist, child_data);
331 
332 	close(lru_map_fd);
333 }
334 
335 static void test_lru_loss0(int map_type, int map_flags)
336 {
337 	unsigned long long key, value[nr_cpus];
338 	unsigned int old_unused_losses = 0;
339 	unsigned int new_unused_losses = 0;
340 	unsigned int used_losses = 0;
341 	int map_fd;
342 
343 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
344 	       map_flags);
345 
346 	assert(sched_next_online(0, 0) != -1);
347 
348 	if (map_flags & BPF_F_NO_COMMON_LRU)
349 		map_fd = create_map(map_type, map_flags, 900 * nr_cpus);
350 	else
351 		map_fd = create_map(map_type, map_flags, 900);
352 
353 	assert(map_fd != -1);
354 
355 	value[0] = 1234;
356 
357 	for (key = 1; key <= 1000; key++) {
358 		int start_key, end_key;
359 
360 		assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0);
361 
362 		start_key = 101;
363 		end_key = min(key, 900);
364 
365 		while (start_key <= end_key) {
366 			bpf_map_lookup_elem(map_fd, &start_key, value);
367 			start_key++;
368 		}
369 	}
370 
371 	for (key = 1; key <= 1000; key++) {
372 		if (bpf_map_lookup_elem(map_fd, &key, value)) {
373 			if (key <= 100)
374 				old_unused_losses++;
375 			else if (key <= 900)
376 				used_losses++;
377 			else
378 				new_unused_losses++;
379 		}
380 	}
381 
382 	close(map_fd);
383 
384 	printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) "
385 	       "newer-elem-losses:%d(/100)\n",
386 	       old_unused_losses, used_losses, new_unused_losses);
387 }
388 
389 static void test_lru_loss1(int map_type, int map_flags)
390 {
391 	unsigned long long key, value[nr_cpus];
392 	int map_fd;
393 	unsigned int nr_losses = 0;
394 
395 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
396 	       map_flags);
397 
398 	assert(sched_next_online(0, 0) != -1);
399 
400 	if (map_flags & BPF_F_NO_COMMON_LRU)
401 		map_fd = create_map(map_type, map_flags, 1000 * nr_cpus);
402 	else
403 		map_fd = create_map(map_type, map_flags, 1000);
404 
405 	assert(map_fd != -1);
406 
407 	value[0] = 1234;
408 
409 	for (key = 1; key <= 1000; key++)
410 		assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
411 
412 	for (key = 1; key <= 1000; key++) {
413 		if (bpf_map_lookup_elem(map_fd, &key, value))
414 			nr_losses++;
415 	}
416 
417 	close(map_fd);
418 
419 	printf("nr_losses:%d(/1000)\n", nr_losses);
420 }
421 
422 static void do_test_parallel_lru_loss(int task, void *data)
423 {
424 	const unsigned int nr_stable_elems = 1000;
425 	const unsigned int nr_repeats = 100000;
426 
427 	int map_fd = *(int *)data;
428 	unsigned long long stable_base;
429 	unsigned long long key, value[nr_cpus];
430 	unsigned long long next_ins_key;
431 	unsigned int nr_losses = 0;
432 	unsigned int i;
433 
434 	stable_base = task * nr_repeats * 2 + 1;
435 	next_ins_key = stable_base;
436 	value[0] = 1234;
437 	for (i = 0; i < nr_stable_elems; i++) {
438 		assert(bpf_map_update_elem(map_fd, &next_ins_key, value,
439 				       BPF_NOEXIST) == 0);
440 		next_ins_key++;
441 	}
442 
443 	for (i = 0; i < nr_repeats; i++) {
444 		int rn;
445 
446 		rn = rand();
447 
448 		if (rn % 10) {
449 			key = rn % nr_stable_elems + stable_base;
450 			bpf_map_lookup_elem(map_fd, &key, value);
451 		} else {
452 			bpf_map_update_elem(map_fd, &next_ins_key, value,
453 					BPF_NOEXIST);
454 			next_ins_key++;
455 		}
456 	}
457 
458 	key = stable_base;
459 	for (i = 0; i < nr_stable_elems; i++) {
460 		if (bpf_map_lookup_elem(map_fd, &key, value))
461 			nr_losses++;
462 		key++;
463 	}
464 
465 	printf("    task:%d nr_losses:%u\n", task, nr_losses);
466 }
467 
468 static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks)
469 {
470 	int map_fd;
471 
472 	printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type,
473 	       map_flags);
474 
475 	/* Give 20% more than the active working set */
476 	if (map_flags & BPF_F_NO_COMMON_LRU)
477 		map_fd = create_map(map_type, map_flags,
478 				    nr_cpus * (1000 + 200));
479 	else
480 		map_fd = create_map(map_type, map_flags,
481 				    nr_tasks * (1000 + 200));
482 
483 	assert(map_fd != -1);
484 
485 	run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd);
486 
487 	close(map_fd);
488 }
489 
490 int main(int argc, char **argv)
491 {
492 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
493 	int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
494 	const char *dist_file;
495 	int nr_tasks = 1;
496 	int lru_size;
497 	int f;
498 
499 	if (argc < 4) {
500 		printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n",
501 		       argv[0]);
502 		return -1;
503 	}
504 
505 	dist_file = argv[1];
506 	lru_size = atoi(argv[2]);
507 	nr_tasks = atoi(argv[3]);
508 
509 	setbuf(stdout, NULL);
510 
511 	assert(!setrlimit(RLIMIT_MEMLOCK, &r));
512 
513 	srand(time(NULL));
514 
515 	nr_cpus = bpf_num_possible_cpus();
516 	assert(nr_cpus != -1);
517 	printf("nr_cpus:%d\n\n", nr_cpus);
518 
519 	nr_tasks = min(nr_tasks, nr_cpus);
520 
521 	dist_key_counts = read_keys(dist_file, &dist_keys);
522 	if (!dist_key_counts) {
523 		printf("%s has no key\n", dist_file);
524 		return -1;
525 	}
526 
527 	for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
528 		test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
529 		test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]);
530 		test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
531 				       nr_tasks);
532 		test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f],
533 				       nr_tasks, lru_size);
534 		printf("\n");
535 	}
536 
537 	free(dist_keys);
538 
539 	return 0;
540 }
541