xref: /linux/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c (revision 3a39d672e7f48b8d6b91a09afa4b55352773b4b5)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2023 Isovalent */
3 
4 #include <errno.h>
5 #include <unistd.h>
6 #include <pthread.h>
7 
8 #include <bpf/bpf.h>
9 #include <bpf/libbpf.h>
10 
11 #include <bpf_util.h>
12 #include <test_maps.h>
13 
14 #include "map_percpu_stats.skel.h"
15 
16 #define MAX_ENTRIES			16384
17 #define MAX_ENTRIES_HASH_OF_MAPS	64
18 #define N_THREADS			8
19 #define MAX_MAP_KEY_SIZE		4
20 #define PCPU_MIN_UNIT_SIZE		32768
21 
map_info(int map_fd,struct bpf_map_info * info)22 static void map_info(int map_fd, struct bpf_map_info *info)
23 {
24 	__u32 len = sizeof(*info);
25 	int ret;
26 
27 	memset(info, 0, sizeof(*info));
28 
29 	ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
30 	CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
31 }
32 
map_type_to_s(__u32 type)33 static const char *map_type_to_s(__u32 type)
34 {
35 	switch (type) {
36 	case BPF_MAP_TYPE_HASH:
37 		return "HASH";
38 	case BPF_MAP_TYPE_PERCPU_HASH:
39 		return "PERCPU_HASH";
40 	case BPF_MAP_TYPE_LRU_HASH:
41 		return "LRU_HASH";
42 	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
43 		return "LRU_PERCPU_HASH";
44 	case BPF_MAP_TYPE_HASH_OF_MAPS:
45 		return "BPF_MAP_TYPE_HASH_OF_MAPS";
46 	default:
47 		return "<define-me>";
48 	}
49 }
50 
map_count_elements(__u32 type,int map_fd)51 static __u32 map_count_elements(__u32 type, int map_fd)
52 {
53 	__u32 key = -1;
54 	int n = 0;
55 
56 	while (!bpf_map_get_next_key(map_fd, &key, &key))
57 		n++;
58 	return n;
59 }
60 
61 #define BATCH	true
62 
delete_and_lookup_batch(int map_fd,void * keys,__u32 count)63 static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count)
64 {
65 	static __u8 values[(8 << 10) * MAX_ENTRIES];
66 	void *in_batch = NULL, *out_batch;
67 	__u32 save_count = count;
68 	int ret;
69 
70 	ret = bpf_map_lookup_and_delete_batch(map_fd,
71 					      &in_batch, &out_batch,
72 					      keys, values, &count,
73 					      NULL);
74 
75 	/*
76 	 * Despite what uapi header says, lookup_and_delete_batch will return
77 	 * -ENOENT in case we successfully have deleted all elements, so check
78 	 * this separately
79 	 */
80 	CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch",
81 		       "error: %s\n", strerror(errno));
82 
83 	CHECK(count != save_count,
84 			"bpf_map_lookup_and_delete_batch",
85 			"deleted not all elements: removed=%u expected=%u\n",
86 			count, save_count);
87 }
88 
delete_all_elements(__u32 type,int map_fd,bool batch)89 static void delete_all_elements(__u32 type, int map_fd, bool batch)
90 {
91 	static __u8 val[8 << 10]; /* enough for 1024 CPUs */
92 	__u32 key = -1;
93 	void *keys;
94 	__u32 i, n;
95 	int ret;
96 
97 	keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
98 	CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
99 
100 	for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++)
101 		memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE);
102 
103 	if (batch) {
104 		/* Can't mix delete_batch and delete_and_lookup_batch because
105 		 * they have different semantics in relation to the keys
106 		 * argument. However, delete_batch utilize map_delete_elem,
107 		 * so we actually test it in non-batch scenario */
108 		delete_and_lookup_batch(map_fd, keys, n);
109 	} else {
110 		/* Intentionally mix delete and lookup_and_delete so we can test both */
111 		for (i = 0; i < n; i++) {
112 			void *keyp = keys + i*MAX_MAP_KEY_SIZE;
113 
114 			if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) {
115 				ret = bpf_map_delete_elem(map_fd, keyp);
116 				CHECK(ret < 0, "bpf_map_delete_elem",
117 					       "error: key %u: %s\n", i, strerror(errno));
118 			} else {
119 				ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val);
120 				CHECK(ret < 0, "bpf_map_lookup_and_delete_elem",
121 					       "error: key %u: %s\n", i, strerror(errno));
122 			}
123 		}
124 	}
125 
126 	free(keys);
127 }
128 
is_lru(__u32 map_type)129 static bool is_lru(__u32 map_type)
130 {
131 	return map_type == BPF_MAP_TYPE_LRU_HASH ||
132 	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
133 }
134 
is_percpu(__u32 map_type)135 static bool is_percpu(__u32 map_type)
136 {
137 	return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
138 	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
139 }
140 
141 struct upsert_opts {
142 	__u32 map_type;
143 	int map_fd;
144 	__u32 n;
145 	bool retry_for_nomem;
146 };
147 
create_small_hash(void)148 static int create_small_hash(void)
149 {
150 	int map_fd;
151 
152 	map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL);
153 	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
154 			strerror(errno), "small");
155 
156 	return map_fd;
157 }
158 
retry_for_nomem_fn(int err)159 static bool retry_for_nomem_fn(int err)
160 {
161 	return err == ENOMEM;
162 }
163 
patch_map_thread(void * arg)164 static void *patch_map_thread(void *arg)
165 {
166 	/* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
167 	static __u8 blob[8 << 10];
168 	struct upsert_opts *opts = arg;
169 	void *val_ptr;
170 	int val;
171 	int ret;
172 	int i;
173 
174 	for (i = 0; i < opts->n; i++) {
175 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
176 			val = create_small_hash();
177 			val_ptr = &val;
178 		} else if (is_percpu(opts->map_type)) {
179 			val_ptr = blob;
180 		} else {
181 			val = rand();
182 			val_ptr = &val;
183 		}
184 
185 		/* 2 seconds may be enough ? */
186 		if (opts->retry_for_nomem)
187 			ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
188 						   40, retry_for_nomem_fn);
189 		else
190 			ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
191 		CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
192 
193 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
194 			close(val);
195 	}
196 	return NULL;
197 }
198 
upsert_elements(struct upsert_opts * opts)199 static void upsert_elements(struct upsert_opts *opts)
200 {
201 	pthread_t threads[N_THREADS];
202 	int ret;
203 	int i;
204 
205 	for (i = 0; i < ARRAY_SIZE(threads); i++) {
206 		ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
207 		CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
208 	}
209 
210 	for (i = 0; i < ARRAY_SIZE(threads); i++) {
211 		ret = pthread_join(i[threads], NULL);
212 		CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
213 	}
214 }
215 
read_cur_elements(int iter_fd)216 static __u32 read_cur_elements(int iter_fd)
217 {
218 	char buf[64];
219 	ssize_t n;
220 	__u32 ret;
221 
222 	n = read(iter_fd, buf, sizeof(buf)-1);
223 	CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
224 	buf[n] = '\0';
225 
226 	errno = 0;
227 	ret = (__u32)strtol(buf, NULL, 10);
228 	CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
229 
230 	return ret;
231 }
232 
get_cur_elements(int map_id)233 static __u32 get_cur_elements(int map_id)
234 {
235 	struct map_percpu_stats *skel;
236 	struct bpf_link *link;
237 	__u32 n_elements;
238 	int iter_fd;
239 	int ret;
240 
241 	skel = map_percpu_stats__open();
242 	CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
243 
244 	skel->bss->target_id = map_id;
245 
246 	ret = map_percpu_stats__load(skel);
247 	CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
248 
249 	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
250 	CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
251 
252 	iter_fd = bpf_iter_create(bpf_link__fd(link));
253 	CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
254 
255 	n_elements = read_cur_elements(iter_fd);
256 
257 	close(iter_fd);
258 	bpf_link__destroy(link);
259 	map_percpu_stats__destroy(skel);
260 
261 	return n_elements;
262 }
263 
check_expected_number_elements(__u32 n_inserted,int map_fd,struct bpf_map_info * info)264 static void check_expected_number_elements(__u32 n_inserted, int map_fd,
265 					   struct bpf_map_info *info)
266 {
267 	__u32 n_real;
268 	__u32 n_iter;
269 
270 	/* Count the current number of elements in the map by iterating through
271 	 * all the map keys via bpf_get_next_key
272 	 */
273 	n_real = map_count_elements(info->type, map_fd);
274 
275 	/* The "real" number of elements should be the same as the inserted
276 	 * number of elements in all cases except LRU maps, where some elements
277 	 * may have been evicted
278 	 */
279 	if (n_inserted == 0 || !is_lru(info->type))
280 		CHECK(n_inserted != n_real, "map_count_elements",
281 		      "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted);
282 
283 	/* Count the current number of elements in the map using an iterator */
284 	n_iter = get_cur_elements(info->id);
285 
286 	/* Both counts should be the same, as all updates are over */
287 	CHECK(n_iter != n_real, "get_cur_elements",
288 	      "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n",
289 	      n_iter, n_real, map_type_to_s(info->type), info->map_flags);
290 }
291 
__test(int map_fd)292 static void __test(int map_fd)
293 {
294 	struct upsert_opts opts = {
295 		.map_fd = map_fd,
296 	};
297 	struct bpf_map_info info;
298 
299 	map_info(map_fd, &info);
300 	opts.map_type = info.type;
301 	opts.n = info.max_entries;
302 
303 	/* Reduce the number of elements we are updating such that we don't
304 	 * bump into -E2BIG from non-preallocated hash maps, but still will
305 	 * have some evictions for LRU maps  */
306 	if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
307 		opts.n -= 512;
308 	else
309 		opts.n /= 2;
310 
311 	/* per-cpu bpf memory allocator may not be able to allocate per-cpu
312 	 * pointer successfully and it can not refill free llist timely, and
313 	 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
314 	 * the problem temporarily.
315 	 */
316 	opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
317 
318 	/*
319 	 * Upsert keys [0, n) under some competition: with random values from
320 	 * N_THREADS threads. Check values, then delete all elements and check
321 	 * values again.
322 	 */
323 	upsert_elements(&opts);
324 	check_expected_number_elements(opts.n, map_fd, &info);
325 	delete_all_elements(info.type, map_fd, !BATCH);
326 	check_expected_number_elements(0, map_fd, &info);
327 
328 	/* Now do the same, but using batch delete operations */
329 	upsert_elements(&opts);
330 	check_expected_number_elements(opts.n, map_fd, &info);
331 	delete_all_elements(info.type, map_fd, BATCH);
332 	check_expected_number_elements(0, map_fd, &info);
333 
334 	close(map_fd);
335 }
336 
map_create_opts(__u32 type,const char * name,struct bpf_map_create_opts * map_opts,__u32 key_size,__u32 val_size)337 static int map_create_opts(__u32 type, const char *name,
338 			   struct bpf_map_create_opts *map_opts,
339 			   __u32 key_size, __u32 val_size)
340 {
341 	int max_entries;
342 	int map_fd;
343 
344 	if (type == BPF_MAP_TYPE_HASH_OF_MAPS)
345 		max_entries = MAX_ENTRIES_HASH_OF_MAPS;
346 	else
347 		max_entries = MAX_ENTRIES;
348 
349 	map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts);
350 	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
351 			strerror(errno), name);
352 
353 	return map_fd;
354 }
355 
map_create(__u32 type,const char * name,struct bpf_map_create_opts * map_opts)356 static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
357 {
358 	return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
359 }
360 
create_hash(void)361 static int create_hash(void)
362 {
363 	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
364 
365 	return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
366 }
367 
create_percpu_hash(void)368 static int create_percpu_hash(void)
369 {
370 	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
371 
372 	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
373 }
374 
create_hash_prealloc(void)375 static int create_hash_prealloc(void)
376 {
377 	return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
378 }
379 
create_percpu_hash_prealloc(void)380 static int create_percpu_hash_prealloc(void)
381 {
382 	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
383 }
384 
create_lru_hash(__u32 type,__u32 map_flags)385 static int create_lru_hash(__u32 type, __u32 map_flags)
386 {
387 	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
388 
389 	return map_create(type, "lru_hash", &map_opts);
390 }
391 
create_hash_of_maps(void)392 static int create_hash_of_maps(void)
393 {
394 	LIBBPF_OPTS(bpf_map_create_opts, map_opts,
395 		.map_flags = BPF_F_NO_PREALLOC,
396 		.inner_map_fd = create_small_hash(),
397 	);
398 	int ret;
399 
400 	ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
401 			      &map_opts, sizeof(int), sizeof(int));
402 	close(map_opts.inner_map_fd);
403 	return ret;
404 }
405 
map_percpu_stats_hash(void)406 static void map_percpu_stats_hash(void)
407 {
408 	__test(create_hash());
409 	printf("test_%s:PASS\n", __func__);
410 }
411 
map_percpu_stats_percpu_hash(void)412 static void map_percpu_stats_percpu_hash(void)
413 {
414 	__test(create_percpu_hash());
415 	printf("test_%s:PASS\n", __func__);
416 }
417 
map_percpu_stats_hash_prealloc(void)418 static void map_percpu_stats_hash_prealloc(void)
419 {
420 	__test(create_hash_prealloc());
421 	printf("test_%s:PASS\n", __func__);
422 }
423 
map_percpu_stats_percpu_hash_prealloc(void)424 static void map_percpu_stats_percpu_hash_prealloc(void)
425 {
426 	__test(create_percpu_hash_prealloc());
427 	printf("test_%s:PASS\n", __func__);
428 }
429 
map_percpu_stats_lru_hash(void)430 static void map_percpu_stats_lru_hash(void)
431 {
432 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0));
433 	printf("test_%s:PASS\n", __func__);
434 }
435 
map_percpu_stats_lru_hash_no_common(void)436 static void map_percpu_stats_lru_hash_no_common(void)
437 {
438 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU));
439 	printf("test_%s:PASS\n", __func__);
440 }
441 
map_percpu_stats_percpu_lru_hash(void)442 static void map_percpu_stats_percpu_lru_hash(void)
443 {
444 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0));
445 	printf("test_%s:PASS\n", __func__);
446 }
447 
map_percpu_stats_percpu_lru_hash_no_common(void)448 static void map_percpu_stats_percpu_lru_hash_no_common(void)
449 {
450 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU));
451 	printf("test_%s:PASS\n", __func__);
452 }
453 
map_percpu_stats_hash_of_maps(void)454 static void map_percpu_stats_hash_of_maps(void)
455 {
456 	__test(create_hash_of_maps());
457 	printf("test_%s:PASS\n", __func__);
458 }
459 
map_percpu_stats_map_value_size(void)460 static void map_percpu_stats_map_value_size(void)
461 {
462 	int fd;
463 	int value_sz = PCPU_MIN_UNIT_SIZE + 1;
464 	struct bpf_map_create_opts opts = { .sz = sizeof(opts) };
465 	enum bpf_map_type map_types[] = { BPF_MAP_TYPE_PERCPU_ARRAY,
466 					  BPF_MAP_TYPE_PERCPU_HASH,
467 					  BPF_MAP_TYPE_LRU_PERCPU_HASH };
468 	for (int i = 0; i < ARRAY_SIZE(map_types); i++) {
469 		fd = bpf_map_create(map_types[i], NULL, sizeof(__u32), value_sz, 1, &opts);
470 		CHECK(fd < 0 && errno != E2BIG, "percpu map value size",
471 			"error: %s\n", strerror(errno));
472 	}
473 	printf("test_%s:PASS\n", __func__);
474 }
475 
test_map_percpu_stats(void)476 void test_map_percpu_stats(void)
477 {
478 	map_percpu_stats_hash();
479 	map_percpu_stats_percpu_hash();
480 	map_percpu_stats_hash_prealloc();
481 	map_percpu_stats_percpu_hash_prealloc();
482 	map_percpu_stats_lru_hash();
483 	map_percpu_stats_lru_hash_no_common();
484 	map_percpu_stats_percpu_lru_hash();
485 	map_percpu_stats_percpu_lru_hash_no_common();
486 	map_percpu_stats_hash_of_maps();
487 	map_percpu_stats_map_value_size();
488 }
489