xref: /linux/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2023 Isovalent */
3 
4 #include <errno.h>
5 #include <unistd.h>
6 #include <pthread.h>
7 
8 #include <bpf/bpf.h>
9 #include <bpf/libbpf.h>
10 
11 #include <bpf_util.h>
12 #include <test_maps.h>
13 
14 #include "map_percpu_stats.skel.h"
15 
16 #define MAX_ENTRIES			16384
17 #define MAX_ENTRIES_HASH_OF_MAPS	64
18 #define N_THREADS			8
19 #define MAX_MAP_KEY_SIZE		4
20 
21 static void map_info(int map_fd, struct bpf_map_info *info)
22 {
23 	__u32 len = sizeof(*info);
24 	int ret;
25 
26 	memset(info, 0, sizeof(*info));
27 
28 	ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
29 	CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
30 }
31 
32 static const char *map_type_to_s(__u32 type)
33 {
34 	switch (type) {
35 	case BPF_MAP_TYPE_HASH:
36 		return "HASH";
37 	case BPF_MAP_TYPE_PERCPU_HASH:
38 		return "PERCPU_HASH";
39 	case BPF_MAP_TYPE_LRU_HASH:
40 		return "LRU_HASH";
41 	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
42 		return "LRU_PERCPU_HASH";
43 	case BPF_MAP_TYPE_HASH_OF_MAPS:
44 		return "BPF_MAP_TYPE_HASH_OF_MAPS";
45 	default:
46 		return "<define-me>";
47 	}
48 }
49 
50 static __u32 map_count_elements(__u32 type, int map_fd)
51 {
52 	__u32 key = -1;
53 	int n = 0;
54 
55 	while (!bpf_map_get_next_key(map_fd, &key, &key))
56 		n++;
57 	return n;
58 }
59 
60 #define BATCH	true
61 
62 static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count)
63 {
64 	static __u8 values[(8 << 10) * MAX_ENTRIES];
65 	void *in_batch = NULL, *out_batch;
66 	__u32 save_count = count;
67 	int ret;
68 
69 	ret = bpf_map_lookup_and_delete_batch(map_fd,
70 					      &in_batch, &out_batch,
71 					      keys, values, &count,
72 					      NULL);
73 
74 	/*
75 	 * Despite what uapi header says, lookup_and_delete_batch will return
76 	 * -ENOENT in case we successfully have deleted all elements, so check
77 	 * this separately
78 	 */
79 	CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch",
80 		       "error: %s\n", strerror(errno));
81 
82 	CHECK(count != save_count,
83 			"bpf_map_lookup_and_delete_batch",
84 			"deleted not all elements: removed=%u expected=%u\n",
85 			count, save_count);
86 }
87 
88 static void delete_all_elements(__u32 type, int map_fd, bool batch)
89 {
90 	static __u8 val[8 << 10]; /* enough for 1024 CPUs */
91 	__u32 key = -1;
92 	void *keys;
93 	__u32 i, n;
94 	int ret;
95 
96 	keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
97 	CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
98 
99 	for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++)
100 		memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE);
101 
102 	if (batch) {
103 		/* Can't mix delete_batch and delete_and_lookup_batch because
104 		 * they have different semantics in relation to the keys
105 		 * argument. However, delete_batch utilize map_delete_elem,
106 		 * so we actually test it in non-batch scenario */
107 		delete_and_lookup_batch(map_fd, keys, n);
108 	} else {
109 		/* Intentionally mix delete and lookup_and_delete so we can test both */
110 		for (i = 0; i < n; i++) {
111 			void *keyp = keys + i*MAX_MAP_KEY_SIZE;
112 
113 			if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) {
114 				ret = bpf_map_delete_elem(map_fd, keyp);
115 				CHECK(ret < 0, "bpf_map_delete_elem",
116 					       "error: key %u: %s\n", i, strerror(errno));
117 			} else {
118 				ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val);
119 				CHECK(ret < 0, "bpf_map_lookup_and_delete_elem",
120 					       "error: key %u: %s\n", i, strerror(errno));
121 			}
122 		}
123 	}
124 
125 	free(keys);
126 }
127 
128 static bool is_lru(__u32 map_type)
129 {
130 	return map_type == BPF_MAP_TYPE_LRU_HASH ||
131 	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
132 }
133 
134 static bool is_percpu(__u32 map_type)
135 {
136 	return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
137 	       map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
138 }
139 
140 struct upsert_opts {
141 	__u32 map_type;
142 	int map_fd;
143 	__u32 n;
144 	bool retry_for_nomem;
145 };
146 
147 static int create_small_hash(void)
148 {
149 	int map_fd;
150 
151 	map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL);
152 	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
153 			strerror(errno), "small");
154 
155 	return map_fd;
156 }
157 
158 static bool retry_for_nomem_fn(int err)
159 {
160 	return err == ENOMEM;
161 }
162 
163 static void *patch_map_thread(void *arg)
164 {
165 	/* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
166 	static __u8 blob[8 << 10];
167 	struct upsert_opts *opts = arg;
168 	void *val_ptr;
169 	int val;
170 	int ret;
171 	int i;
172 
173 	for (i = 0; i < opts->n; i++) {
174 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
175 			val = create_small_hash();
176 			val_ptr = &val;
177 		} else if (is_percpu(opts->map_type)) {
178 			val_ptr = blob;
179 		} else {
180 			val = rand();
181 			val_ptr = &val;
182 		}
183 
184 		/* 2 seconds may be enough ? */
185 		if (opts->retry_for_nomem)
186 			ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
187 						   40, retry_for_nomem_fn);
188 		else
189 			ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
190 		CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
191 
192 		if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
193 			close(val);
194 	}
195 	return NULL;
196 }
197 
198 static void upsert_elements(struct upsert_opts *opts)
199 {
200 	pthread_t threads[N_THREADS];
201 	int ret;
202 	int i;
203 
204 	for (i = 0; i < ARRAY_SIZE(threads); i++) {
205 		ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
206 		CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
207 	}
208 
209 	for (i = 0; i < ARRAY_SIZE(threads); i++) {
210 		ret = pthread_join(i[threads], NULL);
211 		CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
212 	}
213 }
214 
215 static __u32 read_cur_elements(int iter_fd)
216 {
217 	char buf[64];
218 	ssize_t n;
219 	__u32 ret;
220 
221 	n = read(iter_fd, buf, sizeof(buf)-1);
222 	CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
223 	buf[n] = '\0';
224 
225 	errno = 0;
226 	ret = (__u32)strtol(buf, NULL, 10);
227 	CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
228 
229 	return ret;
230 }
231 
232 static __u32 get_cur_elements(int map_id)
233 {
234 	struct map_percpu_stats *skel;
235 	struct bpf_link *link;
236 	__u32 n_elements;
237 	int iter_fd;
238 	int ret;
239 
240 	skel = map_percpu_stats__open();
241 	CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
242 
243 	skel->bss->target_id = map_id;
244 
245 	ret = map_percpu_stats__load(skel);
246 	CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
247 
248 	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
249 	CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
250 
251 	iter_fd = bpf_iter_create(bpf_link__fd(link));
252 	CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
253 
254 	n_elements = read_cur_elements(iter_fd);
255 
256 	close(iter_fd);
257 	bpf_link__destroy(link);
258 	map_percpu_stats__destroy(skel);
259 
260 	return n_elements;
261 }
262 
263 static void check_expected_number_elements(__u32 n_inserted, int map_fd,
264 					   struct bpf_map_info *info)
265 {
266 	__u32 n_real;
267 	__u32 n_iter;
268 
269 	/* Count the current number of elements in the map by iterating through
270 	 * all the map keys via bpf_get_next_key
271 	 */
272 	n_real = map_count_elements(info->type, map_fd);
273 
274 	/* The "real" number of elements should be the same as the inserted
275 	 * number of elements in all cases except LRU maps, where some elements
276 	 * may have been evicted
277 	 */
278 	if (n_inserted == 0 || !is_lru(info->type))
279 		CHECK(n_inserted != n_real, "map_count_elements",
280 		      "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted);
281 
282 	/* Count the current number of elements in the map using an iterator */
283 	n_iter = get_cur_elements(info->id);
284 
285 	/* Both counts should be the same, as all updates are over */
286 	CHECK(n_iter != n_real, "get_cur_elements",
287 	      "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n",
288 	      n_iter, n_real, map_type_to_s(info->type), info->map_flags);
289 }
290 
291 static void __test(int map_fd)
292 {
293 	struct upsert_opts opts = {
294 		.map_fd = map_fd,
295 	};
296 	struct bpf_map_info info;
297 
298 	map_info(map_fd, &info);
299 	opts.map_type = info.type;
300 	opts.n = info.max_entries;
301 
302 	/* Reduce the number of elements we are updating such that we don't
303 	 * bump into -E2BIG from non-preallocated hash maps, but still will
304 	 * have some evictions for LRU maps  */
305 	if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
306 		opts.n -= 512;
307 	else
308 		opts.n /= 2;
309 
310 	/* per-cpu bpf memory allocator may not be able to allocate per-cpu
311 	 * pointer successfully and it can not refill free llist timely, and
312 	 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
313 	 * the problem temporarily.
314 	 */
315 	opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
316 
317 	/*
318 	 * Upsert keys [0, n) under some competition: with random values from
319 	 * N_THREADS threads. Check values, then delete all elements and check
320 	 * values again.
321 	 */
322 	upsert_elements(&opts);
323 	check_expected_number_elements(opts.n, map_fd, &info);
324 	delete_all_elements(info.type, map_fd, !BATCH);
325 	check_expected_number_elements(0, map_fd, &info);
326 
327 	/* Now do the same, but using batch delete operations */
328 	upsert_elements(&opts);
329 	check_expected_number_elements(opts.n, map_fd, &info);
330 	delete_all_elements(info.type, map_fd, BATCH);
331 	check_expected_number_elements(0, map_fd, &info);
332 
333 	close(map_fd);
334 }
335 
336 static int map_create_opts(__u32 type, const char *name,
337 			   struct bpf_map_create_opts *map_opts,
338 			   __u32 key_size, __u32 val_size)
339 {
340 	int max_entries;
341 	int map_fd;
342 
343 	if (type == BPF_MAP_TYPE_HASH_OF_MAPS)
344 		max_entries = MAX_ENTRIES_HASH_OF_MAPS;
345 	else
346 		max_entries = MAX_ENTRIES;
347 
348 	map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts);
349 	CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
350 			strerror(errno), name);
351 
352 	return map_fd;
353 }
354 
355 static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
356 {
357 	return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
358 }
359 
360 static int create_hash(void)
361 {
362 	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
363 
364 	return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
365 }
366 
367 static int create_percpu_hash(void)
368 {
369 	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
370 
371 	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
372 }
373 
374 static int create_hash_prealloc(void)
375 {
376 	return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
377 }
378 
379 static int create_percpu_hash_prealloc(void)
380 {
381 	return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
382 }
383 
384 static int create_lru_hash(__u32 type, __u32 map_flags)
385 {
386 	LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
387 
388 	return map_create(type, "lru_hash", &map_opts);
389 }
390 
391 static int create_hash_of_maps(void)
392 {
393 	LIBBPF_OPTS(bpf_map_create_opts, map_opts,
394 		.map_flags = BPF_F_NO_PREALLOC,
395 		.inner_map_fd = create_small_hash(),
396 	);
397 	int ret;
398 
399 	ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
400 			      &map_opts, sizeof(int), sizeof(int));
401 	close(map_opts.inner_map_fd);
402 	return ret;
403 }
404 
405 static void map_percpu_stats_hash(void)
406 {
407 	__test(create_hash());
408 	printf("test_%s:PASS\n", __func__);
409 }
410 
411 static void map_percpu_stats_percpu_hash(void)
412 {
413 	__test(create_percpu_hash());
414 	printf("test_%s:PASS\n", __func__);
415 }
416 
417 static void map_percpu_stats_hash_prealloc(void)
418 {
419 	__test(create_hash_prealloc());
420 	printf("test_%s:PASS\n", __func__);
421 }
422 
423 static void map_percpu_stats_percpu_hash_prealloc(void)
424 {
425 	__test(create_percpu_hash_prealloc());
426 	printf("test_%s:PASS\n", __func__);
427 }
428 
429 static void map_percpu_stats_lru_hash(void)
430 {
431 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0));
432 	printf("test_%s:PASS\n", __func__);
433 }
434 
435 static void map_percpu_stats_lru_hash_no_common(void)
436 {
437 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU));
438 	printf("test_%s:PASS\n", __func__);
439 }
440 
441 static void map_percpu_stats_percpu_lru_hash(void)
442 {
443 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0));
444 	printf("test_%s:PASS\n", __func__);
445 }
446 
447 static void map_percpu_stats_percpu_lru_hash_no_common(void)
448 {
449 	__test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU));
450 	printf("test_%s:PASS\n", __func__);
451 }
452 
453 static void map_percpu_stats_hash_of_maps(void)
454 {
455 	__test(create_hash_of_maps());
456 	printf("test_%s:PASS\n", __func__);
457 }
458 
459 void test_map_percpu_stats(void)
460 {
461 	map_percpu_stats_hash();
462 	map_percpu_stats_percpu_hash();
463 	map_percpu_stats_hash_prealloc();
464 	map_percpu_stats_percpu_hash_prealloc();
465 	map_percpu_stats_lru_hash();
466 	map_percpu_stats_lru_hash_no_common();
467 	map_percpu_stats_percpu_lru_hash();
468 	map_percpu_stats_percpu_lru_hash_no_common();
469 	map_percpu_stats_hash_of_maps();
470 }
471