1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2023 Isovalent */
3
4 #include <errno.h>
5 #include <unistd.h>
6 #include <pthread.h>
7
8 #include <bpf/bpf.h>
9 #include <bpf/libbpf.h>
10
11 #include <bpf_util.h>
12 #include <test_maps.h>
13
14 #include "map_percpu_stats.skel.h"
15
16 #define MAX_ENTRIES 16384
17 #define MAX_ENTRIES_HASH_OF_MAPS 64
18 #define N_THREADS 8
19 #define MAX_MAP_KEY_SIZE 4
20 #define PCPU_MIN_UNIT_SIZE 32768
21
map_info(int map_fd,struct bpf_map_info * info)22 static void map_info(int map_fd, struct bpf_map_info *info)
23 {
24 __u32 len = sizeof(*info);
25 int ret;
26
27 memset(info, 0, sizeof(*info));
28
29 ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
30 CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
31 }
32
map_type_to_s(__u32 type)33 static const char *map_type_to_s(__u32 type)
34 {
35 switch (type) {
36 case BPF_MAP_TYPE_HASH:
37 return "HASH";
38 case BPF_MAP_TYPE_PERCPU_HASH:
39 return "PERCPU_HASH";
40 case BPF_MAP_TYPE_LRU_HASH:
41 return "LRU_HASH";
42 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
43 return "LRU_PERCPU_HASH";
44 case BPF_MAP_TYPE_HASH_OF_MAPS:
45 return "BPF_MAP_TYPE_HASH_OF_MAPS";
46 default:
47 return "<define-me>";
48 }
49 }
50
map_count_elements(__u32 type,int map_fd)51 static __u32 map_count_elements(__u32 type, int map_fd)
52 {
53 __u32 key = -1;
54 int n = 0;
55
56 while (!bpf_map_get_next_key(map_fd, &key, &key))
57 n++;
58 return n;
59 }
60
61 #define BATCH true
62
delete_and_lookup_batch(int map_fd,void * keys,__u32 count)63 static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count)
64 {
65 static __u8 values[(8 << 10) * MAX_ENTRIES];
66 void *in_batch = NULL, *out_batch;
67 __u32 save_count = count;
68 int ret;
69
70 ret = bpf_map_lookup_and_delete_batch(map_fd,
71 &in_batch, &out_batch,
72 keys, values, &count,
73 NULL);
74
75 /*
76 * Despite what uapi header says, lookup_and_delete_batch will return
77 * -ENOENT in case we successfully have deleted all elements, so check
78 * this separately
79 */
80 CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch",
81 "error: %s\n", strerror(errno));
82
83 CHECK(count != save_count,
84 "bpf_map_lookup_and_delete_batch",
85 "deleted not all elements: removed=%u expected=%u\n",
86 count, save_count);
87 }
88
delete_all_elements(__u32 type,int map_fd,bool batch)89 static void delete_all_elements(__u32 type, int map_fd, bool batch)
90 {
91 static __u8 val[8 << 10]; /* enough for 1024 CPUs */
92 __u32 key = -1;
93 void *keys;
94 __u32 i, n;
95 int ret;
96
97 keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
98 CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
99
100 for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++)
101 memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE);
102
103 if (batch) {
104 /* Can't mix delete_batch and delete_and_lookup_batch because
105 * they have different semantics in relation to the keys
106 * argument. However, delete_batch utilize map_delete_elem,
107 * so we actually test it in non-batch scenario */
108 delete_and_lookup_batch(map_fd, keys, n);
109 } else {
110 /* Intentionally mix delete and lookup_and_delete so we can test both */
111 for (i = 0; i < n; i++) {
112 void *keyp = keys + i*MAX_MAP_KEY_SIZE;
113
114 if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) {
115 ret = bpf_map_delete_elem(map_fd, keyp);
116 CHECK(ret < 0, "bpf_map_delete_elem",
117 "error: key %u: %s\n", i, strerror(errno));
118 } else {
119 ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val);
120 CHECK(ret < 0, "bpf_map_lookup_and_delete_elem",
121 "error: key %u: %s\n", i, strerror(errno));
122 }
123 }
124 }
125
126 free(keys);
127 }
128
is_lru(__u32 map_type)129 static bool is_lru(__u32 map_type)
130 {
131 return map_type == BPF_MAP_TYPE_LRU_HASH ||
132 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
133 }
134
is_percpu(__u32 map_type)135 static bool is_percpu(__u32 map_type)
136 {
137 return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
138 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
139 }
140
141 struct upsert_opts {
142 __u32 map_type;
143 int map_fd;
144 __u32 n;
145 bool retry_for_nomem;
146 };
147
create_small_hash(void)148 static int create_small_hash(void)
149 {
150 int map_fd;
151
152 map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL);
153 CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
154 strerror(errno), "small");
155
156 return map_fd;
157 }
158
retry_for_nomem_fn(int err)159 static bool retry_for_nomem_fn(int err)
160 {
161 return err == ENOMEM;
162 }
163
patch_map_thread(void * arg)164 static void *patch_map_thread(void *arg)
165 {
166 /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
167 static __u8 blob[8 << 10];
168 struct upsert_opts *opts = arg;
169 void *val_ptr;
170 int val;
171 int ret;
172 int i;
173
174 for (i = 0; i < opts->n; i++) {
175 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
176 val = create_small_hash();
177 val_ptr = &val;
178 } else if (is_percpu(opts->map_type)) {
179 val_ptr = blob;
180 } else {
181 val = rand();
182 val_ptr = &val;
183 }
184
185 /* 2 seconds may be enough ? */
186 if (opts->retry_for_nomem)
187 ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
188 40, retry_for_nomem_fn);
189 else
190 ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
191 CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
192
193 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
194 close(val);
195 }
196 return NULL;
197 }
198
upsert_elements(struct upsert_opts * opts)199 static void upsert_elements(struct upsert_opts *opts)
200 {
201 pthread_t threads[N_THREADS];
202 int ret;
203 int i;
204
205 for (i = 0; i < ARRAY_SIZE(threads); i++) {
206 ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
207 CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
208 }
209
210 for (i = 0; i < ARRAY_SIZE(threads); i++) {
211 ret = pthread_join(i[threads], NULL);
212 CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
213 }
214 }
215
read_cur_elements(int iter_fd)216 static __u32 read_cur_elements(int iter_fd)
217 {
218 char buf[64];
219 ssize_t n;
220 __u32 ret;
221
222 n = read(iter_fd, buf, sizeof(buf)-1);
223 CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
224 buf[n] = '\0';
225
226 errno = 0;
227 ret = (__u32)strtol(buf, NULL, 10);
228 CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
229
230 return ret;
231 }
232
get_cur_elements(int map_id)233 static __u32 get_cur_elements(int map_id)
234 {
235 struct map_percpu_stats *skel;
236 struct bpf_link *link;
237 __u32 n_elements;
238 int iter_fd;
239 int ret;
240
241 skel = map_percpu_stats__open();
242 CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
243
244 skel->bss->target_id = map_id;
245
246 ret = map_percpu_stats__load(skel);
247 CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
248
249 link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
250 CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
251
252 iter_fd = bpf_iter_create(bpf_link__fd(link));
253 CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
254
255 n_elements = read_cur_elements(iter_fd);
256
257 close(iter_fd);
258 bpf_link__destroy(link);
259 map_percpu_stats__destroy(skel);
260
261 return n_elements;
262 }
263
check_expected_number_elements(__u32 n_inserted,int map_fd,struct bpf_map_info * info)264 static void check_expected_number_elements(__u32 n_inserted, int map_fd,
265 struct bpf_map_info *info)
266 {
267 __u32 n_real;
268 __u32 n_iter;
269
270 /* Count the current number of elements in the map by iterating through
271 * all the map keys via bpf_get_next_key
272 */
273 n_real = map_count_elements(info->type, map_fd);
274
275 /* The "real" number of elements should be the same as the inserted
276 * number of elements in all cases except LRU maps, where some elements
277 * may have been evicted
278 */
279 if (n_inserted == 0 || !is_lru(info->type))
280 CHECK(n_inserted != n_real, "map_count_elements",
281 "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted);
282
283 /* Count the current number of elements in the map using an iterator */
284 n_iter = get_cur_elements(info->id);
285
286 /* Both counts should be the same, as all updates are over */
287 CHECK(n_iter != n_real, "get_cur_elements",
288 "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n",
289 n_iter, n_real, map_type_to_s(info->type), info->map_flags);
290 }
291
__test(int map_fd)292 static void __test(int map_fd)
293 {
294 struct upsert_opts opts = {
295 .map_fd = map_fd,
296 };
297 struct bpf_map_info info;
298
299 map_info(map_fd, &info);
300 opts.map_type = info.type;
301 opts.n = info.max_entries;
302
303 /* Reduce the number of elements we are updating such that we don't
304 * bump into -E2BIG from non-preallocated hash maps, but still will
305 * have some evictions for LRU maps */
306 if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
307 opts.n -= 512;
308 else
309 opts.n /= 2;
310
311 /* per-cpu bpf memory allocator may not be able to allocate per-cpu
312 * pointer successfully and it can not refill free llist timely, and
313 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
314 * the problem temporarily.
315 */
316 opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
317
318 /*
319 * Upsert keys [0, n) under some competition: with random values from
320 * N_THREADS threads. Check values, then delete all elements and check
321 * values again.
322 */
323 upsert_elements(&opts);
324 check_expected_number_elements(opts.n, map_fd, &info);
325 delete_all_elements(info.type, map_fd, !BATCH);
326 check_expected_number_elements(0, map_fd, &info);
327
328 /* Now do the same, but using batch delete operations */
329 upsert_elements(&opts);
330 check_expected_number_elements(opts.n, map_fd, &info);
331 delete_all_elements(info.type, map_fd, BATCH);
332 check_expected_number_elements(0, map_fd, &info);
333
334 close(map_fd);
335 }
336
map_create_opts(__u32 type,const char * name,struct bpf_map_create_opts * map_opts,__u32 key_size,__u32 val_size)337 static int map_create_opts(__u32 type, const char *name,
338 struct bpf_map_create_opts *map_opts,
339 __u32 key_size, __u32 val_size)
340 {
341 int max_entries;
342 int map_fd;
343
344 if (type == BPF_MAP_TYPE_HASH_OF_MAPS)
345 max_entries = MAX_ENTRIES_HASH_OF_MAPS;
346 else
347 max_entries = MAX_ENTRIES;
348
349 map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts);
350 CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
351 strerror(errno), name);
352
353 return map_fd;
354 }
355
map_create(__u32 type,const char * name,struct bpf_map_create_opts * map_opts)356 static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
357 {
358 return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
359 }
360
create_hash(void)361 static int create_hash(void)
362 {
363 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
364
365 return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
366 }
367
create_percpu_hash(void)368 static int create_percpu_hash(void)
369 {
370 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
371
372 return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
373 }
374
create_hash_prealloc(void)375 static int create_hash_prealloc(void)
376 {
377 return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
378 }
379
create_percpu_hash_prealloc(void)380 static int create_percpu_hash_prealloc(void)
381 {
382 return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
383 }
384
create_lru_hash(__u32 type,__u32 map_flags)385 static int create_lru_hash(__u32 type, __u32 map_flags)
386 {
387 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
388
389 return map_create(type, "lru_hash", &map_opts);
390 }
391
create_hash_of_maps(void)392 static int create_hash_of_maps(void)
393 {
394 LIBBPF_OPTS(bpf_map_create_opts, map_opts,
395 .map_flags = BPF_F_NO_PREALLOC,
396 .inner_map_fd = create_small_hash(),
397 );
398 int ret;
399
400 ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
401 &map_opts, sizeof(int), sizeof(int));
402 close(map_opts.inner_map_fd);
403 return ret;
404 }
405
map_percpu_stats_hash(void)406 static void map_percpu_stats_hash(void)
407 {
408 __test(create_hash());
409 printf("test_%s:PASS\n", __func__);
410 }
411
map_percpu_stats_percpu_hash(void)412 static void map_percpu_stats_percpu_hash(void)
413 {
414 __test(create_percpu_hash());
415 printf("test_%s:PASS\n", __func__);
416 }
417
map_percpu_stats_hash_prealloc(void)418 static void map_percpu_stats_hash_prealloc(void)
419 {
420 __test(create_hash_prealloc());
421 printf("test_%s:PASS\n", __func__);
422 }
423
map_percpu_stats_percpu_hash_prealloc(void)424 static void map_percpu_stats_percpu_hash_prealloc(void)
425 {
426 __test(create_percpu_hash_prealloc());
427 printf("test_%s:PASS\n", __func__);
428 }
429
map_percpu_stats_lru_hash(void)430 static void map_percpu_stats_lru_hash(void)
431 {
432 __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0));
433 printf("test_%s:PASS\n", __func__);
434 }
435
map_percpu_stats_lru_hash_no_common(void)436 static void map_percpu_stats_lru_hash_no_common(void)
437 {
438 __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU));
439 printf("test_%s:PASS\n", __func__);
440 }
441
map_percpu_stats_percpu_lru_hash(void)442 static void map_percpu_stats_percpu_lru_hash(void)
443 {
444 __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0));
445 printf("test_%s:PASS\n", __func__);
446 }
447
map_percpu_stats_percpu_lru_hash_no_common(void)448 static void map_percpu_stats_percpu_lru_hash_no_common(void)
449 {
450 __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU));
451 printf("test_%s:PASS\n", __func__);
452 }
453
map_percpu_stats_hash_of_maps(void)454 static void map_percpu_stats_hash_of_maps(void)
455 {
456 __test(create_hash_of_maps());
457 printf("test_%s:PASS\n", __func__);
458 }
459
map_percpu_stats_map_value_size(void)460 static void map_percpu_stats_map_value_size(void)
461 {
462 int fd;
463 int value_sz = PCPU_MIN_UNIT_SIZE + 1;
464 struct bpf_map_create_opts opts = { .sz = sizeof(opts) };
465 enum bpf_map_type map_types[] = { BPF_MAP_TYPE_PERCPU_ARRAY,
466 BPF_MAP_TYPE_PERCPU_HASH,
467 BPF_MAP_TYPE_LRU_PERCPU_HASH };
468 for (int i = 0; i < ARRAY_SIZE(map_types); i++) {
469 fd = bpf_map_create(map_types[i], NULL, sizeof(__u32), value_sz, 1, &opts);
470 CHECK(fd < 0 && errno != E2BIG, "percpu map value size",
471 "error: %s\n", strerror(errno));
472 }
473 printf("test_%s:PASS\n", __func__);
474 }
475
test_map_percpu_stats(void)476 void test_map_percpu_stats(void)
477 {
478 map_percpu_stats_hash();
479 map_percpu_stats_percpu_hash();
480 map_percpu_stats_hash_prealloc();
481 map_percpu_stats_percpu_hash_prealloc();
482 map_percpu_stats_lru_hash();
483 map_percpu_stats_lru_hash_no_common();
484 map_percpu_stats_percpu_lru_hash();
485 map_percpu_stats_percpu_lru_hash_no_common();
486 map_percpu_stats_hash_of_maps();
487 map_percpu_stats_map_value_size();
488 }
489