1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2023 Isovalent */ 3 4 #include <errno.h> 5 #include <unistd.h> 6 #include <pthread.h> 7 8 #include <bpf/bpf.h> 9 #include <bpf/libbpf.h> 10 11 #include <bpf_util.h> 12 #include <test_maps.h> 13 14 #include "map_percpu_stats.skel.h" 15 16 #define MAX_ENTRIES 16384 17 #define MAX_ENTRIES_HASH_OF_MAPS 64 18 #define N_THREADS 8 19 #define MAX_MAP_KEY_SIZE 4 20 21 static void map_info(int map_fd, struct bpf_map_info *info) 22 { 23 __u32 len = sizeof(*info); 24 int ret; 25 26 memset(info, 0, sizeof(*info)); 27 28 ret = bpf_obj_get_info_by_fd(map_fd, info, &len); 29 CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno)); 30 } 31 32 static const char *map_type_to_s(__u32 type) 33 { 34 switch (type) { 35 case BPF_MAP_TYPE_HASH: 36 return "HASH"; 37 case BPF_MAP_TYPE_PERCPU_HASH: 38 return "PERCPU_HASH"; 39 case BPF_MAP_TYPE_LRU_HASH: 40 return "LRU_HASH"; 41 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 42 return "LRU_PERCPU_HASH"; 43 case BPF_MAP_TYPE_HASH_OF_MAPS: 44 return "BPF_MAP_TYPE_HASH_OF_MAPS"; 45 default: 46 return "<define-me>"; 47 } 48 } 49 50 static __u32 map_count_elements(__u32 type, int map_fd) 51 { 52 __u32 key = -1; 53 int n = 0; 54 55 while (!bpf_map_get_next_key(map_fd, &key, &key)) 56 n++; 57 return n; 58 } 59 60 #define BATCH true 61 62 static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count) 63 { 64 static __u8 values[(8 << 10) * MAX_ENTRIES]; 65 void *in_batch = NULL, *out_batch; 66 __u32 save_count = count; 67 int ret; 68 69 ret = bpf_map_lookup_and_delete_batch(map_fd, 70 &in_batch, &out_batch, 71 keys, values, &count, 72 NULL); 73 74 /* 75 * Despite what uapi header says, lookup_and_delete_batch will return 76 * -ENOENT in case we successfully have deleted all elements, so check 77 * this separately 78 */ 79 CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch", 80 "error: %s\n", strerror(errno)); 81 82 CHECK(count != save_count, 83 "bpf_map_lookup_and_delete_batch", 84 "deleted not all elements: removed=%u expected=%u\n", 85 count, save_count); 86 } 87 88 static void delete_all_elements(__u32 type, int map_fd, bool batch) 89 { 90 static __u8 val[8 << 10]; /* enough for 1024 CPUs */ 91 __u32 key = -1; 92 void *keys; 93 __u32 i, n; 94 int ret; 95 96 keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES); 97 CHECK(!keys, "calloc", "error: %s\n", strerror(errno)); 98 99 for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++) 100 memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE); 101 102 if (batch) { 103 /* Can't mix delete_batch and delete_and_lookup_batch because 104 * they have different semantics in relation to the keys 105 * argument. However, delete_batch utilize map_delete_elem, 106 * so we actually test it in non-batch scenario */ 107 delete_and_lookup_batch(map_fd, keys, n); 108 } else { 109 /* Intentionally mix delete and lookup_and_delete so we can test both */ 110 for (i = 0; i < n; i++) { 111 void *keyp = keys + i*MAX_MAP_KEY_SIZE; 112 113 if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) { 114 ret = bpf_map_delete_elem(map_fd, keyp); 115 CHECK(ret < 0, "bpf_map_delete_elem", 116 "error: key %u: %s\n", i, strerror(errno)); 117 } else { 118 ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val); 119 CHECK(ret < 0, "bpf_map_lookup_and_delete_elem", 120 "error: key %u: %s\n", i, strerror(errno)); 121 } 122 } 123 } 124 125 free(keys); 126 } 127 128 static bool is_lru(__u32 map_type) 129 { 130 return map_type == BPF_MAP_TYPE_LRU_HASH || 131 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 132 } 133 134 static bool is_percpu(__u32 map_type) 135 { 136 return map_type == BPF_MAP_TYPE_PERCPU_HASH || 137 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 138 } 139 140 struct upsert_opts { 141 __u32 map_type; 142 int map_fd; 143 __u32 n; 144 bool retry_for_nomem; 145 }; 146 147 static int create_small_hash(void) 148 { 149 int map_fd; 150 151 map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL); 152 CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n", 153 strerror(errno), "small"); 154 155 return map_fd; 156 } 157 158 static bool retry_for_nomem_fn(int err) 159 { 160 return err == ENOMEM; 161 } 162 163 static void *patch_map_thread(void *arg) 164 { 165 /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ 166 static __u8 blob[8 << 10]; 167 struct upsert_opts *opts = arg; 168 void *val_ptr; 169 int val; 170 int ret; 171 int i; 172 173 for (i = 0; i < opts->n; i++) { 174 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 175 val = create_small_hash(); 176 val_ptr = &val; 177 } else if (is_percpu(opts->map_type)) { 178 val_ptr = blob; 179 } else { 180 val = rand(); 181 val_ptr = &val; 182 } 183 184 /* 2 seconds may be enough ? */ 185 if (opts->retry_for_nomem) 186 ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, 187 40, retry_for_nomem_fn); 188 else 189 ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); 190 CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); 191 192 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 193 close(val); 194 } 195 return NULL; 196 } 197 198 static void upsert_elements(struct upsert_opts *opts) 199 { 200 pthread_t threads[N_THREADS]; 201 int ret; 202 int i; 203 204 for (i = 0; i < ARRAY_SIZE(threads); i++) { 205 ret = pthread_create(&i[threads], NULL, patch_map_thread, opts); 206 CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret)); 207 } 208 209 for (i = 0; i < ARRAY_SIZE(threads); i++) { 210 ret = pthread_join(i[threads], NULL); 211 CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret)); 212 } 213 } 214 215 static __u32 read_cur_elements(int iter_fd) 216 { 217 char buf[64]; 218 ssize_t n; 219 __u32 ret; 220 221 n = read(iter_fd, buf, sizeof(buf)-1); 222 CHECK(n <= 0, "read", "error: %s\n", strerror(errno)); 223 buf[n] = '\0'; 224 225 errno = 0; 226 ret = (__u32)strtol(buf, NULL, 10); 227 CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno)); 228 229 return ret; 230 } 231 232 static __u32 get_cur_elements(int map_id) 233 { 234 struct map_percpu_stats *skel; 235 struct bpf_link *link; 236 __u32 n_elements; 237 int iter_fd; 238 int ret; 239 240 skel = map_percpu_stats__open(); 241 CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno)); 242 243 skel->bss->target_id = map_id; 244 245 ret = map_percpu_stats__load(skel); 246 CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno)); 247 248 link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL); 249 CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno)); 250 251 iter_fd = bpf_iter_create(bpf_link__fd(link)); 252 CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno)); 253 254 n_elements = read_cur_elements(iter_fd); 255 256 close(iter_fd); 257 bpf_link__destroy(link); 258 map_percpu_stats__destroy(skel); 259 260 return n_elements; 261 } 262 263 static void check_expected_number_elements(__u32 n_inserted, int map_fd, 264 struct bpf_map_info *info) 265 { 266 __u32 n_real; 267 __u32 n_iter; 268 269 /* Count the current number of elements in the map by iterating through 270 * all the map keys via bpf_get_next_key 271 */ 272 n_real = map_count_elements(info->type, map_fd); 273 274 /* The "real" number of elements should be the same as the inserted 275 * number of elements in all cases except LRU maps, where some elements 276 * may have been evicted 277 */ 278 if (n_inserted == 0 || !is_lru(info->type)) 279 CHECK(n_inserted != n_real, "map_count_elements", 280 "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted); 281 282 /* Count the current number of elements in the map using an iterator */ 283 n_iter = get_cur_elements(info->id); 284 285 /* Both counts should be the same, as all updates are over */ 286 CHECK(n_iter != n_real, "get_cur_elements", 287 "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n", 288 n_iter, n_real, map_type_to_s(info->type), info->map_flags); 289 } 290 291 static void __test(int map_fd) 292 { 293 struct upsert_opts opts = { 294 .map_fd = map_fd, 295 }; 296 struct bpf_map_info info; 297 298 map_info(map_fd, &info); 299 opts.map_type = info.type; 300 opts.n = info.max_entries; 301 302 /* Reduce the number of elements we are updating such that we don't 303 * bump into -E2BIG from non-preallocated hash maps, but still will 304 * have some evictions for LRU maps */ 305 if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS) 306 opts.n -= 512; 307 else 308 opts.n /= 2; 309 310 /* per-cpu bpf memory allocator may not be able to allocate per-cpu 311 * pointer successfully and it can not refill free llist timely, and 312 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate 313 * the problem temporarily. 314 */ 315 opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); 316 317 /* 318 * Upsert keys [0, n) under some competition: with random values from 319 * N_THREADS threads. Check values, then delete all elements and check 320 * values again. 321 */ 322 upsert_elements(&opts); 323 check_expected_number_elements(opts.n, map_fd, &info); 324 delete_all_elements(info.type, map_fd, !BATCH); 325 check_expected_number_elements(0, map_fd, &info); 326 327 /* Now do the same, but using batch delete operations */ 328 upsert_elements(&opts); 329 check_expected_number_elements(opts.n, map_fd, &info); 330 delete_all_elements(info.type, map_fd, BATCH); 331 check_expected_number_elements(0, map_fd, &info); 332 333 close(map_fd); 334 } 335 336 static int map_create_opts(__u32 type, const char *name, 337 struct bpf_map_create_opts *map_opts, 338 __u32 key_size, __u32 val_size) 339 { 340 int max_entries; 341 int map_fd; 342 343 if (type == BPF_MAP_TYPE_HASH_OF_MAPS) 344 max_entries = MAX_ENTRIES_HASH_OF_MAPS; 345 else 346 max_entries = MAX_ENTRIES; 347 348 map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts); 349 CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n", 350 strerror(errno), name); 351 352 return map_fd; 353 } 354 355 static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts) 356 { 357 return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int)); 358 } 359 360 static int create_hash(void) 361 { 362 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); 363 364 return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts); 365 } 366 367 static int create_percpu_hash(void) 368 { 369 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); 370 371 return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts); 372 } 373 374 static int create_hash_prealloc(void) 375 { 376 return map_create(BPF_MAP_TYPE_HASH, "hash", NULL); 377 } 378 379 static int create_percpu_hash_prealloc(void) 380 { 381 return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL); 382 } 383 384 static int create_lru_hash(__u32 type, __u32 map_flags) 385 { 386 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags); 387 388 return map_create(type, "lru_hash", &map_opts); 389 } 390 391 static int create_hash_of_maps(void) 392 { 393 LIBBPF_OPTS(bpf_map_create_opts, map_opts, 394 .map_flags = BPF_F_NO_PREALLOC, 395 .inner_map_fd = create_small_hash(), 396 ); 397 int ret; 398 399 ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps", 400 &map_opts, sizeof(int), sizeof(int)); 401 close(map_opts.inner_map_fd); 402 return ret; 403 } 404 405 static void map_percpu_stats_hash(void) 406 { 407 __test(create_hash()); 408 printf("test_%s:PASS\n", __func__); 409 } 410 411 static void map_percpu_stats_percpu_hash(void) 412 { 413 __test(create_percpu_hash()); 414 printf("test_%s:PASS\n", __func__); 415 } 416 417 static void map_percpu_stats_hash_prealloc(void) 418 { 419 __test(create_hash_prealloc()); 420 printf("test_%s:PASS\n", __func__); 421 } 422 423 static void map_percpu_stats_percpu_hash_prealloc(void) 424 { 425 __test(create_percpu_hash_prealloc()); 426 printf("test_%s:PASS\n", __func__); 427 } 428 429 static void map_percpu_stats_lru_hash(void) 430 { 431 __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0)); 432 printf("test_%s:PASS\n", __func__); 433 } 434 435 static void map_percpu_stats_lru_hash_no_common(void) 436 { 437 __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU)); 438 printf("test_%s:PASS\n", __func__); 439 } 440 441 static void map_percpu_stats_percpu_lru_hash(void) 442 { 443 __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0)); 444 printf("test_%s:PASS\n", __func__); 445 } 446 447 static void map_percpu_stats_percpu_lru_hash_no_common(void) 448 { 449 __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU)); 450 printf("test_%s:PASS\n", __func__); 451 } 452 453 static void map_percpu_stats_hash_of_maps(void) 454 { 455 __test(create_hash_of_maps()); 456 printf("test_%s:PASS\n", __func__); 457 } 458 459 void test_map_percpu_stats(void) 460 { 461 map_percpu_stats_hash(); 462 map_percpu_stats_percpu_hash(); 463 map_percpu_stats_hash_prealloc(); 464 map_percpu_stats_percpu_hash_prealloc(); 465 map_percpu_stats_lru_hash(); 466 map_percpu_stats_lru_hash_no_common(); 467 map_percpu_stats_percpu_lru_hash(); 468 map_percpu_stats_percpu_lru_hash_no_common(); 469 map_percpu_stats_hash_of_maps(); 470 } 471