1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright (c) 2023 Isovalent */ 3 4 #include <errno.h> 5 #include <unistd.h> 6 #include <pthread.h> 7 8 #include <bpf/bpf.h> 9 #include <bpf/libbpf.h> 10 11 #include <bpf_util.h> 12 #include <test_maps.h> 13 14 #include "map_percpu_stats.skel.h" 15 16 #define MAX_ENTRIES 16384 17 #define MAX_ENTRIES_HASH_OF_MAPS 64 18 #define N_THREADS 8 19 #define MAX_MAP_KEY_SIZE 4 20 #define PCPU_MIN_UNIT_SIZE 32768 21 22 static void map_info(int map_fd, struct bpf_map_info *info) 23 { 24 __u32 len = sizeof(*info); 25 int ret; 26 27 memset(info, 0, sizeof(*info)); 28 29 ret = bpf_obj_get_info_by_fd(map_fd, info, &len); 30 CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno)); 31 } 32 33 static const char *map_type_to_s(__u32 type) 34 { 35 switch (type) { 36 case BPF_MAP_TYPE_HASH: 37 return "HASH"; 38 case BPF_MAP_TYPE_PERCPU_HASH: 39 return "PERCPU_HASH"; 40 case BPF_MAP_TYPE_LRU_HASH: 41 return "LRU_HASH"; 42 case BPF_MAP_TYPE_LRU_PERCPU_HASH: 43 return "LRU_PERCPU_HASH"; 44 case BPF_MAP_TYPE_HASH_OF_MAPS: 45 return "BPF_MAP_TYPE_HASH_OF_MAPS"; 46 default: 47 return "<define-me>"; 48 } 49 } 50 51 static __u32 map_count_elements(__u32 type, int map_fd) 52 { 53 __u32 key = -1; 54 int n = 0; 55 56 while (!bpf_map_get_next_key(map_fd, &key, &key)) 57 n++; 58 return n; 59 } 60 61 #define BATCH true 62 63 static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count) 64 { 65 static __u8 values[(8 << 10) * MAX_ENTRIES]; 66 void *in_batch = NULL, *out_batch; 67 __u32 save_count = count; 68 int ret; 69 70 ret = bpf_map_lookup_and_delete_batch(map_fd, 71 &in_batch, &out_batch, 72 keys, values, &count, 73 NULL); 74 75 /* 76 * Despite what uapi header says, lookup_and_delete_batch will return 77 * -ENOENT in case we successfully have deleted all elements, so check 78 * this separately 79 */ 80 CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch", 81 "error: %s\n", strerror(errno)); 82 83 CHECK(count != save_count, 84 "bpf_map_lookup_and_delete_batch", 85 "deleted not all elements: removed=%u expected=%u\n", 86 count, save_count); 87 } 88 89 static void delete_all_elements(__u32 type, int map_fd, bool batch) 90 { 91 static __u8 val[8 << 10]; /* enough for 1024 CPUs */ 92 __u32 key = -1; 93 void *keys; 94 __u32 i, n; 95 int ret; 96 97 keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES); 98 CHECK(!keys, "calloc", "error: %s\n", strerror(errno)); 99 100 for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++) 101 memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE); 102 103 if (batch) { 104 /* Can't mix delete_batch and delete_and_lookup_batch because 105 * they have different semantics in relation to the keys 106 * argument. However, delete_batch utilize map_delete_elem, 107 * so we actually test it in non-batch scenario */ 108 delete_and_lookup_batch(map_fd, keys, n); 109 } else { 110 /* Intentionally mix delete and lookup_and_delete so we can test both */ 111 for (i = 0; i < n; i++) { 112 void *keyp = keys + i*MAX_MAP_KEY_SIZE; 113 114 if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) { 115 ret = bpf_map_delete_elem(map_fd, keyp); 116 CHECK(ret < 0, "bpf_map_delete_elem", 117 "error: key %u: %s\n", i, strerror(errno)); 118 } else { 119 ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val); 120 CHECK(ret < 0, "bpf_map_lookup_and_delete_elem", 121 "error: key %u: %s\n", i, strerror(errno)); 122 } 123 } 124 } 125 126 free(keys); 127 } 128 129 static bool is_lru(__u32 map_type) 130 { 131 return map_type == BPF_MAP_TYPE_LRU_HASH || 132 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 133 } 134 135 static bool is_percpu(__u32 map_type) 136 { 137 return map_type == BPF_MAP_TYPE_PERCPU_HASH || 138 map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 139 } 140 141 struct upsert_opts { 142 __u32 map_type; 143 int map_fd; 144 __u32 n; 145 bool retry_for_nomem; 146 }; 147 148 static int create_small_hash(void) 149 { 150 int map_fd; 151 152 map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL); 153 CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n", 154 strerror(errno), "small"); 155 156 return map_fd; 157 } 158 159 static bool retry_for_nomem_fn(int err) 160 { 161 return err == ENOMEM; 162 } 163 164 static void *patch_map_thread(void *arg) 165 { 166 /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ 167 static __u8 blob[8 << 10]; 168 struct upsert_opts *opts = arg; 169 void *val_ptr; 170 int val; 171 int ret; 172 int i; 173 174 for (i = 0; i < opts->n; i++) { 175 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 176 val = create_small_hash(); 177 val_ptr = &val; 178 } else if (is_percpu(opts->map_type)) { 179 val_ptr = blob; 180 } else { 181 val = rand(); 182 val_ptr = &val; 183 } 184 185 /* 2 seconds may be enough ? */ 186 if (opts->retry_for_nomem) 187 ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, 188 40, retry_for_nomem_fn); 189 else 190 ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); 191 CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); 192 193 if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) 194 close(val); 195 } 196 return NULL; 197 } 198 199 static void upsert_elements(struct upsert_opts *opts) 200 { 201 pthread_t threads[N_THREADS]; 202 int ret; 203 int i; 204 205 for (i = 0; i < ARRAY_SIZE(threads); i++) { 206 ret = pthread_create(&i[threads], NULL, patch_map_thread, opts); 207 CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret)); 208 } 209 210 for (i = 0; i < ARRAY_SIZE(threads); i++) { 211 ret = pthread_join(i[threads], NULL); 212 CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret)); 213 } 214 } 215 216 static __u32 read_cur_elements(int iter_fd) 217 { 218 char buf[64]; 219 ssize_t n; 220 __u32 ret; 221 222 n = read(iter_fd, buf, sizeof(buf)-1); 223 CHECK(n <= 0, "read", "error: %s\n", strerror(errno)); 224 buf[n] = '\0'; 225 226 errno = 0; 227 ret = (__u32)strtol(buf, NULL, 10); 228 CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno)); 229 230 return ret; 231 } 232 233 static __u32 get_cur_elements(int map_id) 234 { 235 struct map_percpu_stats *skel; 236 struct bpf_link *link; 237 __u32 n_elements; 238 int iter_fd; 239 int ret; 240 241 skel = map_percpu_stats__open(); 242 CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno)); 243 244 skel->bss->target_id = map_id; 245 246 ret = map_percpu_stats__load(skel); 247 CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno)); 248 249 link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL); 250 CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno)); 251 252 iter_fd = bpf_iter_create(bpf_link__fd(link)); 253 CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno)); 254 255 n_elements = read_cur_elements(iter_fd); 256 257 close(iter_fd); 258 bpf_link__destroy(link); 259 map_percpu_stats__destroy(skel); 260 261 return n_elements; 262 } 263 264 static void check_expected_number_elements(__u32 n_inserted, int map_fd, 265 struct bpf_map_info *info) 266 { 267 __u32 n_real; 268 __u32 n_iter; 269 270 /* Count the current number of elements in the map by iterating through 271 * all the map keys via bpf_get_next_key 272 */ 273 n_real = map_count_elements(info->type, map_fd); 274 275 /* The "real" number of elements should be the same as the inserted 276 * number of elements in all cases except LRU maps, where some elements 277 * may have been evicted 278 */ 279 if (n_inserted == 0 || !is_lru(info->type)) 280 CHECK(n_inserted != n_real, "map_count_elements", 281 "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted); 282 283 /* Count the current number of elements in the map using an iterator */ 284 n_iter = get_cur_elements(info->id); 285 286 /* Both counts should be the same, as all updates are over */ 287 CHECK(n_iter != n_real, "get_cur_elements", 288 "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n", 289 n_iter, n_real, map_type_to_s(info->type), info->map_flags); 290 } 291 292 static void __test(int map_fd) 293 { 294 struct upsert_opts opts = { 295 .map_fd = map_fd, 296 }; 297 struct bpf_map_info info; 298 299 map_info(map_fd, &info); 300 opts.map_type = info.type; 301 opts.n = info.max_entries; 302 303 /* Reduce the number of elements we are updating such that we don't 304 * bump into -E2BIG from non-preallocated hash maps, but still will 305 * have some evictions for LRU maps */ 306 if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS) 307 opts.n -= 512; 308 else 309 opts.n /= 2; 310 311 /* per-cpu bpf memory allocator may not be able to allocate per-cpu 312 * pointer successfully and it can not refill free llist timely, and 313 * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate 314 * the problem temporarily. 315 */ 316 opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); 317 318 /* 319 * Upsert keys [0, n) under some competition: with random values from 320 * N_THREADS threads. Check values, then delete all elements and check 321 * values again. 322 */ 323 upsert_elements(&opts); 324 check_expected_number_elements(opts.n, map_fd, &info); 325 delete_all_elements(info.type, map_fd, !BATCH); 326 check_expected_number_elements(0, map_fd, &info); 327 328 /* Now do the same, but using batch delete operations */ 329 upsert_elements(&opts); 330 check_expected_number_elements(opts.n, map_fd, &info); 331 delete_all_elements(info.type, map_fd, BATCH); 332 check_expected_number_elements(0, map_fd, &info); 333 334 close(map_fd); 335 } 336 337 static int map_create_opts(__u32 type, const char *name, 338 struct bpf_map_create_opts *map_opts, 339 __u32 key_size, __u32 val_size) 340 { 341 int max_entries; 342 int map_fd; 343 344 if (type == BPF_MAP_TYPE_HASH_OF_MAPS) 345 max_entries = MAX_ENTRIES_HASH_OF_MAPS; 346 else 347 max_entries = MAX_ENTRIES; 348 349 map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts); 350 CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n", 351 strerror(errno), name); 352 353 return map_fd; 354 } 355 356 static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts) 357 { 358 return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int)); 359 } 360 361 static int create_hash(void) 362 { 363 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); 364 365 return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts); 366 } 367 368 static int create_percpu_hash(void) 369 { 370 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC); 371 372 return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts); 373 } 374 375 static int create_hash_prealloc(void) 376 { 377 return map_create(BPF_MAP_TYPE_HASH, "hash", NULL); 378 } 379 380 static int create_percpu_hash_prealloc(void) 381 { 382 return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL); 383 } 384 385 static int create_lru_hash(__u32 type, __u32 map_flags) 386 { 387 LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags); 388 389 return map_create(type, "lru_hash", &map_opts); 390 } 391 392 static int create_hash_of_maps(void) 393 { 394 LIBBPF_OPTS(bpf_map_create_opts, map_opts, 395 .map_flags = BPF_F_NO_PREALLOC, 396 .inner_map_fd = create_small_hash(), 397 ); 398 int ret; 399 400 ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps", 401 &map_opts, sizeof(int), sizeof(int)); 402 close(map_opts.inner_map_fd); 403 return ret; 404 } 405 406 static void map_percpu_stats_hash(void) 407 { 408 __test(create_hash()); 409 printf("test_%s:PASS\n", __func__); 410 } 411 412 static void map_percpu_stats_percpu_hash(void) 413 { 414 __test(create_percpu_hash()); 415 printf("test_%s:PASS\n", __func__); 416 } 417 418 static void map_percpu_stats_hash_prealloc(void) 419 { 420 __test(create_hash_prealloc()); 421 printf("test_%s:PASS\n", __func__); 422 } 423 424 static void map_percpu_stats_percpu_hash_prealloc(void) 425 { 426 __test(create_percpu_hash_prealloc()); 427 printf("test_%s:PASS\n", __func__); 428 } 429 430 static void map_percpu_stats_lru_hash(void) 431 { 432 __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0)); 433 printf("test_%s:PASS\n", __func__); 434 } 435 436 static void map_percpu_stats_lru_hash_no_common(void) 437 { 438 __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU)); 439 printf("test_%s:PASS\n", __func__); 440 } 441 442 static void map_percpu_stats_percpu_lru_hash(void) 443 { 444 __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0)); 445 printf("test_%s:PASS\n", __func__); 446 } 447 448 static void map_percpu_stats_percpu_lru_hash_no_common(void) 449 { 450 __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU)); 451 printf("test_%s:PASS\n", __func__); 452 } 453 454 static void map_percpu_stats_hash_of_maps(void) 455 { 456 __test(create_hash_of_maps()); 457 printf("test_%s:PASS\n", __func__); 458 } 459 460 static void map_percpu_stats_map_value_size(void) 461 { 462 int fd; 463 int value_sz = PCPU_MIN_UNIT_SIZE + 1; 464 struct bpf_map_create_opts opts = { .sz = sizeof(opts) }; 465 enum bpf_map_type map_types[] = { BPF_MAP_TYPE_PERCPU_ARRAY, 466 BPF_MAP_TYPE_PERCPU_HASH, 467 BPF_MAP_TYPE_LRU_PERCPU_HASH }; 468 for (int i = 0; i < ARRAY_SIZE(map_types); i++) { 469 fd = bpf_map_create(map_types[i], NULL, sizeof(__u32), value_sz, 1, &opts); 470 CHECK(fd < 0 && errno != E2BIG, "percpu map value size", 471 "error: %s\n", strerror(errno)); 472 } 473 printf("test_%s:PASS\n", __func__); 474 } 475 476 void test_map_percpu_stats(void) 477 { 478 map_percpu_stats_hash(); 479 map_percpu_stats_percpu_hash(); 480 map_percpu_stats_hash_prealloc(); 481 map_percpu_stats_percpu_hash_prealloc(); 482 map_percpu_stats_lru_hash(); 483 map_percpu_stats_lru_hash_no_common(); 484 map_percpu_stats_percpu_lru_hash(); 485 map_percpu_stats_percpu_lru_hash_no_common(); 486 map_percpu_stats_hash_of_maps(); 487 map_percpu_stats_map_value_size(); 488 } 489