1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2016 Facebook 3 */ 4 #define _GNU_SOURCE 5 #include <sched.h> 6 #include <stdio.h> 7 #include <sys/types.h> 8 #include <asm/unistd.h> 9 #include <unistd.h> 10 #include <assert.h> 11 #include <sys/wait.h> 12 #include <stdlib.h> 13 #include <signal.h> 14 #include <string.h> 15 #include <time.h> 16 #include <arpa/inet.h> 17 #include <errno.h> 18 19 #include <bpf/bpf.h> 20 #include <bpf/libbpf.h> 21 22 #define TEST_BIT(t) (1U << (t)) 23 #define MAX_NR_CPUS 1024 24 25 static __u64 time_get_ns(void) 26 { 27 struct timespec ts; 28 29 clock_gettime(CLOCK_MONOTONIC, &ts); 30 return ts.tv_sec * 1000000000ull + ts.tv_nsec; 31 } 32 33 enum test_type { 34 HASH_PREALLOC, 35 PERCPU_HASH_PREALLOC, 36 HASH_KMALLOC, 37 PERCPU_HASH_KMALLOC, 38 LRU_HASH_PREALLOC, 39 NOCOMMON_LRU_HASH_PREALLOC, 40 LPM_KMALLOC, 41 HASH_LOOKUP, 42 ARRAY_LOOKUP, 43 INNER_LRU_HASH_PREALLOC, 44 LRU_HASH_LOOKUP, 45 NR_TESTS, 46 }; 47 48 const char *test_map_names[NR_TESTS] = { 49 [HASH_PREALLOC] = "hash_map", 50 [PERCPU_HASH_PREALLOC] = "percpu_hash_map", 51 [HASH_KMALLOC] = "hash_map_alloc", 52 [PERCPU_HASH_KMALLOC] = "percpu_hash_map_alloc", 53 [LRU_HASH_PREALLOC] = "lru_hash_map", 54 [NOCOMMON_LRU_HASH_PREALLOC] = "nocommon_lru_hash_map", 55 [LPM_KMALLOC] = "lpm_trie_map_alloc", 56 [HASH_LOOKUP] = "hash_map", 57 [ARRAY_LOOKUP] = "array_map", 58 [INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map", 59 [LRU_HASH_LOOKUP] = "lru_hash_lookup_map", 60 }; 61 62 enum map_idx { 63 array_of_lru_hashs_idx, 64 hash_map_alloc_idx, 65 lru_hash_lookup_idx, 66 NR_IDXES, 67 }; 68 69 static int map_fd[NR_IDXES]; 70 71 static int test_flags = ~0; 72 static uint32_t num_map_entries; 73 static uint32_t inner_lru_hash_size; 74 static int lru_hash_lookup_test_entries = 32; 75 static uint32_t max_cnt = 1000000; 76 77 static int check_test_flags(enum test_type t) 78 { 79 return test_flags & TEST_BIT(t); 80 } 81 82 static void test_hash_prealloc(int cpu) 83 { 84 __u64 start_time; 85 int i; 86 87 start_time = time_get_ns(); 88 for (i = 0; i < max_cnt; i++) 89 syscall(__NR_getuid); 90 printf("%d:hash_map_perf pre-alloc %lld events per sec\n", 91 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 92 } 93 94 static int pre_test_lru_hash_lookup(int tasks) 95 { 96 int fd = map_fd[lru_hash_lookup_idx]; 97 uint32_t key; 98 long val = 1; 99 int ret; 100 101 if (num_map_entries > lru_hash_lookup_test_entries) 102 lru_hash_lookup_test_entries = num_map_entries; 103 104 /* Populate the lru_hash_map for LRU_HASH_LOOKUP perf test. 105 * 106 * It is fine that the user requests for a map with 107 * num_map_entries < 32 and some of the later lru hash lookup 108 * may return not found. For LRU map, we are not interested 109 * in such small map performance. 110 */ 111 for (key = 0; key < lru_hash_lookup_test_entries; key++) { 112 ret = bpf_map_update_elem(fd, &key, &val, BPF_NOEXIST); 113 if (ret) 114 return ret; 115 } 116 117 return 0; 118 } 119 120 static void do_test_lru(enum test_type test, int cpu) 121 { 122 static int inner_lru_map_fds[MAX_NR_CPUS]; 123 124 struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 }; 125 const char *test_name; 126 __u64 start_time; 127 int i, ret; 128 129 if (test == INNER_LRU_HASH_PREALLOC && cpu) { 130 /* If CPU is not 0, create inner_lru hash map and insert the fd 131 * value into the array_of_lru_hash map. In case of CPU 0, 132 * 'inner_lru_hash_map' was statically inserted on the map init 133 */ 134 int outer_fd = map_fd[array_of_lru_hashs_idx]; 135 unsigned int mycpu, mynode; 136 LIBBPF_OPTS(bpf_map_create_opts, opts, 137 .map_flags = BPF_F_NUMA_NODE, 138 ); 139 140 assert(cpu < MAX_NR_CPUS); 141 142 ret = syscall(__NR_getcpu, &mycpu, &mynode, NULL); 143 assert(!ret); 144 145 opts.numa_node = mynode; 146 inner_lru_map_fds[cpu] = 147 bpf_map_create(BPF_MAP_TYPE_LRU_HASH, 148 test_map_names[INNER_LRU_HASH_PREALLOC], 149 sizeof(uint32_t), 150 sizeof(long), 151 inner_lru_hash_size, &opts); 152 if (inner_lru_map_fds[cpu] == -1) { 153 printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n", 154 strerror(errno), errno); 155 exit(1); 156 } 157 158 ret = bpf_map_update_elem(outer_fd, &cpu, 159 &inner_lru_map_fds[cpu], 160 BPF_ANY); 161 if (ret) { 162 printf("cannot update ARRAY_OF_LRU_HASHS with key:%u. %s(%d)\n", 163 cpu, strerror(errno), errno); 164 exit(1); 165 } 166 } 167 168 in6.sin6_addr.s6_addr16[0] = 0xdead; 169 in6.sin6_addr.s6_addr16[1] = 0xbeef; 170 171 if (test == LRU_HASH_PREALLOC) { 172 test_name = "lru_hash_map_perf"; 173 in6.sin6_addr.s6_addr16[2] = 0; 174 } else if (test == NOCOMMON_LRU_HASH_PREALLOC) { 175 test_name = "nocommon_lru_hash_map_perf"; 176 in6.sin6_addr.s6_addr16[2] = 1; 177 } else if (test == INNER_LRU_HASH_PREALLOC) { 178 test_name = "inner_lru_hash_map_perf"; 179 in6.sin6_addr.s6_addr16[2] = 2; 180 } else if (test == LRU_HASH_LOOKUP) { 181 test_name = "lru_hash_lookup_perf"; 182 in6.sin6_addr.s6_addr16[2] = 3; 183 in6.sin6_addr.s6_addr32[3] = 0; 184 } else { 185 assert(0); 186 } 187 188 start_time = time_get_ns(); 189 for (i = 0; i < max_cnt; i++) { 190 ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6)); 191 assert(ret == -1 && errno == EBADF); 192 if (in6.sin6_addr.s6_addr32[3] < 193 lru_hash_lookup_test_entries - 32) 194 in6.sin6_addr.s6_addr32[3] += 32; 195 else 196 in6.sin6_addr.s6_addr32[3] = 0; 197 } 198 printf("%d:%s pre-alloc %lld events per sec\n", 199 cpu, test_name, 200 max_cnt * 1000000000ll / (time_get_ns() - start_time)); 201 } 202 203 static void test_lru_hash_prealloc(int cpu) 204 { 205 do_test_lru(LRU_HASH_PREALLOC, cpu); 206 } 207 208 static void test_nocommon_lru_hash_prealloc(int cpu) 209 { 210 do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu); 211 } 212 213 static void test_inner_lru_hash_prealloc(int cpu) 214 { 215 do_test_lru(INNER_LRU_HASH_PREALLOC, cpu); 216 } 217 218 static void test_lru_hash_lookup(int cpu) 219 { 220 do_test_lru(LRU_HASH_LOOKUP, cpu); 221 } 222 223 static void test_percpu_hash_prealloc(int cpu) 224 { 225 __u64 start_time; 226 int i; 227 228 start_time = time_get_ns(); 229 for (i = 0; i < max_cnt; i++) 230 syscall(__NR_geteuid); 231 printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n", 232 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 233 } 234 235 static void test_hash_kmalloc(int cpu) 236 { 237 __u64 start_time; 238 int i; 239 240 start_time = time_get_ns(); 241 for (i = 0; i < max_cnt; i++) 242 syscall(__NR_getgid); 243 printf("%d:hash_map_perf kmalloc %lld events per sec\n", 244 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 245 } 246 247 static void test_percpu_hash_kmalloc(int cpu) 248 { 249 __u64 start_time; 250 int i; 251 252 start_time = time_get_ns(); 253 for (i = 0; i < max_cnt; i++) 254 syscall(__NR_getegid); 255 printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n", 256 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 257 } 258 259 static void test_lpm_kmalloc(int cpu) 260 { 261 __u64 start_time; 262 int i; 263 264 start_time = time_get_ns(); 265 for (i = 0; i < max_cnt; i++) 266 syscall(__NR_gettid); 267 printf("%d:lpm_perf kmalloc %lld events per sec\n", 268 cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time)); 269 } 270 271 static void test_hash_lookup(int cpu) 272 { 273 __u64 start_time; 274 int i; 275 276 start_time = time_get_ns(); 277 for (i = 0; i < max_cnt; i++) 278 syscall(__NR_getpgid, 0); 279 printf("%d:hash_lookup %lld lookups per sec\n", 280 cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); 281 } 282 283 static void test_array_lookup(int cpu) 284 { 285 __u64 start_time; 286 int i; 287 288 start_time = time_get_ns(); 289 for (i = 0; i < max_cnt; i++) 290 syscall(__NR_getppid, 0); 291 printf("%d:array_lookup %lld lookups per sec\n", 292 cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time)); 293 } 294 295 typedef int (*pre_test_func)(int tasks); 296 const pre_test_func pre_test_funcs[] = { 297 [LRU_HASH_LOOKUP] = pre_test_lru_hash_lookup, 298 }; 299 300 typedef void (*test_func)(int cpu); 301 const test_func test_funcs[] = { 302 [HASH_PREALLOC] = test_hash_prealloc, 303 [PERCPU_HASH_PREALLOC] = test_percpu_hash_prealloc, 304 [HASH_KMALLOC] = test_hash_kmalloc, 305 [PERCPU_HASH_KMALLOC] = test_percpu_hash_kmalloc, 306 [LRU_HASH_PREALLOC] = test_lru_hash_prealloc, 307 [NOCOMMON_LRU_HASH_PREALLOC] = test_nocommon_lru_hash_prealloc, 308 [LPM_KMALLOC] = test_lpm_kmalloc, 309 [HASH_LOOKUP] = test_hash_lookup, 310 [ARRAY_LOOKUP] = test_array_lookup, 311 [INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc, 312 [LRU_HASH_LOOKUP] = test_lru_hash_lookup, 313 }; 314 315 static int pre_test(int tasks) 316 { 317 int i; 318 319 for (i = 0; i < NR_TESTS; i++) { 320 if (pre_test_funcs[i] && check_test_flags(i)) { 321 int ret = pre_test_funcs[i](tasks); 322 323 if (ret) 324 return ret; 325 } 326 } 327 328 return 0; 329 } 330 331 static void loop(int cpu) 332 { 333 cpu_set_t cpuset; 334 int i; 335 336 CPU_ZERO(&cpuset); 337 CPU_SET(cpu, &cpuset); 338 sched_setaffinity(0, sizeof(cpuset), &cpuset); 339 340 for (i = 0; i < NR_TESTS; i++) { 341 if (check_test_flags(i)) 342 test_funcs[i](cpu); 343 } 344 } 345 346 static void run_perf_test(int tasks) 347 { 348 pid_t pid[tasks]; 349 int i; 350 351 assert(!pre_test(tasks)); 352 353 for (i = 0; i < tasks; i++) { 354 pid[i] = fork(); 355 if (pid[i] == 0) { 356 loop(i); 357 exit(0); 358 } else if (pid[i] == -1) { 359 printf("couldn't spawn #%d process\n", i); 360 exit(1); 361 } 362 } 363 for (i = 0; i < tasks; i++) { 364 int status; 365 366 assert(waitpid(pid[i], &status, 0) == pid[i]); 367 assert(status == 0); 368 } 369 } 370 371 static void fill_lpm_trie(void) 372 { 373 struct bpf_lpm_trie_key *key; 374 unsigned long value = 0; 375 unsigned int i; 376 int r; 377 378 key = alloca(sizeof(*key) + 4); 379 key->prefixlen = 32; 380 381 for (i = 0; i < 512; ++i) { 382 key->prefixlen = rand() % 33; 383 key->data[0] = rand() & 0xff; 384 key->data[1] = rand() & 0xff; 385 key->data[2] = rand() & 0xff; 386 key->data[3] = rand() & 0xff; 387 r = bpf_map_update_elem(map_fd[hash_map_alloc_idx], 388 key, &value, 0); 389 assert(!r); 390 } 391 392 key->prefixlen = 32; 393 key->data[0] = 192; 394 key->data[1] = 168; 395 key->data[2] = 0; 396 key->data[3] = 1; 397 value = 128; 398 399 r = bpf_map_update_elem(map_fd[hash_map_alloc_idx], key, &value, 0); 400 assert(!r); 401 } 402 403 static void fixup_map(struct bpf_object *obj) 404 { 405 struct bpf_map *map; 406 int i; 407 408 bpf_object__for_each_map(map, obj) { 409 const char *name = bpf_map__name(map); 410 411 /* Only change the max_entries for the enabled test(s) */ 412 for (i = 0; i < NR_TESTS; i++) { 413 if (!strcmp(test_map_names[i], name) && 414 (check_test_flags(i))) { 415 bpf_map__set_max_entries(map, num_map_entries); 416 continue; 417 } 418 } 419 } 420 421 inner_lru_hash_size = num_map_entries; 422 } 423 424 int main(int argc, char **argv) 425 { 426 int nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); 427 struct bpf_link *links[8]; 428 struct bpf_program *prog; 429 struct bpf_object *obj; 430 struct bpf_map *map; 431 char filename[256]; 432 int i = 0; 433 434 if (argc > 1) 435 test_flags = atoi(argv[1]) ? : test_flags; 436 437 if (argc > 2) 438 nr_cpus = atoi(argv[2]) ? : nr_cpus; 439 440 if (argc > 3) 441 num_map_entries = atoi(argv[3]); 442 443 if (argc > 4) 444 max_cnt = atoi(argv[4]); 445 446 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 447 obj = bpf_object__open_file(filename, NULL); 448 if (libbpf_get_error(obj)) { 449 fprintf(stderr, "ERROR: opening BPF object file failed\n"); 450 return 0; 451 } 452 453 map = bpf_object__find_map_by_name(obj, "inner_lru_hash_map"); 454 if (libbpf_get_error(map)) { 455 fprintf(stderr, "ERROR: finding a map in obj file failed\n"); 456 goto cleanup; 457 } 458 459 inner_lru_hash_size = bpf_map__max_entries(map); 460 if (!inner_lru_hash_size) { 461 fprintf(stderr, "ERROR: failed to get map attribute\n"); 462 goto cleanup; 463 } 464 465 /* resize BPF map prior to loading */ 466 if (num_map_entries > 0) 467 fixup_map(obj); 468 469 /* load BPF program */ 470 if (bpf_object__load(obj)) { 471 fprintf(stderr, "ERROR: loading BPF object file failed\n"); 472 goto cleanup; 473 } 474 475 map_fd[0] = bpf_object__find_map_fd_by_name(obj, "array_of_lru_hashs"); 476 map_fd[1] = bpf_object__find_map_fd_by_name(obj, "hash_map_alloc"); 477 map_fd[2] = bpf_object__find_map_fd_by_name(obj, "lru_hash_lookup_map"); 478 if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) { 479 fprintf(stderr, "ERROR: finding a map in obj file failed\n"); 480 goto cleanup; 481 } 482 483 bpf_object__for_each_program(prog, obj) { 484 links[i] = bpf_program__attach(prog); 485 if (libbpf_get_error(links[i])) { 486 fprintf(stderr, "ERROR: bpf_program__attach failed\n"); 487 links[i] = NULL; 488 goto cleanup; 489 } 490 i++; 491 } 492 493 fill_lpm_trie(); 494 495 run_perf_test(nr_cpus); 496 497 cleanup: 498 for (i--; i >= 0; i--) 499 bpf_link__destroy(links[i]); 500 501 bpf_object__close(obj); 502 return 0; 503 } 504