1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/jhash.h> 8 #include <linux/filter.h> 9 #include <linux/rculist_nulls.h> 10 #include <linux/rcupdate_wait.h> 11 #include <linux/random.h> 12 #include <uapi/linux/btf.h> 13 #include <linux/rcupdate_trace.h> 14 #include <linux/btf_ids.h> 15 #include "percpu_freelist.h" 16 #include "bpf_lru_list.h" 17 #include "map_in_map.h" 18 #include <linux/bpf_mem_alloc.h> 19 #include <asm/rqspinlock.h> 20 21 #define HTAB_CREATE_FLAG_MASK \ 22 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 23 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) 24 25 #define BATCH_OPS(_name) \ 26 .map_lookup_batch = \ 27 _name##_map_lookup_batch, \ 28 .map_lookup_and_delete_batch = \ 29 _name##_map_lookup_and_delete_batch, \ 30 .map_update_batch = \ 31 generic_map_update_batch, \ 32 .map_delete_batch = \ 33 generic_map_delete_batch 34 35 /* 36 * The bucket lock has two protection scopes: 37 * 38 * 1) Serializing concurrent operations from BPF programs on different 39 * CPUs 40 * 41 * 2) Serializing concurrent operations from BPF programs and sys_bpf() 42 * 43 * BPF programs can execute in any context including perf, kprobes and 44 * tracing. As there are almost no limits where perf, kprobes and tracing 45 * can be invoked from the lock operations need to be protected against 46 * deadlocks. Deadlocks can be caused by recursion and by an invocation in 47 * the lock held section when functions which acquire this lock are invoked 48 * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU 49 * variable bpf_prog_active, which prevents BPF programs attached to perf 50 * events, kprobes and tracing to be invoked before the prior invocation 51 * from one of these contexts completed. sys_bpf() uses the same mechanism 52 * by pinning the task to the current CPU and incrementing the recursion 53 * protection across the map operation. 54 * 55 * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain 56 * operations like memory allocations (even with GFP_ATOMIC) from atomic 57 * contexts. This is required because even with GFP_ATOMIC the memory 58 * allocator calls into code paths which acquire locks with long held lock 59 * sections. To ensure the deterministic behaviour these locks are regular 60 * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only 61 * true atomic contexts on an RT kernel are the low level hardware 62 * handling, scheduling, low level interrupt handling, NMIs etc. None of 63 * these contexts should ever do memory allocations. 64 * 65 * As regular device interrupt handlers and soft interrupts are forced into 66 * thread context, the existing code which does 67 * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*(); 68 * just works. 69 * 70 * In theory the BPF locks could be converted to regular spinlocks as well, 71 * but the bucket locks and percpu_freelist locks can be taken from 72 * arbitrary contexts (perf, kprobes, tracepoints) which are required to be 73 * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, 74 * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, 75 * because there is no memory allocation within the lock held sections. However 76 * after hash map was fully converted to use bpf_mem_alloc, there will be 77 * non-synchronous memory allocation for non-preallocated hash map, so it is 78 * safe to always use raw spinlock for bucket lock. 79 */ 80 struct bucket { 81 struct hlist_nulls_head head; 82 rqspinlock_t raw_lock; 83 }; 84 85 #define HASHTAB_MAP_LOCK_COUNT 8 86 #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1) 87 88 struct bpf_htab { 89 struct bpf_map map; 90 struct bpf_mem_alloc ma; 91 struct bpf_mem_alloc pcpu_ma; 92 struct bucket *buckets; 93 void *elems; 94 union { 95 struct pcpu_freelist freelist; 96 struct bpf_lru lru; 97 }; 98 struct htab_elem *__percpu *extra_elems; 99 /* number of elements in non-preallocated hashtable are kept 100 * in either pcount or count 101 */ 102 struct percpu_counter pcount; 103 atomic_t count; 104 bool use_percpu_counter; 105 u32 n_buckets; /* number of hash buckets */ 106 u32 elem_size; /* size of each element in bytes */ 107 u32 hashrnd; 108 }; 109 110 /* each htab element is struct htab_elem + key + value */ 111 struct htab_elem { 112 union { 113 struct hlist_nulls_node hash_node; 114 struct { 115 void *padding; 116 union { 117 struct pcpu_freelist_node fnode; 118 struct htab_elem *batch_flink; 119 }; 120 }; 121 }; 122 union { 123 /* pointer to per-cpu pointer */ 124 void *ptr_to_pptr; 125 struct bpf_lru_node lru_node; 126 }; 127 u32 hash; 128 char key[] __aligned(8); 129 }; 130 131 static inline bool htab_is_prealloc(const struct bpf_htab *htab) 132 { 133 return !(htab->map.map_flags & BPF_F_NO_PREALLOC); 134 } 135 136 static void htab_init_buckets(struct bpf_htab *htab) 137 { 138 unsigned int i; 139 140 for (i = 0; i < htab->n_buckets; i++) { 141 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 142 raw_res_spin_lock_init(&htab->buckets[i].raw_lock); 143 cond_resched(); 144 } 145 } 146 147 static inline int htab_lock_bucket(struct bucket *b, unsigned long *pflags) 148 { 149 unsigned long flags; 150 int ret; 151 152 ret = raw_res_spin_lock_irqsave(&b->raw_lock, flags); 153 if (ret) 154 return ret; 155 *pflags = flags; 156 return 0; 157 } 158 159 static inline void htab_unlock_bucket(struct bucket *b, unsigned long flags) 160 { 161 raw_res_spin_unlock_irqrestore(&b->raw_lock, flags); 162 } 163 164 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); 165 166 static bool htab_is_lru(const struct bpf_htab *htab) 167 { 168 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || 169 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 170 } 171 172 static bool htab_is_percpu(const struct bpf_htab *htab) 173 { 174 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || 175 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 176 } 177 178 static inline bool is_fd_htab(const struct bpf_htab *htab) 179 { 180 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS; 181 } 182 183 static inline void *htab_elem_value(struct htab_elem *l, u32 key_size) 184 { 185 return l->key + round_up(key_size, 8); 186 } 187 188 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 189 void __percpu *pptr) 190 { 191 *(void __percpu **)htab_elem_value(l, key_size) = pptr; 192 } 193 194 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 195 { 196 return *(void __percpu **)htab_elem_value(l, key_size); 197 } 198 199 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) 200 { 201 return *(void **)htab_elem_value(l, map->key_size); 202 } 203 204 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 205 { 206 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); 207 } 208 209 /* Both percpu and fd htab support in-place update, so no need for 210 * extra elem. LRU itself can remove the least used element, so 211 * there is no need for an extra elem during map_update. 212 */ 213 static bool htab_has_extra_elems(struct bpf_htab *htab) 214 { 215 return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab); 216 } 217 218 static void htab_free_internal_structs(struct bpf_htab *htab, struct htab_elem *elem) 219 { 220 if (btf_record_has_field(htab->map.record, BPF_TIMER)) 221 bpf_obj_free_timer(htab->map.record, 222 htab_elem_value(elem, htab->map.key_size)); 223 if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) 224 bpf_obj_free_workqueue(htab->map.record, 225 htab_elem_value(elem, htab->map.key_size)); 226 if (btf_record_has_field(htab->map.record, BPF_TASK_WORK)) 227 bpf_obj_free_task_work(htab->map.record, 228 htab_elem_value(elem, htab->map.key_size)); 229 } 230 231 static void htab_free_prealloced_internal_structs(struct bpf_htab *htab) 232 { 233 u32 num_entries = htab->map.max_entries; 234 int i; 235 236 if (htab_has_extra_elems(htab)) 237 num_entries += num_possible_cpus(); 238 239 for (i = 0; i < num_entries; i++) { 240 struct htab_elem *elem; 241 242 elem = get_htab_elem(htab, i); 243 htab_free_internal_structs(htab, elem); 244 cond_resched(); 245 } 246 } 247 248 static void htab_free_prealloced_fields(struct bpf_htab *htab) 249 { 250 u32 num_entries = htab->map.max_entries; 251 int i; 252 253 if (IS_ERR_OR_NULL(htab->map.record)) 254 return; 255 if (htab_has_extra_elems(htab)) 256 num_entries += num_possible_cpus(); 257 for (i = 0; i < num_entries; i++) { 258 struct htab_elem *elem; 259 260 elem = get_htab_elem(htab, i); 261 if (htab_is_percpu(htab)) { 262 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 263 int cpu; 264 265 for_each_possible_cpu(cpu) { 266 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 267 cond_resched(); 268 } 269 } else { 270 bpf_obj_free_fields(htab->map.record, 271 htab_elem_value(elem, htab->map.key_size)); 272 cond_resched(); 273 } 274 cond_resched(); 275 } 276 } 277 278 static void htab_free_elems(struct bpf_htab *htab) 279 { 280 int i; 281 282 if (!htab_is_percpu(htab)) 283 goto free_elems; 284 285 for (i = 0; i < htab->map.max_entries; i++) { 286 void __percpu *pptr; 287 288 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 289 htab->map.key_size); 290 free_percpu(pptr); 291 cond_resched(); 292 } 293 free_elems: 294 bpf_map_area_free(htab->elems); 295 } 296 297 /* The LRU list has a lock (lru_lock). Each htab bucket has a lock 298 * (bucket_lock). If both locks need to be acquired together, the lock 299 * order is always lru_lock -> bucket_lock and this only happens in 300 * bpf_lru_list.c logic. For example, certain code path of 301 * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), 302 * will acquire lru_lock first followed by acquiring bucket_lock. 303 * 304 * In hashtab.c, to avoid deadlock, lock acquisition of 305 * bucket_lock followed by lru_lock is not allowed. In such cases, 306 * bucket_lock needs to be released first before acquiring lru_lock. 307 */ 308 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 309 u32 hash) 310 { 311 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash); 312 struct htab_elem *l; 313 314 if (node) { 315 bpf_map_inc_elem_count(&htab->map); 316 l = container_of(node, struct htab_elem, lru_node); 317 memcpy(l->key, key, htab->map.key_size); 318 return l; 319 } 320 321 return NULL; 322 } 323 324 static int prealloc_init(struct bpf_htab *htab) 325 { 326 u32 num_entries = htab->map.max_entries; 327 int err = -ENOMEM, i; 328 329 if (htab_has_extra_elems(htab)) 330 num_entries += num_possible_cpus(); 331 332 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, 333 htab->map.numa_node); 334 if (!htab->elems) 335 return -ENOMEM; 336 337 if (!htab_is_percpu(htab)) 338 goto skip_percpu_elems; 339 340 for (i = 0; i < num_entries; i++) { 341 u32 size = round_up(htab->map.value_size, 8); 342 void __percpu *pptr; 343 344 pptr = bpf_map_alloc_percpu(&htab->map, size, 8, 345 GFP_USER | __GFP_NOWARN); 346 if (!pptr) 347 goto free_elems; 348 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 349 pptr); 350 cond_resched(); 351 } 352 353 skip_percpu_elems: 354 if (htab_is_lru(htab)) 355 err = bpf_lru_init(&htab->lru, 356 htab->map.map_flags & BPF_F_NO_COMMON_LRU, 357 offsetof(struct htab_elem, hash) - 358 offsetof(struct htab_elem, lru_node), 359 htab_lru_map_delete_node, 360 htab); 361 else 362 err = pcpu_freelist_init(&htab->freelist); 363 364 if (err) 365 goto free_elems; 366 367 if (htab_is_lru(htab)) 368 bpf_lru_populate(&htab->lru, htab->elems, 369 offsetof(struct htab_elem, lru_node), 370 htab->elem_size, num_entries); 371 else 372 pcpu_freelist_populate(&htab->freelist, 373 htab->elems + offsetof(struct htab_elem, fnode), 374 htab->elem_size, num_entries); 375 376 return 0; 377 378 free_elems: 379 htab_free_elems(htab); 380 return err; 381 } 382 383 static void prealloc_destroy(struct bpf_htab *htab) 384 { 385 htab_free_elems(htab); 386 387 if (htab_is_lru(htab)) 388 bpf_lru_destroy(&htab->lru); 389 else 390 pcpu_freelist_destroy(&htab->freelist); 391 } 392 393 static int alloc_extra_elems(struct bpf_htab *htab) 394 { 395 struct htab_elem *__percpu *pptr, *l_new; 396 struct pcpu_freelist_node *l; 397 int cpu; 398 399 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8, 400 GFP_USER | __GFP_NOWARN); 401 if (!pptr) 402 return -ENOMEM; 403 404 for_each_possible_cpu(cpu) { 405 l = pcpu_freelist_pop(&htab->freelist); 406 /* pop will succeed, since prealloc_init() 407 * preallocated extra num_possible_cpus elements 408 */ 409 l_new = container_of(l, struct htab_elem, fnode); 410 *per_cpu_ptr(pptr, cpu) = l_new; 411 } 412 htab->extra_elems = pptr; 413 return 0; 414 } 415 416 /* Called from syscall */ 417 static int htab_map_alloc_check(union bpf_attr *attr) 418 { 419 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 420 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 421 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 422 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 423 /* percpu_lru means each cpu has its own LRU list. 424 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 425 * the map's value itself is percpu. percpu_lru has 426 * nothing to do with the map's value. 427 */ 428 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 429 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 430 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); 431 int numa_node = bpf_map_attr_numa_node(attr); 432 433 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 434 offsetof(struct htab_elem, hash_node.pprev)); 435 436 if (zero_seed && !capable(CAP_SYS_ADMIN)) 437 /* Guard against local DoS, and discourage production use. */ 438 return -EPERM; 439 440 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || 441 !bpf_map_flags_access_ok(attr->map_flags)) 442 return -EINVAL; 443 444 if (!lru && percpu_lru) 445 return -EINVAL; 446 447 if (lru && !prealloc) 448 return -ENOTSUPP; 449 450 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) 451 return -EINVAL; 452 453 /* check sanity of attributes. 454 * value_size == 0 may be allowed in the future to use map as a set 455 */ 456 if (attr->max_entries == 0 || attr->key_size == 0 || 457 attr->value_size == 0) 458 return -EINVAL; 459 460 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - 461 sizeof(struct htab_elem)) 462 /* if key_size + value_size is bigger, the user space won't be 463 * able to access the elements via bpf syscall. This check 464 * also makes sure that the elem_size doesn't overflow and it's 465 * kmalloc-able later in htab_map_update_elem() 466 */ 467 return -E2BIG; 468 /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ 469 if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) 470 return -E2BIG; 471 472 return 0; 473 } 474 475 static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 476 { 477 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 478 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 479 /* percpu_lru means each cpu has its own LRU list. 480 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 481 * the map's value itself is percpu. percpu_lru has 482 * nothing to do with the map's value. 483 */ 484 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 485 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 486 struct bpf_htab *htab; 487 int err; 488 489 htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); 490 if (!htab) 491 return ERR_PTR(-ENOMEM); 492 493 bpf_map_init_from_attr(&htab->map, attr); 494 495 if (percpu_lru) { 496 /* ensure each CPU's lru list has >=1 elements. 497 * since we are at it, make each lru list has the same 498 * number of elements. 499 */ 500 htab->map.max_entries = roundup(attr->max_entries, 501 num_possible_cpus()); 502 if (htab->map.max_entries < attr->max_entries) 503 htab->map.max_entries = rounddown(attr->max_entries, 504 num_possible_cpus()); 505 } 506 507 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 508 * into UB on 32-bit arches, so check that first 509 */ 510 err = -E2BIG; 511 if (htab->map.max_entries > 1UL << 31) 512 goto free_htab; 513 514 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 515 516 htab->elem_size = sizeof(struct htab_elem) + 517 round_up(htab->map.key_size, 8); 518 if (percpu) 519 htab->elem_size += sizeof(void *); 520 else 521 htab->elem_size += round_up(htab->map.value_size, 8); 522 523 /* check for u32 overflow */ 524 if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) 525 goto free_htab; 526 527 err = bpf_map_init_elem_count(&htab->map); 528 if (err) 529 goto free_htab; 530 531 err = -ENOMEM; 532 htab->buckets = bpf_map_area_alloc(htab->n_buckets * 533 sizeof(struct bucket), 534 htab->map.numa_node); 535 if (!htab->buckets) 536 goto free_elem_count; 537 538 if (htab->map.map_flags & BPF_F_ZERO_SEED) 539 htab->hashrnd = 0; 540 else 541 htab->hashrnd = get_random_u32(); 542 543 htab_init_buckets(htab); 544 545 /* compute_batch_value() computes batch value as num_online_cpus() * 2 546 * and __percpu_counter_compare() needs 547 * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus() 548 * for percpu_counter to be faster than atomic_t. In practice the average bpf 549 * hash map size is 10k, which means that a system with 64 cpus will fill 550 * hashmap to 20% of 10k before percpu_counter becomes ineffective. Therefore 551 * define our own batch count as 32 then 10k hash map can be filled up to 80%: 552 * 10k - 8k > 32 _batch_ * 64 _cpus_ 553 * and __percpu_counter_compare() will still be fast. At that point hash map 554 * collisions will dominate its performance anyway. Assume that hash map filled 555 * to 50+% isn't going to be O(1) and use the following formula to choose 556 * between percpu_counter and atomic_t. 557 */ 558 #define PERCPU_COUNTER_BATCH 32 559 if (attr->max_entries / 2 > num_online_cpus() * PERCPU_COUNTER_BATCH) 560 htab->use_percpu_counter = true; 561 562 if (htab->use_percpu_counter) { 563 err = percpu_counter_init(&htab->pcount, 0, GFP_KERNEL); 564 if (err) 565 goto free_map_locked; 566 } 567 568 if (prealloc) { 569 err = prealloc_init(htab); 570 if (err) 571 goto free_map_locked; 572 573 if (htab_has_extra_elems(htab)) { 574 err = alloc_extra_elems(htab); 575 if (err) 576 goto free_prealloc; 577 } 578 } else { 579 err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false); 580 if (err) 581 goto free_map_locked; 582 if (percpu) { 583 err = bpf_mem_alloc_init(&htab->pcpu_ma, 584 round_up(htab->map.value_size, 8), true); 585 if (err) 586 goto free_map_locked; 587 } 588 } 589 590 return &htab->map; 591 592 free_prealloc: 593 prealloc_destroy(htab); 594 free_map_locked: 595 if (htab->use_percpu_counter) 596 percpu_counter_destroy(&htab->pcount); 597 bpf_map_area_free(htab->buckets); 598 bpf_mem_alloc_destroy(&htab->pcpu_ma); 599 bpf_mem_alloc_destroy(&htab->ma); 600 free_elem_count: 601 bpf_map_free_elem_count(&htab->map); 602 free_htab: 603 bpf_map_area_free(htab); 604 return ERR_PTR(err); 605 } 606 607 static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) 608 { 609 if (likely(key_len % 4 == 0)) 610 return jhash2(key, key_len / 4, hashrnd); 611 return jhash(key, key_len, hashrnd); 612 } 613 614 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 615 { 616 return &htab->buckets[hash & (htab->n_buckets - 1)]; 617 } 618 619 static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) 620 { 621 return &__select_bucket(htab, hash)->head; 622 } 623 624 /* this lookup function can only be called with bucket lock taken */ 625 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, 626 void *key, u32 key_size) 627 { 628 struct hlist_nulls_node *n; 629 struct htab_elem *l; 630 631 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 632 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 633 return l; 634 635 return NULL; 636 } 637 638 /* can be called without bucket lock. it will repeat the loop in 639 * the unlikely event when elements moved from one bucket into another 640 * while link list is being walked 641 */ 642 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, 643 u32 hash, void *key, 644 u32 key_size, u32 n_buckets) 645 { 646 struct hlist_nulls_node *n; 647 struct htab_elem *l; 648 649 again: 650 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 651 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 652 return l; 653 654 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) 655 goto again; 656 657 return NULL; 658 } 659 660 /* Called from syscall or from eBPF program directly, so 661 * arguments have to match bpf_map_lookup_elem() exactly. 662 * The return value is adjusted by BPF instructions 663 * in htab_map_gen_lookup(). 664 */ 665 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 666 { 667 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 668 struct hlist_nulls_head *head; 669 struct htab_elem *l; 670 u32 hash, key_size; 671 672 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 673 !rcu_read_lock_bh_held()); 674 675 key_size = map->key_size; 676 677 hash = htab_map_hash(key, key_size, htab->hashrnd); 678 679 head = select_bucket(htab, hash); 680 681 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 682 683 return l; 684 } 685 686 static void *htab_map_lookup_elem(struct bpf_map *map, void *key) 687 { 688 struct htab_elem *l = __htab_map_lookup_elem(map, key); 689 690 if (l) 691 return htab_elem_value(l, map->key_size); 692 693 return NULL; 694 } 695 696 /* inline bpf_map_lookup_elem() call. 697 * Instead of: 698 * bpf_prog 699 * bpf_map_lookup_elem 700 * map->ops->map_lookup_elem 701 * htab_map_lookup_elem 702 * __htab_map_lookup_elem 703 * do: 704 * bpf_prog 705 * __htab_map_lookup_elem 706 */ 707 static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 708 { 709 struct bpf_insn *insn = insn_buf; 710 const int ret = BPF_REG_0; 711 712 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 713 (void *(*)(struct bpf_map *map, void *key))NULL)); 714 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 715 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 716 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 717 offsetof(struct htab_elem, key) + 718 round_up(map->key_size, 8)); 719 return insn - insn_buf; 720 } 721 722 static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, 723 void *key, const bool mark) 724 { 725 struct htab_elem *l = __htab_map_lookup_elem(map, key); 726 727 if (l) { 728 if (mark) 729 bpf_lru_node_set_ref(&l->lru_node); 730 return htab_elem_value(l, map->key_size); 731 } 732 733 return NULL; 734 } 735 736 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) 737 { 738 return __htab_lru_map_lookup_elem(map, key, true); 739 } 740 741 static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) 742 { 743 return __htab_lru_map_lookup_elem(map, key, false); 744 } 745 746 static int htab_lru_map_gen_lookup(struct bpf_map *map, 747 struct bpf_insn *insn_buf) 748 { 749 struct bpf_insn *insn = insn_buf; 750 const int ret = BPF_REG_0; 751 const int ref_reg = BPF_REG_1; 752 753 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 754 (void *(*)(struct bpf_map *map, void *key))NULL)); 755 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 756 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); 757 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, 758 offsetof(struct htab_elem, lru_node) + 759 offsetof(struct bpf_lru_node, ref)); 760 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); 761 *insn++ = BPF_ST_MEM(BPF_B, ret, 762 offsetof(struct htab_elem, lru_node) + 763 offsetof(struct bpf_lru_node, ref), 764 1); 765 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 766 offsetof(struct htab_elem, key) + 767 round_up(map->key_size, 8)); 768 return insn - insn_buf; 769 } 770 771 static void check_and_free_fields(struct bpf_htab *htab, 772 struct htab_elem *elem) 773 { 774 if (IS_ERR_OR_NULL(htab->map.record)) 775 return; 776 777 if (htab_is_percpu(htab)) { 778 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 779 int cpu; 780 781 for_each_possible_cpu(cpu) 782 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 783 } else { 784 void *map_value = htab_elem_value(elem, htab->map.key_size); 785 786 bpf_obj_free_fields(htab->map.record, map_value); 787 } 788 } 789 790 /* It is called from the bpf_lru_list when the LRU needs to delete 791 * older elements from the htab. 792 */ 793 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 794 { 795 struct bpf_htab *htab = arg; 796 struct htab_elem *l = NULL, *tgt_l; 797 struct hlist_nulls_head *head; 798 struct hlist_nulls_node *n; 799 unsigned long flags; 800 struct bucket *b; 801 int ret; 802 803 tgt_l = container_of(node, struct htab_elem, lru_node); 804 b = __select_bucket(htab, tgt_l->hash); 805 head = &b->head; 806 807 ret = htab_lock_bucket(b, &flags); 808 if (ret) 809 return false; 810 811 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 812 if (l == tgt_l) { 813 hlist_nulls_del_rcu(&l->hash_node); 814 bpf_map_dec_elem_count(&htab->map); 815 break; 816 } 817 818 htab_unlock_bucket(b, flags); 819 820 if (l == tgt_l) 821 check_and_free_fields(htab, l); 822 return l == tgt_l; 823 } 824 825 /* Called from syscall */ 826 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 827 { 828 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 829 struct hlist_nulls_head *head; 830 struct htab_elem *l, *next_l; 831 u32 hash, key_size; 832 int i = 0; 833 834 WARN_ON_ONCE(!rcu_read_lock_held()); 835 836 key_size = map->key_size; 837 838 if (!key) 839 goto find_first_elem; 840 841 hash = htab_map_hash(key, key_size, htab->hashrnd); 842 843 head = select_bucket(htab, hash); 844 845 /* lookup the key */ 846 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 847 848 if (!l) 849 goto find_first_elem; 850 851 /* key was found, get next key in the same bucket */ 852 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), 853 struct htab_elem, hash_node); 854 855 if (next_l) { 856 /* if next elem in this hash list is non-zero, just return it */ 857 memcpy(next_key, next_l->key, key_size); 858 return 0; 859 } 860 861 /* no more elements in this hash list, go to the next bucket */ 862 i = hash & (htab->n_buckets - 1); 863 i++; 864 865 find_first_elem: 866 /* iterate over buckets */ 867 for (; i < htab->n_buckets; i++) { 868 head = select_bucket(htab, i); 869 870 /* pick first element in the bucket */ 871 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), 872 struct htab_elem, hash_node); 873 if (next_l) { 874 /* if it's not empty, just return it */ 875 memcpy(next_key, next_l->key, key_size); 876 return 0; 877 } 878 } 879 880 /* iterated over all buckets and all elements */ 881 return -ENOENT; 882 } 883 884 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 885 { 886 check_and_free_fields(htab, l); 887 888 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 889 bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); 890 bpf_mem_cache_free(&htab->ma, l); 891 } 892 893 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) 894 { 895 struct bpf_map *map = &htab->map; 896 void *ptr; 897 898 if (map->ops->map_fd_put_ptr) { 899 ptr = fd_htab_map_get_ptr(map, l); 900 map->ops->map_fd_put_ptr(map, ptr, true); 901 } 902 } 903 904 static bool is_map_full(struct bpf_htab *htab) 905 { 906 if (htab->use_percpu_counter) 907 return __percpu_counter_compare(&htab->pcount, htab->map.max_entries, 908 PERCPU_COUNTER_BATCH) >= 0; 909 return atomic_read(&htab->count) >= htab->map.max_entries; 910 } 911 912 static void inc_elem_count(struct bpf_htab *htab) 913 { 914 bpf_map_inc_elem_count(&htab->map); 915 916 if (htab->use_percpu_counter) 917 percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH); 918 else 919 atomic_inc(&htab->count); 920 } 921 922 static void dec_elem_count(struct bpf_htab *htab) 923 { 924 bpf_map_dec_elem_count(&htab->map); 925 926 if (htab->use_percpu_counter) 927 percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH); 928 else 929 atomic_dec(&htab->count); 930 } 931 932 933 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 934 { 935 htab_put_fd_value(htab, l); 936 937 if (htab_is_prealloc(htab)) { 938 bpf_map_dec_elem_count(&htab->map); 939 check_and_free_fields(htab, l); 940 pcpu_freelist_push(&htab->freelist, &l->fnode); 941 } else { 942 dec_elem_count(htab); 943 htab_elem_free(htab, l); 944 } 945 } 946 947 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 948 void *value, bool onallcpus) 949 { 950 if (!onallcpus) { 951 /* copy true value_size bytes */ 952 copy_map_value(&htab->map, this_cpu_ptr(pptr), value); 953 } else { 954 u32 size = round_up(htab->map.value_size, 8); 955 int off = 0, cpu; 956 957 for_each_possible_cpu(cpu) { 958 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); 959 off += size; 960 } 961 } 962 } 963 964 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 965 void *value, bool onallcpus) 966 { 967 /* When not setting the initial value on all cpus, zero-fill element 968 * values for other cpus. Otherwise, bpf program has no way to ensure 969 * known initial values for cpus other than current one 970 * (onallcpus=false always when coming from bpf prog). 971 */ 972 if (!onallcpus) { 973 int current_cpu = raw_smp_processor_id(); 974 int cpu; 975 976 for_each_possible_cpu(cpu) { 977 if (cpu == current_cpu) 978 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); 979 else /* Since elem is preallocated, we cannot touch special fields */ 980 zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 981 } 982 } else { 983 pcpu_copy_value(htab, pptr, value, onallcpus); 984 } 985 } 986 987 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) 988 { 989 return is_fd_htab(htab) && BITS_PER_LONG == 64; 990 } 991 992 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 993 void *value, u32 key_size, u32 hash, 994 bool percpu, bool onallcpus, 995 struct htab_elem *old_elem) 996 { 997 u32 size = htab->map.value_size; 998 bool prealloc = htab_is_prealloc(htab); 999 struct htab_elem *l_new, **pl_new; 1000 void __percpu *pptr; 1001 1002 if (prealloc) { 1003 if (old_elem) { 1004 /* if we're updating the existing element, 1005 * use per-cpu extra elems to avoid freelist_pop/push 1006 */ 1007 pl_new = this_cpu_ptr(htab->extra_elems); 1008 l_new = *pl_new; 1009 *pl_new = old_elem; 1010 } else { 1011 struct pcpu_freelist_node *l; 1012 1013 l = __pcpu_freelist_pop(&htab->freelist); 1014 if (!l) 1015 return ERR_PTR(-E2BIG); 1016 l_new = container_of(l, struct htab_elem, fnode); 1017 bpf_map_inc_elem_count(&htab->map); 1018 } 1019 } else { 1020 if (is_map_full(htab)) 1021 if (!old_elem) 1022 /* when map is full and update() is replacing 1023 * old element, it's ok to allocate, since 1024 * old element will be freed immediately. 1025 * Otherwise return an error 1026 */ 1027 return ERR_PTR(-E2BIG); 1028 inc_elem_count(htab); 1029 l_new = bpf_mem_cache_alloc(&htab->ma); 1030 if (!l_new) { 1031 l_new = ERR_PTR(-ENOMEM); 1032 goto dec_count; 1033 } 1034 } 1035 1036 memcpy(l_new->key, key, key_size); 1037 if (percpu) { 1038 if (prealloc) { 1039 pptr = htab_elem_get_ptr(l_new, key_size); 1040 } else { 1041 /* alloc_percpu zero-fills */ 1042 void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma); 1043 1044 if (!ptr) { 1045 bpf_mem_cache_free(&htab->ma, l_new); 1046 l_new = ERR_PTR(-ENOMEM); 1047 goto dec_count; 1048 } 1049 l_new->ptr_to_pptr = ptr; 1050 pptr = *(void __percpu **)ptr; 1051 } 1052 1053 pcpu_init_value(htab, pptr, value, onallcpus); 1054 1055 if (!prealloc) 1056 htab_elem_set_ptr(l_new, key_size, pptr); 1057 } else if (fd_htab_map_needs_adjust(htab)) { 1058 size = round_up(size, 8); 1059 memcpy(htab_elem_value(l_new, key_size), value, size); 1060 } else { 1061 copy_map_value(&htab->map, htab_elem_value(l_new, key_size), value); 1062 } 1063 1064 l_new->hash = hash; 1065 return l_new; 1066 dec_count: 1067 dec_elem_count(htab); 1068 return l_new; 1069 } 1070 1071 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 1072 u64 map_flags) 1073 { 1074 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 1075 /* elem already exists */ 1076 return -EEXIST; 1077 1078 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 1079 /* elem doesn't exist, cannot update it */ 1080 return -ENOENT; 1081 1082 return 0; 1083 } 1084 1085 /* Called from syscall or from eBPF program */ 1086 static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, 1087 u64 map_flags) 1088 { 1089 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1090 struct htab_elem *l_new, *l_old; 1091 struct hlist_nulls_head *head; 1092 unsigned long flags; 1093 struct bucket *b; 1094 u32 key_size, hash; 1095 int ret; 1096 1097 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 1098 /* unknown flags */ 1099 return -EINVAL; 1100 1101 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1102 !rcu_read_lock_bh_held()); 1103 1104 key_size = map->key_size; 1105 1106 hash = htab_map_hash(key, key_size, htab->hashrnd); 1107 1108 b = __select_bucket(htab, hash); 1109 head = &b->head; 1110 1111 if (unlikely(map_flags & BPF_F_LOCK)) { 1112 if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1113 return -EINVAL; 1114 /* find an element without taking the bucket lock */ 1115 l_old = lookup_nulls_elem_raw(head, hash, key, key_size, 1116 htab->n_buckets); 1117 ret = check_flags(htab, l_old, map_flags); 1118 if (ret) 1119 return ret; 1120 if (l_old) { 1121 /* grab the element lock and update value in place */ 1122 copy_map_value_locked(map, 1123 htab_elem_value(l_old, key_size), 1124 value, false); 1125 return 0; 1126 } 1127 /* fall through, grab the bucket lock and lookup again. 1128 * 99.9% chance that the element won't be found, 1129 * but second lookup under lock has to be done. 1130 */ 1131 } 1132 1133 ret = htab_lock_bucket(b, &flags); 1134 if (ret) 1135 return ret; 1136 1137 l_old = lookup_elem_raw(head, hash, key, key_size); 1138 1139 ret = check_flags(htab, l_old, map_flags); 1140 if (ret) 1141 goto err; 1142 1143 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { 1144 /* first lookup without the bucket lock didn't find the element, 1145 * but second lookup with the bucket lock found it. 1146 * This case is highly unlikely, but has to be dealt with: 1147 * grab the element lock in addition to the bucket lock 1148 * and update element in place 1149 */ 1150 copy_map_value_locked(map, 1151 htab_elem_value(l_old, key_size), 1152 value, false); 1153 ret = 0; 1154 goto err; 1155 } 1156 1157 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 1158 l_old); 1159 if (IS_ERR(l_new)) { 1160 /* all pre-allocated elements are in use or memory exhausted */ 1161 ret = PTR_ERR(l_new); 1162 goto err; 1163 } 1164 1165 /* add new element to the head of the list, so that 1166 * concurrent search will find it before old elem 1167 */ 1168 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1169 if (l_old) { 1170 hlist_nulls_del_rcu(&l_old->hash_node); 1171 1172 /* l_old has already been stashed in htab->extra_elems, free 1173 * its special fields before it is available for reuse. 1174 */ 1175 if (htab_is_prealloc(htab)) 1176 check_and_free_fields(htab, l_old); 1177 } 1178 htab_unlock_bucket(b, flags); 1179 if (l_old && !htab_is_prealloc(htab)) 1180 free_htab_elem(htab, l_old); 1181 return 0; 1182 err: 1183 htab_unlock_bucket(b, flags); 1184 return ret; 1185 } 1186 1187 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) 1188 { 1189 check_and_free_fields(htab, elem); 1190 bpf_map_dec_elem_count(&htab->map); 1191 bpf_lru_push_free(&htab->lru, &elem->lru_node); 1192 } 1193 1194 static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, 1195 u64 map_flags) 1196 { 1197 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1198 struct htab_elem *l_new, *l_old = NULL; 1199 struct hlist_nulls_head *head; 1200 unsigned long flags; 1201 struct bucket *b; 1202 u32 key_size, hash; 1203 int ret; 1204 1205 if (unlikely(map_flags > BPF_EXIST)) 1206 /* unknown flags */ 1207 return -EINVAL; 1208 1209 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1210 !rcu_read_lock_bh_held()); 1211 1212 key_size = map->key_size; 1213 1214 hash = htab_map_hash(key, key_size, htab->hashrnd); 1215 1216 b = __select_bucket(htab, hash); 1217 head = &b->head; 1218 1219 /* For LRU, we need to alloc before taking bucket's 1220 * spinlock because getting free nodes from LRU may need 1221 * to remove older elements from htab and this removal 1222 * operation will need a bucket lock. 1223 */ 1224 l_new = prealloc_lru_pop(htab, key, hash); 1225 if (!l_new) 1226 return -ENOMEM; 1227 copy_map_value(&htab->map, htab_elem_value(l_new, map->key_size), value); 1228 1229 ret = htab_lock_bucket(b, &flags); 1230 if (ret) 1231 goto err_lock_bucket; 1232 1233 l_old = lookup_elem_raw(head, hash, key, key_size); 1234 1235 ret = check_flags(htab, l_old, map_flags); 1236 if (ret) 1237 goto err; 1238 1239 /* add new element to the head of the list, so that 1240 * concurrent search will find it before old elem 1241 */ 1242 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1243 if (l_old) { 1244 bpf_lru_node_set_ref(&l_new->lru_node); 1245 hlist_nulls_del_rcu(&l_old->hash_node); 1246 } 1247 ret = 0; 1248 1249 err: 1250 htab_unlock_bucket(b, flags); 1251 1252 err_lock_bucket: 1253 if (ret) 1254 htab_lru_push_free(htab, l_new); 1255 else if (l_old) 1256 htab_lru_push_free(htab, l_old); 1257 1258 return ret; 1259 } 1260 1261 static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, 1262 void *value, u64 map_flags, 1263 bool percpu, bool onallcpus) 1264 { 1265 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1266 struct htab_elem *l_new, *l_old; 1267 struct hlist_nulls_head *head; 1268 void *old_map_ptr = NULL; 1269 unsigned long flags; 1270 struct bucket *b; 1271 u32 key_size, hash; 1272 int ret; 1273 1274 if (unlikely(map_flags > BPF_EXIST)) 1275 /* unknown flags */ 1276 return -EINVAL; 1277 1278 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1279 !rcu_read_lock_bh_held()); 1280 1281 key_size = map->key_size; 1282 1283 hash = htab_map_hash(key, key_size, htab->hashrnd); 1284 1285 b = __select_bucket(htab, hash); 1286 head = &b->head; 1287 1288 ret = htab_lock_bucket(b, &flags); 1289 if (ret) 1290 return ret; 1291 1292 l_old = lookup_elem_raw(head, hash, key, key_size); 1293 1294 ret = check_flags(htab, l_old, map_flags); 1295 if (ret) 1296 goto err; 1297 1298 if (l_old) { 1299 /* Update value in-place */ 1300 if (percpu) { 1301 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1302 value, onallcpus); 1303 } else { 1304 void **inner_map_pptr = htab_elem_value(l_old, key_size); 1305 1306 old_map_ptr = *inner_map_pptr; 1307 WRITE_ONCE(*inner_map_pptr, *(void **)value); 1308 } 1309 } else { 1310 l_new = alloc_htab_elem(htab, key, value, key_size, 1311 hash, percpu, onallcpus, NULL); 1312 if (IS_ERR(l_new)) { 1313 ret = PTR_ERR(l_new); 1314 goto err; 1315 } 1316 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1317 } 1318 err: 1319 htab_unlock_bucket(b, flags); 1320 if (old_map_ptr) 1321 map->ops->map_fd_put_ptr(map, old_map_ptr, true); 1322 return ret; 1323 } 1324 1325 static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1326 void *value, u64 map_flags, 1327 bool onallcpus) 1328 { 1329 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1330 struct htab_elem *l_new = NULL, *l_old; 1331 struct hlist_nulls_head *head; 1332 unsigned long flags; 1333 struct bucket *b; 1334 u32 key_size, hash; 1335 int ret; 1336 1337 if (unlikely(map_flags > BPF_EXIST)) 1338 /* unknown flags */ 1339 return -EINVAL; 1340 1341 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1342 !rcu_read_lock_bh_held()); 1343 1344 key_size = map->key_size; 1345 1346 hash = htab_map_hash(key, key_size, htab->hashrnd); 1347 1348 b = __select_bucket(htab, hash); 1349 head = &b->head; 1350 1351 /* For LRU, we need to alloc before taking bucket's 1352 * spinlock because LRU's elem alloc may need 1353 * to remove older elem from htab and this removal 1354 * operation will need a bucket lock. 1355 */ 1356 if (map_flags != BPF_EXIST) { 1357 l_new = prealloc_lru_pop(htab, key, hash); 1358 if (!l_new) 1359 return -ENOMEM; 1360 } 1361 1362 ret = htab_lock_bucket(b, &flags); 1363 if (ret) 1364 goto err_lock_bucket; 1365 1366 l_old = lookup_elem_raw(head, hash, key, key_size); 1367 1368 ret = check_flags(htab, l_old, map_flags); 1369 if (ret) 1370 goto err; 1371 1372 if (l_old) { 1373 bpf_lru_node_set_ref(&l_old->lru_node); 1374 1375 /* per-cpu hash map can update value in-place */ 1376 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1377 value, onallcpus); 1378 } else { 1379 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1380 value, onallcpus); 1381 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1382 l_new = NULL; 1383 } 1384 ret = 0; 1385 err: 1386 htab_unlock_bucket(b, flags); 1387 err_lock_bucket: 1388 if (l_new) { 1389 bpf_map_dec_elem_count(&htab->map); 1390 bpf_lru_push_free(&htab->lru, &l_new->lru_node); 1391 } 1392 return ret; 1393 } 1394 1395 static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1396 void *value, u64 map_flags) 1397 { 1398 return htab_map_update_elem_in_place(map, key, value, map_flags, true, false); 1399 } 1400 1401 static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1402 void *value, u64 map_flags) 1403 { 1404 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, 1405 false); 1406 } 1407 1408 /* Called from syscall or from eBPF program */ 1409 static long htab_map_delete_elem(struct bpf_map *map, void *key) 1410 { 1411 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1412 struct hlist_nulls_head *head; 1413 struct bucket *b; 1414 struct htab_elem *l; 1415 unsigned long flags; 1416 u32 hash, key_size; 1417 int ret; 1418 1419 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1420 !rcu_read_lock_bh_held()); 1421 1422 key_size = map->key_size; 1423 1424 hash = htab_map_hash(key, key_size, htab->hashrnd); 1425 b = __select_bucket(htab, hash); 1426 head = &b->head; 1427 1428 ret = htab_lock_bucket(b, &flags); 1429 if (ret) 1430 return ret; 1431 1432 l = lookup_elem_raw(head, hash, key, key_size); 1433 if (l) 1434 hlist_nulls_del_rcu(&l->hash_node); 1435 else 1436 ret = -ENOENT; 1437 1438 htab_unlock_bucket(b, flags); 1439 1440 if (l) 1441 free_htab_elem(htab, l); 1442 return ret; 1443 } 1444 1445 static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) 1446 { 1447 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1448 struct hlist_nulls_head *head; 1449 struct bucket *b; 1450 struct htab_elem *l; 1451 unsigned long flags; 1452 u32 hash, key_size; 1453 int ret; 1454 1455 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1456 !rcu_read_lock_bh_held()); 1457 1458 key_size = map->key_size; 1459 1460 hash = htab_map_hash(key, key_size, htab->hashrnd); 1461 b = __select_bucket(htab, hash); 1462 head = &b->head; 1463 1464 ret = htab_lock_bucket(b, &flags); 1465 if (ret) 1466 return ret; 1467 1468 l = lookup_elem_raw(head, hash, key, key_size); 1469 1470 if (l) 1471 hlist_nulls_del_rcu(&l->hash_node); 1472 else 1473 ret = -ENOENT; 1474 1475 htab_unlock_bucket(b, flags); 1476 if (l) 1477 htab_lru_push_free(htab, l); 1478 return ret; 1479 } 1480 1481 static void delete_all_elements(struct bpf_htab *htab) 1482 { 1483 int i; 1484 1485 /* It's called from a worker thread and migration has been disabled, 1486 * therefore, it is OK to invoke bpf_mem_cache_free() directly. 1487 */ 1488 for (i = 0; i < htab->n_buckets; i++) { 1489 struct hlist_nulls_head *head = select_bucket(htab, i); 1490 struct hlist_nulls_node *n; 1491 struct htab_elem *l; 1492 1493 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1494 hlist_nulls_del_rcu(&l->hash_node); 1495 htab_elem_free(htab, l); 1496 } 1497 cond_resched(); 1498 } 1499 } 1500 1501 static void htab_free_malloced_internal_structs(struct bpf_htab *htab) 1502 { 1503 int i; 1504 1505 rcu_read_lock(); 1506 for (i = 0; i < htab->n_buckets; i++) { 1507 struct hlist_nulls_head *head = select_bucket(htab, i); 1508 struct hlist_nulls_node *n; 1509 struct htab_elem *l; 1510 1511 hlist_nulls_for_each_entry(l, n, head, hash_node) { 1512 /* We only free timer on uref dropping to zero */ 1513 htab_free_internal_structs(htab, l); 1514 } 1515 cond_resched_rcu(); 1516 } 1517 rcu_read_unlock(); 1518 } 1519 1520 static void htab_map_free_internal_structs(struct bpf_map *map) 1521 { 1522 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1523 1524 /* We only free timer and workqueue on uref dropping to zero */ 1525 if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK)) { 1526 if (!htab_is_prealloc(htab)) 1527 htab_free_malloced_internal_structs(htab); 1528 else 1529 htab_free_prealloced_internal_structs(htab); 1530 } 1531 } 1532 1533 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 1534 static void htab_map_free(struct bpf_map *map) 1535 { 1536 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1537 1538 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. 1539 * bpf_free_used_maps() is called after bpf prog is no longer executing. 1540 * There is no need to synchronize_rcu() here to protect map elements. 1541 */ 1542 1543 /* htab no longer uses call_rcu() directly. bpf_mem_alloc does it 1544 * underneath and is responsible for waiting for callbacks to finish 1545 * during bpf_mem_alloc_destroy(). 1546 */ 1547 if (!htab_is_prealloc(htab)) { 1548 delete_all_elements(htab); 1549 } else { 1550 htab_free_prealloced_fields(htab); 1551 prealloc_destroy(htab); 1552 } 1553 1554 bpf_map_free_elem_count(map); 1555 free_percpu(htab->extra_elems); 1556 bpf_map_area_free(htab->buckets); 1557 bpf_mem_alloc_destroy(&htab->pcpu_ma); 1558 bpf_mem_alloc_destroy(&htab->ma); 1559 if (htab->use_percpu_counter) 1560 percpu_counter_destroy(&htab->pcount); 1561 bpf_map_area_free(htab); 1562 } 1563 1564 static void htab_map_seq_show_elem(struct bpf_map *map, void *key, 1565 struct seq_file *m) 1566 { 1567 void *value; 1568 1569 rcu_read_lock(); 1570 1571 value = htab_map_lookup_elem(map, key); 1572 if (!value) { 1573 rcu_read_unlock(); 1574 return; 1575 } 1576 1577 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 1578 seq_puts(m, ": "); 1579 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 1580 seq_putc(m, '\n'); 1581 1582 rcu_read_unlock(); 1583 } 1584 1585 static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1586 void *value, bool is_lru_map, 1587 bool is_percpu, u64 flags) 1588 { 1589 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1590 struct hlist_nulls_head *head; 1591 unsigned long bflags; 1592 struct htab_elem *l; 1593 u32 hash, key_size; 1594 struct bucket *b; 1595 int ret; 1596 1597 key_size = map->key_size; 1598 1599 hash = htab_map_hash(key, key_size, htab->hashrnd); 1600 b = __select_bucket(htab, hash); 1601 head = &b->head; 1602 1603 ret = htab_lock_bucket(b, &bflags); 1604 if (ret) 1605 return ret; 1606 1607 l = lookup_elem_raw(head, hash, key, key_size); 1608 if (!l) { 1609 ret = -ENOENT; 1610 goto out_unlock; 1611 } 1612 1613 if (is_percpu) { 1614 u32 roundup_value_size = round_up(map->value_size, 8); 1615 void __percpu *pptr; 1616 int off = 0, cpu; 1617 1618 pptr = htab_elem_get_ptr(l, key_size); 1619 for_each_possible_cpu(cpu) { 1620 copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); 1621 check_and_init_map_value(&htab->map, value + off); 1622 off += roundup_value_size; 1623 } 1624 } else { 1625 void *src = htab_elem_value(l, map->key_size); 1626 1627 if (flags & BPF_F_LOCK) 1628 copy_map_value_locked(map, value, src, true); 1629 else 1630 copy_map_value(map, value, src); 1631 /* Zeroing special fields in the temp buffer */ 1632 check_and_init_map_value(map, value); 1633 } 1634 hlist_nulls_del_rcu(&l->hash_node); 1635 1636 out_unlock: 1637 htab_unlock_bucket(b, bflags); 1638 1639 if (l) { 1640 if (is_lru_map) 1641 htab_lru_push_free(htab, l); 1642 else 1643 free_htab_elem(htab, l); 1644 } 1645 1646 return ret; 1647 } 1648 1649 static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1650 void *value, u64 flags) 1651 { 1652 return __htab_map_lookup_and_delete_elem(map, key, value, false, false, 1653 flags); 1654 } 1655 1656 static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1657 void *key, void *value, 1658 u64 flags) 1659 { 1660 return __htab_map_lookup_and_delete_elem(map, key, value, false, true, 1661 flags); 1662 } 1663 1664 static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1665 void *value, u64 flags) 1666 { 1667 return __htab_map_lookup_and_delete_elem(map, key, value, true, false, 1668 flags); 1669 } 1670 1671 static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1672 void *key, void *value, 1673 u64 flags) 1674 { 1675 return __htab_map_lookup_and_delete_elem(map, key, value, true, true, 1676 flags); 1677 } 1678 1679 static int 1680 __htab_map_lookup_and_delete_batch(struct bpf_map *map, 1681 const union bpf_attr *attr, 1682 union bpf_attr __user *uattr, 1683 bool do_delete, bool is_lru_map, 1684 bool is_percpu) 1685 { 1686 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1687 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val; 1688 void __user *uvalues = u64_to_user_ptr(attr->batch.values); 1689 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1690 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1691 u32 batch, max_count, size, bucket_size, map_id; 1692 u32 bucket_cnt, total, key_size, value_size; 1693 struct htab_elem *node_to_free = NULL; 1694 u64 elem_map_flags, map_flags; 1695 struct hlist_nulls_head *head; 1696 struct hlist_nulls_node *n; 1697 unsigned long flags = 0; 1698 bool locked = false; 1699 struct htab_elem *l; 1700 struct bucket *b; 1701 int ret = 0; 1702 1703 elem_map_flags = attr->batch.elem_flags; 1704 if ((elem_map_flags & ~BPF_F_LOCK) || 1705 ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1706 return -EINVAL; 1707 1708 map_flags = attr->batch.flags; 1709 if (map_flags) 1710 return -EINVAL; 1711 1712 max_count = attr->batch.count; 1713 if (!max_count) 1714 return 0; 1715 1716 if (put_user(0, &uattr->batch.count)) 1717 return -EFAULT; 1718 1719 batch = 0; 1720 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch))) 1721 return -EFAULT; 1722 1723 if (batch >= htab->n_buckets) 1724 return -ENOENT; 1725 1726 key_size = htab->map.key_size; 1727 value_size = htab->map.value_size; 1728 size = round_up(value_size, 8); 1729 if (is_percpu) 1730 value_size = size * num_possible_cpus(); 1731 total = 0; 1732 /* while experimenting with hash tables with sizes ranging from 10 to 1733 * 1000, it was observed that a bucket can have up to 5 entries. 1734 */ 1735 bucket_size = 5; 1736 1737 alloc: 1738 /* We cannot do copy_from_user or copy_to_user inside 1739 * the rcu_read_lock. Allocate enough space here. 1740 */ 1741 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); 1742 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); 1743 if (!keys || !values) { 1744 ret = -ENOMEM; 1745 goto after_loop; 1746 } 1747 1748 again: 1749 bpf_disable_instrumentation(); 1750 rcu_read_lock(); 1751 again_nocopy: 1752 dst_key = keys; 1753 dst_val = values; 1754 b = &htab->buckets[batch]; 1755 head = &b->head; 1756 /* do not grab the lock unless need it (bucket_cnt > 0). */ 1757 if (locked) { 1758 ret = htab_lock_bucket(b, &flags); 1759 if (ret) { 1760 rcu_read_unlock(); 1761 bpf_enable_instrumentation(); 1762 goto after_loop; 1763 } 1764 } 1765 1766 bucket_cnt = 0; 1767 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1768 bucket_cnt++; 1769 1770 if (bucket_cnt && !locked) { 1771 locked = true; 1772 goto again_nocopy; 1773 } 1774 1775 if (bucket_cnt > (max_count - total)) { 1776 if (total == 0) 1777 ret = -ENOSPC; 1778 /* Note that since bucket_cnt > 0 here, it is implicit 1779 * that the locked was grabbed, so release it. 1780 */ 1781 htab_unlock_bucket(b, flags); 1782 rcu_read_unlock(); 1783 bpf_enable_instrumentation(); 1784 goto after_loop; 1785 } 1786 1787 if (bucket_cnt > bucket_size) { 1788 bucket_size = bucket_cnt; 1789 /* Note that since bucket_cnt > 0 here, it is implicit 1790 * that the locked was grabbed, so release it. 1791 */ 1792 htab_unlock_bucket(b, flags); 1793 rcu_read_unlock(); 1794 bpf_enable_instrumentation(); 1795 kvfree(keys); 1796 kvfree(values); 1797 goto alloc; 1798 } 1799 1800 /* Next block is only safe to run if you have grabbed the lock */ 1801 if (!locked) 1802 goto next_batch; 1803 1804 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1805 memcpy(dst_key, l->key, key_size); 1806 1807 if (is_percpu) { 1808 int off = 0, cpu; 1809 void __percpu *pptr; 1810 1811 pptr = htab_elem_get_ptr(l, map->key_size); 1812 for_each_possible_cpu(cpu) { 1813 copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); 1814 check_and_init_map_value(&htab->map, dst_val + off); 1815 off += size; 1816 } 1817 } else { 1818 value = htab_elem_value(l, key_size); 1819 if (is_fd_htab(htab)) { 1820 struct bpf_map **inner_map = value; 1821 1822 /* Actual value is the id of the inner map */ 1823 map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); 1824 value = &map_id; 1825 } 1826 1827 if (elem_map_flags & BPF_F_LOCK) 1828 copy_map_value_locked(map, dst_val, value, 1829 true); 1830 else 1831 copy_map_value(map, dst_val, value); 1832 /* Zeroing special fields in the temp buffer */ 1833 check_and_init_map_value(map, dst_val); 1834 } 1835 if (do_delete) { 1836 hlist_nulls_del_rcu(&l->hash_node); 1837 1838 /* bpf_lru_push_free() will acquire lru_lock, which 1839 * may cause deadlock. See comments in function 1840 * prealloc_lru_pop(). Let us do bpf_lru_push_free() 1841 * after releasing the bucket lock. 1842 * 1843 * For htab of maps, htab_put_fd_value() in 1844 * free_htab_elem() may acquire a spinlock with bucket 1845 * lock being held and it violates the lock rule, so 1846 * invoke free_htab_elem() after unlock as well. 1847 */ 1848 l->batch_flink = node_to_free; 1849 node_to_free = l; 1850 } 1851 dst_key += key_size; 1852 dst_val += value_size; 1853 } 1854 1855 htab_unlock_bucket(b, flags); 1856 locked = false; 1857 1858 while (node_to_free) { 1859 l = node_to_free; 1860 node_to_free = node_to_free->batch_flink; 1861 if (is_lru_map) 1862 htab_lru_push_free(htab, l); 1863 else 1864 free_htab_elem(htab, l); 1865 } 1866 1867 next_batch: 1868 /* If we are not copying data, we can go to next bucket and avoid 1869 * unlocking the rcu. 1870 */ 1871 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) { 1872 batch++; 1873 goto again_nocopy; 1874 } 1875 1876 rcu_read_unlock(); 1877 bpf_enable_instrumentation(); 1878 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys, 1879 key_size * bucket_cnt) || 1880 copy_to_user(uvalues + total * value_size, values, 1881 value_size * bucket_cnt))) { 1882 ret = -EFAULT; 1883 goto after_loop; 1884 } 1885 1886 total += bucket_cnt; 1887 batch++; 1888 if (batch >= htab->n_buckets) { 1889 ret = -ENOENT; 1890 goto after_loop; 1891 } 1892 goto again; 1893 1894 after_loop: 1895 if (ret == -EFAULT) 1896 goto out; 1897 1898 /* copy # of entries and next batch */ 1899 ubatch = u64_to_user_ptr(attr->batch.out_batch); 1900 if (copy_to_user(ubatch, &batch, sizeof(batch)) || 1901 put_user(total, &uattr->batch.count)) 1902 ret = -EFAULT; 1903 1904 out: 1905 kvfree(keys); 1906 kvfree(values); 1907 return ret; 1908 } 1909 1910 static int 1911 htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1912 union bpf_attr __user *uattr) 1913 { 1914 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1915 false, true); 1916 } 1917 1918 static int 1919 htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1920 const union bpf_attr *attr, 1921 union bpf_attr __user *uattr) 1922 { 1923 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1924 false, true); 1925 } 1926 1927 static int 1928 htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1929 union bpf_attr __user *uattr) 1930 { 1931 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1932 false, false); 1933 } 1934 1935 static int 1936 htab_map_lookup_and_delete_batch(struct bpf_map *map, 1937 const union bpf_attr *attr, 1938 union bpf_attr __user *uattr) 1939 { 1940 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1941 false, false); 1942 } 1943 1944 static int 1945 htab_lru_percpu_map_lookup_batch(struct bpf_map *map, 1946 const union bpf_attr *attr, 1947 union bpf_attr __user *uattr) 1948 { 1949 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1950 true, true); 1951 } 1952 1953 static int 1954 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1955 const union bpf_attr *attr, 1956 union bpf_attr __user *uattr) 1957 { 1958 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1959 true, true); 1960 } 1961 1962 static int 1963 htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1964 union bpf_attr __user *uattr) 1965 { 1966 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1967 true, false); 1968 } 1969 1970 static int 1971 htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, 1972 const union bpf_attr *attr, 1973 union bpf_attr __user *uattr) 1974 { 1975 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1976 true, false); 1977 } 1978 1979 struct bpf_iter_seq_hash_map_info { 1980 struct bpf_map *map; 1981 struct bpf_htab *htab; 1982 void *percpu_value_buf; // non-zero means percpu hash 1983 u32 bucket_id; 1984 u32 skip_elems; 1985 }; 1986 1987 static struct htab_elem * 1988 bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, 1989 struct htab_elem *prev_elem) 1990 { 1991 const struct bpf_htab *htab = info->htab; 1992 u32 skip_elems = info->skip_elems; 1993 u32 bucket_id = info->bucket_id; 1994 struct hlist_nulls_head *head; 1995 struct hlist_nulls_node *n; 1996 struct htab_elem *elem; 1997 struct bucket *b; 1998 u32 i, count; 1999 2000 if (bucket_id >= htab->n_buckets) 2001 return NULL; 2002 2003 /* try to find next elem in the same bucket */ 2004 if (prev_elem) { 2005 /* no update/deletion on this bucket, prev_elem should be still valid 2006 * and we won't skip elements. 2007 */ 2008 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); 2009 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); 2010 if (elem) 2011 return elem; 2012 2013 /* not found, unlock and go to the next bucket */ 2014 b = &htab->buckets[bucket_id++]; 2015 rcu_read_unlock(); 2016 skip_elems = 0; 2017 } 2018 2019 for (i = bucket_id; i < htab->n_buckets; i++) { 2020 b = &htab->buckets[i]; 2021 rcu_read_lock(); 2022 2023 count = 0; 2024 head = &b->head; 2025 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2026 if (count >= skip_elems) { 2027 info->bucket_id = i; 2028 info->skip_elems = count; 2029 return elem; 2030 } 2031 count++; 2032 } 2033 2034 rcu_read_unlock(); 2035 skip_elems = 0; 2036 } 2037 2038 info->bucket_id = i; 2039 info->skip_elems = 0; 2040 return NULL; 2041 } 2042 2043 static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) 2044 { 2045 struct bpf_iter_seq_hash_map_info *info = seq->private; 2046 struct htab_elem *elem; 2047 2048 elem = bpf_hash_map_seq_find_next(info, NULL); 2049 if (!elem) 2050 return NULL; 2051 2052 if (*pos == 0) 2053 ++*pos; 2054 return elem; 2055 } 2056 2057 static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2058 { 2059 struct bpf_iter_seq_hash_map_info *info = seq->private; 2060 2061 ++*pos; 2062 ++info->skip_elems; 2063 return bpf_hash_map_seq_find_next(info, v); 2064 } 2065 2066 static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) 2067 { 2068 struct bpf_iter_seq_hash_map_info *info = seq->private; 2069 struct bpf_iter__bpf_map_elem ctx = {}; 2070 struct bpf_map *map = info->map; 2071 struct bpf_iter_meta meta; 2072 int ret = 0, off = 0, cpu; 2073 u32 roundup_value_size; 2074 struct bpf_prog *prog; 2075 void __percpu *pptr; 2076 2077 meta.seq = seq; 2078 prog = bpf_iter_get_info(&meta, elem == NULL); 2079 if (prog) { 2080 ctx.meta = &meta; 2081 ctx.map = info->map; 2082 if (elem) { 2083 ctx.key = elem->key; 2084 if (!info->percpu_value_buf) { 2085 ctx.value = htab_elem_value(elem, map->key_size); 2086 } else { 2087 roundup_value_size = round_up(map->value_size, 8); 2088 pptr = htab_elem_get_ptr(elem, map->key_size); 2089 for_each_possible_cpu(cpu) { 2090 copy_map_value_long(map, info->percpu_value_buf + off, 2091 per_cpu_ptr(pptr, cpu)); 2092 check_and_init_map_value(map, info->percpu_value_buf + off); 2093 off += roundup_value_size; 2094 } 2095 ctx.value = info->percpu_value_buf; 2096 } 2097 } 2098 ret = bpf_iter_run_prog(prog, &ctx); 2099 } 2100 2101 return ret; 2102 } 2103 2104 static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) 2105 { 2106 return __bpf_hash_map_seq_show(seq, v); 2107 } 2108 2109 static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) 2110 { 2111 if (!v) 2112 (void)__bpf_hash_map_seq_show(seq, NULL); 2113 else 2114 rcu_read_unlock(); 2115 } 2116 2117 static int bpf_iter_init_hash_map(void *priv_data, 2118 struct bpf_iter_aux_info *aux) 2119 { 2120 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2121 struct bpf_map *map = aux->map; 2122 void *value_buf; 2123 u32 buf_size; 2124 2125 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2126 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 2127 buf_size = round_up(map->value_size, 8) * num_possible_cpus(); 2128 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 2129 if (!value_buf) 2130 return -ENOMEM; 2131 2132 seq_info->percpu_value_buf = value_buf; 2133 } 2134 2135 bpf_map_inc_with_uref(map); 2136 seq_info->map = map; 2137 seq_info->htab = container_of(map, struct bpf_htab, map); 2138 return 0; 2139 } 2140 2141 static void bpf_iter_fini_hash_map(void *priv_data) 2142 { 2143 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2144 2145 bpf_map_put_with_uref(seq_info->map); 2146 kfree(seq_info->percpu_value_buf); 2147 } 2148 2149 static const struct seq_operations bpf_hash_map_seq_ops = { 2150 .start = bpf_hash_map_seq_start, 2151 .next = bpf_hash_map_seq_next, 2152 .stop = bpf_hash_map_seq_stop, 2153 .show = bpf_hash_map_seq_show, 2154 }; 2155 2156 static const struct bpf_iter_seq_info iter_seq_info = { 2157 .seq_ops = &bpf_hash_map_seq_ops, 2158 .init_seq_private = bpf_iter_init_hash_map, 2159 .fini_seq_private = bpf_iter_fini_hash_map, 2160 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), 2161 }; 2162 2163 static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, 2164 void *callback_ctx, u64 flags) 2165 { 2166 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2167 struct hlist_nulls_head *head; 2168 struct hlist_nulls_node *n; 2169 struct htab_elem *elem; 2170 int i, num_elems = 0; 2171 void __percpu *pptr; 2172 struct bucket *b; 2173 void *key, *val; 2174 bool is_percpu; 2175 u64 ret = 0; 2176 2177 cant_migrate(); 2178 2179 if (flags != 0) 2180 return -EINVAL; 2181 2182 is_percpu = htab_is_percpu(htab); 2183 2184 /* migration has been disabled, so percpu value prepared here will be 2185 * the same as the one seen by the bpf program with 2186 * bpf_map_lookup_elem(). 2187 */ 2188 for (i = 0; i < htab->n_buckets; i++) { 2189 b = &htab->buckets[i]; 2190 rcu_read_lock(); 2191 head = &b->head; 2192 hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) { 2193 key = elem->key; 2194 if (is_percpu) { 2195 /* current cpu value for percpu map */ 2196 pptr = htab_elem_get_ptr(elem, map->key_size); 2197 val = this_cpu_ptr(pptr); 2198 } else { 2199 val = htab_elem_value(elem, map->key_size); 2200 } 2201 num_elems++; 2202 ret = callback_fn((u64)(long)map, (u64)(long)key, 2203 (u64)(long)val, (u64)(long)callback_ctx, 0); 2204 /* return value: 0 - continue, 1 - stop and return */ 2205 if (ret) { 2206 rcu_read_unlock(); 2207 goto out; 2208 } 2209 } 2210 rcu_read_unlock(); 2211 } 2212 out: 2213 return num_elems; 2214 } 2215 2216 static u64 htab_map_mem_usage(const struct bpf_map *map) 2217 { 2218 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2219 u32 value_size = round_up(htab->map.value_size, 8); 2220 bool prealloc = htab_is_prealloc(htab); 2221 bool percpu = htab_is_percpu(htab); 2222 bool lru = htab_is_lru(htab); 2223 u64 num_entries; 2224 u64 usage = sizeof(struct bpf_htab); 2225 2226 usage += sizeof(struct bucket) * htab->n_buckets; 2227 usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT; 2228 if (prealloc) { 2229 num_entries = map->max_entries; 2230 if (htab_has_extra_elems(htab)) 2231 num_entries += num_possible_cpus(); 2232 2233 usage += htab->elem_size * num_entries; 2234 2235 if (percpu) 2236 usage += value_size * num_possible_cpus() * num_entries; 2237 else if (!lru) 2238 usage += sizeof(struct htab_elem *) * num_possible_cpus(); 2239 } else { 2240 #define LLIST_NODE_SZ sizeof(struct llist_node) 2241 2242 num_entries = htab->use_percpu_counter ? 2243 percpu_counter_sum(&htab->pcount) : 2244 atomic_read(&htab->count); 2245 usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries; 2246 if (percpu) { 2247 usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries; 2248 usage += value_size * num_possible_cpus() * num_entries; 2249 } 2250 } 2251 return usage; 2252 } 2253 2254 BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) 2255 const struct bpf_map_ops htab_map_ops = { 2256 .map_meta_equal = bpf_map_meta_equal, 2257 .map_alloc_check = htab_map_alloc_check, 2258 .map_alloc = htab_map_alloc, 2259 .map_free = htab_map_free, 2260 .map_get_next_key = htab_map_get_next_key, 2261 .map_release_uref = htab_map_free_internal_structs, 2262 .map_lookup_elem = htab_map_lookup_elem, 2263 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, 2264 .map_update_elem = htab_map_update_elem, 2265 .map_delete_elem = htab_map_delete_elem, 2266 .map_gen_lookup = htab_map_gen_lookup, 2267 .map_seq_show_elem = htab_map_seq_show_elem, 2268 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2269 .map_for_each_callback = bpf_for_each_hash_elem, 2270 .map_mem_usage = htab_map_mem_usage, 2271 BATCH_OPS(htab), 2272 .map_btf_id = &htab_map_btf_ids[0], 2273 .iter_seq_info = &iter_seq_info, 2274 }; 2275 2276 const struct bpf_map_ops htab_lru_map_ops = { 2277 .map_meta_equal = bpf_map_meta_equal, 2278 .map_alloc_check = htab_map_alloc_check, 2279 .map_alloc = htab_map_alloc, 2280 .map_free = htab_map_free, 2281 .map_get_next_key = htab_map_get_next_key, 2282 .map_release_uref = htab_map_free_internal_structs, 2283 .map_lookup_elem = htab_lru_map_lookup_elem, 2284 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, 2285 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, 2286 .map_update_elem = htab_lru_map_update_elem, 2287 .map_delete_elem = htab_lru_map_delete_elem, 2288 .map_gen_lookup = htab_lru_map_gen_lookup, 2289 .map_seq_show_elem = htab_map_seq_show_elem, 2290 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2291 .map_for_each_callback = bpf_for_each_hash_elem, 2292 .map_mem_usage = htab_map_mem_usage, 2293 BATCH_OPS(htab_lru), 2294 .map_btf_id = &htab_map_btf_ids[0], 2295 .iter_seq_info = &iter_seq_info, 2296 }; 2297 2298 /* Called from eBPF program */ 2299 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2300 { 2301 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2302 2303 if (l) 2304 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2305 else 2306 return NULL; 2307 } 2308 2309 /* inline bpf_map_lookup_elem() call for per-CPU hashmap */ 2310 static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 2311 { 2312 struct bpf_insn *insn = insn_buf; 2313 2314 if (!bpf_jit_supports_percpu_insn()) 2315 return -EOPNOTSUPP; 2316 2317 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2318 (void *(*)(struct bpf_map *map, void *key))NULL)); 2319 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2320 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); 2321 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2322 offsetof(struct htab_elem, key) + roundup(map->key_size, 8)); 2323 *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2324 *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2325 2326 return insn - insn_buf; 2327 } 2328 2329 static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2330 { 2331 struct htab_elem *l; 2332 2333 if (cpu >= nr_cpu_ids) 2334 return NULL; 2335 2336 l = __htab_map_lookup_elem(map, key); 2337 if (l) 2338 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2339 else 2340 return NULL; 2341 } 2342 2343 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2344 { 2345 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2346 2347 if (l) { 2348 bpf_lru_node_set_ref(&l->lru_node); 2349 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2350 } 2351 2352 return NULL; 2353 } 2354 2355 static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2356 { 2357 struct htab_elem *l; 2358 2359 if (cpu >= nr_cpu_ids) 2360 return NULL; 2361 2362 l = __htab_map_lookup_elem(map, key); 2363 if (l) { 2364 bpf_lru_node_set_ref(&l->lru_node); 2365 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2366 } 2367 2368 return NULL; 2369 } 2370 2371 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) 2372 { 2373 struct htab_elem *l; 2374 void __percpu *pptr; 2375 int ret = -ENOENT; 2376 int cpu, off = 0; 2377 u32 size; 2378 2379 /* per_cpu areas are zero-filled and bpf programs can only 2380 * access 'value_size' of them, so copying rounded areas 2381 * will not leak any kernel data 2382 */ 2383 size = round_up(map->value_size, 8); 2384 rcu_read_lock(); 2385 l = __htab_map_lookup_elem(map, key); 2386 if (!l) 2387 goto out; 2388 /* We do not mark LRU map element here in order to not mess up 2389 * eviction heuristics when user space does a map walk. 2390 */ 2391 pptr = htab_elem_get_ptr(l, map->key_size); 2392 for_each_possible_cpu(cpu) { 2393 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2394 check_and_init_map_value(map, value + off); 2395 off += size; 2396 } 2397 ret = 0; 2398 out: 2399 rcu_read_unlock(); 2400 return ret; 2401 } 2402 2403 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 2404 u64 map_flags) 2405 { 2406 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2407 int ret; 2408 2409 rcu_read_lock(); 2410 if (htab_is_lru(htab)) 2411 ret = __htab_lru_percpu_map_update_elem(map, key, value, 2412 map_flags, true); 2413 else 2414 ret = htab_map_update_elem_in_place(map, key, value, map_flags, 2415 true, true); 2416 rcu_read_unlock(); 2417 2418 return ret; 2419 } 2420 2421 static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, 2422 struct seq_file *m) 2423 { 2424 struct htab_elem *l; 2425 void __percpu *pptr; 2426 int cpu; 2427 2428 rcu_read_lock(); 2429 2430 l = __htab_map_lookup_elem(map, key); 2431 if (!l) { 2432 rcu_read_unlock(); 2433 return; 2434 } 2435 2436 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 2437 seq_puts(m, ": {\n"); 2438 pptr = htab_elem_get_ptr(l, map->key_size); 2439 for_each_possible_cpu(cpu) { 2440 seq_printf(m, "\tcpu%d: ", cpu); 2441 btf_type_seq_show(map->btf, map->btf_value_type_id, 2442 per_cpu_ptr(pptr, cpu), m); 2443 seq_putc(m, '\n'); 2444 } 2445 seq_puts(m, "}\n"); 2446 2447 rcu_read_unlock(); 2448 } 2449 2450 const struct bpf_map_ops htab_percpu_map_ops = { 2451 .map_meta_equal = bpf_map_meta_equal, 2452 .map_alloc_check = htab_map_alloc_check, 2453 .map_alloc = htab_map_alloc, 2454 .map_free = htab_map_free, 2455 .map_get_next_key = htab_map_get_next_key, 2456 .map_lookup_elem = htab_percpu_map_lookup_elem, 2457 .map_gen_lookup = htab_percpu_map_gen_lookup, 2458 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, 2459 .map_update_elem = htab_percpu_map_update_elem, 2460 .map_delete_elem = htab_map_delete_elem, 2461 .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, 2462 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2463 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2464 .map_for_each_callback = bpf_for_each_hash_elem, 2465 .map_mem_usage = htab_map_mem_usage, 2466 BATCH_OPS(htab_percpu), 2467 .map_btf_id = &htab_map_btf_ids[0], 2468 .iter_seq_info = &iter_seq_info, 2469 }; 2470 2471 const struct bpf_map_ops htab_lru_percpu_map_ops = { 2472 .map_meta_equal = bpf_map_meta_equal, 2473 .map_alloc_check = htab_map_alloc_check, 2474 .map_alloc = htab_map_alloc, 2475 .map_free = htab_map_free, 2476 .map_get_next_key = htab_map_get_next_key, 2477 .map_lookup_elem = htab_lru_percpu_map_lookup_elem, 2478 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, 2479 .map_update_elem = htab_lru_percpu_map_update_elem, 2480 .map_delete_elem = htab_lru_map_delete_elem, 2481 .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, 2482 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2483 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2484 .map_for_each_callback = bpf_for_each_hash_elem, 2485 .map_mem_usage = htab_map_mem_usage, 2486 BATCH_OPS(htab_lru_percpu), 2487 .map_btf_id = &htab_map_btf_ids[0], 2488 .iter_seq_info = &iter_seq_info, 2489 }; 2490 2491 static int fd_htab_map_alloc_check(union bpf_attr *attr) 2492 { 2493 if (attr->value_size != sizeof(u32)) 2494 return -EINVAL; 2495 return htab_map_alloc_check(attr); 2496 } 2497 2498 static void fd_htab_map_free(struct bpf_map *map) 2499 { 2500 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2501 struct hlist_nulls_node *n; 2502 struct hlist_nulls_head *head; 2503 struct htab_elem *l; 2504 int i; 2505 2506 for (i = 0; i < htab->n_buckets; i++) { 2507 head = select_bucket(htab, i); 2508 2509 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 2510 void *ptr = fd_htab_map_get_ptr(map, l); 2511 2512 map->ops->map_fd_put_ptr(map, ptr, false); 2513 } 2514 } 2515 2516 htab_map_free(map); 2517 } 2518 2519 /* only called from syscall */ 2520 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 2521 { 2522 void **ptr; 2523 int ret = 0; 2524 2525 if (!map->ops->map_fd_sys_lookup_elem) 2526 return -ENOTSUPP; 2527 2528 rcu_read_lock(); 2529 ptr = htab_map_lookup_elem(map, key); 2530 if (ptr) 2531 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); 2532 else 2533 ret = -ENOENT; 2534 rcu_read_unlock(); 2535 2536 return ret; 2537 } 2538 2539 /* Only called from syscall */ 2540 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, 2541 void *key, void *value, u64 map_flags) 2542 { 2543 void *ptr; 2544 int ret; 2545 2546 ptr = map->ops->map_fd_get_ptr(map, map_file, *(int *)value); 2547 if (IS_ERR(ptr)) 2548 return PTR_ERR(ptr); 2549 2550 /* The htab bucket lock is always held during update operations in fd 2551 * htab map, and the following rcu_read_lock() is only used to avoid 2552 * the WARN_ON_ONCE in htab_map_update_elem_in_place(). 2553 */ 2554 rcu_read_lock(); 2555 ret = htab_map_update_elem_in_place(map, key, &ptr, map_flags, false, false); 2556 rcu_read_unlock(); 2557 if (ret) 2558 map->ops->map_fd_put_ptr(map, ptr, false); 2559 2560 return ret; 2561 } 2562 2563 static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) 2564 { 2565 struct bpf_map *map, *inner_map_meta; 2566 2567 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 2568 if (IS_ERR(inner_map_meta)) 2569 return inner_map_meta; 2570 2571 map = htab_map_alloc(attr); 2572 if (IS_ERR(map)) { 2573 bpf_map_meta_free(inner_map_meta); 2574 return map; 2575 } 2576 2577 map->inner_map_meta = inner_map_meta; 2578 2579 return map; 2580 } 2581 2582 static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) 2583 { 2584 struct bpf_map **inner_map = htab_map_lookup_elem(map, key); 2585 2586 if (!inner_map) 2587 return NULL; 2588 2589 return READ_ONCE(*inner_map); 2590 } 2591 2592 static int htab_of_map_gen_lookup(struct bpf_map *map, 2593 struct bpf_insn *insn_buf) 2594 { 2595 struct bpf_insn *insn = insn_buf; 2596 const int ret = BPF_REG_0; 2597 2598 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2599 (void *(*)(struct bpf_map *map, void *key))NULL)); 2600 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2601 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); 2602 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 2603 offsetof(struct htab_elem, key) + 2604 round_up(map->key_size, 8)); 2605 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 2606 2607 return insn - insn_buf; 2608 } 2609 2610 static void htab_of_map_free(struct bpf_map *map) 2611 { 2612 bpf_map_meta_free(map->inner_map_meta); 2613 fd_htab_map_free(map); 2614 } 2615 2616 const struct bpf_map_ops htab_of_maps_map_ops = { 2617 .map_alloc_check = fd_htab_map_alloc_check, 2618 .map_alloc = htab_of_map_alloc, 2619 .map_free = htab_of_map_free, 2620 .map_get_next_key = htab_map_get_next_key, 2621 .map_lookup_elem = htab_of_map_lookup_elem, 2622 .map_delete_elem = htab_map_delete_elem, 2623 .map_fd_get_ptr = bpf_map_fd_get_ptr, 2624 .map_fd_put_ptr = bpf_map_fd_put_ptr, 2625 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 2626 .map_gen_lookup = htab_of_map_gen_lookup, 2627 .map_check_btf = map_check_no_btf, 2628 .map_mem_usage = htab_map_mem_usage, 2629 BATCH_OPS(htab), 2630 .map_btf_id = &htab_map_btf_ids[0], 2631 }; 2632