1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/jhash.h> 8 #include <linux/filter.h> 9 #include <linux/rculist_nulls.h> 10 #include <linux/rcupdate_wait.h> 11 #include <linux/random.h> 12 #include <uapi/linux/btf.h> 13 #include <linux/rcupdate_trace.h> 14 #include <linux/btf_ids.h> 15 #include "percpu_freelist.h" 16 #include "bpf_lru_list.h" 17 #include "map_in_map.h" 18 #include <linux/bpf_mem_alloc.h> 19 #include <asm/rqspinlock.h> 20 21 #define HTAB_CREATE_FLAG_MASK \ 22 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 23 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) 24 25 #define BATCH_OPS(_name) \ 26 .map_lookup_batch = \ 27 _name##_map_lookup_batch, \ 28 .map_lookup_and_delete_batch = \ 29 _name##_map_lookup_and_delete_batch, \ 30 .map_update_batch = \ 31 generic_map_update_batch, \ 32 .map_delete_batch = \ 33 generic_map_delete_batch 34 35 /* 36 * The bucket lock has two protection scopes: 37 * 38 * 1) Serializing concurrent operations from BPF programs on different 39 * CPUs 40 * 41 * 2) Serializing concurrent operations from BPF programs and sys_bpf() 42 * 43 * BPF programs can execute in any context including perf, kprobes and 44 * tracing. As there are almost no limits where perf, kprobes and tracing 45 * can be invoked from the lock operations need to be protected against 46 * deadlocks. Deadlocks can be caused by recursion and by an invocation in 47 * the lock held section when functions which acquire this lock are invoked 48 * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU 49 * variable bpf_prog_active, which prevents BPF programs attached to perf 50 * events, kprobes and tracing to be invoked before the prior invocation 51 * from one of these contexts completed. sys_bpf() uses the same mechanism 52 * by pinning the task to the current CPU and incrementing the recursion 53 * protection across the map operation. 54 * 55 * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain 56 * operations like memory allocations (even with GFP_ATOMIC) from atomic 57 * contexts. This is required because even with GFP_ATOMIC the memory 58 * allocator calls into code paths which acquire locks with long held lock 59 * sections. To ensure the deterministic behaviour these locks are regular 60 * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only 61 * true atomic contexts on an RT kernel are the low level hardware 62 * handling, scheduling, low level interrupt handling, NMIs etc. None of 63 * these contexts should ever do memory allocations. 64 * 65 * As regular device interrupt handlers and soft interrupts are forced into 66 * thread context, the existing code which does 67 * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*(); 68 * just works. 69 * 70 * In theory the BPF locks could be converted to regular spinlocks as well, 71 * but the bucket locks and percpu_freelist locks can be taken from 72 * arbitrary contexts (perf, kprobes, tracepoints) which are required to be 73 * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, 74 * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, 75 * because there is no memory allocation within the lock held sections. However 76 * after hash map was fully converted to use bpf_mem_alloc, there will be 77 * non-synchronous memory allocation for non-preallocated hash map, so it is 78 * safe to always use raw spinlock for bucket lock. 79 */ 80 struct bucket { 81 struct hlist_nulls_head head; 82 rqspinlock_t raw_lock; 83 }; 84 85 struct bpf_htab { 86 struct bpf_map map; 87 struct bpf_mem_alloc ma; 88 struct bpf_mem_alloc pcpu_ma; 89 struct bucket *buckets; 90 void *elems; 91 union { 92 struct pcpu_freelist freelist; 93 struct bpf_lru lru; 94 }; 95 struct htab_elem *__percpu *extra_elems; 96 /* number of elements in non-preallocated hashtable are kept 97 * in either pcount or count 98 */ 99 struct percpu_counter pcount; 100 atomic_t count; 101 bool use_percpu_counter; 102 u32 n_buckets; /* number of hash buckets */ 103 u32 elem_size; /* size of each element in bytes */ 104 u32 hashrnd; 105 }; 106 107 /* each htab element is struct htab_elem + key + value */ 108 struct htab_elem { 109 union { 110 struct hlist_nulls_node hash_node; 111 struct { 112 void *padding; 113 union { 114 struct pcpu_freelist_node fnode; 115 struct htab_elem *batch_flink; 116 }; 117 }; 118 }; 119 union { 120 /* pointer to per-cpu pointer */ 121 void *ptr_to_pptr; 122 struct bpf_lru_node lru_node; 123 }; 124 u32 hash; 125 char key[] __aligned(8); 126 }; 127 128 static inline bool htab_is_prealloc(const struct bpf_htab *htab) 129 { 130 return !(htab->map.map_flags & BPF_F_NO_PREALLOC); 131 } 132 133 static void htab_init_buckets(struct bpf_htab *htab) 134 { 135 unsigned int i; 136 137 for (i = 0; i < htab->n_buckets; i++) { 138 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 139 raw_res_spin_lock_init(&htab->buckets[i].raw_lock); 140 cond_resched(); 141 } 142 } 143 144 static inline int htab_lock_bucket(struct bucket *b, unsigned long *pflags) 145 { 146 unsigned long flags; 147 int ret; 148 149 ret = raw_res_spin_lock_irqsave(&b->raw_lock, flags); 150 if (ret) 151 return ret; 152 *pflags = flags; 153 return 0; 154 } 155 156 static inline void htab_unlock_bucket(struct bucket *b, unsigned long flags) 157 { 158 raw_res_spin_unlock_irqrestore(&b->raw_lock, flags); 159 } 160 161 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); 162 163 static bool htab_is_lru(const struct bpf_htab *htab) 164 { 165 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || 166 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 167 } 168 169 static bool htab_is_percpu(const struct bpf_htab *htab) 170 { 171 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || 172 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 173 } 174 175 static inline bool is_fd_htab(const struct bpf_htab *htab) 176 { 177 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS; 178 } 179 180 static inline void *htab_elem_value(struct htab_elem *l, u32 key_size) 181 { 182 return l->key + round_up(key_size, 8); 183 } 184 185 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 186 void __percpu *pptr) 187 { 188 *(void __percpu **)htab_elem_value(l, key_size) = pptr; 189 } 190 191 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 192 { 193 return *(void __percpu **)htab_elem_value(l, key_size); 194 } 195 196 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) 197 { 198 return *(void **)htab_elem_value(l, map->key_size); 199 } 200 201 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 202 { 203 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); 204 } 205 206 /* Both percpu and fd htab support in-place update, so no need for 207 * extra elem. LRU itself can remove the least used element, so 208 * there is no need for an extra elem during map_update. 209 */ 210 static bool htab_has_extra_elems(struct bpf_htab *htab) 211 { 212 return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab); 213 } 214 215 static void htab_free_prealloced_internal_structs(struct bpf_htab *htab) 216 { 217 u32 num_entries = htab->map.max_entries; 218 int i; 219 220 if (htab_has_extra_elems(htab)) 221 num_entries += num_possible_cpus(); 222 223 for (i = 0; i < num_entries; i++) { 224 struct htab_elem *elem; 225 226 elem = get_htab_elem(htab, i); 227 bpf_map_free_internal_structs(&htab->map, 228 htab_elem_value(elem, htab->map.key_size)); 229 cond_resched(); 230 } 231 } 232 233 static void htab_free_prealloced_fields(struct bpf_htab *htab) 234 { 235 u32 num_entries = htab->map.max_entries; 236 int i; 237 238 if (IS_ERR_OR_NULL(htab->map.record)) 239 return; 240 if (htab_has_extra_elems(htab)) 241 num_entries += num_possible_cpus(); 242 for (i = 0; i < num_entries; i++) { 243 struct htab_elem *elem; 244 245 elem = get_htab_elem(htab, i); 246 if (htab_is_percpu(htab)) { 247 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 248 int cpu; 249 250 for_each_possible_cpu(cpu) { 251 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 252 cond_resched(); 253 } 254 } else { 255 bpf_obj_free_fields(htab->map.record, 256 htab_elem_value(elem, htab->map.key_size)); 257 cond_resched(); 258 } 259 cond_resched(); 260 } 261 } 262 263 static void htab_free_elems(struct bpf_htab *htab) 264 { 265 int i; 266 267 if (!htab_is_percpu(htab)) 268 goto free_elems; 269 270 for (i = 0; i < htab->map.max_entries; i++) { 271 void __percpu *pptr; 272 273 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 274 htab->map.key_size); 275 free_percpu(pptr); 276 cond_resched(); 277 } 278 free_elems: 279 bpf_map_area_free(htab->elems); 280 } 281 282 /* The LRU list has a lock (lru_lock). Each htab bucket has a lock 283 * (bucket_lock). If both locks need to be acquired together, the lock 284 * order is always lru_lock -> bucket_lock and this only happens in 285 * bpf_lru_list.c logic. For example, certain code path of 286 * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), 287 * will acquire lru_lock first followed by acquiring bucket_lock. 288 * 289 * In hashtab.c, to avoid deadlock, lock acquisition of 290 * bucket_lock followed by lru_lock is not allowed. In such cases, 291 * bucket_lock needs to be released first before acquiring lru_lock. 292 */ 293 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 294 u32 hash) 295 { 296 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash); 297 struct htab_elem *l; 298 299 if (node) { 300 bpf_map_inc_elem_count(&htab->map); 301 l = container_of(node, struct htab_elem, lru_node); 302 memcpy(l->key, key, htab->map.key_size); 303 return l; 304 } 305 306 return NULL; 307 } 308 309 static int prealloc_init(struct bpf_htab *htab) 310 { 311 u32 num_entries = htab->map.max_entries; 312 int err = -ENOMEM, i; 313 314 if (htab_has_extra_elems(htab)) 315 num_entries += num_possible_cpus(); 316 317 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, 318 htab->map.numa_node); 319 if (!htab->elems) 320 return -ENOMEM; 321 322 if (!htab_is_percpu(htab)) 323 goto skip_percpu_elems; 324 325 for (i = 0; i < num_entries; i++) { 326 u32 size = round_up(htab->map.value_size, 8); 327 void __percpu *pptr; 328 329 pptr = bpf_map_alloc_percpu(&htab->map, size, 8, 330 GFP_USER | __GFP_NOWARN); 331 if (!pptr) 332 goto free_elems; 333 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 334 pptr); 335 cond_resched(); 336 } 337 338 skip_percpu_elems: 339 if (htab_is_lru(htab)) 340 err = bpf_lru_init(&htab->lru, 341 htab->map.map_flags & BPF_F_NO_COMMON_LRU, 342 offsetof(struct htab_elem, hash) - 343 offsetof(struct htab_elem, lru_node), 344 htab_lru_map_delete_node, 345 htab); 346 else 347 err = pcpu_freelist_init(&htab->freelist); 348 349 if (err) 350 goto free_elems; 351 352 if (htab_is_lru(htab)) 353 bpf_lru_populate(&htab->lru, htab->elems, 354 offsetof(struct htab_elem, lru_node), 355 htab->elem_size, num_entries); 356 else 357 pcpu_freelist_populate(&htab->freelist, 358 htab->elems + offsetof(struct htab_elem, fnode), 359 htab->elem_size, num_entries); 360 361 return 0; 362 363 free_elems: 364 htab_free_elems(htab); 365 return err; 366 } 367 368 static void prealloc_destroy(struct bpf_htab *htab) 369 { 370 htab_free_elems(htab); 371 372 if (htab_is_lru(htab)) 373 bpf_lru_destroy(&htab->lru); 374 else 375 pcpu_freelist_destroy(&htab->freelist); 376 } 377 378 static int alloc_extra_elems(struct bpf_htab *htab) 379 { 380 struct htab_elem *__percpu *pptr, *l_new; 381 struct pcpu_freelist_node *l; 382 int cpu; 383 384 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8, 385 GFP_USER | __GFP_NOWARN); 386 if (!pptr) 387 return -ENOMEM; 388 389 for_each_possible_cpu(cpu) { 390 l = pcpu_freelist_pop(&htab->freelist); 391 /* pop will succeed, since prealloc_init() 392 * preallocated extra num_possible_cpus elements 393 */ 394 l_new = container_of(l, struct htab_elem, fnode); 395 *per_cpu_ptr(pptr, cpu) = l_new; 396 } 397 htab->extra_elems = pptr; 398 return 0; 399 } 400 401 /* Called from syscall */ 402 static int htab_map_alloc_check(union bpf_attr *attr) 403 { 404 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 405 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 406 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 407 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 408 /* percpu_lru means each cpu has its own LRU list. 409 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 410 * the map's value itself is percpu. percpu_lru has 411 * nothing to do with the map's value. 412 */ 413 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 414 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 415 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); 416 int numa_node = bpf_map_attr_numa_node(attr); 417 418 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 419 offsetof(struct htab_elem, hash_node.pprev)); 420 421 if (zero_seed && !capable(CAP_SYS_ADMIN)) 422 /* Guard against local DoS, and discourage production use. */ 423 return -EPERM; 424 425 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || 426 !bpf_map_flags_access_ok(attr->map_flags)) 427 return -EINVAL; 428 429 if (!lru && percpu_lru) 430 return -EINVAL; 431 432 if (lru && !prealloc) 433 return -ENOTSUPP; 434 435 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) 436 return -EINVAL; 437 438 /* check sanity of attributes. 439 * value_size == 0 may be allowed in the future to use map as a set 440 */ 441 if (attr->max_entries == 0 || attr->key_size == 0 || 442 attr->value_size == 0) 443 return -EINVAL; 444 445 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - 446 sizeof(struct htab_elem)) 447 /* if key_size + value_size is bigger, the user space won't be 448 * able to access the elements via bpf syscall. This check 449 * also makes sure that the elem_size doesn't overflow and it's 450 * kmalloc-able later in htab_map_update_elem() 451 */ 452 return -E2BIG; 453 /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ 454 if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) 455 return -E2BIG; 456 457 return 0; 458 } 459 460 static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 461 { 462 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 463 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 464 /* percpu_lru means each cpu has its own LRU list. 465 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 466 * the map's value itself is percpu. percpu_lru has 467 * nothing to do with the map's value. 468 */ 469 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 470 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 471 struct bpf_htab *htab; 472 int err; 473 474 htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); 475 if (!htab) 476 return ERR_PTR(-ENOMEM); 477 478 bpf_map_init_from_attr(&htab->map, attr); 479 480 if (percpu_lru) { 481 /* ensure each CPU's lru list has >=1 elements. 482 * since we are at it, make each lru list has the same 483 * number of elements. 484 */ 485 htab->map.max_entries = roundup(attr->max_entries, 486 num_possible_cpus()); 487 if (htab->map.max_entries < attr->max_entries) 488 htab->map.max_entries = rounddown(attr->max_entries, 489 num_possible_cpus()); 490 } 491 492 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 493 * into UB on 32-bit arches, so check that first 494 */ 495 err = -E2BIG; 496 if (htab->map.max_entries > 1UL << 31) 497 goto free_htab; 498 499 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 500 501 htab->elem_size = sizeof(struct htab_elem) + 502 round_up(htab->map.key_size, 8); 503 if (percpu) 504 htab->elem_size += sizeof(void *); 505 else 506 htab->elem_size += round_up(htab->map.value_size, 8); 507 508 /* check for u32 overflow */ 509 if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) 510 goto free_htab; 511 512 err = bpf_map_init_elem_count(&htab->map); 513 if (err) 514 goto free_htab; 515 516 err = -ENOMEM; 517 htab->buckets = bpf_map_area_alloc(htab->n_buckets * 518 sizeof(struct bucket), 519 htab->map.numa_node); 520 if (!htab->buckets) 521 goto free_elem_count; 522 523 if (htab->map.map_flags & BPF_F_ZERO_SEED) 524 htab->hashrnd = 0; 525 else 526 htab->hashrnd = get_random_u32(); 527 528 htab_init_buckets(htab); 529 530 /* compute_batch_value() computes batch value as num_online_cpus() * 2 531 * and __percpu_counter_compare() needs 532 * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus() 533 * for percpu_counter to be faster than atomic_t. In practice the average bpf 534 * hash map size is 10k, which means that a system with 64 cpus will fill 535 * hashmap to 20% of 10k before percpu_counter becomes ineffective. Therefore 536 * define our own batch count as 32 then 10k hash map can be filled up to 80%: 537 * 10k - 8k > 32 _batch_ * 64 _cpus_ 538 * and __percpu_counter_compare() will still be fast. At that point hash map 539 * collisions will dominate its performance anyway. Assume that hash map filled 540 * to 50+% isn't going to be O(1) and use the following formula to choose 541 * between percpu_counter and atomic_t. 542 */ 543 #define PERCPU_COUNTER_BATCH 32 544 if (attr->max_entries / 2 > num_online_cpus() * PERCPU_COUNTER_BATCH) 545 htab->use_percpu_counter = true; 546 547 if (htab->use_percpu_counter) { 548 err = percpu_counter_init(&htab->pcount, 0, GFP_KERNEL); 549 if (err) 550 goto free_map_locked; 551 } 552 553 if (prealloc) { 554 err = prealloc_init(htab); 555 if (err) 556 goto free_map_locked; 557 558 if (htab_has_extra_elems(htab)) { 559 err = alloc_extra_elems(htab); 560 if (err) 561 goto free_prealloc; 562 } 563 } else { 564 err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false); 565 if (err) 566 goto free_map_locked; 567 if (percpu) { 568 err = bpf_mem_alloc_init(&htab->pcpu_ma, 569 round_up(htab->map.value_size, 8), true); 570 if (err) 571 goto free_map_locked; 572 } 573 } 574 575 return &htab->map; 576 577 free_prealloc: 578 prealloc_destroy(htab); 579 free_map_locked: 580 if (htab->use_percpu_counter) 581 percpu_counter_destroy(&htab->pcount); 582 bpf_map_area_free(htab->buckets); 583 bpf_mem_alloc_destroy(&htab->pcpu_ma); 584 bpf_mem_alloc_destroy(&htab->ma); 585 free_elem_count: 586 bpf_map_free_elem_count(&htab->map); 587 free_htab: 588 bpf_map_area_free(htab); 589 return ERR_PTR(err); 590 } 591 592 static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) 593 { 594 if (likely(key_len % 4 == 0)) 595 return jhash2(key, key_len / 4, hashrnd); 596 return jhash(key, key_len, hashrnd); 597 } 598 599 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 600 { 601 return &htab->buckets[hash & (htab->n_buckets - 1)]; 602 } 603 604 static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) 605 { 606 return &__select_bucket(htab, hash)->head; 607 } 608 609 /* this lookup function can only be called with bucket lock taken */ 610 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, 611 void *key, u32 key_size) 612 { 613 struct hlist_nulls_node *n; 614 struct htab_elem *l; 615 616 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 617 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 618 return l; 619 620 return NULL; 621 } 622 623 /* can be called without bucket lock. it will repeat the loop in 624 * the unlikely event when elements moved from one bucket into another 625 * while link list is being walked 626 */ 627 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, 628 u32 hash, void *key, 629 u32 key_size, u32 n_buckets) 630 { 631 struct hlist_nulls_node *n; 632 struct htab_elem *l; 633 634 again: 635 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 636 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 637 return l; 638 639 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) 640 goto again; 641 642 return NULL; 643 } 644 645 /* Called from syscall or from eBPF program directly, so 646 * arguments have to match bpf_map_lookup_elem() exactly. 647 * The return value is adjusted by BPF instructions 648 * in htab_map_gen_lookup(). 649 */ 650 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 651 { 652 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 653 struct hlist_nulls_head *head; 654 struct htab_elem *l; 655 u32 hash, key_size; 656 657 WARN_ON_ONCE(!bpf_rcu_lock_held()); 658 659 key_size = map->key_size; 660 661 hash = htab_map_hash(key, key_size, htab->hashrnd); 662 663 head = select_bucket(htab, hash); 664 665 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 666 667 return l; 668 } 669 670 static void *htab_map_lookup_elem(struct bpf_map *map, void *key) 671 { 672 struct htab_elem *l = __htab_map_lookup_elem(map, key); 673 674 if (l) 675 return htab_elem_value(l, map->key_size); 676 677 return NULL; 678 } 679 680 /* inline bpf_map_lookup_elem() call. 681 * Instead of: 682 * bpf_prog 683 * bpf_map_lookup_elem 684 * map->ops->map_lookup_elem 685 * htab_map_lookup_elem 686 * __htab_map_lookup_elem 687 * do: 688 * bpf_prog 689 * __htab_map_lookup_elem 690 */ 691 static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 692 { 693 struct bpf_insn *insn = insn_buf; 694 const int ret = BPF_REG_0; 695 696 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 697 (void *(*)(struct bpf_map *map, void *key))NULL)); 698 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 699 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 700 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 701 offsetof(struct htab_elem, key) + 702 round_up(map->key_size, 8)); 703 return insn - insn_buf; 704 } 705 706 static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, 707 void *key, const bool mark) 708 { 709 struct htab_elem *l = __htab_map_lookup_elem(map, key); 710 711 if (l) { 712 if (mark) 713 bpf_lru_node_set_ref(&l->lru_node); 714 return htab_elem_value(l, map->key_size); 715 } 716 717 return NULL; 718 } 719 720 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) 721 { 722 return __htab_lru_map_lookup_elem(map, key, true); 723 } 724 725 static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) 726 { 727 return __htab_lru_map_lookup_elem(map, key, false); 728 } 729 730 static int htab_lru_map_gen_lookup(struct bpf_map *map, 731 struct bpf_insn *insn_buf) 732 { 733 struct bpf_insn *insn = insn_buf; 734 const int ret = BPF_REG_0; 735 const int ref_reg = BPF_REG_1; 736 737 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 738 (void *(*)(struct bpf_map *map, void *key))NULL)); 739 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 740 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); 741 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, 742 offsetof(struct htab_elem, lru_node) + 743 offsetof(struct bpf_lru_node, ref)); 744 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); 745 *insn++ = BPF_ST_MEM(BPF_B, ret, 746 offsetof(struct htab_elem, lru_node) + 747 offsetof(struct bpf_lru_node, ref), 748 1); 749 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 750 offsetof(struct htab_elem, key) + 751 round_up(map->key_size, 8)); 752 return insn - insn_buf; 753 } 754 755 static void check_and_free_fields(struct bpf_htab *htab, 756 struct htab_elem *elem) 757 { 758 if (IS_ERR_OR_NULL(htab->map.record)) 759 return; 760 761 if (htab_is_percpu(htab)) { 762 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 763 int cpu; 764 765 for_each_possible_cpu(cpu) 766 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 767 } else { 768 void *map_value = htab_elem_value(elem, htab->map.key_size); 769 770 bpf_obj_free_fields(htab->map.record, map_value); 771 } 772 } 773 774 /* It is called from the bpf_lru_list when the LRU needs to delete 775 * older elements from the htab. 776 */ 777 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 778 { 779 struct bpf_htab *htab = arg; 780 struct htab_elem *l = NULL, *tgt_l; 781 struct hlist_nulls_head *head; 782 struct hlist_nulls_node *n; 783 unsigned long flags; 784 struct bucket *b; 785 int ret; 786 787 tgt_l = container_of(node, struct htab_elem, lru_node); 788 b = __select_bucket(htab, tgt_l->hash); 789 head = &b->head; 790 791 ret = htab_lock_bucket(b, &flags); 792 if (ret) 793 return false; 794 795 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 796 if (l == tgt_l) { 797 hlist_nulls_del_rcu(&l->hash_node); 798 bpf_map_dec_elem_count(&htab->map); 799 break; 800 } 801 802 htab_unlock_bucket(b, flags); 803 804 if (l == tgt_l) 805 check_and_free_fields(htab, l); 806 return l == tgt_l; 807 } 808 809 /* Called from syscall */ 810 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 811 { 812 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 813 struct hlist_nulls_head *head; 814 struct htab_elem *l, *next_l; 815 u32 hash, key_size; 816 int i = 0; 817 818 WARN_ON_ONCE(!rcu_read_lock_held()); 819 820 key_size = map->key_size; 821 822 if (!key) 823 goto find_first_elem; 824 825 hash = htab_map_hash(key, key_size, htab->hashrnd); 826 827 head = select_bucket(htab, hash); 828 829 /* lookup the key */ 830 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 831 832 if (!l) 833 goto find_first_elem; 834 835 /* key was found, get next key in the same bucket */ 836 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), 837 struct htab_elem, hash_node); 838 839 if (next_l) { 840 /* if next elem in this hash list is non-zero, just return it */ 841 memcpy(next_key, next_l->key, key_size); 842 return 0; 843 } 844 845 /* no more elements in this hash list, go to the next bucket */ 846 i = hash & (htab->n_buckets - 1); 847 i++; 848 849 find_first_elem: 850 /* iterate over buckets */ 851 for (; i < htab->n_buckets; i++) { 852 head = select_bucket(htab, i); 853 854 /* pick first element in the bucket */ 855 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), 856 struct htab_elem, hash_node); 857 if (next_l) { 858 /* if it's not empty, just return it */ 859 memcpy(next_key, next_l->key, key_size); 860 return 0; 861 } 862 } 863 864 /* iterated over all buckets and all elements */ 865 return -ENOENT; 866 } 867 868 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 869 { 870 check_and_free_fields(htab, l); 871 872 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 873 bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); 874 bpf_mem_cache_free(&htab->ma, l); 875 } 876 877 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) 878 { 879 struct bpf_map *map = &htab->map; 880 void *ptr; 881 882 if (map->ops->map_fd_put_ptr) { 883 ptr = fd_htab_map_get_ptr(map, l); 884 map->ops->map_fd_put_ptr(map, ptr, true); 885 } 886 } 887 888 static bool is_map_full(struct bpf_htab *htab) 889 { 890 if (htab->use_percpu_counter) 891 return __percpu_counter_compare(&htab->pcount, htab->map.max_entries, 892 PERCPU_COUNTER_BATCH) >= 0; 893 return atomic_read(&htab->count) >= htab->map.max_entries; 894 } 895 896 static void inc_elem_count(struct bpf_htab *htab) 897 { 898 bpf_map_inc_elem_count(&htab->map); 899 900 if (htab->use_percpu_counter) 901 percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH); 902 else 903 atomic_inc(&htab->count); 904 } 905 906 static void dec_elem_count(struct bpf_htab *htab) 907 { 908 bpf_map_dec_elem_count(&htab->map); 909 910 if (htab->use_percpu_counter) 911 percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH); 912 else 913 atomic_dec(&htab->count); 914 } 915 916 917 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 918 { 919 htab_put_fd_value(htab, l); 920 921 if (htab_is_prealloc(htab)) { 922 bpf_map_dec_elem_count(&htab->map); 923 check_and_free_fields(htab, l); 924 pcpu_freelist_push(&htab->freelist, &l->fnode); 925 } else { 926 dec_elem_count(htab); 927 htab_elem_free(htab, l); 928 } 929 } 930 931 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 932 void *value, bool onallcpus, u64 map_flags) 933 { 934 void *ptr; 935 936 if (!onallcpus) { 937 /* copy true value_size bytes */ 938 ptr = this_cpu_ptr(pptr); 939 copy_map_value(&htab->map, ptr, value); 940 bpf_obj_free_fields(htab->map.record, ptr); 941 } else { 942 u32 size = round_up(htab->map.value_size, 8); 943 void *val; 944 int cpu; 945 946 if (map_flags & BPF_F_CPU) { 947 cpu = map_flags >> 32; 948 ptr = per_cpu_ptr(pptr, cpu); 949 copy_map_value(&htab->map, ptr, value); 950 bpf_obj_free_fields(htab->map.record, ptr); 951 return; 952 } 953 954 for_each_possible_cpu(cpu) { 955 ptr = per_cpu_ptr(pptr, cpu); 956 val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu; 957 copy_map_value(&htab->map, ptr, val); 958 bpf_obj_free_fields(htab->map.record, ptr); 959 } 960 } 961 } 962 963 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 964 void *value, bool onallcpus, u64 map_flags) 965 { 966 /* When not setting the initial value on all cpus, zero-fill element 967 * values for other cpus. Otherwise, bpf program has no way to ensure 968 * known initial values for cpus other than current one 969 * (onallcpus=false always when coming from bpf prog). 970 */ 971 if (!onallcpus) { 972 int current_cpu = raw_smp_processor_id(); 973 int cpu; 974 975 for_each_possible_cpu(cpu) { 976 if (cpu == current_cpu) 977 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); 978 else /* Since elem is preallocated, we cannot touch special fields */ 979 zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 980 } 981 } else { 982 pcpu_copy_value(htab, pptr, value, onallcpus, map_flags); 983 } 984 } 985 986 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) 987 { 988 return is_fd_htab(htab) && BITS_PER_LONG == 64; 989 } 990 991 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 992 void *value, u32 key_size, u32 hash, 993 bool percpu, bool onallcpus, 994 struct htab_elem *old_elem, u64 map_flags) 995 { 996 u32 size = htab->map.value_size; 997 bool prealloc = htab_is_prealloc(htab); 998 struct htab_elem *l_new, **pl_new; 999 void __percpu *pptr; 1000 1001 if (prealloc) { 1002 if (old_elem) { 1003 /* if we're updating the existing element, 1004 * use per-cpu extra elems to avoid freelist_pop/push 1005 */ 1006 pl_new = this_cpu_ptr(htab->extra_elems); 1007 l_new = *pl_new; 1008 *pl_new = old_elem; 1009 } else { 1010 struct pcpu_freelist_node *l; 1011 1012 l = __pcpu_freelist_pop(&htab->freelist); 1013 if (!l) 1014 return ERR_PTR(-E2BIG); 1015 l_new = container_of(l, struct htab_elem, fnode); 1016 bpf_map_inc_elem_count(&htab->map); 1017 } 1018 } else { 1019 if (is_map_full(htab)) 1020 if (!old_elem) 1021 /* when map is full and update() is replacing 1022 * old element, it's ok to allocate, since 1023 * old element will be freed immediately. 1024 * Otherwise return an error 1025 */ 1026 return ERR_PTR(-E2BIG); 1027 inc_elem_count(htab); 1028 l_new = bpf_mem_cache_alloc(&htab->ma); 1029 if (!l_new) { 1030 l_new = ERR_PTR(-ENOMEM); 1031 goto dec_count; 1032 } 1033 } 1034 1035 memcpy(l_new->key, key, key_size); 1036 if (percpu) { 1037 if (prealloc) { 1038 pptr = htab_elem_get_ptr(l_new, key_size); 1039 } else { 1040 /* alloc_percpu zero-fills */ 1041 void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma); 1042 1043 if (!ptr) { 1044 bpf_mem_cache_free(&htab->ma, l_new); 1045 l_new = ERR_PTR(-ENOMEM); 1046 goto dec_count; 1047 } 1048 l_new->ptr_to_pptr = ptr; 1049 pptr = *(void __percpu **)ptr; 1050 } 1051 1052 pcpu_init_value(htab, pptr, value, onallcpus, map_flags); 1053 1054 if (!prealloc) 1055 htab_elem_set_ptr(l_new, key_size, pptr); 1056 } else if (fd_htab_map_needs_adjust(htab)) { 1057 size = round_up(size, 8); 1058 memcpy(htab_elem_value(l_new, key_size), value, size); 1059 } else { 1060 copy_map_value(&htab->map, htab_elem_value(l_new, key_size), value); 1061 } 1062 1063 l_new->hash = hash; 1064 return l_new; 1065 dec_count: 1066 dec_elem_count(htab); 1067 return l_new; 1068 } 1069 1070 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 1071 u64 map_flags) 1072 { 1073 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 1074 /* elem already exists */ 1075 return -EEXIST; 1076 1077 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 1078 /* elem doesn't exist, cannot update it */ 1079 return -ENOENT; 1080 1081 return 0; 1082 } 1083 1084 /* Called from syscall or from eBPF program */ 1085 static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, 1086 u64 map_flags) 1087 { 1088 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1089 struct htab_elem *l_new, *l_old; 1090 struct hlist_nulls_head *head; 1091 unsigned long flags; 1092 struct bucket *b; 1093 u32 key_size, hash; 1094 int ret; 1095 1096 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 1097 /* unknown flags */ 1098 return -EINVAL; 1099 1100 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1101 1102 key_size = map->key_size; 1103 1104 hash = htab_map_hash(key, key_size, htab->hashrnd); 1105 1106 b = __select_bucket(htab, hash); 1107 head = &b->head; 1108 1109 if (unlikely(map_flags & BPF_F_LOCK)) { 1110 if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1111 return -EINVAL; 1112 /* find an element without taking the bucket lock */ 1113 l_old = lookup_nulls_elem_raw(head, hash, key, key_size, 1114 htab->n_buckets); 1115 ret = check_flags(htab, l_old, map_flags); 1116 if (ret) 1117 return ret; 1118 if (l_old) { 1119 /* grab the element lock and update value in place */ 1120 copy_map_value_locked(map, 1121 htab_elem_value(l_old, key_size), 1122 value, false); 1123 return 0; 1124 } 1125 /* fall through, grab the bucket lock and lookup again. 1126 * 99.9% chance that the element won't be found, 1127 * but second lookup under lock has to be done. 1128 */ 1129 } 1130 1131 ret = htab_lock_bucket(b, &flags); 1132 if (ret) 1133 return ret; 1134 1135 l_old = lookup_elem_raw(head, hash, key, key_size); 1136 1137 ret = check_flags(htab, l_old, map_flags); 1138 if (ret) 1139 goto err; 1140 1141 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { 1142 /* first lookup without the bucket lock didn't find the element, 1143 * but second lookup with the bucket lock found it. 1144 * This case is highly unlikely, but has to be dealt with: 1145 * grab the element lock in addition to the bucket lock 1146 * and update element in place 1147 */ 1148 copy_map_value_locked(map, 1149 htab_elem_value(l_old, key_size), 1150 value, false); 1151 ret = 0; 1152 goto err; 1153 } 1154 1155 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 1156 l_old, map_flags); 1157 if (IS_ERR(l_new)) { 1158 /* all pre-allocated elements are in use or memory exhausted */ 1159 ret = PTR_ERR(l_new); 1160 goto err; 1161 } 1162 1163 /* add new element to the head of the list, so that 1164 * concurrent search will find it before old elem 1165 */ 1166 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1167 if (l_old) { 1168 hlist_nulls_del_rcu(&l_old->hash_node); 1169 1170 /* l_old has already been stashed in htab->extra_elems, free 1171 * its special fields before it is available for reuse. 1172 */ 1173 if (htab_is_prealloc(htab)) 1174 check_and_free_fields(htab, l_old); 1175 } 1176 htab_unlock_bucket(b, flags); 1177 if (l_old && !htab_is_prealloc(htab)) 1178 free_htab_elem(htab, l_old); 1179 return 0; 1180 err: 1181 htab_unlock_bucket(b, flags); 1182 return ret; 1183 } 1184 1185 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) 1186 { 1187 check_and_free_fields(htab, elem); 1188 bpf_map_dec_elem_count(&htab->map); 1189 bpf_lru_push_free(&htab->lru, &elem->lru_node); 1190 } 1191 1192 static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, 1193 u64 map_flags) 1194 { 1195 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1196 struct htab_elem *l_new, *l_old = NULL; 1197 struct hlist_nulls_head *head; 1198 unsigned long flags; 1199 struct bucket *b; 1200 u32 key_size, hash; 1201 int ret; 1202 1203 if (unlikely(map_flags > BPF_EXIST)) 1204 /* unknown flags */ 1205 return -EINVAL; 1206 1207 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1208 1209 key_size = map->key_size; 1210 1211 hash = htab_map_hash(key, key_size, htab->hashrnd); 1212 1213 b = __select_bucket(htab, hash); 1214 head = &b->head; 1215 1216 /* For LRU, we need to alloc before taking bucket's 1217 * spinlock because getting free nodes from LRU may need 1218 * to remove older elements from htab and this removal 1219 * operation will need a bucket lock. 1220 */ 1221 l_new = prealloc_lru_pop(htab, key, hash); 1222 if (!l_new) 1223 return -ENOMEM; 1224 copy_map_value(&htab->map, htab_elem_value(l_new, map->key_size), value); 1225 1226 ret = htab_lock_bucket(b, &flags); 1227 if (ret) 1228 goto err_lock_bucket; 1229 1230 l_old = lookup_elem_raw(head, hash, key, key_size); 1231 1232 ret = check_flags(htab, l_old, map_flags); 1233 if (ret) 1234 goto err; 1235 1236 /* add new element to the head of the list, so that 1237 * concurrent search will find it before old elem 1238 */ 1239 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1240 if (l_old) { 1241 bpf_lru_node_set_ref(&l_new->lru_node); 1242 hlist_nulls_del_rcu(&l_old->hash_node); 1243 } 1244 ret = 0; 1245 1246 err: 1247 htab_unlock_bucket(b, flags); 1248 1249 err_lock_bucket: 1250 if (ret) 1251 htab_lru_push_free(htab, l_new); 1252 else if (l_old) 1253 htab_lru_push_free(htab, l_old); 1254 1255 return ret; 1256 } 1257 1258 static int htab_map_check_update_flags(bool onallcpus, u64 map_flags) 1259 { 1260 if (unlikely(!onallcpus && map_flags > BPF_EXIST)) 1261 return -EINVAL; 1262 if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS))) 1263 return -EINVAL; 1264 return 0; 1265 } 1266 1267 static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, 1268 void *value, u64 map_flags, 1269 bool percpu, bool onallcpus) 1270 { 1271 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1272 struct htab_elem *l_new, *l_old; 1273 struct hlist_nulls_head *head; 1274 void *old_map_ptr = NULL; 1275 unsigned long flags; 1276 struct bucket *b; 1277 u32 key_size, hash; 1278 int ret; 1279 1280 ret = htab_map_check_update_flags(onallcpus, map_flags); 1281 if (unlikely(ret)) 1282 return ret; 1283 1284 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1285 1286 key_size = map->key_size; 1287 1288 hash = htab_map_hash(key, key_size, htab->hashrnd); 1289 1290 b = __select_bucket(htab, hash); 1291 head = &b->head; 1292 1293 ret = htab_lock_bucket(b, &flags); 1294 if (ret) 1295 return ret; 1296 1297 l_old = lookup_elem_raw(head, hash, key, key_size); 1298 1299 ret = check_flags(htab, l_old, map_flags); 1300 if (ret) 1301 goto err; 1302 1303 if (l_old) { 1304 /* Update value in-place */ 1305 if (percpu) { 1306 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1307 value, onallcpus, map_flags); 1308 } else { 1309 void **inner_map_pptr = htab_elem_value(l_old, key_size); 1310 1311 old_map_ptr = *inner_map_pptr; 1312 WRITE_ONCE(*inner_map_pptr, *(void **)value); 1313 } 1314 } else { 1315 l_new = alloc_htab_elem(htab, key, value, key_size, 1316 hash, percpu, onallcpus, NULL, map_flags); 1317 if (IS_ERR(l_new)) { 1318 ret = PTR_ERR(l_new); 1319 goto err; 1320 } 1321 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1322 } 1323 err: 1324 htab_unlock_bucket(b, flags); 1325 if (old_map_ptr) 1326 map->ops->map_fd_put_ptr(map, old_map_ptr, true); 1327 return ret; 1328 } 1329 1330 static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1331 void *value, u64 map_flags, 1332 bool onallcpus) 1333 { 1334 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1335 struct htab_elem *l_new = NULL, *l_old; 1336 struct hlist_nulls_head *head; 1337 unsigned long flags; 1338 struct bucket *b; 1339 u32 key_size, hash; 1340 int ret; 1341 1342 ret = htab_map_check_update_flags(onallcpus, map_flags); 1343 if (unlikely(ret)) 1344 return ret; 1345 1346 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1347 1348 key_size = map->key_size; 1349 1350 hash = htab_map_hash(key, key_size, htab->hashrnd); 1351 1352 b = __select_bucket(htab, hash); 1353 head = &b->head; 1354 1355 /* For LRU, we need to alloc before taking bucket's 1356 * spinlock because LRU's elem alloc may need 1357 * to remove older elem from htab and this removal 1358 * operation will need a bucket lock. 1359 */ 1360 if (map_flags != BPF_EXIST) { 1361 l_new = prealloc_lru_pop(htab, key, hash); 1362 if (!l_new) 1363 return -ENOMEM; 1364 } 1365 1366 ret = htab_lock_bucket(b, &flags); 1367 if (ret) 1368 goto err_lock_bucket; 1369 1370 l_old = lookup_elem_raw(head, hash, key, key_size); 1371 1372 ret = check_flags(htab, l_old, map_flags); 1373 if (ret) 1374 goto err; 1375 1376 if (l_old) { 1377 bpf_lru_node_set_ref(&l_old->lru_node); 1378 1379 /* per-cpu hash map can update value in-place */ 1380 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1381 value, onallcpus, map_flags); 1382 } else { 1383 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1384 value, onallcpus, map_flags); 1385 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1386 l_new = NULL; 1387 } 1388 ret = 0; 1389 err: 1390 htab_unlock_bucket(b, flags); 1391 err_lock_bucket: 1392 if (l_new) { 1393 bpf_map_dec_elem_count(&htab->map); 1394 bpf_lru_push_free(&htab->lru, &l_new->lru_node); 1395 } 1396 return ret; 1397 } 1398 1399 static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1400 void *value, u64 map_flags) 1401 { 1402 return htab_map_update_elem_in_place(map, key, value, map_flags, true, false); 1403 } 1404 1405 static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1406 void *value, u64 map_flags) 1407 { 1408 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, 1409 false); 1410 } 1411 1412 /* Called from syscall or from eBPF program */ 1413 static long htab_map_delete_elem(struct bpf_map *map, void *key) 1414 { 1415 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1416 struct hlist_nulls_head *head; 1417 struct bucket *b; 1418 struct htab_elem *l; 1419 unsigned long flags; 1420 u32 hash, key_size; 1421 int ret; 1422 1423 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1424 1425 key_size = map->key_size; 1426 1427 hash = htab_map_hash(key, key_size, htab->hashrnd); 1428 b = __select_bucket(htab, hash); 1429 head = &b->head; 1430 1431 ret = htab_lock_bucket(b, &flags); 1432 if (ret) 1433 return ret; 1434 1435 l = lookup_elem_raw(head, hash, key, key_size); 1436 if (l) 1437 hlist_nulls_del_rcu(&l->hash_node); 1438 else 1439 ret = -ENOENT; 1440 1441 htab_unlock_bucket(b, flags); 1442 1443 if (l) 1444 free_htab_elem(htab, l); 1445 return ret; 1446 } 1447 1448 static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) 1449 { 1450 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1451 struct hlist_nulls_head *head; 1452 struct bucket *b; 1453 struct htab_elem *l; 1454 unsigned long flags; 1455 u32 hash, key_size; 1456 int ret; 1457 1458 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1459 1460 key_size = map->key_size; 1461 1462 hash = htab_map_hash(key, key_size, htab->hashrnd); 1463 b = __select_bucket(htab, hash); 1464 head = &b->head; 1465 1466 ret = htab_lock_bucket(b, &flags); 1467 if (ret) 1468 return ret; 1469 1470 l = lookup_elem_raw(head, hash, key, key_size); 1471 1472 if (l) 1473 hlist_nulls_del_rcu(&l->hash_node); 1474 else 1475 ret = -ENOENT; 1476 1477 htab_unlock_bucket(b, flags); 1478 if (l) 1479 htab_lru_push_free(htab, l); 1480 return ret; 1481 } 1482 1483 static void delete_all_elements(struct bpf_htab *htab) 1484 { 1485 int i; 1486 1487 /* It's called from a worker thread and migration has been disabled, 1488 * therefore, it is OK to invoke bpf_mem_cache_free() directly. 1489 */ 1490 for (i = 0; i < htab->n_buckets; i++) { 1491 struct hlist_nulls_head *head = select_bucket(htab, i); 1492 struct hlist_nulls_node *n; 1493 struct htab_elem *l; 1494 1495 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1496 hlist_nulls_del_rcu(&l->hash_node); 1497 htab_elem_free(htab, l); 1498 } 1499 cond_resched(); 1500 } 1501 } 1502 1503 static void htab_free_malloced_internal_structs(struct bpf_htab *htab) 1504 { 1505 int i; 1506 1507 rcu_read_lock(); 1508 for (i = 0; i < htab->n_buckets; i++) { 1509 struct hlist_nulls_head *head = select_bucket(htab, i); 1510 struct hlist_nulls_node *n; 1511 struct htab_elem *l; 1512 1513 hlist_nulls_for_each_entry(l, n, head, hash_node) { 1514 /* We only free internal structs on uref dropping to zero */ 1515 bpf_map_free_internal_structs(&htab->map, 1516 htab_elem_value(l, htab->map.key_size)); 1517 } 1518 cond_resched_rcu(); 1519 } 1520 rcu_read_unlock(); 1521 } 1522 1523 static void htab_map_free_internal_structs(struct bpf_map *map) 1524 { 1525 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1526 1527 /* We only free internal structs on uref dropping to zero */ 1528 if (!bpf_map_has_internal_structs(map)) 1529 return; 1530 1531 if (htab_is_prealloc(htab)) 1532 htab_free_prealloced_internal_structs(htab); 1533 else 1534 htab_free_malloced_internal_structs(htab); 1535 } 1536 1537 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 1538 static void htab_map_free(struct bpf_map *map) 1539 { 1540 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1541 1542 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. 1543 * bpf_free_used_maps() is called after bpf prog is no longer executing. 1544 * There is no need to synchronize_rcu() here to protect map elements. 1545 */ 1546 1547 /* htab no longer uses call_rcu() directly. bpf_mem_alloc does it 1548 * underneath and is responsible for waiting for callbacks to finish 1549 * during bpf_mem_alloc_destroy(). 1550 */ 1551 if (!htab_is_prealloc(htab)) { 1552 delete_all_elements(htab); 1553 } else { 1554 htab_free_prealloced_fields(htab); 1555 prealloc_destroy(htab); 1556 } 1557 1558 bpf_map_free_elem_count(map); 1559 free_percpu(htab->extra_elems); 1560 bpf_map_area_free(htab->buckets); 1561 bpf_mem_alloc_destroy(&htab->pcpu_ma); 1562 bpf_mem_alloc_destroy(&htab->ma); 1563 if (htab->use_percpu_counter) 1564 percpu_counter_destroy(&htab->pcount); 1565 bpf_map_area_free(htab); 1566 } 1567 1568 static void htab_map_seq_show_elem(struct bpf_map *map, void *key, 1569 struct seq_file *m) 1570 { 1571 void *value; 1572 1573 rcu_read_lock(); 1574 1575 value = htab_map_lookup_elem(map, key); 1576 if (!value) { 1577 rcu_read_unlock(); 1578 return; 1579 } 1580 1581 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 1582 seq_puts(m, ": "); 1583 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 1584 seq_putc(m, '\n'); 1585 1586 rcu_read_unlock(); 1587 } 1588 1589 static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1590 void *value, bool is_lru_map, 1591 bool is_percpu, u64 flags) 1592 { 1593 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1594 struct hlist_nulls_head *head; 1595 unsigned long bflags; 1596 struct htab_elem *l; 1597 u32 hash, key_size; 1598 struct bucket *b; 1599 int ret; 1600 1601 key_size = map->key_size; 1602 1603 hash = htab_map_hash(key, key_size, htab->hashrnd); 1604 b = __select_bucket(htab, hash); 1605 head = &b->head; 1606 1607 ret = htab_lock_bucket(b, &bflags); 1608 if (ret) 1609 return ret; 1610 1611 l = lookup_elem_raw(head, hash, key, key_size); 1612 if (!l) { 1613 ret = -ENOENT; 1614 goto out_unlock; 1615 } 1616 1617 if (is_percpu) { 1618 u32 roundup_value_size = round_up(map->value_size, 8); 1619 void __percpu *pptr; 1620 int off = 0, cpu; 1621 1622 pptr = htab_elem_get_ptr(l, key_size); 1623 for_each_possible_cpu(cpu) { 1624 copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); 1625 check_and_init_map_value(&htab->map, value + off); 1626 off += roundup_value_size; 1627 } 1628 } else { 1629 void *src = htab_elem_value(l, map->key_size); 1630 1631 if (flags & BPF_F_LOCK) 1632 copy_map_value_locked(map, value, src, true); 1633 else 1634 copy_map_value(map, value, src); 1635 /* Zeroing special fields in the temp buffer */ 1636 check_and_init_map_value(map, value); 1637 } 1638 hlist_nulls_del_rcu(&l->hash_node); 1639 1640 out_unlock: 1641 htab_unlock_bucket(b, bflags); 1642 1643 if (l) { 1644 if (is_lru_map) 1645 htab_lru_push_free(htab, l); 1646 else 1647 free_htab_elem(htab, l); 1648 } 1649 1650 return ret; 1651 } 1652 1653 static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1654 void *value, u64 flags) 1655 { 1656 return __htab_map_lookup_and_delete_elem(map, key, value, false, false, 1657 flags); 1658 } 1659 1660 static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1661 void *key, void *value, 1662 u64 flags) 1663 { 1664 return __htab_map_lookup_and_delete_elem(map, key, value, false, true, 1665 flags); 1666 } 1667 1668 static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1669 void *value, u64 flags) 1670 { 1671 return __htab_map_lookup_and_delete_elem(map, key, value, true, false, 1672 flags); 1673 } 1674 1675 static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1676 void *key, void *value, 1677 u64 flags) 1678 { 1679 return __htab_map_lookup_and_delete_elem(map, key, value, true, true, 1680 flags); 1681 } 1682 1683 static int 1684 __htab_map_lookup_and_delete_batch(struct bpf_map *map, 1685 const union bpf_attr *attr, 1686 union bpf_attr __user *uattr, 1687 bool do_delete, bool is_lru_map, 1688 bool is_percpu) 1689 { 1690 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1691 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val; 1692 void __user *uvalues = u64_to_user_ptr(attr->batch.values); 1693 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1694 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1695 u32 batch, max_count, size, bucket_size, map_id; 1696 u64 elem_map_flags, map_flags, allowed_flags; 1697 u32 bucket_cnt, total, key_size, value_size; 1698 struct htab_elem *node_to_free = NULL; 1699 struct hlist_nulls_head *head; 1700 struct hlist_nulls_node *n; 1701 unsigned long flags = 0; 1702 bool locked = false; 1703 struct htab_elem *l; 1704 struct bucket *b; 1705 int ret = 0; 1706 1707 elem_map_flags = attr->batch.elem_flags; 1708 allowed_flags = BPF_F_LOCK; 1709 if (!do_delete && is_percpu) 1710 allowed_flags |= BPF_F_CPU; 1711 ret = bpf_map_check_op_flags(map, elem_map_flags, allowed_flags); 1712 if (ret) 1713 return ret; 1714 1715 map_flags = attr->batch.flags; 1716 if (map_flags) 1717 return -EINVAL; 1718 1719 max_count = attr->batch.count; 1720 if (!max_count) 1721 return 0; 1722 1723 if (put_user(0, &uattr->batch.count)) 1724 return -EFAULT; 1725 1726 batch = 0; 1727 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch))) 1728 return -EFAULT; 1729 1730 if (batch >= htab->n_buckets) 1731 return -ENOENT; 1732 1733 key_size = htab->map.key_size; 1734 value_size = htab->map.value_size; 1735 size = round_up(value_size, 8); 1736 if (is_percpu && !(elem_map_flags & BPF_F_CPU)) 1737 value_size = size * num_possible_cpus(); 1738 total = 0; 1739 /* while experimenting with hash tables with sizes ranging from 10 to 1740 * 1000, it was observed that a bucket can have up to 5 entries. 1741 */ 1742 bucket_size = 5; 1743 1744 alloc: 1745 /* We cannot do copy_from_user or copy_to_user inside 1746 * the rcu_read_lock. Allocate enough space here. 1747 */ 1748 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); 1749 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); 1750 if (!keys || !values) { 1751 ret = -ENOMEM; 1752 goto after_loop; 1753 } 1754 1755 again: 1756 bpf_disable_instrumentation(); 1757 rcu_read_lock(); 1758 again_nocopy: 1759 dst_key = keys; 1760 dst_val = values; 1761 b = &htab->buckets[batch]; 1762 head = &b->head; 1763 /* do not grab the lock unless need it (bucket_cnt > 0). */ 1764 if (locked) { 1765 ret = htab_lock_bucket(b, &flags); 1766 if (ret) { 1767 rcu_read_unlock(); 1768 bpf_enable_instrumentation(); 1769 goto after_loop; 1770 } 1771 } 1772 1773 bucket_cnt = 0; 1774 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1775 bucket_cnt++; 1776 1777 if (bucket_cnt && !locked) { 1778 locked = true; 1779 goto again_nocopy; 1780 } 1781 1782 if (bucket_cnt > (max_count - total)) { 1783 if (total == 0) 1784 ret = -ENOSPC; 1785 /* Note that since bucket_cnt > 0 here, it is implicit 1786 * that the locked was grabbed, so release it. 1787 */ 1788 htab_unlock_bucket(b, flags); 1789 rcu_read_unlock(); 1790 bpf_enable_instrumentation(); 1791 goto after_loop; 1792 } 1793 1794 if (bucket_cnt > bucket_size) { 1795 bucket_size = bucket_cnt; 1796 /* Note that since bucket_cnt > 0 here, it is implicit 1797 * that the locked was grabbed, so release it. 1798 */ 1799 htab_unlock_bucket(b, flags); 1800 rcu_read_unlock(); 1801 bpf_enable_instrumentation(); 1802 kvfree(keys); 1803 kvfree(values); 1804 goto alloc; 1805 } 1806 1807 /* Next block is only safe to run if you have grabbed the lock */ 1808 if (!locked) 1809 goto next_batch; 1810 1811 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1812 memcpy(dst_key, l->key, key_size); 1813 1814 if (is_percpu) { 1815 int off = 0, cpu; 1816 void __percpu *pptr; 1817 1818 pptr = htab_elem_get_ptr(l, map->key_size); 1819 if (elem_map_flags & BPF_F_CPU) { 1820 cpu = elem_map_flags >> 32; 1821 copy_map_value(&htab->map, dst_val, per_cpu_ptr(pptr, cpu)); 1822 check_and_init_map_value(&htab->map, dst_val); 1823 } else { 1824 for_each_possible_cpu(cpu) { 1825 copy_map_value_long(&htab->map, dst_val + off, 1826 per_cpu_ptr(pptr, cpu)); 1827 check_and_init_map_value(&htab->map, dst_val + off); 1828 off += size; 1829 } 1830 } 1831 } else { 1832 value = htab_elem_value(l, key_size); 1833 if (is_fd_htab(htab)) { 1834 struct bpf_map **inner_map = value; 1835 1836 /* Actual value is the id of the inner map */ 1837 map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); 1838 value = &map_id; 1839 } 1840 1841 if (elem_map_flags & BPF_F_LOCK) 1842 copy_map_value_locked(map, dst_val, value, 1843 true); 1844 else 1845 copy_map_value(map, dst_val, value); 1846 /* Zeroing special fields in the temp buffer */ 1847 check_and_init_map_value(map, dst_val); 1848 } 1849 if (do_delete) { 1850 hlist_nulls_del_rcu(&l->hash_node); 1851 1852 /* bpf_lru_push_free() will acquire lru_lock, which 1853 * may cause deadlock. See comments in function 1854 * prealloc_lru_pop(). Let us do bpf_lru_push_free() 1855 * after releasing the bucket lock. 1856 * 1857 * For htab of maps, htab_put_fd_value() in 1858 * free_htab_elem() may acquire a spinlock with bucket 1859 * lock being held and it violates the lock rule, so 1860 * invoke free_htab_elem() after unlock as well. 1861 */ 1862 l->batch_flink = node_to_free; 1863 node_to_free = l; 1864 } 1865 dst_key += key_size; 1866 dst_val += value_size; 1867 } 1868 1869 htab_unlock_bucket(b, flags); 1870 locked = false; 1871 1872 while (node_to_free) { 1873 l = node_to_free; 1874 node_to_free = node_to_free->batch_flink; 1875 if (is_lru_map) 1876 htab_lru_push_free(htab, l); 1877 else 1878 free_htab_elem(htab, l); 1879 } 1880 1881 next_batch: 1882 /* If we are not copying data, we can go to next bucket and avoid 1883 * unlocking the rcu. 1884 */ 1885 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) { 1886 batch++; 1887 goto again_nocopy; 1888 } 1889 1890 rcu_read_unlock(); 1891 bpf_enable_instrumentation(); 1892 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys, 1893 key_size * bucket_cnt) || 1894 copy_to_user(uvalues + total * value_size, values, 1895 value_size * bucket_cnt))) { 1896 ret = -EFAULT; 1897 goto after_loop; 1898 } 1899 1900 total += bucket_cnt; 1901 batch++; 1902 if (batch >= htab->n_buckets) { 1903 ret = -ENOENT; 1904 goto after_loop; 1905 } 1906 goto again; 1907 1908 after_loop: 1909 if (ret == -EFAULT) 1910 goto out; 1911 1912 /* copy # of entries and next batch */ 1913 ubatch = u64_to_user_ptr(attr->batch.out_batch); 1914 if (copy_to_user(ubatch, &batch, sizeof(batch)) || 1915 put_user(total, &uattr->batch.count)) 1916 ret = -EFAULT; 1917 1918 out: 1919 kvfree(keys); 1920 kvfree(values); 1921 return ret; 1922 } 1923 1924 static int 1925 htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1926 union bpf_attr __user *uattr) 1927 { 1928 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1929 false, true); 1930 } 1931 1932 static int 1933 htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1934 const union bpf_attr *attr, 1935 union bpf_attr __user *uattr) 1936 { 1937 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1938 false, true); 1939 } 1940 1941 static int 1942 htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1943 union bpf_attr __user *uattr) 1944 { 1945 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1946 false, false); 1947 } 1948 1949 static int 1950 htab_map_lookup_and_delete_batch(struct bpf_map *map, 1951 const union bpf_attr *attr, 1952 union bpf_attr __user *uattr) 1953 { 1954 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1955 false, false); 1956 } 1957 1958 static int 1959 htab_lru_percpu_map_lookup_batch(struct bpf_map *map, 1960 const union bpf_attr *attr, 1961 union bpf_attr __user *uattr) 1962 { 1963 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1964 true, true); 1965 } 1966 1967 static int 1968 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1969 const union bpf_attr *attr, 1970 union bpf_attr __user *uattr) 1971 { 1972 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1973 true, true); 1974 } 1975 1976 static int 1977 htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1978 union bpf_attr __user *uattr) 1979 { 1980 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1981 true, false); 1982 } 1983 1984 static int 1985 htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, 1986 const union bpf_attr *attr, 1987 union bpf_attr __user *uattr) 1988 { 1989 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1990 true, false); 1991 } 1992 1993 struct bpf_iter_seq_hash_map_info { 1994 struct bpf_map *map; 1995 struct bpf_htab *htab; 1996 void *percpu_value_buf; // non-zero means percpu hash 1997 u32 bucket_id; 1998 u32 skip_elems; 1999 }; 2000 2001 static struct htab_elem * 2002 bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, 2003 struct htab_elem *prev_elem) 2004 { 2005 const struct bpf_htab *htab = info->htab; 2006 u32 skip_elems = info->skip_elems; 2007 u32 bucket_id = info->bucket_id; 2008 struct hlist_nulls_head *head; 2009 struct hlist_nulls_node *n; 2010 struct htab_elem *elem; 2011 struct bucket *b; 2012 u32 i, count; 2013 2014 if (bucket_id >= htab->n_buckets) 2015 return NULL; 2016 2017 /* try to find next elem in the same bucket */ 2018 if (prev_elem) { 2019 /* no update/deletion on this bucket, prev_elem should be still valid 2020 * and we won't skip elements. 2021 */ 2022 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); 2023 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); 2024 if (elem) 2025 return elem; 2026 2027 /* not found, unlock and go to the next bucket */ 2028 b = &htab->buckets[bucket_id++]; 2029 rcu_read_unlock(); 2030 skip_elems = 0; 2031 } 2032 2033 for (i = bucket_id; i < htab->n_buckets; i++) { 2034 b = &htab->buckets[i]; 2035 rcu_read_lock(); 2036 2037 count = 0; 2038 head = &b->head; 2039 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2040 if (count >= skip_elems) { 2041 info->bucket_id = i; 2042 info->skip_elems = count; 2043 return elem; 2044 } 2045 count++; 2046 } 2047 2048 rcu_read_unlock(); 2049 skip_elems = 0; 2050 } 2051 2052 info->bucket_id = i; 2053 info->skip_elems = 0; 2054 return NULL; 2055 } 2056 2057 static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) 2058 { 2059 struct bpf_iter_seq_hash_map_info *info = seq->private; 2060 struct htab_elem *elem; 2061 2062 elem = bpf_hash_map_seq_find_next(info, NULL); 2063 if (!elem) 2064 return NULL; 2065 2066 if (*pos == 0) 2067 ++*pos; 2068 return elem; 2069 } 2070 2071 static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2072 { 2073 struct bpf_iter_seq_hash_map_info *info = seq->private; 2074 2075 ++*pos; 2076 ++info->skip_elems; 2077 return bpf_hash_map_seq_find_next(info, v); 2078 } 2079 2080 static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) 2081 { 2082 struct bpf_iter_seq_hash_map_info *info = seq->private; 2083 struct bpf_iter__bpf_map_elem ctx = {}; 2084 struct bpf_map *map = info->map; 2085 struct bpf_iter_meta meta; 2086 int ret = 0, off = 0, cpu; 2087 u32 roundup_value_size; 2088 struct bpf_prog *prog; 2089 void __percpu *pptr; 2090 2091 meta.seq = seq; 2092 prog = bpf_iter_get_info(&meta, elem == NULL); 2093 if (prog) { 2094 ctx.meta = &meta; 2095 ctx.map = info->map; 2096 if (elem) { 2097 ctx.key = elem->key; 2098 if (!info->percpu_value_buf) { 2099 ctx.value = htab_elem_value(elem, map->key_size); 2100 } else { 2101 roundup_value_size = round_up(map->value_size, 8); 2102 pptr = htab_elem_get_ptr(elem, map->key_size); 2103 for_each_possible_cpu(cpu) { 2104 copy_map_value_long(map, info->percpu_value_buf + off, 2105 per_cpu_ptr(pptr, cpu)); 2106 check_and_init_map_value(map, info->percpu_value_buf + off); 2107 off += roundup_value_size; 2108 } 2109 ctx.value = info->percpu_value_buf; 2110 } 2111 } 2112 ret = bpf_iter_run_prog(prog, &ctx); 2113 } 2114 2115 return ret; 2116 } 2117 2118 static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) 2119 { 2120 return __bpf_hash_map_seq_show(seq, v); 2121 } 2122 2123 static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) 2124 { 2125 if (!v) 2126 (void)__bpf_hash_map_seq_show(seq, NULL); 2127 else 2128 rcu_read_unlock(); 2129 } 2130 2131 static int bpf_iter_init_hash_map(void *priv_data, 2132 struct bpf_iter_aux_info *aux) 2133 { 2134 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2135 struct bpf_map *map = aux->map; 2136 void *value_buf; 2137 u32 buf_size; 2138 2139 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2140 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 2141 buf_size = round_up(map->value_size, 8) * num_possible_cpus(); 2142 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 2143 if (!value_buf) 2144 return -ENOMEM; 2145 2146 seq_info->percpu_value_buf = value_buf; 2147 } 2148 2149 bpf_map_inc_with_uref(map); 2150 seq_info->map = map; 2151 seq_info->htab = container_of(map, struct bpf_htab, map); 2152 return 0; 2153 } 2154 2155 static void bpf_iter_fini_hash_map(void *priv_data) 2156 { 2157 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2158 2159 bpf_map_put_with_uref(seq_info->map); 2160 kfree(seq_info->percpu_value_buf); 2161 } 2162 2163 static const struct seq_operations bpf_hash_map_seq_ops = { 2164 .start = bpf_hash_map_seq_start, 2165 .next = bpf_hash_map_seq_next, 2166 .stop = bpf_hash_map_seq_stop, 2167 .show = bpf_hash_map_seq_show, 2168 }; 2169 2170 static const struct bpf_iter_seq_info iter_seq_info = { 2171 .seq_ops = &bpf_hash_map_seq_ops, 2172 .init_seq_private = bpf_iter_init_hash_map, 2173 .fini_seq_private = bpf_iter_fini_hash_map, 2174 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), 2175 }; 2176 2177 static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, 2178 void *callback_ctx, u64 flags) 2179 { 2180 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2181 struct hlist_nulls_head *head; 2182 struct hlist_nulls_node *n; 2183 struct htab_elem *elem; 2184 int i, num_elems = 0; 2185 void __percpu *pptr; 2186 struct bucket *b; 2187 void *key, *val; 2188 bool is_percpu; 2189 u64 ret = 0; 2190 2191 cant_migrate(); 2192 2193 if (flags != 0) 2194 return -EINVAL; 2195 2196 is_percpu = htab_is_percpu(htab); 2197 2198 /* migration has been disabled, so percpu value prepared here will be 2199 * the same as the one seen by the bpf program with 2200 * bpf_map_lookup_elem(). 2201 */ 2202 for (i = 0; i < htab->n_buckets; i++) { 2203 b = &htab->buckets[i]; 2204 rcu_read_lock(); 2205 head = &b->head; 2206 hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) { 2207 key = elem->key; 2208 if (is_percpu) { 2209 /* current cpu value for percpu map */ 2210 pptr = htab_elem_get_ptr(elem, map->key_size); 2211 val = this_cpu_ptr(pptr); 2212 } else { 2213 val = htab_elem_value(elem, map->key_size); 2214 } 2215 num_elems++; 2216 ret = callback_fn((u64)(long)map, (u64)(long)key, 2217 (u64)(long)val, (u64)(long)callback_ctx, 0); 2218 /* return value: 0 - continue, 1 - stop and return */ 2219 if (ret) { 2220 rcu_read_unlock(); 2221 goto out; 2222 } 2223 } 2224 rcu_read_unlock(); 2225 } 2226 out: 2227 return num_elems; 2228 } 2229 2230 static u64 htab_map_mem_usage(const struct bpf_map *map) 2231 { 2232 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2233 u32 value_size = round_up(htab->map.value_size, 8); 2234 bool prealloc = htab_is_prealloc(htab); 2235 bool percpu = htab_is_percpu(htab); 2236 bool lru = htab_is_lru(htab); 2237 u64 num_entries, usage; 2238 2239 usage = sizeof(struct bpf_htab) + 2240 sizeof(struct bucket) * htab->n_buckets; 2241 2242 if (prealloc) { 2243 num_entries = map->max_entries; 2244 if (htab_has_extra_elems(htab)) 2245 num_entries += num_possible_cpus(); 2246 2247 usage += htab->elem_size * num_entries; 2248 2249 if (percpu) 2250 usage += value_size * num_possible_cpus() * num_entries; 2251 else if (!lru) 2252 usage += sizeof(struct htab_elem *) * num_possible_cpus(); 2253 } else { 2254 #define LLIST_NODE_SZ sizeof(struct llist_node) 2255 2256 num_entries = htab->use_percpu_counter ? 2257 percpu_counter_sum(&htab->pcount) : 2258 atomic_read(&htab->count); 2259 usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries; 2260 if (percpu) { 2261 usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries; 2262 usage += value_size * num_possible_cpus() * num_entries; 2263 } 2264 } 2265 return usage; 2266 } 2267 2268 BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) 2269 const struct bpf_map_ops htab_map_ops = { 2270 .map_meta_equal = bpf_map_meta_equal, 2271 .map_alloc_check = htab_map_alloc_check, 2272 .map_alloc = htab_map_alloc, 2273 .map_free = htab_map_free, 2274 .map_get_next_key = htab_map_get_next_key, 2275 .map_release_uref = htab_map_free_internal_structs, 2276 .map_lookup_elem = htab_map_lookup_elem, 2277 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, 2278 .map_update_elem = htab_map_update_elem, 2279 .map_delete_elem = htab_map_delete_elem, 2280 .map_gen_lookup = htab_map_gen_lookup, 2281 .map_seq_show_elem = htab_map_seq_show_elem, 2282 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2283 .map_for_each_callback = bpf_for_each_hash_elem, 2284 .map_mem_usage = htab_map_mem_usage, 2285 BATCH_OPS(htab), 2286 .map_btf_id = &htab_map_btf_ids[0], 2287 .iter_seq_info = &iter_seq_info, 2288 }; 2289 2290 const struct bpf_map_ops htab_lru_map_ops = { 2291 .map_meta_equal = bpf_map_meta_equal, 2292 .map_alloc_check = htab_map_alloc_check, 2293 .map_alloc = htab_map_alloc, 2294 .map_free = htab_map_free, 2295 .map_get_next_key = htab_map_get_next_key, 2296 .map_release_uref = htab_map_free_internal_structs, 2297 .map_lookup_elem = htab_lru_map_lookup_elem, 2298 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, 2299 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, 2300 .map_update_elem = htab_lru_map_update_elem, 2301 .map_delete_elem = htab_lru_map_delete_elem, 2302 .map_gen_lookup = htab_lru_map_gen_lookup, 2303 .map_seq_show_elem = htab_map_seq_show_elem, 2304 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2305 .map_for_each_callback = bpf_for_each_hash_elem, 2306 .map_mem_usage = htab_map_mem_usage, 2307 BATCH_OPS(htab_lru), 2308 .map_btf_id = &htab_map_btf_ids[0], 2309 .iter_seq_info = &iter_seq_info, 2310 }; 2311 2312 /* Called from eBPF program */ 2313 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2314 { 2315 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2316 2317 if (l) 2318 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2319 else 2320 return NULL; 2321 } 2322 2323 /* inline bpf_map_lookup_elem() call for per-CPU hashmap */ 2324 static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 2325 { 2326 struct bpf_insn *insn = insn_buf; 2327 2328 if (!bpf_jit_supports_percpu_insn()) 2329 return -EOPNOTSUPP; 2330 2331 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2332 (void *(*)(struct bpf_map *map, void *key))NULL)); 2333 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2334 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); 2335 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2336 offsetof(struct htab_elem, key) + roundup(map->key_size, 8)); 2337 *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2338 *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2339 2340 return insn - insn_buf; 2341 } 2342 2343 static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2344 { 2345 struct htab_elem *l; 2346 2347 if (cpu >= nr_cpu_ids) 2348 return NULL; 2349 2350 l = __htab_map_lookup_elem(map, key); 2351 if (l) 2352 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2353 else 2354 return NULL; 2355 } 2356 2357 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2358 { 2359 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2360 2361 if (l) { 2362 bpf_lru_node_set_ref(&l->lru_node); 2363 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2364 } 2365 2366 return NULL; 2367 } 2368 2369 static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2370 { 2371 struct htab_elem *l; 2372 2373 if (cpu >= nr_cpu_ids) 2374 return NULL; 2375 2376 l = __htab_map_lookup_elem(map, key); 2377 if (l) { 2378 bpf_lru_node_set_ref(&l->lru_node); 2379 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2380 } 2381 2382 return NULL; 2383 } 2384 2385 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags) 2386 { 2387 struct htab_elem *l; 2388 void __percpu *pptr; 2389 int ret = -ENOENT; 2390 int cpu, off = 0; 2391 u32 size; 2392 2393 /* per_cpu areas are zero-filled and bpf programs can only 2394 * access 'value_size' of them, so copying rounded areas 2395 * will not leak any kernel data 2396 */ 2397 size = round_up(map->value_size, 8); 2398 rcu_read_lock(); 2399 l = __htab_map_lookup_elem(map, key); 2400 if (!l) 2401 goto out; 2402 ret = 0; 2403 /* We do not mark LRU map element here in order to not mess up 2404 * eviction heuristics when user space does a map walk. 2405 */ 2406 pptr = htab_elem_get_ptr(l, map->key_size); 2407 if (map_flags & BPF_F_CPU) { 2408 cpu = map_flags >> 32; 2409 copy_map_value(map, value, per_cpu_ptr(pptr, cpu)); 2410 check_and_init_map_value(map, value); 2411 goto out; 2412 } 2413 for_each_possible_cpu(cpu) { 2414 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2415 check_and_init_map_value(map, value + off); 2416 off += size; 2417 } 2418 out: 2419 rcu_read_unlock(); 2420 return ret; 2421 } 2422 2423 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 2424 u64 map_flags) 2425 { 2426 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2427 int ret; 2428 2429 rcu_read_lock(); 2430 if (htab_is_lru(htab)) 2431 ret = __htab_lru_percpu_map_update_elem(map, key, value, 2432 map_flags, true); 2433 else 2434 ret = htab_map_update_elem_in_place(map, key, value, map_flags, 2435 true, true); 2436 rcu_read_unlock(); 2437 2438 return ret; 2439 } 2440 2441 static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, 2442 struct seq_file *m) 2443 { 2444 struct htab_elem *l; 2445 void __percpu *pptr; 2446 int cpu; 2447 2448 rcu_read_lock(); 2449 2450 l = __htab_map_lookup_elem(map, key); 2451 if (!l) { 2452 rcu_read_unlock(); 2453 return; 2454 } 2455 2456 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 2457 seq_puts(m, ": {\n"); 2458 pptr = htab_elem_get_ptr(l, map->key_size); 2459 for_each_possible_cpu(cpu) { 2460 seq_printf(m, "\tcpu%d: ", cpu); 2461 btf_type_seq_show(map->btf, map->btf_value_type_id, 2462 per_cpu_ptr(pptr, cpu), m); 2463 seq_putc(m, '\n'); 2464 } 2465 seq_puts(m, "}\n"); 2466 2467 rcu_read_unlock(); 2468 } 2469 2470 const struct bpf_map_ops htab_percpu_map_ops = { 2471 .map_meta_equal = bpf_map_meta_equal, 2472 .map_alloc_check = htab_map_alloc_check, 2473 .map_alloc = htab_map_alloc, 2474 .map_free = htab_map_free, 2475 .map_get_next_key = htab_map_get_next_key, 2476 .map_lookup_elem = htab_percpu_map_lookup_elem, 2477 .map_gen_lookup = htab_percpu_map_gen_lookup, 2478 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, 2479 .map_update_elem = htab_percpu_map_update_elem, 2480 .map_delete_elem = htab_map_delete_elem, 2481 .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, 2482 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2483 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2484 .map_for_each_callback = bpf_for_each_hash_elem, 2485 .map_mem_usage = htab_map_mem_usage, 2486 BATCH_OPS(htab_percpu), 2487 .map_btf_id = &htab_map_btf_ids[0], 2488 .iter_seq_info = &iter_seq_info, 2489 }; 2490 2491 const struct bpf_map_ops htab_lru_percpu_map_ops = { 2492 .map_meta_equal = bpf_map_meta_equal, 2493 .map_alloc_check = htab_map_alloc_check, 2494 .map_alloc = htab_map_alloc, 2495 .map_free = htab_map_free, 2496 .map_get_next_key = htab_map_get_next_key, 2497 .map_lookup_elem = htab_lru_percpu_map_lookup_elem, 2498 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, 2499 .map_update_elem = htab_lru_percpu_map_update_elem, 2500 .map_delete_elem = htab_lru_map_delete_elem, 2501 .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, 2502 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2503 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2504 .map_for_each_callback = bpf_for_each_hash_elem, 2505 .map_mem_usage = htab_map_mem_usage, 2506 BATCH_OPS(htab_lru_percpu), 2507 .map_btf_id = &htab_map_btf_ids[0], 2508 .iter_seq_info = &iter_seq_info, 2509 }; 2510 2511 static int fd_htab_map_alloc_check(union bpf_attr *attr) 2512 { 2513 if (attr->value_size != sizeof(u32)) 2514 return -EINVAL; 2515 return htab_map_alloc_check(attr); 2516 } 2517 2518 static void fd_htab_map_free(struct bpf_map *map) 2519 { 2520 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2521 struct hlist_nulls_node *n; 2522 struct hlist_nulls_head *head; 2523 struct htab_elem *l; 2524 int i; 2525 2526 for (i = 0; i < htab->n_buckets; i++) { 2527 head = select_bucket(htab, i); 2528 2529 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 2530 void *ptr = fd_htab_map_get_ptr(map, l); 2531 2532 map->ops->map_fd_put_ptr(map, ptr, false); 2533 } 2534 } 2535 2536 htab_map_free(map); 2537 } 2538 2539 /* only called from syscall */ 2540 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 2541 { 2542 void **ptr; 2543 int ret = 0; 2544 2545 if (!map->ops->map_fd_sys_lookup_elem) 2546 return -ENOTSUPP; 2547 2548 rcu_read_lock(); 2549 ptr = htab_map_lookup_elem(map, key); 2550 if (ptr) 2551 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); 2552 else 2553 ret = -ENOENT; 2554 rcu_read_unlock(); 2555 2556 return ret; 2557 } 2558 2559 /* Only called from syscall */ 2560 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, 2561 void *key, void *value, u64 map_flags) 2562 { 2563 void *ptr; 2564 int ret; 2565 2566 ptr = map->ops->map_fd_get_ptr(map, map_file, *(int *)value); 2567 if (IS_ERR(ptr)) 2568 return PTR_ERR(ptr); 2569 2570 /* The htab bucket lock is always held during update operations in fd 2571 * htab map, and the following rcu_read_lock() is only used to avoid 2572 * the WARN_ON_ONCE in htab_map_update_elem_in_place(). 2573 */ 2574 rcu_read_lock(); 2575 ret = htab_map_update_elem_in_place(map, key, &ptr, map_flags, false, false); 2576 rcu_read_unlock(); 2577 if (ret) 2578 map->ops->map_fd_put_ptr(map, ptr, false); 2579 2580 return ret; 2581 } 2582 2583 static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) 2584 { 2585 struct bpf_map *map, *inner_map_meta; 2586 2587 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 2588 if (IS_ERR(inner_map_meta)) 2589 return inner_map_meta; 2590 2591 map = htab_map_alloc(attr); 2592 if (IS_ERR(map)) { 2593 bpf_map_meta_free(inner_map_meta); 2594 return map; 2595 } 2596 2597 map->inner_map_meta = inner_map_meta; 2598 2599 return map; 2600 } 2601 2602 static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) 2603 { 2604 struct bpf_map **inner_map = htab_map_lookup_elem(map, key); 2605 2606 if (!inner_map) 2607 return NULL; 2608 2609 return READ_ONCE(*inner_map); 2610 } 2611 2612 static int htab_of_map_gen_lookup(struct bpf_map *map, 2613 struct bpf_insn *insn_buf) 2614 { 2615 struct bpf_insn *insn = insn_buf; 2616 const int ret = BPF_REG_0; 2617 2618 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2619 (void *(*)(struct bpf_map *map, void *key))NULL)); 2620 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2621 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); 2622 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 2623 offsetof(struct htab_elem, key) + 2624 round_up(map->key_size, 8)); 2625 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 2626 2627 return insn - insn_buf; 2628 } 2629 2630 static void htab_of_map_free(struct bpf_map *map) 2631 { 2632 bpf_map_meta_free(map->inner_map_meta); 2633 fd_htab_map_free(map); 2634 } 2635 2636 const struct bpf_map_ops htab_of_maps_map_ops = { 2637 .map_alloc_check = fd_htab_map_alloc_check, 2638 .map_alloc = htab_of_map_alloc, 2639 .map_free = htab_of_map_free, 2640 .map_get_next_key = htab_map_get_next_key, 2641 .map_lookup_elem = htab_of_map_lookup_elem, 2642 .map_delete_elem = htab_map_delete_elem, 2643 .map_fd_get_ptr = bpf_map_fd_get_ptr, 2644 .map_fd_put_ptr = bpf_map_fd_put_ptr, 2645 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 2646 .map_gen_lookup = htab_of_map_gen_lookup, 2647 .map_check_btf = map_check_no_btf, 2648 .map_mem_usage = htab_map_mem_usage, 2649 BATCH_OPS(htab), 2650 .map_btf_id = &htab_map_btf_ids[0], 2651 }; 2652