1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/jhash.h> 8 #include <linux/filter.h> 9 #include <linux/rculist_nulls.h> 10 #include <linux/rcupdate_wait.h> 11 #include <linux/random.h> 12 #include <uapi/linux/btf.h> 13 #include <linux/rcupdate_trace.h> 14 #include <linux/btf_ids.h> 15 #include "percpu_freelist.h" 16 #include "bpf_lru_list.h" 17 #include "map_in_map.h" 18 #include <linux/bpf_mem_alloc.h> 19 #include <asm/rqspinlock.h> 20 21 #define HTAB_CREATE_FLAG_MASK \ 22 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 23 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) 24 25 #define BATCH_OPS(_name) \ 26 .map_lookup_batch = \ 27 _name##_map_lookup_batch, \ 28 .map_lookup_and_delete_batch = \ 29 _name##_map_lookup_and_delete_batch, \ 30 .map_update_batch = \ 31 generic_map_update_batch, \ 32 .map_delete_batch = \ 33 generic_map_delete_batch 34 35 /* 36 * The bucket lock has two protection scopes: 37 * 38 * 1) Serializing concurrent operations from BPF programs on different 39 * CPUs 40 * 41 * 2) Serializing concurrent operations from BPF programs and sys_bpf() 42 * 43 * BPF programs can execute in any context including perf, kprobes and 44 * tracing. As there are almost no limits where perf, kprobes and tracing 45 * can be invoked from the lock operations need to be protected against 46 * deadlocks. Deadlocks can be caused by recursion and by an invocation in 47 * the lock held section when functions which acquire this lock are invoked 48 * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU 49 * variable bpf_prog_active, which prevents BPF programs attached to perf 50 * events, kprobes and tracing to be invoked before the prior invocation 51 * from one of these contexts completed. sys_bpf() uses the same mechanism 52 * by pinning the task to the current CPU and incrementing the recursion 53 * protection across the map operation. 54 * 55 * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain 56 * operations like memory allocations (even with GFP_ATOMIC) from atomic 57 * contexts. This is required because even with GFP_ATOMIC the memory 58 * allocator calls into code paths which acquire locks with long held lock 59 * sections. To ensure the deterministic behaviour these locks are regular 60 * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only 61 * true atomic contexts on an RT kernel are the low level hardware 62 * handling, scheduling, low level interrupt handling, NMIs etc. None of 63 * these contexts should ever do memory allocations. 64 * 65 * As regular device interrupt handlers and soft interrupts are forced into 66 * thread context, the existing code which does 67 * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*(); 68 * just works. 69 * 70 * In theory the BPF locks could be converted to regular spinlocks as well, 71 * but the bucket locks and percpu_freelist locks can be taken from 72 * arbitrary contexts (perf, kprobes, tracepoints) which are required to be 73 * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, 74 * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, 75 * because there is no memory allocation within the lock held sections. However 76 * after hash map was fully converted to use bpf_mem_alloc, there will be 77 * non-synchronous memory allocation for non-preallocated hash map, so it is 78 * safe to always use raw spinlock for bucket lock. 79 */ 80 struct bucket { 81 struct hlist_nulls_head head; 82 rqspinlock_t raw_lock; 83 }; 84 85 #define HASHTAB_MAP_LOCK_COUNT 8 86 #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1) 87 88 struct bpf_htab { 89 struct bpf_map map; 90 struct bpf_mem_alloc ma; 91 struct bpf_mem_alloc pcpu_ma; 92 struct bucket *buckets; 93 void *elems; 94 union { 95 struct pcpu_freelist freelist; 96 struct bpf_lru lru; 97 }; 98 struct htab_elem *__percpu *extra_elems; 99 /* number of elements in non-preallocated hashtable are kept 100 * in either pcount or count 101 */ 102 struct percpu_counter pcount; 103 atomic_t count; 104 bool use_percpu_counter; 105 u32 n_buckets; /* number of hash buckets */ 106 u32 elem_size; /* size of each element in bytes */ 107 u32 hashrnd; 108 }; 109 110 /* each htab element is struct htab_elem + key + value */ 111 struct htab_elem { 112 union { 113 struct hlist_nulls_node hash_node; 114 struct { 115 void *padding; 116 union { 117 struct pcpu_freelist_node fnode; 118 struct htab_elem *batch_flink; 119 }; 120 }; 121 }; 122 union { 123 /* pointer to per-cpu pointer */ 124 void *ptr_to_pptr; 125 struct bpf_lru_node lru_node; 126 }; 127 u32 hash; 128 char key[] __aligned(8); 129 }; 130 131 static inline bool htab_is_prealloc(const struct bpf_htab *htab) 132 { 133 return !(htab->map.map_flags & BPF_F_NO_PREALLOC); 134 } 135 136 static void htab_init_buckets(struct bpf_htab *htab) 137 { 138 unsigned int i; 139 140 for (i = 0; i < htab->n_buckets; i++) { 141 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 142 raw_res_spin_lock_init(&htab->buckets[i].raw_lock); 143 cond_resched(); 144 } 145 } 146 147 static inline int htab_lock_bucket(struct bucket *b, unsigned long *pflags) 148 { 149 unsigned long flags; 150 int ret; 151 152 ret = raw_res_spin_lock_irqsave(&b->raw_lock, flags); 153 if (ret) 154 return ret; 155 *pflags = flags; 156 return 0; 157 } 158 159 static inline void htab_unlock_bucket(struct bucket *b, unsigned long flags) 160 { 161 raw_res_spin_unlock_irqrestore(&b->raw_lock, flags); 162 } 163 164 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); 165 166 static bool htab_is_lru(const struct bpf_htab *htab) 167 { 168 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || 169 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 170 } 171 172 static bool htab_is_percpu(const struct bpf_htab *htab) 173 { 174 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || 175 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 176 } 177 178 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 179 void __percpu *pptr) 180 { 181 *(void __percpu **)(l->key + roundup(key_size, 8)) = pptr; 182 } 183 184 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 185 { 186 return *(void __percpu **)(l->key + roundup(key_size, 8)); 187 } 188 189 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) 190 { 191 return *(void **)(l->key + roundup(map->key_size, 8)); 192 } 193 194 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 195 { 196 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); 197 } 198 199 static bool htab_has_extra_elems(struct bpf_htab *htab) 200 { 201 return !htab_is_percpu(htab) && !htab_is_lru(htab); 202 } 203 204 static void htab_free_prealloced_timers_and_wq(struct bpf_htab *htab) 205 { 206 u32 num_entries = htab->map.max_entries; 207 int i; 208 209 if (htab_has_extra_elems(htab)) 210 num_entries += num_possible_cpus(); 211 212 for (i = 0; i < num_entries; i++) { 213 struct htab_elem *elem; 214 215 elem = get_htab_elem(htab, i); 216 if (btf_record_has_field(htab->map.record, BPF_TIMER)) 217 bpf_obj_free_timer(htab->map.record, 218 elem->key + round_up(htab->map.key_size, 8)); 219 if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) 220 bpf_obj_free_workqueue(htab->map.record, 221 elem->key + round_up(htab->map.key_size, 8)); 222 cond_resched(); 223 } 224 } 225 226 static void htab_free_prealloced_fields(struct bpf_htab *htab) 227 { 228 u32 num_entries = htab->map.max_entries; 229 int i; 230 231 if (IS_ERR_OR_NULL(htab->map.record)) 232 return; 233 if (htab_has_extra_elems(htab)) 234 num_entries += num_possible_cpus(); 235 for (i = 0; i < num_entries; i++) { 236 struct htab_elem *elem; 237 238 elem = get_htab_elem(htab, i); 239 if (htab_is_percpu(htab)) { 240 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 241 int cpu; 242 243 for_each_possible_cpu(cpu) { 244 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 245 cond_resched(); 246 } 247 } else { 248 bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); 249 cond_resched(); 250 } 251 cond_resched(); 252 } 253 } 254 255 static void htab_free_elems(struct bpf_htab *htab) 256 { 257 int i; 258 259 if (!htab_is_percpu(htab)) 260 goto free_elems; 261 262 for (i = 0; i < htab->map.max_entries; i++) { 263 void __percpu *pptr; 264 265 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 266 htab->map.key_size); 267 free_percpu(pptr); 268 cond_resched(); 269 } 270 free_elems: 271 bpf_map_area_free(htab->elems); 272 } 273 274 /* The LRU list has a lock (lru_lock). Each htab bucket has a lock 275 * (bucket_lock). If both locks need to be acquired together, the lock 276 * order is always lru_lock -> bucket_lock and this only happens in 277 * bpf_lru_list.c logic. For example, certain code path of 278 * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), 279 * will acquire lru_lock first followed by acquiring bucket_lock. 280 * 281 * In hashtab.c, to avoid deadlock, lock acquisition of 282 * bucket_lock followed by lru_lock is not allowed. In such cases, 283 * bucket_lock needs to be released first before acquiring lru_lock. 284 */ 285 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 286 u32 hash) 287 { 288 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash); 289 struct htab_elem *l; 290 291 if (node) { 292 bpf_map_inc_elem_count(&htab->map); 293 l = container_of(node, struct htab_elem, lru_node); 294 memcpy(l->key, key, htab->map.key_size); 295 return l; 296 } 297 298 return NULL; 299 } 300 301 static int prealloc_init(struct bpf_htab *htab) 302 { 303 u32 num_entries = htab->map.max_entries; 304 int err = -ENOMEM, i; 305 306 if (htab_has_extra_elems(htab)) 307 num_entries += num_possible_cpus(); 308 309 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, 310 htab->map.numa_node); 311 if (!htab->elems) 312 return -ENOMEM; 313 314 if (!htab_is_percpu(htab)) 315 goto skip_percpu_elems; 316 317 for (i = 0; i < num_entries; i++) { 318 u32 size = round_up(htab->map.value_size, 8); 319 void __percpu *pptr; 320 321 pptr = bpf_map_alloc_percpu(&htab->map, size, 8, 322 GFP_USER | __GFP_NOWARN); 323 if (!pptr) 324 goto free_elems; 325 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 326 pptr); 327 cond_resched(); 328 } 329 330 skip_percpu_elems: 331 if (htab_is_lru(htab)) 332 err = bpf_lru_init(&htab->lru, 333 htab->map.map_flags & BPF_F_NO_COMMON_LRU, 334 offsetof(struct htab_elem, hash) - 335 offsetof(struct htab_elem, lru_node), 336 htab_lru_map_delete_node, 337 htab); 338 else 339 err = pcpu_freelist_init(&htab->freelist); 340 341 if (err) 342 goto free_elems; 343 344 if (htab_is_lru(htab)) 345 bpf_lru_populate(&htab->lru, htab->elems, 346 offsetof(struct htab_elem, lru_node), 347 htab->elem_size, num_entries); 348 else 349 pcpu_freelist_populate(&htab->freelist, 350 htab->elems + offsetof(struct htab_elem, fnode), 351 htab->elem_size, num_entries); 352 353 return 0; 354 355 free_elems: 356 htab_free_elems(htab); 357 return err; 358 } 359 360 static void prealloc_destroy(struct bpf_htab *htab) 361 { 362 htab_free_elems(htab); 363 364 if (htab_is_lru(htab)) 365 bpf_lru_destroy(&htab->lru); 366 else 367 pcpu_freelist_destroy(&htab->freelist); 368 } 369 370 static int alloc_extra_elems(struct bpf_htab *htab) 371 { 372 struct htab_elem *__percpu *pptr, *l_new; 373 struct pcpu_freelist_node *l; 374 int cpu; 375 376 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8, 377 GFP_USER | __GFP_NOWARN); 378 if (!pptr) 379 return -ENOMEM; 380 381 for_each_possible_cpu(cpu) { 382 l = pcpu_freelist_pop(&htab->freelist); 383 /* pop will succeed, since prealloc_init() 384 * preallocated extra num_possible_cpus elements 385 */ 386 l_new = container_of(l, struct htab_elem, fnode); 387 *per_cpu_ptr(pptr, cpu) = l_new; 388 } 389 htab->extra_elems = pptr; 390 return 0; 391 } 392 393 /* Called from syscall */ 394 static int htab_map_alloc_check(union bpf_attr *attr) 395 { 396 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 397 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 398 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 399 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 400 /* percpu_lru means each cpu has its own LRU list. 401 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 402 * the map's value itself is percpu. percpu_lru has 403 * nothing to do with the map's value. 404 */ 405 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 406 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 407 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); 408 int numa_node = bpf_map_attr_numa_node(attr); 409 410 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 411 offsetof(struct htab_elem, hash_node.pprev)); 412 413 if (zero_seed && !capable(CAP_SYS_ADMIN)) 414 /* Guard against local DoS, and discourage production use. */ 415 return -EPERM; 416 417 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || 418 !bpf_map_flags_access_ok(attr->map_flags)) 419 return -EINVAL; 420 421 if (!lru && percpu_lru) 422 return -EINVAL; 423 424 if (lru && !prealloc) 425 return -ENOTSUPP; 426 427 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) 428 return -EINVAL; 429 430 /* check sanity of attributes. 431 * value_size == 0 may be allowed in the future to use map as a set 432 */ 433 if (attr->max_entries == 0 || attr->key_size == 0 || 434 attr->value_size == 0) 435 return -EINVAL; 436 437 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - 438 sizeof(struct htab_elem)) 439 /* if key_size + value_size is bigger, the user space won't be 440 * able to access the elements via bpf syscall. This check 441 * also makes sure that the elem_size doesn't overflow and it's 442 * kmalloc-able later in htab_map_update_elem() 443 */ 444 return -E2BIG; 445 /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ 446 if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) 447 return -E2BIG; 448 449 return 0; 450 } 451 452 static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 453 { 454 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 455 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 456 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 457 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 458 /* percpu_lru means each cpu has its own LRU list. 459 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 460 * the map's value itself is percpu. percpu_lru has 461 * nothing to do with the map's value. 462 */ 463 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 464 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 465 struct bpf_htab *htab; 466 int err; 467 468 htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); 469 if (!htab) 470 return ERR_PTR(-ENOMEM); 471 472 bpf_map_init_from_attr(&htab->map, attr); 473 474 if (percpu_lru) { 475 /* ensure each CPU's lru list has >=1 elements. 476 * since we are at it, make each lru list has the same 477 * number of elements. 478 */ 479 htab->map.max_entries = roundup(attr->max_entries, 480 num_possible_cpus()); 481 if (htab->map.max_entries < attr->max_entries) 482 htab->map.max_entries = rounddown(attr->max_entries, 483 num_possible_cpus()); 484 } 485 486 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 487 * into UB on 32-bit arches, so check that first 488 */ 489 err = -E2BIG; 490 if (htab->map.max_entries > 1UL << 31) 491 goto free_htab; 492 493 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 494 495 htab->elem_size = sizeof(struct htab_elem) + 496 round_up(htab->map.key_size, 8); 497 if (percpu) 498 htab->elem_size += sizeof(void *); 499 else 500 htab->elem_size += round_up(htab->map.value_size, 8); 501 502 /* check for u32 overflow */ 503 if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) 504 goto free_htab; 505 506 err = bpf_map_init_elem_count(&htab->map); 507 if (err) 508 goto free_htab; 509 510 err = -ENOMEM; 511 htab->buckets = bpf_map_area_alloc(htab->n_buckets * 512 sizeof(struct bucket), 513 htab->map.numa_node); 514 if (!htab->buckets) 515 goto free_elem_count; 516 517 if (htab->map.map_flags & BPF_F_ZERO_SEED) 518 htab->hashrnd = 0; 519 else 520 htab->hashrnd = get_random_u32(); 521 522 htab_init_buckets(htab); 523 524 /* compute_batch_value() computes batch value as num_online_cpus() * 2 525 * and __percpu_counter_compare() needs 526 * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus() 527 * for percpu_counter to be faster than atomic_t. In practice the average bpf 528 * hash map size is 10k, which means that a system with 64 cpus will fill 529 * hashmap to 20% of 10k before percpu_counter becomes ineffective. Therefore 530 * define our own batch count as 32 then 10k hash map can be filled up to 80%: 531 * 10k - 8k > 32 _batch_ * 64 _cpus_ 532 * and __percpu_counter_compare() will still be fast. At that point hash map 533 * collisions will dominate its performance anyway. Assume that hash map filled 534 * to 50+% isn't going to be O(1) and use the following formula to choose 535 * between percpu_counter and atomic_t. 536 */ 537 #define PERCPU_COUNTER_BATCH 32 538 if (attr->max_entries / 2 > num_online_cpus() * PERCPU_COUNTER_BATCH) 539 htab->use_percpu_counter = true; 540 541 if (htab->use_percpu_counter) { 542 err = percpu_counter_init(&htab->pcount, 0, GFP_KERNEL); 543 if (err) 544 goto free_map_locked; 545 } 546 547 if (prealloc) { 548 err = prealloc_init(htab); 549 if (err) 550 goto free_map_locked; 551 552 if (!percpu && !lru) { 553 /* lru itself can remove the least used element, so 554 * there is no need for an extra elem during map_update. 555 */ 556 err = alloc_extra_elems(htab); 557 if (err) 558 goto free_prealloc; 559 } 560 } else { 561 err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false); 562 if (err) 563 goto free_map_locked; 564 if (percpu) { 565 err = bpf_mem_alloc_init(&htab->pcpu_ma, 566 round_up(htab->map.value_size, 8), true); 567 if (err) 568 goto free_map_locked; 569 } 570 } 571 572 return &htab->map; 573 574 free_prealloc: 575 prealloc_destroy(htab); 576 free_map_locked: 577 if (htab->use_percpu_counter) 578 percpu_counter_destroy(&htab->pcount); 579 bpf_map_area_free(htab->buckets); 580 bpf_mem_alloc_destroy(&htab->pcpu_ma); 581 bpf_mem_alloc_destroy(&htab->ma); 582 free_elem_count: 583 bpf_map_free_elem_count(&htab->map); 584 free_htab: 585 bpf_map_area_free(htab); 586 return ERR_PTR(err); 587 } 588 589 static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) 590 { 591 if (likely(key_len % 4 == 0)) 592 return jhash2(key, key_len / 4, hashrnd); 593 return jhash(key, key_len, hashrnd); 594 } 595 596 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 597 { 598 return &htab->buckets[hash & (htab->n_buckets - 1)]; 599 } 600 601 static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) 602 { 603 return &__select_bucket(htab, hash)->head; 604 } 605 606 /* this lookup function can only be called with bucket lock taken */ 607 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, 608 void *key, u32 key_size) 609 { 610 struct hlist_nulls_node *n; 611 struct htab_elem *l; 612 613 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 614 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 615 return l; 616 617 return NULL; 618 } 619 620 /* can be called without bucket lock. it will repeat the loop in 621 * the unlikely event when elements moved from one bucket into another 622 * while link list is being walked 623 */ 624 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, 625 u32 hash, void *key, 626 u32 key_size, u32 n_buckets) 627 { 628 struct hlist_nulls_node *n; 629 struct htab_elem *l; 630 631 again: 632 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 633 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 634 return l; 635 636 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) 637 goto again; 638 639 return NULL; 640 } 641 642 /* Called from syscall or from eBPF program directly, so 643 * arguments have to match bpf_map_lookup_elem() exactly. 644 * The return value is adjusted by BPF instructions 645 * in htab_map_gen_lookup(). 646 */ 647 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 648 { 649 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 650 struct hlist_nulls_head *head; 651 struct htab_elem *l; 652 u32 hash, key_size; 653 654 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 655 !rcu_read_lock_bh_held()); 656 657 key_size = map->key_size; 658 659 hash = htab_map_hash(key, key_size, htab->hashrnd); 660 661 head = select_bucket(htab, hash); 662 663 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 664 665 return l; 666 } 667 668 static void *htab_map_lookup_elem(struct bpf_map *map, void *key) 669 { 670 struct htab_elem *l = __htab_map_lookup_elem(map, key); 671 672 if (l) 673 return l->key + round_up(map->key_size, 8); 674 675 return NULL; 676 } 677 678 /* inline bpf_map_lookup_elem() call. 679 * Instead of: 680 * bpf_prog 681 * bpf_map_lookup_elem 682 * map->ops->map_lookup_elem 683 * htab_map_lookup_elem 684 * __htab_map_lookup_elem 685 * do: 686 * bpf_prog 687 * __htab_map_lookup_elem 688 */ 689 static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 690 { 691 struct bpf_insn *insn = insn_buf; 692 const int ret = BPF_REG_0; 693 694 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 695 (void *(*)(struct bpf_map *map, void *key))NULL)); 696 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 697 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 698 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 699 offsetof(struct htab_elem, key) + 700 round_up(map->key_size, 8)); 701 return insn - insn_buf; 702 } 703 704 static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, 705 void *key, const bool mark) 706 { 707 struct htab_elem *l = __htab_map_lookup_elem(map, key); 708 709 if (l) { 710 if (mark) 711 bpf_lru_node_set_ref(&l->lru_node); 712 return l->key + round_up(map->key_size, 8); 713 } 714 715 return NULL; 716 } 717 718 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) 719 { 720 return __htab_lru_map_lookup_elem(map, key, true); 721 } 722 723 static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) 724 { 725 return __htab_lru_map_lookup_elem(map, key, false); 726 } 727 728 static int htab_lru_map_gen_lookup(struct bpf_map *map, 729 struct bpf_insn *insn_buf) 730 { 731 struct bpf_insn *insn = insn_buf; 732 const int ret = BPF_REG_0; 733 const int ref_reg = BPF_REG_1; 734 735 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 736 (void *(*)(struct bpf_map *map, void *key))NULL)); 737 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 738 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); 739 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, 740 offsetof(struct htab_elem, lru_node) + 741 offsetof(struct bpf_lru_node, ref)); 742 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); 743 *insn++ = BPF_ST_MEM(BPF_B, ret, 744 offsetof(struct htab_elem, lru_node) + 745 offsetof(struct bpf_lru_node, ref), 746 1); 747 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 748 offsetof(struct htab_elem, key) + 749 round_up(map->key_size, 8)); 750 return insn - insn_buf; 751 } 752 753 static void check_and_free_fields(struct bpf_htab *htab, 754 struct htab_elem *elem) 755 { 756 if (IS_ERR_OR_NULL(htab->map.record)) 757 return; 758 759 if (htab_is_percpu(htab)) { 760 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 761 int cpu; 762 763 for_each_possible_cpu(cpu) 764 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 765 } else { 766 void *map_value = elem->key + round_up(htab->map.key_size, 8); 767 768 bpf_obj_free_fields(htab->map.record, map_value); 769 } 770 } 771 772 /* It is called from the bpf_lru_list when the LRU needs to delete 773 * older elements from the htab. 774 */ 775 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 776 { 777 struct bpf_htab *htab = arg; 778 struct htab_elem *l = NULL, *tgt_l; 779 struct hlist_nulls_head *head; 780 struct hlist_nulls_node *n; 781 unsigned long flags; 782 struct bucket *b; 783 int ret; 784 785 tgt_l = container_of(node, struct htab_elem, lru_node); 786 b = __select_bucket(htab, tgt_l->hash); 787 head = &b->head; 788 789 ret = htab_lock_bucket(b, &flags); 790 if (ret) 791 return false; 792 793 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 794 if (l == tgt_l) { 795 hlist_nulls_del_rcu(&l->hash_node); 796 bpf_map_dec_elem_count(&htab->map); 797 break; 798 } 799 800 htab_unlock_bucket(b, flags); 801 802 if (l == tgt_l) 803 check_and_free_fields(htab, l); 804 return l == tgt_l; 805 } 806 807 /* Called from syscall */ 808 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 809 { 810 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 811 struct hlist_nulls_head *head; 812 struct htab_elem *l, *next_l; 813 u32 hash, key_size; 814 int i = 0; 815 816 WARN_ON_ONCE(!rcu_read_lock_held()); 817 818 key_size = map->key_size; 819 820 if (!key) 821 goto find_first_elem; 822 823 hash = htab_map_hash(key, key_size, htab->hashrnd); 824 825 head = select_bucket(htab, hash); 826 827 /* lookup the key */ 828 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 829 830 if (!l) 831 goto find_first_elem; 832 833 /* key was found, get next key in the same bucket */ 834 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), 835 struct htab_elem, hash_node); 836 837 if (next_l) { 838 /* if next elem in this hash list is non-zero, just return it */ 839 memcpy(next_key, next_l->key, key_size); 840 return 0; 841 } 842 843 /* no more elements in this hash list, go to the next bucket */ 844 i = hash & (htab->n_buckets - 1); 845 i++; 846 847 find_first_elem: 848 /* iterate over buckets */ 849 for (; i < htab->n_buckets; i++) { 850 head = select_bucket(htab, i); 851 852 /* pick first element in the bucket */ 853 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), 854 struct htab_elem, hash_node); 855 if (next_l) { 856 /* if it's not empty, just return it */ 857 memcpy(next_key, next_l->key, key_size); 858 return 0; 859 } 860 } 861 862 /* iterated over all buckets and all elements */ 863 return -ENOENT; 864 } 865 866 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 867 { 868 check_and_free_fields(htab, l); 869 870 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 871 bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); 872 bpf_mem_cache_free(&htab->ma, l); 873 } 874 875 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) 876 { 877 struct bpf_map *map = &htab->map; 878 void *ptr; 879 880 if (map->ops->map_fd_put_ptr) { 881 ptr = fd_htab_map_get_ptr(map, l); 882 map->ops->map_fd_put_ptr(map, ptr, true); 883 } 884 } 885 886 static bool is_map_full(struct bpf_htab *htab) 887 { 888 if (htab->use_percpu_counter) 889 return __percpu_counter_compare(&htab->pcount, htab->map.max_entries, 890 PERCPU_COUNTER_BATCH) >= 0; 891 return atomic_read(&htab->count) >= htab->map.max_entries; 892 } 893 894 static void inc_elem_count(struct bpf_htab *htab) 895 { 896 bpf_map_inc_elem_count(&htab->map); 897 898 if (htab->use_percpu_counter) 899 percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH); 900 else 901 atomic_inc(&htab->count); 902 } 903 904 static void dec_elem_count(struct bpf_htab *htab) 905 { 906 bpf_map_dec_elem_count(&htab->map); 907 908 if (htab->use_percpu_counter) 909 percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH); 910 else 911 atomic_dec(&htab->count); 912 } 913 914 915 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 916 { 917 htab_put_fd_value(htab, l); 918 919 if (htab_is_prealloc(htab)) { 920 bpf_map_dec_elem_count(&htab->map); 921 check_and_free_fields(htab, l); 922 pcpu_freelist_push(&htab->freelist, &l->fnode); 923 } else { 924 dec_elem_count(htab); 925 htab_elem_free(htab, l); 926 } 927 } 928 929 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 930 void *value, bool onallcpus) 931 { 932 if (!onallcpus) { 933 /* copy true value_size bytes */ 934 copy_map_value(&htab->map, this_cpu_ptr(pptr), value); 935 } else { 936 u32 size = round_up(htab->map.value_size, 8); 937 int off = 0, cpu; 938 939 for_each_possible_cpu(cpu) { 940 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); 941 off += size; 942 } 943 } 944 } 945 946 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 947 void *value, bool onallcpus) 948 { 949 /* When not setting the initial value on all cpus, zero-fill element 950 * values for other cpus. Otherwise, bpf program has no way to ensure 951 * known initial values for cpus other than current one 952 * (onallcpus=false always when coming from bpf prog). 953 */ 954 if (!onallcpus) { 955 int current_cpu = raw_smp_processor_id(); 956 int cpu; 957 958 for_each_possible_cpu(cpu) { 959 if (cpu == current_cpu) 960 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); 961 else /* Since elem is preallocated, we cannot touch special fields */ 962 zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 963 } 964 } else { 965 pcpu_copy_value(htab, pptr, value, onallcpus); 966 } 967 } 968 969 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) 970 { 971 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS && 972 BITS_PER_LONG == 64; 973 } 974 975 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 976 void *value, u32 key_size, u32 hash, 977 bool percpu, bool onallcpus, 978 struct htab_elem *old_elem) 979 { 980 u32 size = htab->map.value_size; 981 bool prealloc = htab_is_prealloc(htab); 982 struct htab_elem *l_new, **pl_new; 983 void __percpu *pptr; 984 985 if (prealloc) { 986 if (old_elem) { 987 /* if we're updating the existing element, 988 * use per-cpu extra elems to avoid freelist_pop/push 989 */ 990 pl_new = this_cpu_ptr(htab->extra_elems); 991 l_new = *pl_new; 992 *pl_new = old_elem; 993 } else { 994 struct pcpu_freelist_node *l; 995 996 l = __pcpu_freelist_pop(&htab->freelist); 997 if (!l) 998 return ERR_PTR(-E2BIG); 999 l_new = container_of(l, struct htab_elem, fnode); 1000 bpf_map_inc_elem_count(&htab->map); 1001 } 1002 } else { 1003 if (is_map_full(htab)) 1004 if (!old_elem) 1005 /* when map is full and update() is replacing 1006 * old element, it's ok to allocate, since 1007 * old element will be freed immediately. 1008 * Otherwise return an error 1009 */ 1010 return ERR_PTR(-E2BIG); 1011 inc_elem_count(htab); 1012 l_new = bpf_mem_cache_alloc(&htab->ma); 1013 if (!l_new) { 1014 l_new = ERR_PTR(-ENOMEM); 1015 goto dec_count; 1016 } 1017 } 1018 1019 memcpy(l_new->key, key, key_size); 1020 if (percpu) { 1021 if (prealloc) { 1022 pptr = htab_elem_get_ptr(l_new, key_size); 1023 } else { 1024 /* alloc_percpu zero-fills */ 1025 void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma); 1026 1027 if (!ptr) { 1028 bpf_mem_cache_free(&htab->ma, l_new); 1029 l_new = ERR_PTR(-ENOMEM); 1030 goto dec_count; 1031 } 1032 l_new->ptr_to_pptr = ptr; 1033 pptr = *(void __percpu **)ptr; 1034 } 1035 1036 pcpu_init_value(htab, pptr, value, onallcpus); 1037 1038 if (!prealloc) 1039 htab_elem_set_ptr(l_new, key_size, pptr); 1040 } else if (fd_htab_map_needs_adjust(htab)) { 1041 size = round_up(size, 8); 1042 memcpy(l_new->key + round_up(key_size, 8), value, size); 1043 } else { 1044 copy_map_value(&htab->map, 1045 l_new->key + round_up(key_size, 8), 1046 value); 1047 } 1048 1049 l_new->hash = hash; 1050 return l_new; 1051 dec_count: 1052 dec_elem_count(htab); 1053 return l_new; 1054 } 1055 1056 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 1057 u64 map_flags) 1058 { 1059 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 1060 /* elem already exists */ 1061 return -EEXIST; 1062 1063 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 1064 /* elem doesn't exist, cannot update it */ 1065 return -ENOENT; 1066 1067 return 0; 1068 } 1069 1070 /* Called from syscall or from eBPF program */ 1071 static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, 1072 u64 map_flags) 1073 { 1074 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1075 struct htab_elem *l_new = NULL, *l_old; 1076 struct hlist_nulls_head *head; 1077 unsigned long flags; 1078 void *old_map_ptr; 1079 struct bucket *b; 1080 u32 key_size, hash; 1081 int ret; 1082 1083 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 1084 /* unknown flags */ 1085 return -EINVAL; 1086 1087 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1088 !rcu_read_lock_bh_held()); 1089 1090 key_size = map->key_size; 1091 1092 hash = htab_map_hash(key, key_size, htab->hashrnd); 1093 1094 b = __select_bucket(htab, hash); 1095 head = &b->head; 1096 1097 if (unlikely(map_flags & BPF_F_LOCK)) { 1098 if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1099 return -EINVAL; 1100 /* find an element without taking the bucket lock */ 1101 l_old = lookup_nulls_elem_raw(head, hash, key, key_size, 1102 htab->n_buckets); 1103 ret = check_flags(htab, l_old, map_flags); 1104 if (ret) 1105 return ret; 1106 if (l_old) { 1107 /* grab the element lock and update value in place */ 1108 copy_map_value_locked(map, 1109 l_old->key + round_up(key_size, 8), 1110 value, false); 1111 return 0; 1112 } 1113 /* fall through, grab the bucket lock and lookup again. 1114 * 99.9% chance that the element won't be found, 1115 * but second lookup under lock has to be done. 1116 */ 1117 } 1118 1119 ret = htab_lock_bucket(b, &flags); 1120 if (ret) 1121 return ret; 1122 1123 l_old = lookup_elem_raw(head, hash, key, key_size); 1124 1125 ret = check_flags(htab, l_old, map_flags); 1126 if (ret) 1127 goto err; 1128 1129 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { 1130 /* first lookup without the bucket lock didn't find the element, 1131 * but second lookup with the bucket lock found it. 1132 * This case is highly unlikely, but has to be dealt with: 1133 * grab the element lock in addition to the bucket lock 1134 * and update element in place 1135 */ 1136 copy_map_value_locked(map, 1137 l_old->key + round_up(key_size, 8), 1138 value, false); 1139 ret = 0; 1140 goto err; 1141 } 1142 1143 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 1144 l_old); 1145 if (IS_ERR(l_new)) { 1146 /* all pre-allocated elements are in use or memory exhausted */ 1147 ret = PTR_ERR(l_new); 1148 goto err; 1149 } 1150 1151 /* add new element to the head of the list, so that 1152 * concurrent search will find it before old elem 1153 */ 1154 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1155 if (l_old) { 1156 hlist_nulls_del_rcu(&l_old->hash_node); 1157 1158 /* l_old has already been stashed in htab->extra_elems, free 1159 * its special fields before it is available for reuse. Also 1160 * save the old map pointer in htab of maps before unlock 1161 * and release it after unlock. 1162 */ 1163 old_map_ptr = NULL; 1164 if (htab_is_prealloc(htab)) { 1165 if (map->ops->map_fd_put_ptr) 1166 old_map_ptr = fd_htab_map_get_ptr(map, l_old); 1167 check_and_free_fields(htab, l_old); 1168 } 1169 } 1170 htab_unlock_bucket(b, flags); 1171 if (l_old) { 1172 if (old_map_ptr) 1173 map->ops->map_fd_put_ptr(map, old_map_ptr, true); 1174 if (!htab_is_prealloc(htab)) 1175 free_htab_elem(htab, l_old); 1176 } 1177 return 0; 1178 err: 1179 htab_unlock_bucket(b, flags); 1180 return ret; 1181 } 1182 1183 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) 1184 { 1185 check_and_free_fields(htab, elem); 1186 bpf_map_dec_elem_count(&htab->map); 1187 bpf_lru_push_free(&htab->lru, &elem->lru_node); 1188 } 1189 1190 static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, 1191 u64 map_flags) 1192 { 1193 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1194 struct htab_elem *l_new, *l_old = NULL; 1195 struct hlist_nulls_head *head; 1196 unsigned long flags; 1197 struct bucket *b; 1198 u32 key_size, hash; 1199 int ret; 1200 1201 if (unlikely(map_flags > BPF_EXIST)) 1202 /* unknown flags */ 1203 return -EINVAL; 1204 1205 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1206 !rcu_read_lock_bh_held()); 1207 1208 key_size = map->key_size; 1209 1210 hash = htab_map_hash(key, key_size, htab->hashrnd); 1211 1212 b = __select_bucket(htab, hash); 1213 head = &b->head; 1214 1215 /* For LRU, we need to alloc before taking bucket's 1216 * spinlock because getting free nodes from LRU may need 1217 * to remove older elements from htab and this removal 1218 * operation will need a bucket lock. 1219 */ 1220 l_new = prealloc_lru_pop(htab, key, hash); 1221 if (!l_new) 1222 return -ENOMEM; 1223 copy_map_value(&htab->map, 1224 l_new->key + round_up(map->key_size, 8), value); 1225 1226 ret = htab_lock_bucket(b, &flags); 1227 if (ret) 1228 goto err_lock_bucket; 1229 1230 l_old = lookup_elem_raw(head, hash, key, key_size); 1231 1232 ret = check_flags(htab, l_old, map_flags); 1233 if (ret) 1234 goto err; 1235 1236 /* add new element to the head of the list, so that 1237 * concurrent search will find it before old elem 1238 */ 1239 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1240 if (l_old) { 1241 bpf_lru_node_set_ref(&l_new->lru_node); 1242 hlist_nulls_del_rcu(&l_old->hash_node); 1243 } 1244 ret = 0; 1245 1246 err: 1247 htab_unlock_bucket(b, flags); 1248 1249 err_lock_bucket: 1250 if (ret) 1251 htab_lru_push_free(htab, l_new); 1252 else if (l_old) 1253 htab_lru_push_free(htab, l_old); 1254 1255 return ret; 1256 } 1257 1258 static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1259 void *value, u64 map_flags, 1260 bool onallcpus) 1261 { 1262 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1263 struct htab_elem *l_new = NULL, *l_old; 1264 struct hlist_nulls_head *head; 1265 unsigned long flags; 1266 struct bucket *b; 1267 u32 key_size, hash; 1268 int ret; 1269 1270 if (unlikely(map_flags > BPF_EXIST)) 1271 /* unknown flags */ 1272 return -EINVAL; 1273 1274 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1275 !rcu_read_lock_bh_held()); 1276 1277 key_size = map->key_size; 1278 1279 hash = htab_map_hash(key, key_size, htab->hashrnd); 1280 1281 b = __select_bucket(htab, hash); 1282 head = &b->head; 1283 1284 ret = htab_lock_bucket(b, &flags); 1285 if (ret) 1286 return ret; 1287 1288 l_old = lookup_elem_raw(head, hash, key, key_size); 1289 1290 ret = check_flags(htab, l_old, map_flags); 1291 if (ret) 1292 goto err; 1293 1294 if (l_old) { 1295 /* per-cpu hash map can update value in-place */ 1296 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1297 value, onallcpus); 1298 } else { 1299 l_new = alloc_htab_elem(htab, key, value, key_size, 1300 hash, true, onallcpus, NULL); 1301 if (IS_ERR(l_new)) { 1302 ret = PTR_ERR(l_new); 1303 goto err; 1304 } 1305 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1306 } 1307 ret = 0; 1308 err: 1309 htab_unlock_bucket(b, flags); 1310 return ret; 1311 } 1312 1313 static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1314 void *value, u64 map_flags, 1315 bool onallcpus) 1316 { 1317 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1318 struct htab_elem *l_new = NULL, *l_old; 1319 struct hlist_nulls_head *head; 1320 unsigned long flags; 1321 struct bucket *b; 1322 u32 key_size, hash; 1323 int ret; 1324 1325 if (unlikely(map_flags > BPF_EXIST)) 1326 /* unknown flags */ 1327 return -EINVAL; 1328 1329 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1330 !rcu_read_lock_bh_held()); 1331 1332 key_size = map->key_size; 1333 1334 hash = htab_map_hash(key, key_size, htab->hashrnd); 1335 1336 b = __select_bucket(htab, hash); 1337 head = &b->head; 1338 1339 /* For LRU, we need to alloc before taking bucket's 1340 * spinlock because LRU's elem alloc may need 1341 * to remove older elem from htab and this removal 1342 * operation will need a bucket lock. 1343 */ 1344 if (map_flags != BPF_EXIST) { 1345 l_new = prealloc_lru_pop(htab, key, hash); 1346 if (!l_new) 1347 return -ENOMEM; 1348 } 1349 1350 ret = htab_lock_bucket(b, &flags); 1351 if (ret) 1352 goto err_lock_bucket; 1353 1354 l_old = lookup_elem_raw(head, hash, key, key_size); 1355 1356 ret = check_flags(htab, l_old, map_flags); 1357 if (ret) 1358 goto err; 1359 1360 if (l_old) { 1361 bpf_lru_node_set_ref(&l_old->lru_node); 1362 1363 /* per-cpu hash map can update value in-place */ 1364 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1365 value, onallcpus); 1366 } else { 1367 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1368 value, onallcpus); 1369 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1370 l_new = NULL; 1371 } 1372 ret = 0; 1373 err: 1374 htab_unlock_bucket(b, flags); 1375 err_lock_bucket: 1376 if (l_new) { 1377 bpf_map_dec_elem_count(&htab->map); 1378 bpf_lru_push_free(&htab->lru, &l_new->lru_node); 1379 } 1380 return ret; 1381 } 1382 1383 static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1384 void *value, u64 map_flags) 1385 { 1386 return __htab_percpu_map_update_elem(map, key, value, map_flags, false); 1387 } 1388 1389 static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1390 void *value, u64 map_flags) 1391 { 1392 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, 1393 false); 1394 } 1395 1396 /* Called from syscall or from eBPF program */ 1397 static long htab_map_delete_elem(struct bpf_map *map, void *key) 1398 { 1399 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1400 struct hlist_nulls_head *head; 1401 struct bucket *b; 1402 struct htab_elem *l; 1403 unsigned long flags; 1404 u32 hash, key_size; 1405 int ret; 1406 1407 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1408 !rcu_read_lock_bh_held()); 1409 1410 key_size = map->key_size; 1411 1412 hash = htab_map_hash(key, key_size, htab->hashrnd); 1413 b = __select_bucket(htab, hash); 1414 head = &b->head; 1415 1416 ret = htab_lock_bucket(b, &flags); 1417 if (ret) 1418 return ret; 1419 1420 l = lookup_elem_raw(head, hash, key, key_size); 1421 if (l) 1422 hlist_nulls_del_rcu(&l->hash_node); 1423 else 1424 ret = -ENOENT; 1425 1426 htab_unlock_bucket(b, flags); 1427 1428 if (l) 1429 free_htab_elem(htab, l); 1430 return ret; 1431 } 1432 1433 static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) 1434 { 1435 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1436 struct hlist_nulls_head *head; 1437 struct bucket *b; 1438 struct htab_elem *l; 1439 unsigned long flags; 1440 u32 hash, key_size; 1441 int ret; 1442 1443 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1444 !rcu_read_lock_bh_held()); 1445 1446 key_size = map->key_size; 1447 1448 hash = htab_map_hash(key, key_size, htab->hashrnd); 1449 b = __select_bucket(htab, hash); 1450 head = &b->head; 1451 1452 ret = htab_lock_bucket(b, &flags); 1453 if (ret) 1454 return ret; 1455 1456 l = lookup_elem_raw(head, hash, key, key_size); 1457 1458 if (l) 1459 hlist_nulls_del_rcu(&l->hash_node); 1460 else 1461 ret = -ENOENT; 1462 1463 htab_unlock_bucket(b, flags); 1464 if (l) 1465 htab_lru_push_free(htab, l); 1466 return ret; 1467 } 1468 1469 static void delete_all_elements(struct bpf_htab *htab) 1470 { 1471 int i; 1472 1473 /* It's called from a worker thread and migration has been disabled, 1474 * therefore, it is OK to invoke bpf_mem_cache_free() directly. 1475 */ 1476 for (i = 0; i < htab->n_buckets; i++) { 1477 struct hlist_nulls_head *head = select_bucket(htab, i); 1478 struct hlist_nulls_node *n; 1479 struct htab_elem *l; 1480 1481 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1482 hlist_nulls_del_rcu(&l->hash_node); 1483 htab_elem_free(htab, l); 1484 } 1485 cond_resched(); 1486 } 1487 } 1488 1489 static void htab_free_malloced_timers_and_wq(struct bpf_htab *htab) 1490 { 1491 int i; 1492 1493 rcu_read_lock(); 1494 for (i = 0; i < htab->n_buckets; i++) { 1495 struct hlist_nulls_head *head = select_bucket(htab, i); 1496 struct hlist_nulls_node *n; 1497 struct htab_elem *l; 1498 1499 hlist_nulls_for_each_entry(l, n, head, hash_node) { 1500 /* We only free timer on uref dropping to zero */ 1501 if (btf_record_has_field(htab->map.record, BPF_TIMER)) 1502 bpf_obj_free_timer(htab->map.record, 1503 l->key + round_up(htab->map.key_size, 8)); 1504 if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) 1505 bpf_obj_free_workqueue(htab->map.record, 1506 l->key + round_up(htab->map.key_size, 8)); 1507 } 1508 cond_resched_rcu(); 1509 } 1510 rcu_read_unlock(); 1511 } 1512 1513 static void htab_map_free_timers_and_wq(struct bpf_map *map) 1514 { 1515 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1516 1517 /* We only free timer and workqueue on uref dropping to zero */ 1518 if (btf_record_has_field(htab->map.record, BPF_TIMER | BPF_WORKQUEUE)) { 1519 if (!htab_is_prealloc(htab)) 1520 htab_free_malloced_timers_and_wq(htab); 1521 else 1522 htab_free_prealloced_timers_and_wq(htab); 1523 } 1524 } 1525 1526 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 1527 static void htab_map_free(struct bpf_map *map) 1528 { 1529 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1530 1531 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. 1532 * bpf_free_used_maps() is called after bpf prog is no longer executing. 1533 * There is no need to synchronize_rcu() here to protect map elements. 1534 */ 1535 1536 /* htab no longer uses call_rcu() directly. bpf_mem_alloc does it 1537 * underneath and is responsible for waiting for callbacks to finish 1538 * during bpf_mem_alloc_destroy(). 1539 */ 1540 if (!htab_is_prealloc(htab)) { 1541 delete_all_elements(htab); 1542 } else { 1543 htab_free_prealloced_fields(htab); 1544 prealloc_destroy(htab); 1545 } 1546 1547 bpf_map_free_elem_count(map); 1548 free_percpu(htab->extra_elems); 1549 bpf_map_area_free(htab->buckets); 1550 bpf_mem_alloc_destroy(&htab->pcpu_ma); 1551 bpf_mem_alloc_destroy(&htab->ma); 1552 if (htab->use_percpu_counter) 1553 percpu_counter_destroy(&htab->pcount); 1554 bpf_map_area_free(htab); 1555 } 1556 1557 static void htab_map_seq_show_elem(struct bpf_map *map, void *key, 1558 struct seq_file *m) 1559 { 1560 void *value; 1561 1562 rcu_read_lock(); 1563 1564 value = htab_map_lookup_elem(map, key); 1565 if (!value) { 1566 rcu_read_unlock(); 1567 return; 1568 } 1569 1570 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 1571 seq_puts(m, ": "); 1572 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 1573 seq_putc(m, '\n'); 1574 1575 rcu_read_unlock(); 1576 } 1577 1578 static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1579 void *value, bool is_lru_map, 1580 bool is_percpu, u64 flags) 1581 { 1582 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1583 struct hlist_nulls_head *head; 1584 unsigned long bflags; 1585 struct htab_elem *l; 1586 u32 hash, key_size; 1587 struct bucket *b; 1588 int ret; 1589 1590 key_size = map->key_size; 1591 1592 hash = htab_map_hash(key, key_size, htab->hashrnd); 1593 b = __select_bucket(htab, hash); 1594 head = &b->head; 1595 1596 ret = htab_lock_bucket(b, &bflags); 1597 if (ret) 1598 return ret; 1599 1600 l = lookup_elem_raw(head, hash, key, key_size); 1601 if (!l) { 1602 ret = -ENOENT; 1603 goto out_unlock; 1604 } 1605 1606 if (is_percpu) { 1607 u32 roundup_value_size = round_up(map->value_size, 8); 1608 void __percpu *pptr; 1609 int off = 0, cpu; 1610 1611 pptr = htab_elem_get_ptr(l, key_size); 1612 for_each_possible_cpu(cpu) { 1613 copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); 1614 check_and_init_map_value(&htab->map, value + off); 1615 off += roundup_value_size; 1616 } 1617 } else { 1618 u32 roundup_key_size = round_up(map->key_size, 8); 1619 1620 if (flags & BPF_F_LOCK) 1621 copy_map_value_locked(map, value, l->key + 1622 roundup_key_size, 1623 true); 1624 else 1625 copy_map_value(map, value, l->key + 1626 roundup_key_size); 1627 /* Zeroing special fields in the temp buffer */ 1628 check_and_init_map_value(map, value); 1629 } 1630 hlist_nulls_del_rcu(&l->hash_node); 1631 1632 out_unlock: 1633 htab_unlock_bucket(b, bflags); 1634 1635 if (l) { 1636 if (is_lru_map) 1637 htab_lru_push_free(htab, l); 1638 else 1639 free_htab_elem(htab, l); 1640 } 1641 1642 return ret; 1643 } 1644 1645 static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1646 void *value, u64 flags) 1647 { 1648 return __htab_map_lookup_and_delete_elem(map, key, value, false, false, 1649 flags); 1650 } 1651 1652 static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1653 void *key, void *value, 1654 u64 flags) 1655 { 1656 return __htab_map_lookup_and_delete_elem(map, key, value, false, true, 1657 flags); 1658 } 1659 1660 static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1661 void *value, u64 flags) 1662 { 1663 return __htab_map_lookup_and_delete_elem(map, key, value, true, false, 1664 flags); 1665 } 1666 1667 static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1668 void *key, void *value, 1669 u64 flags) 1670 { 1671 return __htab_map_lookup_and_delete_elem(map, key, value, true, true, 1672 flags); 1673 } 1674 1675 static int 1676 __htab_map_lookup_and_delete_batch(struct bpf_map *map, 1677 const union bpf_attr *attr, 1678 union bpf_attr __user *uattr, 1679 bool do_delete, bool is_lru_map, 1680 bool is_percpu) 1681 { 1682 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1683 u32 bucket_cnt, total, key_size, value_size, roundup_key_size; 1684 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val; 1685 void __user *uvalues = u64_to_user_ptr(attr->batch.values); 1686 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1687 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1688 u32 batch, max_count, size, bucket_size, map_id; 1689 struct htab_elem *node_to_free = NULL; 1690 u64 elem_map_flags, map_flags; 1691 struct hlist_nulls_head *head; 1692 struct hlist_nulls_node *n; 1693 unsigned long flags = 0; 1694 bool locked = false; 1695 struct htab_elem *l; 1696 struct bucket *b; 1697 int ret = 0; 1698 1699 elem_map_flags = attr->batch.elem_flags; 1700 if ((elem_map_flags & ~BPF_F_LOCK) || 1701 ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1702 return -EINVAL; 1703 1704 map_flags = attr->batch.flags; 1705 if (map_flags) 1706 return -EINVAL; 1707 1708 max_count = attr->batch.count; 1709 if (!max_count) 1710 return 0; 1711 1712 if (put_user(0, &uattr->batch.count)) 1713 return -EFAULT; 1714 1715 batch = 0; 1716 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch))) 1717 return -EFAULT; 1718 1719 if (batch >= htab->n_buckets) 1720 return -ENOENT; 1721 1722 key_size = htab->map.key_size; 1723 roundup_key_size = round_up(htab->map.key_size, 8); 1724 value_size = htab->map.value_size; 1725 size = round_up(value_size, 8); 1726 if (is_percpu) 1727 value_size = size * num_possible_cpus(); 1728 total = 0; 1729 /* while experimenting with hash tables with sizes ranging from 10 to 1730 * 1000, it was observed that a bucket can have up to 5 entries. 1731 */ 1732 bucket_size = 5; 1733 1734 alloc: 1735 /* We cannot do copy_from_user or copy_to_user inside 1736 * the rcu_read_lock. Allocate enough space here. 1737 */ 1738 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); 1739 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); 1740 if (!keys || !values) { 1741 ret = -ENOMEM; 1742 goto after_loop; 1743 } 1744 1745 again: 1746 bpf_disable_instrumentation(); 1747 rcu_read_lock(); 1748 again_nocopy: 1749 dst_key = keys; 1750 dst_val = values; 1751 b = &htab->buckets[batch]; 1752 head = &b->head; 1753 /* do not grab the lock unless need it (bucket_cnt > 0). */ 1754 if (locked) { 1755 ret = htab_lock_bucket(b, &flags); 1756 if (ret) { 1757 rcu_read_unlock(); 1758 bpf_enable_instrumentation(); 1759 goto after_loop; 1760 } 1761 } 1762 1763 bucket_cnt = 0; 1764 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1765 bucket_cnt++; 1766 1767 if (bucket_cnt && !locked) { 1768 locked = true; 1769 goto again_nocopy; 1770 } 1771 1772 if (bucket_cnt > (max_count - total)) { 1773 if (total == 0) 1774 ret = -ENOSPC; 1775 /* Note that since bucket_cnt > 0 here, it is implicit 1776 * that the locked was grabbed, so release it. 1777 */ 1778 htab_unlock_bucket(b, flags); 1779 rcu_read_unlock(); 1780 bpf_enable_instrumentation(); 1781 goto after_loop; 1782 } 1783 1784 if (bucket_cnt > bucket_size) { 1785 bucket_size = bucket_cnt; 1786 /* Note that since bucket_cnt > 0 here, it is implicit 1787 * that the locked was grabbed, so release it. 1788 */ 1789 htab_unlock_bucket(b, flags); 1790 rcu_read_unlock(); 1791 bpf_enable_instrumentation(); 1792 kvfree(keys); 1793 kvfree(values); 1794 goto alloc; 1795 } 1796 1797 /* Next block is only safe to run if you have grabbed the lock */ 1798 if (!locked) 1799 goto next_batch; 1800 1801 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1802 memcpy(dst_key, l->key, key_size); 1803 1804 if (is_percpu) { 1805 int off = 0, cpu; 1806 void __percpu *pptr; 1807 1808 pptr = htab_elem_get_ptr(l, map->key_size); 1809 for_each_possible_cpu(cpu) { 1810 copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); 1811 check_and_init_map_value(&htab->map, dst_val + off); 1812 off += size; 1813 } 1814 } else { 1815 value = l->key + roundup_key_size; 1816 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 1817 struct bpf_map **inner_map = value; 1818 1819 /* Actual value is the id of the inner map */ 1820 map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); 1821 value = &map_id; 1822 } 1823 1824 if (elem_map_flags & BPF_F_LOCK) 1825 copy_map_value_locked(map, dst_val, value, 1826 true); 1827 else 1828 copy_map_value(map, dst_val, value); 1829 /* Zeroing special fields in the temp buffer */ 1830 check_and_init_map_value(map, dst_val); 1831 } 1832 if (do_delete) { 1833 hlist_nulls_del_rcu(&l->hash_node); 1834 1835 /* bpf_lru_push_free() will acquire lru_lock, which 1836 * may cause deadlock. See comments in function 1837 * prealloc_lru_pop(). Let us do bpf_lru_push_free() 1838 * after releasing the bucket lock. 1839 * 1840 * For htab of maps, htab_put_fd_value() in 1841 * free_htab_elem() may acquire a spinlock with bucket 1842 * lock being held and it violates the lock rule, so 1843 * invoke free_htab_elem() after unlock as well. 1844 */ 1845 l->batch_flink = node_to_free; 1846 node_to_free = l; 1847 } 1848 dst_key += key_size; 1849 dst_val += value_size; 1850 } 1851 1852 htab_unlock_bucket(b, flags); 1853 locked = false; 1854 1855 while (node_to_free) { 1856 l = node_to_free; 1857 node_to_free = node_to_free->batch_flink; 1858 if (is_lru_map) 1859 htab_lru_push_free(htab, l); 1860 else 1861 free_htab_elem(htab, l); 1862 } 1863 1864 next_batch: 1865 /* If we are not copying data, we can go to next bucket and avoid 1866 * unlocking the rcu. 1867 */ 1868 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) { 1869 batch++; 1870 goto again_nocopy; 1871 } 1872 1873 rcu_read_unlock(); 1874 bpf_enable_instrumentation(); 1875 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys, 1876 key_size * bucket_cnt) || 1877 copy_to_user(uvalues + total * value_size, values, 1878 value_size * bucket_cnt))) { 1879 ret = -EFAULT; 1880 goto after_loop; 1881 } 1882 1883 total += bucket_cnt; 1884 batch++; 1885 if (batch >= htab->n_buckets) { 1886 ret = -ENOENT; 1887 goto after_loop; 1888 } 1889 goto again; 1890 1891 after_loop: 1892 if (ret == -EFAULT) 1893 goto out; 1894 1895 /* copy # of entries and next batch */ 1896 ubatch = u64_to_user_ptr(attr->batch.out_batch); 1897 if (copy_to_user(ubatch, &batch, sizeof(batch)) || 1898 put_user(total, &uattr->batch.count)) 1899 ret = -EFAULT; 1900 1901 out: 1902 kvfree(keys); 1903 kvfree(values); 1904 return ret; 1905 } 1906 1907 static int 1908 htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1909 union bpf_attr __user *uattr) 1910 { 1911 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1912 false, true); 1913 } 1914 1915 static int 1916 htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1917 const union bpf_attr *attr, 1918 union bpf_attr __user *uattr) 1919 { 1920 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1921 false, true); 1922 } 1923 1924 static int 1925 htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1926 union bpf_attr __user *uattr) 1927 { 1928 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1929 false, false); 1930 } 1931 1932 static int 1933 htab_map_lookup_and_delete_batch(struct bpf_map *map, 1934 const union bpf_attr *attr, 1935 union bpf_attr __user *uattr) 1936 { 1937 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1938 false, false); 1939 } 1940 1941 static int 1942 htab_lru_percpu_map_lookup_batch(struct bpf_map *map, 1943 const union bpf_attr *attr, 1944 union bpf_attr __user *uattr) 1945 { 1946 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1947 true, true); 1948 } 1949 1950 static int 1951 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1952 const union bpf_attr *attr, 1953 union bpf_attr __user *uattr) 1954 { 1955 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1956 true, true); 1957 } 1958 1959 static int 1960 htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1961 union bpf_attr __user *uattr) 1962 { 1963 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1964 true, false); 1965 } 1966 1967 static int 1968 htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, 1969 const union bpf_attr *attr, 1970 union bpf_attr __user *uattr) 1971 { 1972 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1973 true, false); 1974 } 1975 1976 struct bpf_iter_seq_hash_map_info { 1977 struct bpf_map *map; 1978 struct bpf_htab *htab; 1979 void *percpu_value_buf; // non-zero means percpu hash 1980 u32 bucket_id; 1981 u32 skip_elems; 1982 }; 1983 1984 static struct htab_elem * 1985 bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, 1986 struct htab_elem *prev_elem) 1987 { 1988 const struct bpf_htab *htab = info->htab; 1989 u32 skip_elems = info->skip_elems; 1990 u32 bucket_id = info->bucket_id; 1991 struct hlist_nulls_head *head; 1992 struct hlist_nulls_node *n; 1993 struct htab_elem *elem; 1994 struct bucket *b; 1995 u32 i, count; 1996 1997 if (bucket_id >= htab->n_buckets) 1998 return NULL; 1999 2000 /* try to find next elem in the same bucket */ 2001 if (prev_elem) { 2002 /* no update/deletion on this bucket, prev_elem should be still valid 2003 * and we won't skip elements. 2004 */ 2005 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); 2006 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); 2007 if (elem) 2008 return elem; 2009 2010 /* not found, unlock and go to the next bucket */ 2011 b = &htab->buckets[bucket_id++]; 2012 rcu_read_unlock(); 2013 skip_elems = 0; 2014 } 2015 2016 for (i = bucket_id; i < htab->n_buckets; i++) { 2017 b = &htab->buckets[i]; 2018 rcu_read_lock(); 2019 2020 count = 0; 2021 head = &b->head; 2022 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2023 if (count >= skip_elems) { 2024 info->bucket_id = i; 2025 info->skip_elems = count; 2026 return elem; 2027 } 2028 count++; 2029 } 2030 2031 rcu_read_unlock(); 2032 skip_elems = 0; 2033 } 2034 2035 info->bucket_id = i; 2036 info->skip_elems = 0; 2037 return NULL; 2038 } 2039 2040 static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) 2041 { 2042 struct bpf_iter_seq_hash_map_info *info = seq->private; 2043 struct htab_elem *elem; 2044 2045 elem = bpf_hash_map_seq_find_next(info, NULL); 2046 if (!elem) 2047 return NULL; 2048 2049 if (*pos == 0) 2050 ++*pos; 2051 return elem; 2052 } 2053 2054 static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2055 { 2056 struct bpf_iter_seq_hash_map_info *info = seq->private; 2057 2058 ++*pos; 2059 ++info->skip_elems; 2060 return bpf_hash_map_seq_find_next(info, v); 2061 } 2062 2063 static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) 2064 { 2065 struct bpf_iter_seq_hash_map_info *info = seq->private; 2066 u32 roundup_key_size, roundup_value_size; 2067 struct bpf_iter__bpf_map_elem ctx = {}; 2068 struct bpf_map *map = info->map; 2069 struct bpf_iter_meta meta; 2070 int ret = 0, off = 0, cpu; 2071 struct bpf_prog *prog; 2072 void __percpu *pptr; 2073 2074 meta.seq = seq; 2075 prog = bpf_iter_get_info(&meta, elem == NULL); 2076 if (prog) { 2077 ctx.meta = &meta; 2078 ctx.map = info->map; 2079 if (elem) { 2080 roundup_key_size = round_up(map->key_size, 8); 2081 ctx.key = elem->key; 2082 if (!info->percpu_value_buf) { 2083 ctx.value = elem->key + roundup_key_size; 2084 } else { 2085 roundup_value_size = round_up(map->value_size, 8); 2086 pptr = htab_elem_get_ptr(elem, map->key_size); 2087 for_each_possible_cpu(cpu) { 2088 copy_map_value_long(map, info->percpu_value_buf + off, 2089 per_cpu_ptr(pptr, cpu)); 2090 check_and_init_map_value(map, info->percpu_value_buf + off); 2091 off += roundup_value_size; 2092 } 2093 ctx.value = info->percpu_value_buf; 2094 } 2095 } 2096 ret = bpf_iter_run_prog(prog, &ctx); 2097 } 2098 2099 return ret; 2100 } 2101 2102 static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) 2103 { 2104 return __bpf_hash_map_seq_show(seq, v); 2105 } 2106 2107 static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) 2108 { 2109 if (!v) 2110 (void)__bpf_hash_map_seq_show(seq, NULL); 2111 else 2112 rcu_read_unlock(); 2113 } 2114 2115 static int bpf_iter_init_hash_map(void *priv_data, 2116 struct bpf_iter_aux_info *aux) 2117 { 2118 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2119 struct bpf_map *map = aux->map; 2120 void *value_buf; 2121 u32 buf_size; 2122 2123 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2124 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 2125 buf_size = round_up(map->value_size, 8) * num_possible_cpus(); 2126 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 2127 if (!value_buf) 2128 return -ENOMEM; 2129 2130 seq_info->percpu_value_buf = value_buf; 2131 } 2132 2133 bpf_map_inc_with_uref(map); 2134 seq_info->map = map; 2135 seq_info->htab = container_of(map, struct bpf_htab, map); 2136 return 0; 2137 } 2138 2139 static void bpf_iter_fini_hash_map(void *priv_data) 2140 { 2141 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2142 2143 bpf_map_put_with_uref(seq_info->map); 2144 kfree(seq_info->percpu_value_buf); 2145 } 2146 2147 static const struct seq_operations bpf_hash_map_seq_ops = { 2148 .start = bpf_hash_map_seq_start, 2149 .next = bpf_hash_map_seq_next, 2150 .stop = bpf_hash_map_seq_stop, 2151 .show = bpf_hash_map_seq_show, 2152 }; 2153 2154 static const struct bpf_iter_seq_info iter_seq_info = { 2155 .seq_ops = &bpf_hash_map_seq_ops, 2156 .init_seq_private = bpf_iter_init_hash_map, 2157 .fini_seq_private = bpf_iter_fini_hash_map, 2158 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), 2159 }; 2160 2161 static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, 2162 void *callback_ctx, u64 flags) 2163 { 2164 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2165 struct hlist_nulls_head *head; 2166 struct hlist_nulls_node *n; 2167 struct htab_elem *elem; 2168 u32 roundup_key_size; 2169 int i, num_elems = 0; 2170 void __percpu *pptr; 2171 struct bucket *b; 2172 void *key, *val; 2173 bool is_percpu; 2174 u64 ret = 0; 2175 2176 cant_migrate(); 2177 2178 if (flags != 0) 2179 return -EINVAL; 2180 2181 is_percpu = htab_is_percpu(htab); 2182 2183 roundup_key_size = round_up(map->key_size, 8); 2184 /* migration has been disabled, so percpu value prepared here will be 2185 * the same as the one seen by the bpf program with 2186 * bpf_map_lookup_elem(). 2187 */ 2188 for (i = 0; i < htab->n_buckets; i++) { 2189 b = &htab->buckets[i]; 2190 rcu_read_lock(); 2191 head = &b->head; 2192 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2193 key = elem->key; 2194 if (is_percpu) { 2195 /* current cpu value for percpu map */ 2196 pptr = htab_elem_get_ptr(elem, map->key_size); 2197 val = this_cpu_ptr(pptr); 2198 } else { 2199 val = elem->key + roundup_key_size; 2200 } 2201 num_elems++; 2202 ret = callback_fn((u64)(long)map, (u64)(long)key, 2203 (u64)(long)val, (u64)(long)callback_ctx, 0); 2204 /* return value: 0 - continue, 1 - stop and return */ 2205 if (ret) { 2206 rcu_read_unlock(); 2207 goto out; 2208 } 2209 } 2210 rcu_read_unlock(); 2211 } 2212 out: 2213 return num_elems; 2214 } 2215 2216 static u64 htab_map_mem_usage(const struct bpf_map *map) 2217 { 2218 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2219 u32 value_size = round_up(htab->map.value_size, 8); 2220 bool prealloc = htab_is_prealloc(htab); 2221 bool percpu = htab_is_percpu(htab); 2222 bool lru = htab_is_lru(htab); 2223 u64 num_entries; 2224 u64 usage = sizeof(struct bpf_htab); 2225 2226 usage += sizeof(struct bucket) * htab->n_buckets; 2227 usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT; 2228 if (prealloc) { 2229 num_entries = map->max_entries; 2230 if (htab_has_extra_elems(htab)) 2231 num_entries += num_possible_cpus(); 2232 2233 usage += htab->elem_size * num_entries; 2234 2235 if (percpu) 2236 usage += value_size * num_possible_cpus() * num_entries; 2237 else if (!lru) 2238 usage += sizeof(struct htab_elem *) * num_possible_cpus(); 2239 } else { 2240 #define LLIST_NODE_SZ sizeof(struct llist_node) 2241 2242 num_entries = htab->use_percpu_counter ? 2243 percpu_counter_sum(&htab->pcount) : 2244 atomic_read(&htab->count); 2245 usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries; 2246 if (percpu) { 2247 usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries; 2248 usage += value_size * num_possible_cpus() * num_entries; 2249 } 2250 } 2251 return usage; 2252 } 2253 2254 BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) 2255 const struct bpf_map_ops htab_map_ops = { 2256 .map_meta_equal = bpf_map_meta_equal, 2257 .map_alloc_check = htab_map_alloc_check, 2258 .map_alloc = htab_map_alloc, 2259 .map_free = htab_map_free, 2260 .map_get_next_key = htab_map_get_next_key, 2261 .map_release_uref = htab_map_free_timers_and_wq, 2262 .map_lookup_elem = htab_map_lookup_elem, 2263 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, 2264 .map_update_elem = htab_map_update_elem, 2265 .map_delete_elem = htab_map_delete_elem, 2266 .map_gen_lookup = htab_map_gen_lookup, 2267 .map_seq_show_elem = htab_map_seq_show_elem, 2268 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2269 .map_for_each_callback = bpf_for_each_hash_elem, 2270 .map_mem_usage = htab_map_mem_usage, 2271 BATCH_OPS(htab), 2272 .map_btf_id = &htab_map_btf_ids[0], 2273 .iter_seq_info = &iter_seq_info, 2274 }; 2275 2276 const struct bpf_map_ops htab_lru_map_ops = { 2277 .map_meta_equal = bpf_map_meta_equal, 2278 .map_alloc_check = htab_map_alloc_check, 2279 .map_alloc = htab_map_alloc, 2280 .map_free = htab_map_free, 2281 .map_get_next_key = htab_map_get_next_key, 2282 .map_release_uref = htab_map_free_timers_and_wq, 2283 .map_lookup_elem = htab_lru_map_lookup_elem, 2284 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, 2285 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, 2286 .map_update_elem = htab_lru_map_update_elem, 2287 .map_delete_elem = htab_lru_map_delete_elem, 2288 .map_gen_lookup = htab_lru_map_gen_lookup, 2289 .map_seq_show_elem = htab_map_seq_show_elem, 2290 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2291 .map_for_each_callback = bpf_for_each_hash_elem, 2292 .map_mem_usage = htab_map_mem_usage, 2293 BATCH_OPS(htab_lru), 2294 .map_btf_id = &htab_map_btf_ids[0], 2295 .iter_seq_info = &iter_seq_info, 2296 }; 2297 2298 /* Called from eBPF program */ 2299 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2300 { 2301 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2302 2303 if (l) 2304 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2305 else 2306 return NULL; 2307 } 2308 2309 /* inline bpf_map_lookup_elem() call for per-CPU hashmap */ 2310 static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 2311 { 2312 struct bpf_insn *insn = insn_buf; 2313 2314 if (!bpf_jit_supports_percpu_insn()) 2315 return -EOPNOTSUPP; 2316 2317 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2318 (void *(*)(struct bpf_map *map, void *key))NULL)); 2319 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2320 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); 2321 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2322 offsetof(struct htab_elem, key) + roundup(map->key_size, 8)); 2323 *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2324 *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2325 2326 return insn - insn_buf; 2327 } 2328 2329 static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2330 { 2331 struct htab_elem *l; 2332 2333 if (cpu >= nr_cpu_ids) 2334 return NULL; 2335 2336 l = __htab_map_lookup_elem(map, key); 2337 if (l) 2338 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2339 else 2340 return NULL; 2341 } 2342 2343 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2344 { 2345 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2346 2347 if (l) { 2348 bpf_lru_node_set_ref(&l->lru_node); 2349 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2350 } 2351 2352 return NULL; 2353 } 2354 2355 static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2356 { 2357 struct htab_elem *l; 2358 2359 if (cpu >= nr_cpu_ids) 2360 return NULL; 2361 2362 l = __htab_map_lookup_elem(map, key); 2363 if (l) { 2364 bpf_lru_node_set_ref(&l->lru_node); 2365 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2366 } 2367 2368 return NULL; 2369 } 2370 2371 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) 2372 { 2373 struct htab_elem *l; 2374 void __percpu *pptr; 2375 int ret = -ENOENT; 2376 int cpu, off = 0; 2377 u32 size; 2378 2379 /* per_cpu areas are zero-filled and bpf programs can only 2380 * access 'value_size' of them, so copying rounded areas 2381 * will not leak any kernel data 2382 */ 2383 size = round_up(map->value_size, 8); 2384 rcu_read_lock(); 2385 l = __htab_map_lookup_elem(map, key); 2386 if (!l) 2387 goto out; 2388 /* We do not mark LRU map element here in order to not mess up 2389 * eviction heuristics when user space does a map walk. 2390 */ 2391 pptr = htab_elem_get_ptr(l, map->key_size); 2392 for_each_possible_cpu(cpu) { 2393 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2394 check_and_init_map_value(map, value + off); 2395 off += size; 2396 } 2397 ret = 0; 2398 out: 2399 rcu_read_unlock(); 2400 return ret; 2401 } 2402 2403 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 2404 u64 map_flags) 2405 { 2406 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2407 int ret; 2408 2409 rcu_read_lock(); 2410 if (htab_is_lru(htab)) 2411 ret = __htab_lru_percpu_map_update_elem(map, key, value, 2412 map_flags, true); 2413 else 2414 ret = __htab_percpu_map_update_elem(map, key, value, map_flags, 2415 true); 2416 rcu_read_unlock(); 2417 2418 return ret; 2419 } 2420 2421 static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, 2422 struct seq_file *m) 2423 { 2424 struct htab_elem *l; 2425 void __percpu *pptr; 2426 int cpu; 2427 2428 rcu_read_lock(); 2429 2430 l = __htab_map_lookup_elem(map, key); 2431 if (!l) { 2432 rcu_read_unlock(); 2433 return; 2434 } 2435 2436 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 2437 seq_puts(m, ": {\n"); 2438 pptr = htab_elem_get_ptr(l, map->key_size); 2439 for_each_possible_cpu(cpu) { 2440 seq_printf(m, "\tcpu%d: ", cpu); 2441 btf_type_seq_show(map->btf, map->btf_value_type_id, 2442 per_cpu_ptr(pptr, cpu), m); 2443 seq_putc(m, '\n'); 2444 } 2445 seq_puts(m, "}\n"); 2446 2447 rcu_read_unlock(); 2448 } 2449 2450 const struct bpf_map_ops htab_percpu_map_ops = { 2451 .map_meta_equal = bpf_map_meta_equal, 2452 .map_alloc_check = htab_map_alloc_check, 2453 .map_alloc = htab_map_alloc, 2454 .map_free = htab_map_free, 2455 .map_get_next_key = htab_map_get_next_key, 2456 .map_lookup_elem = htab_percpu_map_lookup_elem, 2457 .map_gen_lookup = htab_percpu_map_gen_lookup, 2458 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, 2459 .map_update_elem = htab_percpu_map_update_elem, 2460 .map_delete_elem = htab_map_delete_elem, 2461 .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, 2462 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2463 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2464 .map_for_each_callback = bpf_for_each_hash_elem, 2465 .map_mem_usage = htab_map_mem_usage, 2466 BATCH_OPS(htab_percpu), 2467 .map_btf_id = &htab_map_btf_ids[0], 2468 .iter_seq_info = &iter_seq_info, 2469 }; 2470 2471 const struct bpf_map_ops htab_lru_percpu_map_ops = { 2472 .map_meta_equal = bpf_map_meta_equal, 2473 .map_alloc_check = htab_map_alloc_check, 2474 .map_alloc = htab_map_alloc, 2475 .map_free = htab_map_free, 2476 .map_get_next_key = htab_map_get_next_key, 2477 .map_lookup_elem = htab_lru_percpu_map_lookup_elem, 2478 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, 2479 .map_update_elem = htab_lru_percpu_map_update_elem, 2480 .map_delete_elem = htab_lru_map_delete_elem, 2481 .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, 2482 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2483 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2484 .map_for_each_callback = bpf_for_each_hash_elem, 2485 .map_mem_usage = htab_map_mem_usage, 2486 BATCH_OPS(htab_lru_percpu), 2487 .map_btf_id = &htab_map_btf_ids[0], 2488 .iter_seq_info = &iter_seq_info, 2489 }; 2490 2491 static int fd_htab_map_alloc_check(union bpf_attr *attr) 2492 { 2493 if (attr->value_size != sizeof(u32)) 2494 return -EINVAL; 2495 return htab_map_alloc_check(attr); 2496 } 2497 2498 static void fd_htab_map_free(struct bpf_map *map) 2499 { 2500 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2501 struct hlist_nulls_node *n; 2502 struct hlist_nulls_head *head; 2503 struct htab_elem *l; 2504 int i; 2505 2506 for (i = 0; i < htab->n_buckets; i++) { 2507 head = select_bucket(htab, i); 2508 2509 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 2510 void *ptr = fd_htab_map_get_ptr(map, l); 2511 2512 map->ops->map_fd_put_ptr(map, ptr, false); 2513 } 2514 } 2515 2516 htab_map_free(map); 2517 } 2518 2519 /* only called from syscall */ 2520 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 2521 { 2522 void **ptr; 2523 int ret = 0; 2524 2525 if (!map->ops->map_fd_sys_lookup_elem) 2526 return -ENOTSUPP; 2527 2528 rcu_read_lock(); 2529 ptr = htab_map_lookup_elem(map, key); 2530 if (ptr) 2531 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); 2532 else 2533 ret = -ENOENT; 2534 rcu_read_unlock(); 2535 2536 return ret; 2537 } 2538 2539 /* only called from syscall */ 2540 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, 2541 void *key, void *value, u64 map_flags) 2542 { 2543 void *ptr; 2544 int ret; 2545 u32 ufd = *(u32 *)value; 2546 2547 ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 2548 if (IS_ERR(ptr)) 2549 return PTR_ERR(ptr); 2550 2551 /* The htab bucket lock is always held during update operations in fd 2552 * htab map, and the following rcu_read_lock() is only used to avoid 2553 * the WARN_ON_ONCE in htab_map_update_elem(). 2554 */ 2555 rcu_read_lock(); 2556 ret = htab_map_update_elem(map, key, &ptr, map_flags); 2557 rcu_read_unlock(); 2558 if (ret) 2559 map->ops->map_fd_put_ptr(map, ptr, false); 2560 2561 return ret; 2562 } 2563 2564 static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) 2565 { 2566 struct bpf_map *map, *inner_map_meta; 2567 2568 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 2569 if (IS_ERR(inner_map_meta)) 2570 return inner_map_meta; 2571 2572 map = htab_map_alloc(attr); 2573 if (IS_ERR(map)) { 2574 bpf_map_meta_free(inner_map_meta); 2575 return map; 2576 } 2577 2578 map->inner_map_meta = inner_map_meta; 2579 2580 return map; 2581 } 2582 2583 static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) 2584 { 2585 struct bpf_map **inner_map = htab_map_lookup_elem(map, key); 2586 2587 if (!inner_map) 2588 return NULL; 2589 2590 return READ_ONCE(*inner_map); 2591 } 2592 2593 static int htab_of_map_gen_lookup(struct bpf_map *map, 2594 struct bpf_insn *insn_buf) 2595 { 2596 struct bpf_insn *insn = insn_buf; 2597 const int ret = BPF_REG_0; 2598 2599 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2600 (void *(*)(struct bpf_map *map, void *key))NULL)); 2601 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2602 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); 2603 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 2604 offsetof(struct htab_elem, key) + 2605 round_up(map->key_size, 8)); 2606 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 2607 2608 return insn - insn_buf; 2609 } 2610 2611 static void htab_of_map_free(struct bpf_map *map) 2612 { 2613 bpf_map_meta_free(map->inner_map_meta); 2614 fd_htab_map_free(map); 2615 } 2616 2617 const struct bpf_map_ops htab_of_maps_map_ops = { 2618 .map_alloc_check = fd_htab_map_alloc_check, 2619 .map_alloc = htab_of_map_alloc, 2620 .map_free = htab_of_map_free, 2621 .map_get_next_key = htab_map_get_next_key, 2622 .map_lookup_elem = htab_of_map_lookup_elem, 2623 .map_delete_elem = htab_map_delete_elem, 2624 .map_fd_get_ptr = bpf_map_fd_get_ptr, 2625 .map_fd_put_ptr = bpf_map_fd_put_ptr, 2626 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 2627 .map_gen_lookup = htab_of_map_gen_lookup, 2628 .map_check_btf = map_check_no_btf, 2629 .map_mem_usage = htab_map_mem_usage, 2630 BATCH_OPS(htab), 2631 .map_btf_id = &htab_map_btf_ids[0], 2632 }; 2633