1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/jhash.h> 8 #include <linux/filter.h> 9 #include <linux/rculist_nulls.h> 10 #include <linux/rcupdate_wait.h> 11 #include <linux/random.h> 12 #include <uapi/linux/btf.h> 13 #include <linux/rcupdate_trace.h> 14 #include <linux/btf_ids.h> 15 #include "percpu_freelist.h" 16 #include "bpf_lru_list.h" 17 #include "map_in_map.h" 18 #include <linux/bpf_mem_alloc.h> 19 20 #define HTAB_CREATE_FLAG_MASK \ 21 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 22 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) 23 24 #define BATCH_OPS(_name) \ 25 .map_lookup_batch = \ 26 _name##_map_lookup_batch, \ 27 .map_lookup_and_delete_batch = \ 28 _name##_map_lookup_and_delete_batch, \ 29 .map_update_batch = \ 30 generic_map_update_batch, \ 31 .map_delete_batch = \ 32 generic_map_delete_batch 33 34 /* 35 * The bucket lock has two protection scopes: 36 * 37 * 1) Serializing concurrent operations from BPF programs on different 38 * CPUs 39 * 40 * 2) Serializing concurrent operations from BPF programs and sys_bpf() 41 * 42 * BPF programs can execute in any context including perf, kprobes and 43 * tracing. As there are almost no limits where perf, kprobes and tracing 44 * can be invoked from the lock operations need to be protected against 45 * deadlocks. Deadlocks can be caused by recursion and by an invocation in 46 * the lock held section when functions which acquire this lock are invoked 47 * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU 48 * variable bpf_prog_active, which prevents BPF programs attached to perf 49 * events, kprobes and tracing to be invoked before the prior invocation 50 * from one of these contexts completed. sys_bpf() uses the same mechanism 51 * by pinning the task to the current CPU and incrementing the recursion 52 * protection across the map operation. 53 * 54 * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain 55 * operations like memory allocations (even with GFP_ATOMIC) from atomic 56 * contexts. This is required because even with GFP_ATOMIC the memory 57 * allocator calls into code paths which acquire locks with long held lock 58 * sections. To ensure the deterministic behaviour these locks are regular 59 * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only 60 * true atomic contexts on an RT kernel are the low level hardware 61 * handling, scheduling, low level interrupt handling, NMIs etc. None of 62 * these contexts should ever do memory allocations. 63 * 64 * As regular device interrupt handlers and soft interrupts are forced into 65 * thread context, the existing code which does 66 * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*(); 67 * just works. 68 * 69 * In theory the BPF locks could be converted to regular spinlocks as well, 70 * but the bucket locks and percpu_freelist locks can be taken from 71 * arbitrary contexts (perf, kprobes, tracepoints) which are required to be 72 * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, 73 * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, 74 * because there is no memory allocation within the lock held sections. However 75 * after hash map was fully converted to use bpf_mem_alloc, there will be 76 * non-synchronous memory allocation for non-preallocated hash map, so it is 77 * safe to always use raw spinlock for bucket lock. 78 */ 79 struct bucket { 80 struct hlist_nulls_head head; 81 raw_spinlock_t raw_lock; 82 }; 83 84 #define HASHTAB_MAP_LOCK_COUNT 8 85 #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1) 86 87 struct bpf_htab { 88 struct bpf_map map; 89 struct bpf_mem_alloc ma; 90 struct bpf_mem_alloc pcpu_ma; 91 struct bucket *buckets; 92 void *elems; 93 union { 94 struct pcpu_freelist freelist; 95 struct bpf_lru lru; 96 }; 97 struct htab_elem *__percpu *extra_elems; 98 /* number of elements in non-preallocated hashtable are kept 99 * in either pcount or count 100 */ 101 struct percpu_counter pcount; 102 atomic_t count; 103 bool use_percpu_counter; 104 u32 n_buckets; /* number of hash buckets */ 105 u32 elem_size; /* size of each element in bytes */ 106 u32 hashrnd; 107 struct lock_class_key lockdep_key; 108 int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT]; 109 }; 110 111 /* each htab element is struct htab_elem + key + value */ 112 struct htab_elem { 113 union { 114 struct hlist_nulls_node hash_node; 115 struct { 116 void *padding; 117 union { 118 struct pcpu_freelist_node fnode; 119 struct htab_elem *batch_flink; 120 }; 121 }; 122 }; 123 union { 124 /* pointer to per-cpu pointer */ 125 void *ptr_to_pptr; 126 struct bpf_lru_node lru_node; 127 }; 128 u32 hash; 129 char key[] __aligned(8); 130 }; 131 132 static inline bool htab_is_prealloc(const struct bpf_htab *htab) 133 { 134 return !(htab->map.map_flags & BPF_F_NO_PREALLOC); 135 } 136 137 static void htab_init_buckets(struct bpf_htab *htab) 138 { 139 unsigned int i; 140 141 for (i = 0; i < htab->n_buckets; i++) { 142 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 143 raw_spin_lock_init(&htab->buckets[i].raw_lock); 144 lockdep_set_class(&htab->buckets[i].raw_lock, 145 &htab->lockdep_key); 146 cond_resched(); 147 } 148 } 149 150 static inline int htab_lock_bucket(const struct bpf_htab *htab, 151 struct bucket *b, u32 hash, 152 unsigned long *pflags) 153 { 154 unsigned long flags; 155 156 hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); 157 158 preempt_disable(); 159 local_irq_save(flags); 160 if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { 161 __this_cpu_dec(*(htab->map_locked[hash])); 162 local_irq_restore(flags); 163 preempt_enable(); 164 return -EBUSY; 165 } 166 167 raw_spin_lock(&b->raw_lock); 168 *pflags = flags; 169 170 return 0; 171 } 172 173 static inline void htab_unlock_bucket(const struct bpf_htab *htab, 174 struct bucket *b, u32 hash, 175 unsigned long flags) 176 { 177 hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); 178 raw_spin_unlock(&b->raw_lock); 179 __this_cpu_dec(*(htab->map_locked[hash])); 180 local_irq_restore(flags); 181 preempt_enable(); 182 } 183 184 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); 185 186 static bool htab_is_lru(const struct bpf_htab *htab) 187 { 188 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || 189 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 190 } 191 192 static bool htab_is_percpu(const struct bpf_htab *htab) 193 { 194 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || 195 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 196 } 197 198 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 199 void __percpu *pptr) 200 { 201 *(void __percpu **)(l->key + key_size) = pptr; 202 } 203 204 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 205 { 206 return *(void __percpu **)(l->key + key_size); 207 } 208 209 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) 210 { 211 return *(void **)(l->key + roundup(map->key_size, 8)); 212 } 213 214 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 215 { 216 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); 217 } 218 219 static bool htab_has_extra_elems(struct bpf_htab *htab) 220 { 221 return !htab_is_percpu(htab) && !htab_is_lru(htab); 222 } 223 224 static void htab_free_prealloced_timers(struct bpf_htab *htab) 225 { 226 u32 num_entries = htab->map.max_entries; 227 int i; 228 229 if (!btf_record_has_field(htab->map.record, BPF_TIMER)) 230 return; 231 if (htab_has_extra_elems(htab)) 232 num_entries += num_possible_cpus(); 233 234 for (i = 0; i < num_entries; i++) { 235 struct htab_elem *elem; 236 237 elem = get_htab_elem(htab, i); 238 bpf_obj_free_timer(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); 239 cond_resched(); 240 } 241 } 242 243 static void htab_free_prealloced_fields(struct bpf_htab *htab) 244 { 245 u32 num_entries = htab->map.max_entries; 246 int i; 247 248 if (IS_ERR_OR_NULL(htab->map.record)) 249 return; 250 if (htab_has_extra_elems(htab)) 251 num_entries += num_possible_cpus(); 252 for (i = 0; i < num_entries; i++) { 253 struct htab_elem *elem; 254 255 elem = get_htab_elem(htab, i); 256 if (htab_is_percpu(htab)) { 257 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 258 int cpu; 259 260 for_each_possible_cpu(cpu) { 261 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 262 cond_resched(); 263 } 264 } else { 265 bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); 266 cond_resched(); 267 } 268 cond_resched(); 269 } 270 } 271 272 static void htab_free_elems(struct bpf_htab *htab) 273 { 274 int i; 275 276 if (!htab_is_percpu(htab)) 277 goto free_elems; 278 279 for (i = 0; i < htab->map.max_entries; i++) { 280 void __percpu *pptr; 281 282 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 283 htab->map.key_size); 284 free_percpu(pptr); 285 cond_resched(); 286 } 287 free_elems: 288 bpf_map_area_free(htab->elems); 289 } 290 291 /* The LRU list has a lock (lru_lock). Each htab bucket has a lock 292 * (bucket_lock). If both locks need to be acquired together, the lock 293 * order is always lru_lock -> bucket_lock and this only happens in 294 * bpf_lru_list.c logic. For example, certain code path of 295 * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), 296 * will acquire lru_lock first followed by acquiring bucket_lock. 297 * 298 * In hashtab.c, to avoid deadlock, lock acquisition of 299 * bucket_lock followed by lru_lock is not allowed. In such cases, 300 * bucket_lock needs to be released first before acquiring lru_lock. 301 */ 302 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 303 u32 hash) 304 { 305 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash); 306 struct htab_elem *l; 307 308 if (node) { 309 bpf_map_inc_elem_count(&htab->map); 310 l = container_of(node, struct htab_elem, lru_node); 311 memcpy(l->key, key, htab->map.key_size); 312 return l; 313 } 314 315 return NULL; 316 } 317 318 static int prealloc_init(struct bpf_htab *htab) 319 { 320 u32 num_entries = htab->map.max_entries; 321 int err = -ENOMEM, i; 322 323 if (htab_has_extra_elems(htab)) 324 num_entries += num_possible_cpus(); 325 326 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, 327 htab->map.numa_node); 328 if (!htab->elems) 329 return -ENOMEM; 330 331 if (!htab_is_percpu(htab)) 332 goto skip_percpu_elems; 333 334 for (i = 0; i < num_entries; i++) { 335 u32 size = round_up(htab->map.value_size, 8); 336 void __percpu *pptr; 337 338 pptr = bpf_map_alloc_percpu(&htab->map, size, 8, 339 GFP_USER | __GFP_NOWARN); 340 if (!pptr) 341 goto free_elems; 342 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 343 pptr); 344 cond_resched(); 345 } 346 347 skip_percpu_elems: 348 if (htab_is_lru(htab)) 349 err = bpf_lru_init(&htab->lru, 350 htab->map.map_flags & BPF_F_NO_COMMON_LRU, 351 offsetof(struct htab_elem, hash) - 352 offsetof(struct htab_elem, lru_node), 353 htab_lru_map_delete_node, 354 htab); 355 else 356 err = pcpu_freelist_init(&htab->freelist); 357 358 if (err) 359 goto free_elems; 360 361 if (htab_is_lru(htab)) 362 bpf_lru_populate(&htab->lru, htab->elems, 363 offsetof(struct htab_elem, lru_node), 364 htab->elem_size, num_entries); 365 else 366 pcpu_freelist_populate(&htab->freelist, 367 htab->elems + offsetof(struct htab_elem, fnode), 368 htab->elem_size, num_entries); 369 370 return 0; 371 372 free_elems: 373 htab_free_elems(htab); 374 return err; 375 } 376 377 static void prealloc_destroy(struct bpf_htab *htab) 378 { 379 htab_free_elems(htab); 380 381 if (htab_is_lru(htab)) 382 bpf_lru_destroy(&htab->lru); 383 else 384 pcpu_freelist_destroy(&htab->freelist); 385 } 386 387 static int alloc_extra_elems(struct bpf_htab *htab) 388 { 389 struct htab_elem *__percpu *pptr, *l_new; 390 struct pcpu_freelist_node *l; 391 int cpu; 392 393 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8, 394 GFP_USER | __GFP_NOWARN); 395 if (!pptr) 396 return -ENOMEM; 397 398 for_each_possible_cpu(cpu) { 399 l = pcpu_freelist_pop(&htab->freelist); 400 /* pop will succeed, since prealloc_init() 401 * preallocated extra num_possible_cpus elements 402 */ 403 l_new = container_of(l, struct htab_elem, fnode); 404 *per_cpu_ptr(pptr, cpu) = l_new; 405 } 406 htab->extra_elems = pptr; 407 return 0; 408 } 409 410 /* Called from syscall */ 411 static int htab_map_alloc_check(union bpf_attr *attr) 412 { 413 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 414 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 415 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 416 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 417 /* percpu_lru means each cpu has its own LRU list. 418 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 419 * the map's value itself is percpu. percpu_lru has 420 * nothing to do with the map's value. 421 */ 422 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 423 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 424 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); 425 int numa_node = bpf_map_attr_numa_node(attr); 426 427 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 428 offsetof(struct htab_elem, hash_node.pprev)); 429 430 if (zero_seed && !capable(CAP_SYS_ADMIN)) 431 /* Guard against local DoS, and discourage production use. */ 432 return -EPERM; 433 434 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || 435 !bpf_map_flags_access_ok(attr->map_flags)) 436 return -EINVAL; 437 438 if (!lru && percpu_lru) 439 return -EINVAL; 440 441 if (lru && !prealloc) 442 return -ENOTSUPP; 443 444 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) 445 return -EINVAL; 446 447 /* check sanity of attributes. 448 * value_size == 0 may be allowed in the future to use map as a set 449 */ 450 if (attr->max_entries == 0 || attr->key_size == 0 || 451 attr->value_size == 0) 452 return -EINVAL; 453 454 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - 455 sizeof(struct htab_elem)) 456 /* if key_size + value_size is bigger, the user space won't be 457 * able to access the elements via bpf syscall. This check 458 * also makes sure that the elem_size doesn't overflow and it's 459 * kmalloc-able later in htab_map_update_elem() 460 */ 461 return -E2BIG; 462 463 return 0; 464 } 465 466 static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 467 { 468 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 469 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 470 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 471 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 472 /* percpu_lru means each cpu has its own LRU list. 473 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 474 * the map's value itself is percpu. percpu_lru has 475 * nothing to do with the map's value. 476 */ 477 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 478 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 479 struct bpf_htab *htab; 480 int err, i; 481 482 htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); 483 if (!htab) 484 return ERR_PTR(-ENOMEM); 485 486 lockdep_register_key(&htab->lockdep_key); 487 488 bpf_map_init_from_attr(&htab->map, attr); 489 490 if (percpu_lru) { 491 /* ensure each CPU's lru list has >=1 elements. 492 * since we are at it, make each lru list has the same 493 * number of elements. 494 */ 495 htab->map.max_entries = roundup(attr->max_entries, 496 num_possible_cpus()); 497 if (htab->map.max_entries < attr->max_entries) 498 htab->map.max_entries = rounddown(attr->max_entries, 499 num_possible_cpus()); 500 } 501 502 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 503 * into UB on 32-bit arches, so check that first 504 */ 505 err = -E2BIG; 506 if (htab->map.max_entries > 1UL << 31) 507 goto free_htab; 508 509 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 510 511 htab->elem_size = sizeof(struct htab_elem) + 512 round_up(htab->map.key_size, 8); 513 if (percpu) 514 htab->elem_size += sizeof(void *); 515 else 516 htab->elem_size += round_up(htab->map.value_size, 8); 517 518 /* check for u32 overflow */ 519 if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) 520 goto free_htab; 521 522 err = bpf_map_init_elem_count(&htab->map); 523 if (err) 524 goto free_htab; 525 526 err = -ENOMEM; 527 htab->buckets = bpf_map_area_alloc(htab->n_buckets * 528 sizeof(struct bucket), 529 htab->map.numa_node); 530 if (!htab->buckets) 531 goto free_elem_count; 532 533 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) { 534 htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map, 535 sizeof(int), 536 sizeof(int), 537 GFP_USER); 538 if (!htab->map_locked[i]) 539 goto free_map_locked; 540 } 541 542 if (htab->map.map_flags & BPF_F_ZERO_SEED) 543 htab->hashrnd = 0; 544 else 545 htab->hashrnd = get_random_u32(); 546 547 htab_init_buckets(htab); 548 549 /* compute_batch_value() computes batch value as num_online_cpus() * 2 550 * and __percpu_counter_compare() needs 551 * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus() 552 * for percpu_counter to be faster than atomic_t. In practice the average bpf 553 * hash map size is 10k, which means that a system with 64 cpus will fill 554 * hashmap to 20% of 10k before percpu_counter becomes ineffective. Therefore 555 * define our own batch count as 32 then 10k hash map can be filled up to 80%: 556 * 10k - 8k > 32 _batch_ * 64 _cpus_ 557 * and __percpu_counter_compare() will still be fast. At that point hash map 558 * collisions will dominate its performance anyway. Assume that hash map filled 559 * to 50+% isn't going to be O(1) and use the following formula to choose 560 * between percpu_counter and atomic_t. 561 */ 562 #define PERCPU_COUNTER_BATCH 32 563 if (attr->max_entries / 2 > num_online_cpus() * PERCPU_COUNTER_BATCH) 564 htab->use_percpu_counter = true; 565 566 if (htab->use_percpu_counter) { 567 err = percpu_counter_init(&htab->pcount, 0, GFP_KERNEL); 568 if (err) 569 goto free_map_locked; 570 } 571 572 if (prealloc) { 573 err = prealloc_init(htab); 574 if (err) 575 goto free_map_locked; 576 577 if (!percpu && !lru) { 578 /* lru itself can remove the least used element, so 579 * there is no need for an extra elem during map_update. 580 */ 581 err = alloc_extra_elems(htab); 582 if (err) 583 goto free_prealloc; 584 } 585 } else { 586 err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false); 587 if (err) 588 goto free_map_locked; 589 if (percpu) { 590 err = bpf_mem_alloc_init(&htab->pcpu_ma, 591 round_up(htab->map.value_size, 8), true); 592 if (err) 593 goto free_map_locked; 594 } 595 } 596 597 return &htab->map; 598 599 free_prealloc: 600 prealloc_destroy(htab); 601 free_map_locked: 602 if (htab->use_percpu_counter) 603 percpu_counter_destroy(&htab->pcount); 604 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) 605 free_percpu(htab->map_locked[i]); 606 bpf_map_area_free(htab->buckets); 607 bpf_mem_alloc_destroy(&htab->pcpu_ma); 608 bpf_mem_alloc_destroy(&htab->ma); 609 free_elem_count: 610 bpf_map_free_elem_count(&htab->map); 611 free_htab: 612 lockdep_unregister_key(&htab->lockdep_key); 613 bpf_map_area_free(htab); 614 return ERR_PTR(err); 615 } 616 617 static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) 618 { 619 if (likely(key_len % 4 == 0)) 620 return jhash2(key, key_len / 4, hashrnd); 621 return jhash(key, key_len, hashrnd); 622 } 623 624 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 625 { 626 return &htab->buckets[hash & (htab->n_buckets - 1)]; 627 } 628 629 static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) 630 { 631 return &__select_bucket(htab, hash)->head; 632 } 633 634 /* this lookup function can only be called with bucket lock taken */ 635 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, 636 void *key, u32 key_size) 637 { 638 struct hlist_nulls_node *n; 639 struct htab_elem *l; 640 641 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 642 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 643 return l; 644 645 return NULL; 646 } 647 648 /* can be called without bucket lock. it will repeat the loop in 649 * the unlikely event when elements moved from one bucket into another 650 * while link list is being walked 651 */ 652 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, 653 u32 hash, void *key, 654 u32 key_size, u32 n_buckets) 655 { 656 struct hlist_nulls_node *n; 657 struct htab_elem *l; 658 659 again: 660 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 661 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 662 return l; 663 664 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) 665 goto again; 666 667 return NULL; 668 } 669 670 /* Called from syscall or from eBPF program directly, so 671 * arguments have to match bpf_map_lookup_elem() exactly. 672 * The return value is adjusted by BPF instructions 673 * in htab_map_gen_lookup(). 674 */ 675 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 676 { 677 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 678 struct hlist_nulls_head *head; 679 struct htab_elem *l; 680 u32 hash, key_size; 681 682 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 683 !rcu_read_lock_bh_held()); 684 685 key_size = map->key_size; 686 687 hash = htab_map_hash(key, key_size, htab->hashrnd); 688 689 head = select_bucket(htab, hash); 690 691 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 692 693 return l; 694 } 695 696 static void *htab_map_lookup_elem(struct bpf_map *map, void *key) 697 { 698 struct htab_elem *l = __htab_map_lookup_elem(map, key); 699 700 if (l) 701 return l->key + round_up(map->key_size, 8); 702 703 return NULL; 704 } 705 706 /* inline bpf_map_lookup_elem() call. 707 * Instead of: 708 * bpf_prog 709 * bpf_map_lookup_elem 710 * map->ops->map_lookup_elem 711 * htab_map_lookup_elem 712 * __htab_map_lookup_elem 713 * do: 714 * bpf_prog 715 * __htab_map_lookup_elem 716 */ 717 static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 718 { 719 struct bpf_insn *insn = insn_buf; 720 const int ret = BPF_REG_0; 721 722 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 723 (void *(*)(struct bpf_map *map, void *key))NULL)); 724 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 725 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 726 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 727 offsetof(struct htab_elem, key) + 728 round_up(map->key_size, 8)); 729 return insn - insn_buf; 730 } 731 732 static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, 733 void *key, const bool mark) 734 { 735 struct htab_elem *l = __htab_map_lookup_elem(map, key); 736 737 if (l) { 738 if (mark) 739 bpf_lru_node_set_ref(&l->lru_node); 740 return l->key + round_up(map->key_size, 8); 741 } 742 743 return NULL; 744 } 745 746 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) 747 { 748 return __htab_lru_map_lookup_elem(map, key, true); 749 } 750 751 static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) 752 { 753 return __htab_lru_map_lookup_elem(map, key, false); 754 } 755 756 static int htab_lru_map_gen_lookup(struct bpf_map *map, 757 struct bpf_insn *insn_buf) 758 { 759 struct bpf_insn *insn = insn_buf; 760 const int ret = BPF_REG_0; 761 const int ref_reg = BPF_REG_1; 762 763 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 764 (void *(*)(struct bpf_map *map, void *key))NULL)); 765 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 766 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); 767 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, 768 offsetof(struct htab_elem, lru_node) + 769 offsetof(struct bpf_lru_node, ref)); 770 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); 771 *insn++ = BPF_ST_MEM(BPF_B, ret, 772 offsetof(struct htab_elem, lru_node) + 773 offsetof(struct bpf_lru_node, ref), 774 1); 775 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 776 offsetof(struct htab_elem, key) + 777 round_up(map->key_size, 8)); 778 return insn - insn_buf; 779 } 780 781 static void check_and_free_fields(struct bpf_htab *htab, 782 struct htab_elem *elem) 783 { 784 if (htab_is_percpu(htab)) { 785 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 786 int cpu; 787 788 for_each_possible_cpu(cpu) 789 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 790 } else { 791 void *map_value = elem->key + round_up(htab->map.key_size, 8); 792 793 bpf_obj_free_fields(htab->map.record, map_value); 794 } 795 } 796 797 /* It is called from the bpf_lru_list when the LRU needs to delete 798 * older elements from the htab. 799 */ 800 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 801 { 802 struct bpf_htab *htab = arg; 803 struct htab_elem *l = NULL, *tgt_l; 804 struct hlist_nulls_head *head; 805 struct hlist_nulls_node *n; 806 unsigned long flags; 807 struct bucket *b; 808 int ret; 809 810 tgt_l = container_of(node, struct htab_elem, lru_node); 811 b = __select_bucket(htab, tgt_l->hash); 812 head = &b->head; 813 814 ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags); 815 if (ret) 816 return false; 817 818 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 819 if (l == tgt_l) { 820 hlist_nulls_del_rcu(&l->hash_node); 821 check_and_free_fields(htab, l); 822 bpf_map_dec_elem_count(&htab->map); 823 break; 824 } 825 826 htab_unlock_bucket(htab, b, tgt_l->hash, flags); 827 828 return l == tgt_l; 829 } 830 831 /* Called from syscall */ 832 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 833 { 834 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 835 struct hlist_nulls_head *head; 836 struct htab_elem *l, *next_l; 837 u32 hash, key_size; 838 int i = 0; 839 840 WARN_ON_ONCE(!rcu_read_lock_held()); 841 842 key_size = map->key_size; 843 844 if (!key) 845 goto find_first_elem; 846 847 hash = htab_map_hash(key, key_size, htab->hashrnd); 848 849 head = select_bucket(htab, hash); 850 851 /* lookup the key */ 852 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 853 854 if (!l) 855 goto find_first_elem; 856 857 /* key was found, get next key in the same bucket */ 858 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), 859 struct htab_elem, hash_node); 860 861 if (next_l) { 862 /* if next elem in this hash list is non-zero, just return it */ 863 memcpy(next_key, next_l->key, key_size); 864 return 0; 865 } 866 867 /* no more elements in this hash list, go to the next bucket */ 868 i = hash & (htab->n_buckets - 1); 869 i++; 870 871 find_first_elem: 872 /* iterate over buckets */ 873 for (; i < htab->n_buckets; i++) { 874 head = select_bucket(htab, i); 875 876 /* pick first element in the bucket */ 877 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), 878 struct htab_elem, hash_node); 879 if (next_l) { 880 /* if it's not empty, just return it */ 881 memcpy(next_key, next_l->key, key_size); 882 return 0; 883 } 884 } 885 886 /* iterated over all buckets and all elements */ 887 return -ENOENT; 888 } 889 890 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 891 { 892 check_and_free_fields(htab, l); 893 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 894 bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); 895 bpf_mem_cache_free(&htab->ma, l); 896 } 897 898 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) 899 { 900 struct bpf_map *map = &htab->map; 901 void *ptr; 902 903 if (map->ops->map_fd_put_ptr) { 904 ptr = fd_htab_map_get_ptr(map, l); 905 map->ops->map_fd_put_ptr(map, ptr, true); 906 } 907 } 908 909 static bool is_map_full(struct bpf_htab *htab) 910 { 911 if (htab->use_percpu_counter) 912 return __percpu_counter_compare(&htab->pcount, htab->map.max_entries, 913 PERCPU_COUNTER_BATCH) >= 0; 914 return atomic_read(&htab->count) >= htab->map.max_entries; 915 } 916 917 static void inc_elem_count(struct bpf_htab *htab) 918 { 919 bpf_map_inc_elem_count(&htab->map); 920 921 if (htab->use_percpu_counter) 922 percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH); 923 else 924 atomic_inc(&htab->count); 925 } 926 927 static void dec_elem_count(struct bpf_htab *htab) 928 { 929 bpf_map_dec_elem_count(&htab->map); 930 931 if (htab->use_percpu_counter) 932 percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH); 933 else 934 atomic_dec(&htab->count); 935 } 936 937 938 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 939 { 940 htab_put_fd_value(htab, l); 941 942 if (htab_is_prealloc(htab)) { 943 bpf_map_dec_elem_count(&htab->map); 944 check_and_free_fields(htab, l); 945 __pcpu_freelist_push(&htab->freelist, &l->fnode); 946 } else { 947 dec_elem_count(htab); 948 htab_elem_free(htab, l); 949 } 950 } 951 952 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 953 void *value, bool onallcpus) 954 { 955 if (!onallcpus) { 956 /* copy true value_size bytes */ 957 copy_map_value(&htab->map, this_cpu_ptr(pptr), value); 958 } else { 959 u32 size = round_up(htab->map.value_size, 8); 960 int off = 0, cpu; 961 962 for_each_possible_cpu(cpu) { 963 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); 964 off += size; 965 } 966 } 967 } 968 969 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 970 void *value, bool onallcpus) 971 { 972 /* When not setting the initial value on all cpus, zero-fill element 973 * values for other cpus. Otherwise, bpf program has no way to ensure 974 * known initial values for cpus other than current one 975 * (onallcpus=false always when coming from bpf prog). 976 */ 977 if (!onallcpus) { 978 int current_cpu = raw_smp_processor_id(); 979 int cpu; 980 981 for_each_possible_cpu(cpu) { 982 if (cpu == current_cpu) 983 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); 984 else /* Since elem is preallocated, we cannot touch special fields */ 985 zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 986 } 987 } else { 988 pcpu_copy_value(htab, pptr, value, onallcpus); 989 } 990 } 991 992 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) 993 { 994 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS && 995 BITS_PER_LONG == 64; 996 } 997 998 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 999 void *value, u32 key_size, u32 hash, 1000 bool percpu, bool onallcpus, 1001 struct htab_elem *old_elem) 1002 { 1003 u32 size = htab->map.value_size; 1004 bool prealloc = htab_is_prealloc(htab); 1005 struct htab_elem *l_new, **pl_new; 1006 void __percpu *pptr; 1007 1008 if (prealloc) { 1009 if (old_elem) { 1010 /* if we're updating the existing element, 1011 * use per-cpu extra elems to avoid freelist_pop/push 1012 */ 1013 pl_new = this_cpu_ptr(htab->extra_elems); 1014 l_new = *pl_new; 1015 htab_put_fd_value(htab, old_elem); 1016 *pl_new = old_elem; 1017 } else { 1018 struct pcpu_freelist_node *l; 1019 1020 l = __pcpu_freelist_pop(&htab->freelist); 1021 if (!l) 1022 return ERR_PTR(-E2BIG); 1023 l_new = container_of(l, struct htab_elem, fnode); 1024 bpf_map_inc_elem_count(&htab->map); 1025 } 1026 } else { 1027 if (is_map_full(htab)) 1028 if (!old_elem) 1029 /* when map is full and update() is replacing 1030 * old element, it's ok to allocate, since 1031 * old element will be freed immediately. 1032 * Otherwise return an error 1033 */ 1034 return ERR_PTR(-E2BIG); 1035 inc_elem_count(htab); 1036 l_new = bpf_mem_cache_alloc(&htab->ma); 1037 if (!l_new) { 1038 l_new = ERR_PTR(-ENOMEM); 1039 goto dec_count; 1040 } 1041 } 1042 1043 memcpy(l_new->key, key, key_size); 1044 if (percpu) { 1045 if (prealloc) { 1046 pptr = htab_elem_get_ptr(l_new, key_size); 1047 } else { 1048 /* alloc_percpu zero-fills */ 1049 pptr = bpf_mem_cache_alloc(&htab->pcpu_ma); 1050 if (!pptr) { 1051 bpf_mem_cache_free(&htab->ma, l_new); 1052 l_new = ERR_PTR(-ENOMEM); 1053 goto dec_count; 1054 } 1055 l_new->ptr_to_pptr = pptr; 1056 pptr = *(void **)pptr; 1057 } 1058 1059 pcpu_init_value(htab, pptr, value, onallcpus); 1060 1061 if (!prealloc) 1062 htab_elem_set_ptr(l_new, key_size, pptr); 1063 } else if (fd_htab_map_needs_adjust(htab)) { 1064 size = round_up(size, 8); 1065 memcpy(l_new->key + round_up(key_size, 8), value, size); 1066 } else { 1067 copy_map_value(&htab->map, 1068 l_new->key + round_up(key_size, 8), 1069 value); 1070 } 1071 1072 l_new->hash = hash; 1073 return l_new; 1074 dec_count: 1075 dec_elem_count(htab); 1076 return l_new; 1077 } 1078 1079 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 1080 u64 map_flags) 1081 { 1082 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 1083 /* elem already exists */ 1084 return -EEXIST; 1085 1086 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 1087 /* elem doesn't exist, cannot update it */ 1088 return -ENOENT; 1089 1090 return 0; 1091 } 1092 1093 /* Called from syscall or from eBPF program */ 1094 static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, 1095 u64 map_flags) 1096 { 1097 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1098 struct htab_elem *l_new = NULL, *l_old; 1099 struct hlist_nulls_head *head; 1100 unsigned long flags; 1101 struct bucket *b; 1102 u32 key_size, hash; 1103 int ret; 1104 1105 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 1106 /* unknown flags */ 1107 return -EINVAL; 1108 1109 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1110 !rcu_read_lock_bh_held()); 1111 1112 key_size = map->key_size; 1113 1114 hash = htab_map_hash(key, key_size, htab->hashrnd); 1115 1116 b = __select_bucket(htab, hash); 1117 head = &b->head; 1118 1119 if (unlikely(map_flags & BPF_F_LOCK)) { 1120 if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1121 return -EINVAL; 1122 /* find an element without taking the bucket lock */ 1123 l_old = lookup_nulls_elem_raw(head, hash, key, key_size, 1124 htab->n_buckets); 1125 ret = check_flags(htab, l_old, map_flags); 1126 if (ret) 1127 return ret; 1128 if (l_old) { 1129 /* grab the element lock and update value in place */ 1130 copy_map_value_locked(map, 1131 l_old->key + round_up(key_size, 8), 1132 value, false); 1133 return 0; 1134 } 1135 /* fall through, grab the bucket lock and lookup again. 1136 * 99.9% chance that the element won't be found, 1137 * but second lookup under lock has to be done. 1138 */ 1139 } 1140 1141 ret = htab_lock_bucket(htab, b, hash, &flags); 1142 if (ret) 1143 return ret; 1144 1145 l_old = lookup_elem_raw(head, hash, key, key_size); 1146 1147 ret = check_flags(htab, l_old, map_flags); 1148 if (ret) 1149 goto err; 1150 1151 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { 1152 /* first lookup without the bucket lock didn't find the element, 1153 * but second lookup with the bucket lock found it. 1154 * This case is highly unlikely, but has to be dealt with: 1155 * grab the element lock in addition to the bucket lock 1156 * and update element in place 1157 */ 1158 copy_map_value_locked(map, 1159 l_old->key + round_up(key_size, 8), 1160 value, false); 1161 ret = 0; 1162 goto err; 1163 } 1164 1165 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 1166 l_old); 1167 if (IS_ERR(l_new)) { 1168 /* all pre-allocated elements are in use or memory exhausted */ 1169 ret = PTR_ERR(l_new); 1170 goto err; 1171 } 1172 1173 /* add new element to the head of the list, so that 1174 * concurrent search will find it before old elem 1175 */ 1176 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1177 if (l_old) { 1178 hlist_nulls_del_rcu(&l_old->hash_node); 1179 if (!htab_is_prealloc(htab)) 1180 free_htab_elem(htab, l_old); 1181 else 1182 check_and_free_fields(htab, l_old); 1183 } 1184 ret = 0; 1185 err: 1186 htab_unlock_bucket(htab, b, hash, flags); 1187 return ret; 1188 } 1189 1190 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) 1191 { 1192 check_and_free_fields(htab, elem); 1193 bpf_map_dec_elem_count(&htab->map); 1194 bpf_lru_push_free(&htab->lru, &elem->lru_node); 1195 } 1196 1197 static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, 1198 u64 map_flags) 1199 { 1200 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1201 struct htab_elem *l_new, *l_old = NULL; 1202 struct hlist_nulls_head *head; 1203 unsigned long flags; 1204 struct bucket *b; 1205 u32 key_size, hash; 1206 int ret; 1207 1208 if (unlikely(map_flags > BPF_EXIST)) 1209 /* unknown flags */ 1210 return -EINVAL; 1211 1212 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1213 !rcu_read_lock_bh_held()); 1214 1215 key_size = map->key_size; 1216 1217 hash = htab_map_hash(key, key_size, htab->hashrnd); 1218 1219 b = __select_bucket(htab, hash); 1220 head = &b->head; 1221 1222 /* For LRU, we need to alloc before taking bucket's 1223 * spinlock because getting free nodes from LRU may need 1224 * to remove older elements from htab and this removal 1225 * operation will need a bucket lock. 1226 */ 1227 l_new = prealloc_lru_pop(htab, key, hash); 1228 if (!l_new) 1229 return -ENOMEM; 1230 copy_map_value(&htab->map, 1231 l_new->key + round_up(map->key_size, 8), value); 1232 1233 ret = htab_lock_bucket(htab, b, hash, &flags); 1234 if (ret) 1235 goto err_lock_bucket; 1236 1237 l_old = lookup_elem_raw(head, hash, key, key_size); 1238 1239 ret = check_flags(htab, l_old, map_flags); 1240 if (ret) 1241 goto err; 1242 1243 /* add new element to the head of the list, so that 1244 * concurrent search will find it before old elem 1245 */ 1246 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1247 if (l_old) { 1248 bpf_lru_node_set_ref(&l_new->lru_node); 1249 hlist_nulls_del_rcu(&l_old->hash_node); 1250 } 1251 ret = 0; 1252 1253 err: 1254 htab_unlock_bucket(htab, b, hash, flags); 1255 1256 err_lock_bucket: 1257 if (ret) 1258 htab_lru_push_free(htab, l_new); 1259 else if (l_old) 1260 htab_lru_push_free(htab, l_old); 1261 1262 return ret; 1263 } 1264 1265 static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1266 void *value, u64 map_flags, 1267 bool onallcpus) 1268 { 1269 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1270 struct htab_elem *l_new = NULL, *l_old; 1271 struct hlist_nulls_head *head; 1272 unsigned long flags; 1273 struct bucket *b; 1274 u32 key_size, hash; 1275 int ret; 1276 1277 if (unlikely(map_flags > BPF_EXIST)) 1278 /* unknown flags */ 1279 return -EINVAL; 1280 1281 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1282 !rcu_read_lock_bh_held()); 1283 1284 key_size = map->key_size; 1285 1286 hash = htab_map_hash(key, key_size, htab->hashrnd); 1287 1288 b = __select_bucket(htab, hash); 1289 head = &b->head; 1290 1291 ret = htab_lock_bucket(htab, b, hash, &flags); 1292 if (ret) 1293 return ret; 1294 1295 l_old = lookup_elem_raw(head, hash, key, key_size); 1296 1297 ret = check_flags(htab, l_old, map_flags); 1298 if (ret) 1299 goto err; 1300 1301 if (l_old) { 1302 /* per-cpu hash map can update value in-place */ 1303 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1304 value, onallcpus); 1305 } else { 1306 l_new = alloc_htab_elem(htab, key, value, key_size, 1307 hash, true, onallcpus, NULL); 1308 if (IS_ERR(l_new)) { 1309 ret = PTR_ERR(l_new); 1310 goto err; 1311 } 1312 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1313 } 1314 ret = 0; 1315 err: 1316 htab_unlock_bucket(htab, b, hash, flags); 1317 return ret; 1318 } 1319 1320 static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1321 void *value, u64 map_flags, 1322 bool onallcpus) 1323 { 1324 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1325 struct htab_elem *l_new = NULL, *l_old; 1326 struct hlist_nulls_head *head; 1327 unsigned long flags; 1328 struct bucket *b; 1329 u32 key_size, hash; 1330 int ret; 1331 1332 if (unlikely(map_flags > BPF_EXIST)) 1333 /* unknown flags */ 1334 return -EINVAL; 1335 1336 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1337 !rcu_read_lock_bh_held()); 1338 1339 key_size = map->key_size; 1340 1341 hash = htab_map_hash(key, key_size, htab->hashrnd); 1342 1343 b = __select_bucket(htab, hash); 1344 head = &b->head; 1345 1346 /* For LRU, we need to alloc before taking bucket's 1347 * spinlock because LRU's elem alloc may need 1348 * to remove older elem from htab and this removal 1349 * operation will need a bucket lock. 1350 */ 1351 if (map_flags != BPF_EXIST) { 1352 l_new = prealloc_lru_pop(htab, key, hash); 1353 if (!l_new) 1354 return -ENOMEM; 1355 } 1356 1357 ret = htab_lock_bucket(htab, b, hash, &flags); 1358 if (ret) 1359 goto err_lock_bucket; 1360 1361 l_old = lookup_elem_raw(head, hash, key, key_size); 1362 1363 ret = check_flags(htab, l_old, map_flags); 1364 if (ret) 1365 goto err; 1366 1367 if (l_old) { 1368 bpf_lru_node_set_ref(&l_old->lru_node); 1369 1370 /* per-cpu hash map can update value in-place */ 1371 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1372 value, onallcpus); 1373 } else { 1374 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1375 value, onallcpus); 1376 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1377 l_new = NULL; 1378 } 1379 ret = 0; 1380 err: 1381 htab_unlock_bucket(htab, b, hash, flags); 1382 err_lock_bucket: 1383 if (l_new) { 1384 bpf_map_dec_elem_count(&htab->map); 1385 bpf_lru_push_free(&htab->lru, &l_new->lru_node); 1386 } 1387 return ret; 1388 } 1389 1390 static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1391 void *value, u64 map_flags) 1392 { 1393 return __htab_percpu_map_update_elem(map, key, value, map_flags, false); 1394 } 1395 1396 static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1397 void *value, u64 map_flags) 1398 { 1399 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, 1400 false); 1401 } 1402 1403 /* Called from syscall or from eBPF program */ 1404 static long htab_map_delete_elem(struct bpf_map *map, void *key) 1405 { 1406 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1407 struct hlist_nulls_head *head; 1408 struct bucket *b; 1409 struct htab_elem *l; 1410 unsigned long flags; 1411 u32 hash, key_size; 1412 int ret; 1413 1414 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1415 !rcu_read_lock_bh_held()); 1416 1417 key_size = map->key_size; 1418 1419 hash = htab_map_hash(key, key_size, htab->hashrnd); 1420 b = __select_bucket(htab, hash); 1421 head = &b->head; 1422 1423 ret = htab_lock_bucket(htab, b, hash, &flags); 1424 if (ret) 1425 return ret; 1426 1427 l = lookup_elem_raw(head, hash, key, key_size); 1428 1429 if (l) { 1430 hlist_nulls_del_rcu(&l->hash_node); 1431 free_htab_elem(htab, l); 1432 } else { 1433 ret = -ENOENT; 1434 } 1435 1436 htab_unlock_bucket(htab, b, hash, flags); 1437 return ret; 1438 } 1439 1440 static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) 1441 { 1442 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1443 struct hlist_nulls_head *head; 1444 struct bucket *b; 1445 struct htab_elem *l; 1446 unsigned long flags; 1447 u32 hash, key_size; 1448 int ret; 1449 1450 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1451 !rcu_read_lock_bh_held()); 1452 1453 key_size = map->key_size; 1454 1455 hash = htab_map_hash(key, key_size, htab->hashrnd); 1456 b = __select_bucket(htab, hash); 1457 head = &b->head; 1458 1459 ret = htab_lock_bucket(htab, b, hash, &flags); 1460 if (ret) 1461 return ret; 1462 1463 l = lookup_elem_raw(head, hash, key, key_size); 1464 1465 if (l) 1466 hlist_nulls_del_rcu(&l->hash_node); 1467 else 1468 ret = -ENOENT; 1469 1470 htab_unlock_bucket(htab, b, hash, flags); 1471 if (l) 1472 htab_lru_push_free(htab, l); 1473 return ret; 1474 } 1475 1476 static void delete_all_elements(struct bpf_htab *htab) 1477 { 1478 int i; 1479 1480 /* It's called from a worker thread, so disable migration here, 1481 * since bpf_mem_cache_free() relies on that. 1482 */ 1483 migrate_disable(); 1484 for (i = 0; i < htab->n_buckets; i++) { 1485 struct hlist_nulls_head *head = select_bucket(htab, i); 1486 struct hlist_nulls_node *n; 1487 struct htab_elem *l; 1488 1489 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1490 hlist_nulls_del_rcu(&l->hash_node); 1491 htab_elem_free(htab, l); 1492 } 1493 cond_resched(); 1494 } 1495 migrate_enable(); 1496 } 1497 1498 static void htab_free_malloced_timers(struct bpf_htab *htab) 1499 { 1500 int i; 1501 1502 rcu_read_lock(); 1503 for (i = 0; i < htab->n_buckets; i++) { 1504 struct hlist_nulls_head *head = select_bucket(htab, i); 1505 struct hlist_nulls_node *n; 1506 struct htab_elem *l; 1507 1508 hlist_nulls_for_each_entry(l, n, head, hash_node) { 1509 /* We only free timer on uref dropping to zero */ 1510 bpf_obj_free_timer(htab->map.record, l->key + round_up(htab->map.key_size, 8)); 1511 } 1512 cond_resched_rcu(); 1513 } 1514 rcu_read_unlock(); 1515 } 1516 1517 static void htab_map_free_timers(struct bpf_map *map) 1518 { 1519 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1520 1521 /* We only free timer on uref dropping to zero */ 1522 if (!btf_record_has_field(htab->map.record, BPF_TIMER)) 1523 return; 1524 if (!htab_is_prealloc(htab)) 1525 htab_free_malloced_timers(htab); 1526 else 1527 htab_free_prealloced_timers(htab); 1528 } 1529 1530 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 1531 static void htab_map_free(struct bpf_map *map) 1532 { 1533 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1534 int i; 1535 1536 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. 1537 * bpf_free_used_maps() is called after bpf prog is no longer executing. 1538 * There is no need to synchronize_rcu() here to protect map elements. 1539 */ 1540 1541 /* htab no longer uses call_rcu() directly. bpf_mem_alloc does it 1542 * underneath and is reponsible for waiting for callbacks to finish 1543 * during bpf_mem_alloc_destroy(). 1544 */ 1545 if (!htab_is_prealloc(htab)) { 1546 delete_all_elements(htab); 1547 } else { 1548 htab_free_prealloced_fields(htab); 1549 prealloc_destroy(htab); 1550 } 1551 1552 bpf_map_free_elem_count(map); 1553 free_percpu(htab->extra_elems); 1554 bpf_map_area_free(htab->buckets); 1555 bpf_mem_alloc_destroy(&htab->pcpu_ma); 1556 bpf_mem_alloc_destroy(&htab->ma); 1557 if (htab->use_percpu_counter) 1558 percpu_counter_destroy(&htab->pcount); 1559 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) 1560 free_percpu(htab->map_locked[i]); 1561 lockdep_unregister_key(&htab->lockdep_key); 1562 bpf_map_area_free(htab); 1563 } 1564 1565 static void htab_map_seq_show_elem(struct bpf_map *map, void *key, 1566 struct seq_file *m) 1567 { 1568 void *value; 1569 1570 rcu_read_lock(); 1571 1572 value = htab_map_lookup_elem(map, key); 1573 if (!value) { 1574 rcu_read_unlock(); 1575 return; 1576 } 1577 1578 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 1579 seq_puts(m, ": "); 1580 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 1581 seq_puts(m, "\n"); 1582 1583 rcu_read_unlock(); 1584 } 1585 1586 static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1587 void *value, bool is_lru_map, 1588 bool is_percpu, u64 flags) 1589 { 1590 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1591 struct hlist_nulls_head *head; 1592 unsigned long bflags; 1593 struct htab_elem *l; 1594 u32 hash, key_size; 1595 struct bucket *b; 1596 int ret; 1597 1598 key_size = map->key_size; 1599 1600 hash = htab_map_hash(key, key_size, htab->hashrnd); 1601 b = __select_bucket(htab, hash); 1602 head = &b->head; 1603 1604 ret = htab_lock_bucket(htab, b, hash, &bflags); 1605 if (ret) 1606 return ret; 1607 1608 l = lookup_elem_raw(head, hash, key, key_size); 1609 if (!l) { 1610 ret = -ENOENT; 1611 } else { 1612 if (is_percpu) { 1613 u32 roundup_value_size = round_up(map->value_size, 8); 1614 void __percpu *pptr; 1615 int off = 0, cpu; 1616 1617 pptr = htab_elem_get_ptr(l, key_size); 1618 for_each_possible_cpu(cpu) { 1619 copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); 1620 check_and_init_map_value(&htab->map, value + off); 1621 off += roundup_value_size; 1622 } 1623 } else { 1624 u32 roundup_key_size = round_up(map->key_size, 8); 1625 1626 if (flags & BPF_F_LOCK) 1627 copy_map_value_locked(map, value, l->key + 1628 roundup_key_size, 1629 true); 1630 else 1631 copy_map_value(map, value, l->key + 1632 roundup_key_size); 1633 /* Zeroing special fields in the temp buffer */ 1634 check_and_init_map_value(map, value); 1635 } 1636 1637 hlist_nulls_del_rcu(&l->hash_node); 1638 if (!is_lru_map) 1639 free_htab_elem(htab, l); 1640 } 1641 1642 htab_unlock_bucket(htab, b, hash, bflags); 1643 1644 if (is_lru_map && l) 1645 htab_lru_push_free(htab, l); 1646 1647 return ret; 1648 } 1649 1650 static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1651 void *value, u64 flags) 1652 { 1653 return __htab_map_lookup_and_delete_elem(map, key, value, false, false, 1654 flags); 1655 } 1656 1657 static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1658 void *key, void *value, 1659 u64 flags) 1660 { 1661 return __htab_map_lookup_and_delete_elem(map, key, value, false, true, 1662 flags); 1663 } 1664 1665 static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1666 void *value, u64 flags) 1667 { 1668 return __htab_map_lookup_and_delete_elem(map, key, value, true, false, 1669 flags); 1670 } 1671 1672 static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1673 void *key, void *value, 1674 u64 flags) 1675 { 1676 return __htab_map_lookup_and_delete_elem(map, key, value, true, true, 1677 flags); 1678 } 1679 1680 static int 1681 __htab_map_lookup_and_delete_batch(struct bpf_map *map, 1682 const union bpf_attr *attr, 1683 union bpf_attr __user *uattr, 1684 bool do_delete, bool is_lru_map, 1685 bool is_percpu) 1686 { 1687 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1688 u32 bucket_cnt, total, key_size, value_size, roundup_key_size; 1689 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val; 1690 void __user *uvalues = u64_to_user_ptr(attr->batch.values); 1691 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1692 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1693 u32 batch, max_count, size, bucket_size, map_id; 1694 struct htab_elem *node_to_free = NULL; 1695 u64 elem_map_flags, map_flags; 1696 struct hlist_nulls_head *head; 1697 struct hlist_nulls_node *n; 1698 unsigned long flags = 0; 1699 bool locked = false; 1700 struct htab_elem *l; 1701 struct bucket *b; 1702 int ret = 0; 1703 1704 elem_map_flags = attr->batch.elem_flags; 1705 if ((elem_map_flags & ~BPF_F_LOCK) || 1706 ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1707 return -EINVAL; 1708 1709 map_flags = attr->batch.flags; 1710 if (map_flags) 1711 return -EINVAL; 1712 1713 max_count = attr->batch.count; 1714 if (!max_count) 1715 return 0; 1716 1717 if (put_user(0, &uattr->batch.count)) 1718 return -EFAULT; 1719 1720 batch = 0; 1721 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch))) 1722 return -EFAULT; 1723 1724 if (batch >= htab->n_buckets) 1725 return -ENOENT; 1726 1727 key_size = htab->map.key_size; 1728 roundup_key_size = round_up(htab->map.key_size, 8); 1729 value_size = htab->map.value_size; 1730 size = round_up(value_size, 8); 1731 if (is_percpu) 1732 value_size = size * num_possible_cpus(); 1733 total = 0; 1734 /* while experimenting with hash tables with sizes ranging from 10 to 1735 * 1000, it was observed that a bucket can have up to 5 entries. 1736 */ 1737 bucket_size = 5; 1738 1739 alloc: 1740 /* We cannot do copy_from_user or copy_to_user inside 1741 * the rcu_read_lock. Allocate enough space here. 1742 */ 1743 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); 1744 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); 1745 if (!keys || !values) { 1746 ret = -ENOMEM; 1747 goto after_loop; 1748 } 1749 1750 again: 1751 bpf_disable_instrumentation(); 1752 rcu_read_lock(); 1753 again_nocopy: 1754 dst_key = keys; 1755 dst_val = values; 1756 b = &htab->buckets[batch]; 1757 head = &b->head; 1758 /* do not grab the lock unless need it (bucket_cnt > 0). */ 1759 if (locked) { 1760 ret = htab_lock_bucket(htab, b, batch, &flags); 1761 if (ret) { 1762 rcu_read_unlock(); 1763 bpf_enable_instrumentation(); 1764 goto after_loop; 1765 } 1766 } 1767 1768 bucket_cnt = 0; 1769 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1770 bucket_cnt++; 1771 1772 if (bucket_cnt && !locked) { 1773 locked = true; 1774 goto again_nocopy; 1775 } 1776 1777 if (bucket_cnt > (max_count - total)) { 1778 if (total == 0) 1779 ret = -ENOSPC; 1780 /* Note that since bucket_cnt > 0 here, it is implicit 1781 * that the locked was grabbed, so release it. 1782 */ 1783 htab_unlock_bucket(htab, b, batch, flags); 1784 rcu_read_unlock(); 1785 bpf_enable_instrumentation(); 1786 goto after_loop; 1787 } 1788 1789 if (bucket_cnt > bucket_size) { 1790 bucket_size = bucket_cnt; 1791 /* Note that since bucket_cnt > 0 here, it is implicit 1792 * that the locked was grabbed, so release it. 1793 */ 1794 htab_unlock_bucket(htab, b, batch, flags); 1795 rcu_read_unlock(); 1796 bpf_enable_instrumentation(); 1797 kvfree(keys); 1798 kvfree(values); 1799 goto alloc; 1800 } 1801 1802 /* Next block is only safe to run if you have grabbed the lock */ 1803 if (!locked) 1804 goto next_batch; 1805 1806 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1807 memcpy(dst_key, l->key, key_size); 1808 1809 if (is_percpu) { 1810 int off = 0, cpu; 1811 void __percpu *pptr; 1812 1813 pptr = htab_elem_get_ptr(l, map->key_size); 1814 for_each_possible_cpu(cpu) { 1815 copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); 1816 check_and_init_map_value(&htab->map, dst_val + off); 1817 off += size; 1818 } 1819 } else { 1820 value = l->key + roundup_key_size; 1821 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 1822 struct bpf_map **inner_map = value; 1823 1824 /* Actual value is the id of the inner map */ 1825 map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); 1826 value = &map_id; 1827 } 1828 1829 if (elem_map_flags & BPF_F_LOCK) 1830 copy_map_value_locked(map, dst_val, value, 1831 true); 1832 else 1833 copy_map_value(map, dst_val, value); 1834 /* Zeroing special fields in the temp buffer */ 1835 check_and_init_map_value(map, dst_val); 1836 } 1837 if (do_delete) { 1838 hlist_nulls_del_rcu(&l->hash_node); 1839 1840 /* bpf_lru_push_free() will acquire lru_lock, which 1841 * may cause deadlock. See comments in function 1842 * prealloc_lru_pop(). Let us do bpf_lru_push_free() 1843 * after releasing the bucket lock. 1844 */ 1845 if (is_lru_map) { 1846 l->batch_flink = node_to_free; 1847 node_to_free = l; 1848 } else { 1849 free_htab_elem(htab, l); 1850 } 1851 } 1852 dst_key += key_size; 1853 dst_val += value_size; 1854 } 1855 1856 htab_unlock_bucket(htab, b, batch, flags); 1857 locked = false; 1858 1859 while (node_to_free) { 1860 l = node_to_free; 1861 node_to_free = node_to_free->batch_flink; 1862 htab_lru_push_free(htab, l); 1863 } 1864 1865 next_batch: 1866 /* If we are not copying data, we can go to next bucket and avoid 1867 * unlocking the rcu. 1868 */ 1869 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) { 1870 batch++; 1871 goto again_nocopy; 1872 } 1873 1874 rcu_read_unlock(); 1875 bpf_enable_instrumentation(); 1876 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys, 1877 key_size * bucket_cnt) || 1878 copy_to_user(uvalues + total * value_size, values, 1879 value_size * bucket_cnt))) { 1880 ret = -EFAULT; 1881 goto after_loop; 1882 } 1883 1884 total += bucket_cnt; 1885 batch++; 1886 if (batch >= htab->n_buckets) { 1887 ret = -ENOENT; 1888 goto after_loop; 1889 } 1890 goto again; 1891 1892 after_loop: 1893 if (ret == -EFAULT) 1894 goto out; 1895 1896 /* copy # of entries and next batch */ 1897 ubatch = u64_to_user_ptr(attr->batch.out_batch); 1898 if (copy_to_user(ubatch, &batch, sizeof(batch)) || 1899 put_user(total, &uattr->batch.count)) 1900 ret = -EFAULT; 1901 1902 out: 1903 kvfree(keys); 1904 kvfree(values); 1905 return ret; 1906 } 1907 1908 static int 1909 htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1910 union bpf_attr __user *uattr) 1911 { 1912 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1913 false, true); 1914 } 1915 1916 static int 1917 htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1918 const union bpf_attr *attr, 1919 union bpf_attr __user *uattr) 1920 { 1921 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1922 false, true); 1923 } 1924 1925 static int 1926 htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1927 union bpf_attr __user *uattr) 1928 { 1929 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1930 false, false); 1931 } 1932 1933 static int 1934 htab_map_lookup_and_delete_batch(struct bpf_map *map, 1935 const union bpf_attr *attr, 1936 union bpf_attr __user *uattr) 1937 { 1938 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1939 false, false); 1940 } 1941 1942 static int 1943 htab_lru_percpu_map_lookup_batch(struct bpf_map *map, 1944 const union bpf_attr *attr, 1945 union bpf_attr __user *uattr) 1946 { 1947 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1948 true, true); 1949 } 1950 1951 static int 1952 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1953 const union bpf_attr *attr, 1954 union bpf_attr __user *uattr) 1955 { 1956 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1957 true, true); 1958 } 1959 1960 static int 1961 htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1962 union bpf_attr __user *uattr) 1963 { 1964 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1965 true, false); 1966 } 1967 1968 static int 1969 htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, 1970 const union bpf_attr *attr, 1971 union bpf_attr __user *uattr) 1972 { 1973 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1974 true, false); 1975 } 1976 1977 struct bpf_iter_seq_hash_map_info { 1978 struct bpf_map *map; 1979 struct bpf_htab *htab; 1980 void *percpu_value_buf; // non-zero means percpu hash 1981 u32 bucket_id; 1982 u32 skip_elems; 1983 }; 1984 1985 static struct htab_elem * 1986 bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, 1987 struct htab_elem *prev_elem) 1988 { 1989 const struct bpf_htab *htab = info->htab; 1990 u32 skip_elems = info->skip_elems; 1991 u32 bucket_id = info->bucket_id; 1992 struct hlist_nulls_head *head; 1993 struct hlist_nulls_node *n; 1994 struct htab_elem *elem; 1995 struct bucket *b; 1996 u32 i, count; 1997 1998 if (bucket_id >= htab->n_buckets) 1999 return NULL; 2000 2001 /* try to find next elem in the same bucket */ 2002 if (prev_elem) { 2003 /* no update/deletion on this bucket, prev_elem should be still valid 2004 * and we won't skip elements. 2005 */ 2006 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); 2007 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); 2008 if (elem) 2009 return elem; 2010 2011 /* not found, unlock and go to the next bucket */ 2012 b = &htab->buckets[bucket_id++]; 2013 rcu_read_unlock(); 2014 skip_elems = 0; 2015 } 2016 2017 for (i = bucket_id; i < htab->n_buckets; i++) { 2018 b = &htab->buckets[i]; 2019 rcu_read_lock(); 2020 2021 count = 0; 2022 head = &b->head; 2023 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2024 if (count >= skip_elems) { 2025 info->bucket_id = i; 2026 info->skip_elems = count; 2027 return elem; 2028 } 2029 count++; 2030 } 2031 2032 rcu_read_unlock(); 2033 skip_elems = 0; 2034 } 2035 2036 info->bucket_id = i; 2037 info->skip_elems = 0; 2038 return NULL; 2039 } 2040 2041 static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) 2042 { 2043 struct bpf_iter_seq_hash_map_info *info = seq->private; 2044 struct htab_elem *elem; 2045 2046 elem = bpf_hash_map_seq_find_next(info, NULL); 2047 if (!elem) 2048 return NULL; 2049 2050 if (*pos == 0) 2051 ++*pos; 2052 return elem; 2053 } 2054 2055 static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2056 { 2057 struct bpf_iter_seq_hash_map_info *info = seq->private; 2058 2059 ++*pos; 2060 ++info->skip_elems; 2061 return bpf_hash_map_seq_find_next(info, v); 2062 } 2063 2064 static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) 2065 { 2066 struct bpf_iter_seq_hash_map_info *info = seq->private; 2067 u32 roundup_key_size, roundup_value_size; 2068 struct bpf_iter__bpf_map_elem ctx = {}; 2069 struct bpf_map *map = info->map; 2070 struct bpf_iter_meta meta; 2071 int ret = 0, off = 0, cpu; 2072 struct bpf_prog *prog; 2073 void __percpu *pptr; 2074 2075 meta.seq = seq; 2076 prog = bpf_iter_get_info(&meta, elem == NULL); 2077 if (prog) { 2078 ctx.meta = &meta; 2079 ctx.map = info->map; 2080 if (elem) { 2081 roundup_key_size = round_up(map->key_size, 8); 2082 ctx.key = elem->key; 2083 if (!info->percpu_value_buf) { 2084 ctx.value = elem->key + roundup_key_size; 2085 } else { 2086 roundup_value_size = round_up(map->value_size, 8); 2087 pptr = htab_elem_get_ptr(elem, map->key_size); 2088 for_each_possible_cpu(cpu) { 2089 copy_map_value_long(map, info->percpu_value_buf + off, 2090 per_cpu_ptr(pptr, cpu)); 2091 check_and_init_map_value(map, info->percpu_value_buf + off); 2092 off += roundup_value_size; 2093 } 2094 ctx.value = info->percpu_value_buf; 2095 } 2096 } 2097 ret = bpf_iter_run_prog(prog, &ctx); 2098 } 2099 2100 return ret; 2101 } 2102 2103 static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) 2104 { 2105 return __bpf_hash_map_seq_show(seq, v); 2106 } 2107 2108 static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) 2109 { 2110 if (!v) 2111 (void)__bpf_hash_map_seq_show(seq, NULL); 2112 else 2113 rcu_read_unlock(); 2114 } 2115 2116 static int bpf_iter_init_hash_map(void *priv_data, 2117 struct bpf_iter_aux_info *aux) 2118 { 2119 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2120 struct bpf_map *map = aux->map; 2121 void *value_buf; 2122 u32 buf_size; 2123 2124 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2125 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 2126 buf_size = round_up(map->value_size, 8) * num_possible_cpus(); 2127 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 2128 if (!value_buf) 2129 return -ENOMEM; 2130 2131 seq_info->percpu_value_buf = value_buf; 2132 } 2133 2134 bpf_map_inc_with_uref(map); 2135 seq_info->map = map; 2136 seq_info->htab = container_of(map, struct bpf_htab, map); 2137 return 0; 2138 } 2139 2140 static void bpf_iter_fini_hash_map(void *priv_data) 2141 { 2142 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2143 2144 bpf_map_put_with_uref(seq_info->map); 2145 kfree(seq_info->percpu_value_buf); 2146 } 2147 2148 static const struct seq_operations bpf_hash_map_seq_ops = { 2149 .start = bpf_hash_map_seq_start, 2150 .next = bpf_hash_map_seq_next, 2151 .stop = bpf_hash_map_seq_stop, 2152 .show = bpf_hash_map_seq_show, 2153 }; 2154 2155 static const struct bpf_iter_seq_info iter_seq_info = { 2156 .seq_ops = &bpf_hash_map_seq_ops, 2157 .init_seq_private = bpf_iter_init_hash_map, 2158 .fini_seq_private = bpf_iter_fini_hash_map, 2159 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), 2160 }; 2161 2162 static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, 2163 void *callback_ctx, u64 flags) 2164 { 2165 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2166 struct hlist_nulls_head *head; 2167 struct hlist_nulls_node *n; 2168 struct htab_elem *elem; 2169 u32 roundup_key_size; 2170 int i, num_elems = 0; 2171 void __percpu *pptr; 2172 struct bucket *b; 2173 void *key, *val; 2174 bool is_percpu; 2175 u64 ret = 0; 2176 2177 if (flags != 0) 2178 return -EINVAL; 2179 2180 is_percpu = htab_is_percpu(htab); 2181 2182 roundup_key_size = round_up(map->key_size, 8); 2183 /* disable migration so percpu value prepared here will be the 2184 * same as the one seen by the bpf program with bpf_map_lookup_elem(). 2185 */ 2186 if (is_percpu) 2187 migrate_disable(); 2188 for (i = 0; i < htab->n_buckets; i++) { 2189 b = &htab->buckets[i]; 2190 rcu_read_lock(); 2191 head = &b->head; 2192 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2193 key = elem->key; 2194 if (is_percpu) { 2195 /* current cpu value for percpu map */ 2196 pptr = htab_elem_get_ptr(elem, map->key_size); 2197 val = this_cpu_ptr(pptr); 2198 } else { 2199 val = elem->key + roundup_key_size; 2200 } 2201 num_elems++; 2202 ret = callback_fn((u64)(long)map, (u64)(long)key, 2203 (u64)(long)val, (u64)(long)callback_ctx, 0); 2204 /* return value: 0 - continue, 1 - stop and return */ 2205 if (ret) { 2206 rcu_read_unlock(); 2207 goto out; 2208 } 2209 } 2210 rcu_read_unlock(); 2211 } 2212 out: 2213 if (is_percpu) 2214 migrate_enable(); 2215 return num_elems; 2216 } 2217 2218 static u64 htab_map_mem_usage(const struct bpf_map *map) 2219 { 2220 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2221 u32 value_size = round_up(htab->map.value_size, 8); 2222 bool prealloc = htab_is_prealloc(htab); 2223 bool percpu = htab_is_percpu(htab); 2224 bool lru = htab_is_lru(htab); 2225 u64 num_entries; 2226 u64 usage = sizeof(struct bpf_htab); 2227 2228 usage += sizeof(struct bucket) * htab->n_buckets; 2229 usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT; 2230 if (prealloc) { 2231 num_entries = map->max_entries; 2232 if (htab_has_extra_elems(htab)) 2233 num_entries += num_possible_cpus(); 2234 2235 usage += htab->elem_size * num_entries; 2236 2237 if (percpu) 2238 usage += value_size * num_possible_cpus() * num_entries; 2239 else if (!lru) 2240 usage += sizeof(struct htab_elem *) * num_possible_cpus(); 2241 } else { 2242 #define LLIST_NODE_SZ sizeof(struct llist_node) 2243 2244 num_entries = htab->use_percpu_counter ? 2245 percpu_counter_sum(&htab->pcount) : 2246 atomic_read(&htab->count); 2247 usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries; 2248 if (percpu) { 2249 usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries; 2250 usage += value_size * num_possible_cpus() * num_entries; 2251 } 2252 } 2253 return usage; 2254 } 2255 2256 BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) 2257 const struct bpf_map_ops htab_map_ops = { 2258 .map_meta_equal = bpf_map_meta_equal, 2259 .map_alloc_check = htab_map_alloc_check, 2260 .map_alloc = htab_map_alloc, 2261 .map_free = htab_map_free, 2262 .map_get_next_key = htab_map_get_next_key, 2263 .map_release_uref = htab_map_free_timers, 2264 .map_lookup_elem = htab_map_lookup_elem, 2265 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, 2266 .map_update_elem = htab_map_update_elem, 2267 .map_delete_elem = htab_map_delete_elem, 2268 .map_gen_lookup = htab_map_gen_lookup, 2269 .map_seq_show_elem = htab_map_seq_show_elem, 2270 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2271 .map_for_each_callback = bpf_for_each_hash_elem, 2272 .map_mem_usage = htab_map_mem_usage, 2273 BATCH_OPS(htab), 2274 .map_btf_id = &htab_map_btf_ids[0], 2275 .iter_seq_info = &iter_seq_info, 2276 }; 2277 2278 const struct bpf_map_ops htab_lru_map_ops = { 2279 .map_meta_equal = bpf_map_meta_equal, 2280 .map_alloc_check = htab_map_alloc_check, 2281 .map_alloc = htab_map_alloc, 2282 .map_free = htab_map_free, 2283 .map_get_next_key = htab_map_get_next_key, 2284 .map_release_uref = htab_map_free_timers, 2285 .map_lookup_elem = htab_lru_map_lookup_elem, 2286 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, 2287 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, 2288 .map_update_elem = htab_lru_map_update_elem, 2289 .map_delete_elem = htab_lru_map_delete_elem, 2290 .map_gen_lookup = htab_lru_map_gen_lookup, 2291 .map_seq_show_elem = htab_map_seq_show_elem, 2292 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2293 .map_for_each_callback = bpf_for_each_hash_elem, 2294 .map_mem_usage = htab_map_mem_usage, 2295 BATCH_OPS(htab_lru), 2296 .map_btf_id = &htab_map_btf_ids[0], 2297 .iter_seq_info = &iter_seq_info, 2298 }; 2299 2300 /* Called from eBPF program */ 2301 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2302 { 2303 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2304 2305 if (l) 2306 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2307 else 2308 return NULL; 2309 } 2310 2311 /* inline bpf_map_lookup_elem() call for per-CPU hashmap */ 2312 static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 2313 { 2314 struct bpf_insn *insn = insn_buf; 2315 2316 if (!bpf_jit_supports_percpu_insn()) 2317 return -EOPNOTSUPP; 2318 2319 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2320 (void *(*)(struct bpf_map *map, void *key))NULL)); 2321 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2322 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); 2323 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2324 offsetof(struct htab_elem, key) + map->key_size); 2325 *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2326 *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2327 2328 return insn - insn_buf; 2329 } 2330 2331 static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2332 { 2333 struct htab_elem *l; 2334 2335 if (cpu >= nr_cpu_ids) 2336 return NULL; 2337 2338 l = __htab_map_lookup_elem(map, key); 2339 if (l) 2340 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2341 else 2342 return NULL; 2343 } 2344 2345 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2346 { 2347 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2348 2349 if (l) { 2350 bpf_lru_node_set_ref(&l->lru_node); 2351 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2352 } 2353 2354 return NULL; 2355 } 2356 2357 static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2358 { 2359 struct htab_elem *l; 2360 2361 if (cpu >= nr_cpu_ids) 2362 return NULL; 2363 2364 l = __htab_map_lookup_elem(map, key); 2365 if (l) { 2366 bpf_lru_node_set_ref(&l->lru_node); 2367 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2368 } 2369 2370 return NULL; 2371 } 2372 2373 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) 2374 { 2375 struct htab_elem *l; 2376 void __percpu *pptr; 2377 int ret = -ENOENT; 2378 int cpu, off = 0; 2379 u32 size; 2380 2381 /* per_cpu areas are zero-filled and bpf programs can only 2382 * access 'value_size' of them, so copying rounded areas 2383 * will not leak any kernel data 2384 */ 2385 size = round_up(map->value_size, 8); 2386 rcu_read_lock(); 2387 l = __htab_map_lookup_elem(map, key); 2388 if (!l) 2389 goto out; 2390 /* We do not mark LRU map element here in order to not mess up 2391 * eviction heuristics when user space does a map walk. 2392 */ 2393 pptr = htab_elem_get_ptr(l, map->key_size); 2394 for_each_possible_cpu(cpu) { 2395 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2396 check_and_init_map_value(map, value + off); 2397 off += size; 2398 } 2399 ret = 0; 2400 out: 2401 rcu_read_unlock(); 2402 return ret; 2403 } 2404 2405 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 2406 u64 map_flags) 2407 { 2408 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2409 int ret; 2410 2411 rcu_read_lock(); 2412 if (htab_is_lru(htab)) 2413 ret = __htab_lru_percpu_map_update_elem(map, key, value, 2414 map_flags, true); 2415 else 2416 ret = __htab_percpu_map_update_elem(map, key, value, map_flags, 2417 true); 2418 rcu_read_unlock(); 2419 2420 return ret; 2421 } 2422 2423 static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, 2424 struct seq_file *m) 2425 { 2426 struct htab_elem *l; 2427 void __percpu *pptr; 2428 int cpu; 2429 2430 rcu_read_lock(); 2431 2432 l = __htab_map_lookup_elem(map, key); 2433 if (!l) { 2434 rcu_read_unlock(); 2435 return; 2436 } 2437 2438 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 2439 seq_puts(m, ": {\n"); 2440 pptr = htab_elem_get_ptr(l, map->key_size); 2441 for_each_possible_cpu(cpu) { 2442 seq_printf(m, "\tcpu%d: ", cpu); 2443 btf_type_seq_show(map->btf, map->btf_value_type_id, 2444 per_cpu_ptr(pptr, cpu), m); 2445 seq_puts(m, "\n"); 2446 } 2447 seq_puts(m, "}\n"); 2448 2449 rcu_read_unlock(); 2450 } 2451 2452 const struct bpf_map_ops htab_percpu_map_ops = { 2453 .map_meta_equal = bpf_map_meta_equal, 2454 .map_alloc_check = htab_map_alloc_check, 2455 .map_alloc = htab_map_alloc, 2456 .map_free = htab_map_free, 2457 .map_get_next_key = htab_map_get_next_key, 2458 .map_lookup_elem = htab_percpu_map_lookup_elem, 2459 .map_gen_lookup = htab_percpu_map_gen_lookup, 2460 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, 2461 .map_update_elem = htab_percpu_map_update_elem, 2462 .map_delete_elem = htab_map_delete_elem, 2463 .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, 2464 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2465 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2466 .map_for_each_callback = bpf_for_each_hash_elem, 2467 .map_mem_usage = htab_map_mem_usage, 2468 BATCH_OPS(htab_percpu), 2469 .map_btf_id = &htab_map_btf_ids[0], 2470 .iter_seq_info = &iter_seq_info, 2471 }; 2472 2473 const struct bpf_map_ops htab_lru_percpu_map_ops = { 2474 .map_meta_equal = bpf_map_meta_equal, 2475 .map_alloc_check = htab_map_alloc_check, 2476 .map_alloc = htab_map_alloc, 2477 .map_free = htab_map_free, 2478 .map_get_next_key = htab_map_get_next_key, 2479 .map_lookup_elem = htab_lru_percpu_map_lookup_elem, 2480 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, 2481 .map_update_elem = htab_lru_percpu_map_update_elem, 2482 .map_delete_elem = htab_lru_map_delete_elem, 2483 .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, 2484 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2485 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2486 .map_for_each_callback = bpf_for_each_hash_elem, 2487 .map_mem_usage = htab_map_mem_usage, 2488 BATCH_OPS(htab_lru_percpu), 2489 .map_btf_id = &htab_map_btf_ids[0], 2490 .iter_seq_info = &iter_seq_info, 2491 }; 2492 2493 static int fd_htab_map_alloc_check(union bpf_attr *attr) 2494 { 2495 if (attr->value_size != sizeof(u32)) 2496 return -EINVAL; 2497 return htab_map_alloc_check(attr); 2498 } 2499 2500 static void fd_htab_map_free(struct bpf_map *map) 2501 { 2502 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2503 struct hlist_nulls_node *n; 2504 struct hlist_nulls_head *head; 2505 struct htab_elem *l; 2506 int i; 2507 2508 for (i = 0; i < htab->n_buckets; i++) { 2509 head = select_bucket(htab, i); 2510 2511 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 2512 void *ptr = fd_htab_map_get_ptr(map, l); 2513 2514 map->ops->map_fd_put_ptr(map, ptr, false); 2515 } 2516 } 2517 2518 htab_map_free(map); 2519 } 2520 2521 /* only called from syscall */ 2522 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 2523 { 2524 void **ptr; 2525 int ret = 0; 2526 2527 if (!map->ops->map_fd_sys_lookup_elem) 2528 return -ENOTSUPP; 2529 2530 rcu_read_lock(); 2531 ptr = htab_map_lookup_elem(map, key); 2532 if (ptr) 2533 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); 2534 else 2535 ret = -ENOENT; 2536 rcu_read_unlock(); 2537 2538 return ret; 2539 } 2540 2541 /* only called from syscall */ 2542 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, 2543 void *key, void *value, u64 map_flags) 2544 { 2545 void *ptr; 2546 int ret; 2547 u32 ufd = *(u32 *)value; 2548 2549 ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 2550 if (IS_ERR(ptr)) 2551 return PTR_ERR(ptr); 2552 2553 /* The htab bucket lock is always held during update operations in fd 2554 * htab map, and the following rcu_read_lock() is only used to avoid 2555 * the WARN_ON_ONCE in htab_map_update_elem(). 2556 */ 2557 rcu_read_lock(); 2558 ret = htab_map_update_elem(map, key, &ptr, map_flags); 2559 rcu_read_unlock(); 2560 if (ret) 2561 map->ops->map_fd_put_ptr(map, ptr, false); 2562 2563 return ret; 2564 } 2565 2566 static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) 2567 { 2568 struct bpf_map *map, *inner_map_meta; 2569 2570 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 2571 if (IS_ERR(inner_map_meta)) 2572 return inner_map_meta; 2573 2574 map = htab_map_alloc(attr); 2575 if (IS_ERR(map)) { 2576 bpf_map_meta_free(inner_map_meta); 2577 return map; 2578 } 2579 2580 map->inner_map_meta = inner_map_meta; 2581 2582 return map; 2583 } 2584 2585 static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) 2586 { 2587 struct bpf_map **inner_map = htab_map_lookup_elem(map, key); 2588 2589 if (!inner_map) 2590 return NULL; 2591 2592 return READ_ONCE(*inner_map); 2593 } 2594 2595 static int htab_of_map_gen_lookup(struct bpf_map *map, 2596 struct bpf_insn *insn_buf) 2597 { 2598 struct bpf_insn *insn = insn_buf; 2599 const int ret = BPF_REG_0; 2600 2601 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2602 (void *(*)(struct bpf_map *map, void *key))NULL)); 2603 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2604 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); 2605 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 2606 offsetof(struct htab_elem, key) + 2607 round_up(map->key_size, 8)); 2608 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 2609 2610 return insn - insn_buf; 2611 } 2612 2613 static void htab_of_map_free(struct bpf_map *map) 2614 { 2615 bpf_map_meta_free(map->inner_map_meta); 2616 fd_htab_map_free(map); 2617 } 2618 2619 const struct bpf_map_ops htab_of_maps_map_ops = { 2620 .map_alloc_check = fd_htab_map_alloc_check, 2621 .map_alloc = htab_of_map_alloc, 2622 .map_free = htab_of_map_free, 2623 .map_get_next_key = htab_map_get_next_key, 2624 .map_lookup_elem = htab_of_map_lookup_elem, 2625 .map_delete_elem = htab_map_delete_elem, 2626 .map_fd_get_ptr = bpf_map_fd_get_ptr, 2627 .map_fd_put_ptr = bpf_map_fd_put_ptr, 2628 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 2629 .map_gen_lookup = htab_of_map_gen_lookup, 2630 .map_check_btf = map_check_no_btf, 2631 .map_mem_usage = htab_map_mem_usage, 2632 BATCH_OPS(htab), 2633 .map_btf_id = &htab_map_btf_ids[0], 2634 }; 2635