1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/jhash.h> 8 #include <linux/filter.h> 9 #include <linux/rculist_nulls.h> 10 #include <linux/rcupdate_wait.h> 11 #include <linux/random.h> 12 #include <uapi/linux/btf.h> 13 #include <linux/rcupdate_trace.h> 14 #include <linux/btf_ids.h> 15 #include "percpu_freelist.h" 16 #include "bpf_lru_list.h" 17 #include "map_in_map.h" 18 #include <linux/bpf_mem_alloc.h> 19 #include <asm/rqspinlock.h> 20 21 #define HTAB_CREATE_FLAG_MASK \ 22 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 23 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) 24 25 #define BATCH_OPS(_name) \ 26 .map_lookup_batch = \ 27 _name##_map_lookup_batch, \ 28 .map_lookup_and_delete_batch = \ 29 _name##_map_lookup_and_delete_batch, \ 30 .map_update_batch = \ 31 generic_map_update_batch, \ 32 .map_delete_batch = \ 33 generic_map_delete_batch 34 35 /* 36 * The bucket lock has two protection scopes: 37 * 38 * 1) Serializing concurrent operations from BPF programs on different 39 * CPUs 40 * 41 * 2) Serializing concurrent operations from BPF programs and sys_bpf() 42 * 43 * BPF programs can execute in any context including perf, kprobes and 44 * tracing. As there are almost no limits where perf, kprobes and tracing 45 * can be invoked from the lock operations need to be protected against 46 * deadlocks. Deadlocks can be caused by recursion and by an invocation in 47 * the lock held section when functions which acquire this lock are invoked 48 * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU 49 * variable bpf_prog_active, which prevents BPF programs attached to perf 50 * events, kprobes and tracing to be invoked before the prior invocation 51 * from one of these contexts completed. sys_bpf() uses the same mechanism 52 * by pinning the task to the current CPU and incrementing the recursion 53 * protection across the map operation. 54 * 55 * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain 56 * operations like memory allocations (even with GFP_ATOMIC) from atomic 57 * contexts. This is required because even with GFP_ATOMIC the memory 58 * allocator calls into code paths which acquire locks with long held lock 59 * sections. To ensure the deterministic behaviour these locks are regular 60 * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only 61 * true atomic contexts on an RT kernel are the low level hardware 62 * handling, scheduling, low level interrupt handling, NMIs etc. None of 63 * these contexts should ever do memory allocations. 64 * 65 * As regular device interrupt handlers and soft interrupts are forced into 66 * thread context, the existing code which does 67 * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*(); 68 * just works. 69 * 70 * In theory the BPF locks could be converted to regular spinlocks as well, 71 * but the bucket locks and percpu_freelist locks can be taken from 72 * arbitrary contexts (perf, kprobes, tracepoints) which are required to be 73 * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, 74 * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, 75 * because there is no memory allocation within the lock held sections. However 76 * after hash map was fully converted to use bpf_mem_alloc, there will be 77 * non-synchronous memory allocation for non-preallocated hash map, so it is 78 * safe to always use raw spinlock for bucket lock. 79 */ 80 struct bucket { 81 struct hlist_nulls_head head; 82 rqspinlock_t raw_lock; 83 }; 84 85 #define HASHTAB_MAP_LOCK_COUNT 8 86 #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1) 87 88 struct bpf_htab { 89 struct bpf_map map; 90 struct bpf_mem_alloc ma; 91 struct bpf_mem_alloc pcpu_ma; 92 struct bucket *buckets; 93 void *elems; 94 union { 95 struct pcpu_freelist freelist; 96 struct bpf_lru lru; 97 }; 98 struct htab_elem *__percpu *extra_elems; 99 /* number of elements in non-preallocated hashtable are kept 100 * in either pcount or count 101 */ 102 struct percpu_counter pcount; 103 atomic_t count; 104 bool use_percpu_counter; 105 u32 n_buckets; /* number of hash buckets */ 106 u32 elem_size; /* size of each element in bytes */ 107 u32 hashrnd; 108 }; 109 110 /* each htab element is struct htab_elem + key + value */ 111 struct htab_elem { 112 union { 113 struct hlist_nulls_node hash_node; 114 struct { 115 void *padding; 116 union { 117 struct pcpu_freelist_node fnode; 118 struct htab_elem *batch_flink; 119 }; 120 }; 121 }; 122 union { 123 /* pointer to per-cpu pointer */ 124 void *ptr_to_pptr; 125 struct bpf_lru_node lru_node; 126 }; 127 u32 hash; 128 char key[] __aligned(8); 129 }; 130 131 static inline bool htab_is_prealloc(const struct bpf_htab *htab) 132 { 133 return !(htab->map.map_flags & BPF_F_NO_PREALLOC); 134 } 135 136 static void htab_init_buckets(struct bpf_htab *htab) 137 { 138 unsigned int i; 139 140 for (i = 0; i < htab->n_buckets; i++) { 141 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 142 raw_res_spin_lock_init(&htab->buckets[i].raw_lock); 143 cond_resched(); 144 } 145 } 146 147 static inline int htab_lock_bucket(struct bucket *b, unsigned long *pflags) 148 { 149 unsigned long flags; 150 int ret; 151 152 ret = raw_res_spin_lock_irqsave(&b->raw_lock, flags); 153 if (ret) 154 return ret; 155 *pflags = flags; 156 return 0; 157 } 158 159 static inline void htab_unlock_bucket(struct bucket *b, unsigned long flags) 160 { 161 raw_res_spin_unlock_irqrestore(&b->raw_lock, flags); 162 } 163 164 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); 165 166 static bool htab_is_lru(const struct bpf_htab *htab) 167 { 168 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || 169 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 170 } 171 172 static bool htab_is_percpu(const struct bpf_htab *htab) 173 { 174 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || 175 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 176 } 177 178 static inline bool is_fd_htab(const struct bpf_htab *htab) 179 { 180 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS; 181 } 182 183 static inline void *htab_elem_value(struct htab_elem *l, u32 key_size) 184 { 185 return l->key + round_up(key_size, 8); 186 } 187 188 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 189 void __percpu *pptr) 190 { 191 *(void __percpu **)htab_elem_value(l, key_size) = pptr; 192 } 193 194 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 195 { 196 return *(void __percpu **)htab_elem_value(l, key_size); 197 } 198 199 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) 200 { 201 return *(void **)htab_elem_value(l, map->key_size); 202 } 203 204 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 205 { 206 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); 207 } 208 209 /* Both percpu and fd htab support in-place update, so no need for 210 * extra elem. LRU itself can remove the least used element, so 211 * there is no need for an extra elem during map_update. 212 */ 213 static bool htab_has_extra_elems(struct bpf_htab *htab) 214 { 215 return !htab_is_percpu(htab) && !htab_is_lru(htab) && !is_fd_htab(htab); 216 } 217 218 static void htab_free_prealloced_internal_structs(struct bpf_htab *htab) 219 { 220 u32 num_entries = htab->map.max_entries; 221 int i; 222 223 if (htab_has_extra_elems(htab)) 224 num_entries += num_possible_cpus(); 225 226 for (i = 0; i < num_entries; i++) { 227 struct htab_elem *elem; 228 229 elem = get_htab_elem(htab, i); 230 bpf_map_free_internal_structs(&htab->map, 231 htab_elem_value(elem, htab->map.key_size)); 232 cond_resched(); 233 } 234 } 235 236 static void htab_free_prealloced_fields(struct bpf_htab *htab) 237 { 238 u32 num_entries = htab->map.max_entries; 239 int i; 240 241 if (IS_ERR_OR_NULL(htab->map.record)) 242 return; 243 if (htab_has_extra_elems(htab)) 244 num_entries += num_possible_cpus(); 245 for (i = 0; i < num_entries; i++) { 246 struct htab_elem *elem; 247 248 elem = get_htab_elem(htab, i); 249 if (htab_is_percpu(htab)) { 250 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 251 int cpu; 252 253 for_each_possible_cpu(cpu) { 254 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 255 cond_resched(); 256 } 257 } else { 258 bpf_obj_free_fields(htab->map.record, 259 htab_elem_value(elem, htab->map.key_size)); 260 cond_resched(); 261 } 262 cond_resched(); 263 } 264 } 265 266 static void htab_free_elems(struct bpf_htab *htab) 267 { 268 int i; 269 270 if (!htab_is_percpu(htab)) 271 goto free_elems; 272 273 for (i = 0; i < htab->map.max_entries; i++) { 274 void __percpu *pptr; 275 276 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 277 htab->map.key_size); 278 free_percpu(pptr); 279 cond_resched(); 280 } 281 free_elems: 282 bpf_map_area_free(htab->elems); 283 } 284 285 /* The LRU list has a lock (lru_lock). Each htab bucket has a lock 286 * (bucket_lock). If both locks need to be acquired together, the lock 287 * order is always lru_lock -> bucket_lock and this only happens in 288 * bpf_lru_list.c logic. For example, certain code path of 289 * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), 290 * will acquire lru_lock first followed by acquiring bucket_lock. 291 * 292 * In hashtab.c, to avoid deadlock, lock acquisition of 293 * bucket_lock followed by lru_lock is not allowed. In such cases, 294 * bucket_lock needs to be released first before acquiring lru_lock. 295 */ 296 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 297 u32 hash) 298 { 299 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash); 300 struct htab_elem *l; 301 302 if (node) { 303 bpf_map_inc_elem_count(&htab->map); 304 l = container_of(node, struct htab_elem, lru_node); 305 memcpy(l->key, key, htab->map.key_size); 306 return l; 307 } 308 309 return NULL; 310 } 311 312 static int prealloc_init(struct bpf_htab *htab) 313 { 314 u32 num_entries = htab->map.max_entries; 315 int err = -ENOMEM, i; 316 317 if (htab_has_extra_elems(htab)) 318 num_entries += num_possible_cpus(); 319 320 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, 321 htab->map.numa_node); 322 if (!htab->elems) 323 return -ENOMEM; 324 325 if (!htab_is_percpu(htab)) 326 goto skip_percpu_elems; 327 328 for (i = 0; i < num_entries; i++) { 329 u32 size = round_up(htab->map.value_size, 8); 330 void __percpu *pptr; 331 332 pptr = bpf_map_alloc_percpu(&htab->map, size, 8, 333 GFP_USER | __GFP_NOWARN); 334 if (!pptr) 335 goto free_elems; 336 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 337 pptr); 338 cond_resched(); 339 } 340 341 skip_percpu_elems: 342 if (htab_is_lru(htab)) 343 err = bpf_lru_init(&htab->lru, 344 htab->map.map_flags & BPF_F_NO_COMMON_LRU, 345 offsetof(struct htab_elem, hash) - 346 offsetof(struct htab_elem, lru_node), 347 htab_lru_map_delete_node, 348 htab); 349 else 350 err = pcpu_freelist_init(&htab->freelist); 351 352 if (err) 353 goto free_elems; 354 355 if (htab_is_lru(htab)) 356 bpf_lru_populate(&htab->lru, htab->elems, 357 offsetof(struct htab_elem, lru_node), 358 htab->elem_size, num_entries); 359 else 360 pcpu_freelist_populate(&htab->freelist, 361 htab->elems + offsetof(struct htab_elem, fnode), 362 htab->elem_size, num_entries); 363 364 return 0; 365 366 free_elems: 367 htab_free_elems(htab); 368 return err; 369 } 370 371 static void prealloc_destroy(struct bpf_htab *htab) 372 { 373 htab_free_elems(htab); 374 375 if (htab_is_lru(htab)) 376 bpf_lru_destroy(&htab->lru); 377 else 378 pcpu_freelist_destroy(&htab->freelist); 379 } 380 381 static int alloc_extra_elems(struct bpf_htab *htab) 382 { 383 struct htab_elem *__percpu *pptr, *l_new; 384 struct pcpu_freelist_node *l; 385 int cpu; 386 387 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8, 388 GFP_USER | __GFP_NOWARN); 389 if (!pptr) 390 return -ENOMEM; 391 392 for_each_possible_cpu(cpu) { 393 l = pcpu_freelist_pop(&htab->freelist); 394 /* pop will succeed, since prealloc_init() 395 * preallocated extra num_possible_cpus elements 396 */ 397 l_new = container_of(l, struct htab_elem, fnode); 398 *per_cpu_ptr(pptr, cpu) = l_new; 399 } 400 htab->extra_elems = pptr; 401 return 0; 402 } 403 404 /* Called from syscall */ 405 static int htab_map_alloc_check(union bpf_attr *attr) 406 { 407 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 408 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 409 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 410 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 411 /* percpu_lru means each cpu has its own LRU list. 412 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 413 * the map's value itself is percpu. percpu_lru has 414 * nothing to do with the map's value. 415 */ 416 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 417 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 418 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); 419 int numa_node = bpf_map_attr_numa_node(attr); 420 421 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 422 offsetof(struct htab_elem, hash_node.pprev)); 423 424 if (zero_seed && !capable(CAP_SYS_ADMIN)) 425 /* Guard against local DoS, and discourage production use. */ 426 return -EPERM; 427 428 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || 429 !bpf_map_flags_access_ok(attr->map_flags)) 430 return -EINVAL; 431 432 if (!lru && percpu_lru) 433 return -EINVAL; 434 435 if (lru && !prealloc) 436 return -ENOTSUPP; 437 438 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) 439 return -EINVAL; 440 441 /* check sanity of attributes. 442 * value_size == 0 may be allowed in the future to use map as a set 443 */ 444 if (attr->max_entries == 0 || attr->key_size == 0 || 445 attr->value_size == 0) 446 return -EINVAL; 447 448 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - 449 sizeof(struct htab_elem)) 450 /* if key_size + value_size is bigger, the user space won't be 451 * able to access the elements via bpf syscall. This check 452 * also makes sure that the elem_size doesn't overflow and it's 453 * kmalloc-able later in htab_map_update_elem() 454 */ 455 return -E2BIG; 456 /* percpu map value size is bound by PCPU_MIN_UNIT_SIZE */ 457 if (percpu && round_up(attr->value_size, 8) > PCPU_MIN_UNIT_SIZE) 458 return -E2BIG; 459 460 return 0; 461 } 462 463 static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 464 { 465 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 466 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 467 /* percpu_lru means each cpu has its own LRU list. 468 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 469 * the map's value itself is percpu. percpu_lru has 470 * nothing to do with the map's value. 471 */ 472 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 473 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 474 struct bpf_htab *htab; 475 int err; 476 477 htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); 478 if (!htab) 479 return ERR_PTR(-ENOMEM); 480 481 bpf_map_init_from_attr(&htab->map, attr); 482 483 if (percpu_lru) { 484 /* ensure each CPU's lru list has >=1 elements. 485 * since we are at it, make each lru list has the same 486 * number of elements. 487 */ 488 htab->map.max_entries = roundup(attr->max_entries, 489 num_possible_cpus()); 490 if (htab->map.max_entries < attr->max_entries) 491 htab->map.max_entries = rounddown(attr->max_entries, 492 num_possible_cpus()); 493 } 494 495 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 496 * into UB on 32-bit arches, so check that first 497 */ 498 err = -E2BIG; 499 if (htab->map.max_entries > 1UL << 31) 500 goto free_htab; 501 502 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 503 504 htab->elem_size = sizeof(struct htab_elem) + 505 round_up(htab->map.key_size, 8); 506 if (percpu) 507 htab->elem_size += sizeof(void *); 508 else 509 htab->elem_size += round_up(htab->map.value_size, 8); 510 511 /* check for u32 overflow */ 512 if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) 513 goto free_htab; 514 515 err = bpf_map_init_elem_count(&htab->map); 516 if (err) 517 goto free_htab; 518 519 err = -ENOMEM; 520 htab->buckets = bpf_map_area_alloc(htab->n_buckets * 521 sizeof(struct bucket), 522 htab->map.numa_node); 523 if (!htab->buckets) 524 goto free_elem_count; 525 526 if (htab->map.map_flags & BPF_F_ZERO_SEED) 527 htab->hashrnd = 0; 528 else 529 htab->hashrnd = get_random_u32(); 530 531 htab_init_buckets(htab); 532 533 /* compute_batch_value() computes batch value as num_online_cpus() * 2 534 * and __percpu_counter_compare() needs 535 * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus() 536 * for percpu_counter to be faster than atomic_t. In practice the average bpf 537 * hash map size is 10k, which means that a system with 64 cpus will fill 538 * hashmap to 20% of 10k before percpu_counter becomes ineffective. Therefore 539 * define our own batch count as 32 then 10k hash map can be filled up to 80%: 540 * 10k - 8k > 32 _batch_ * 64 _cpus_ 541 * and __percpu_counter_compare() will still be fast. At that point hash map 542 * collisions will dominate its performance anyway. Assume that hash map filled 543 * to 50+% isn't going to be O(1) and use the following formula to choose 544 * between percpu_counter and atomic_t. 545 */ 546 #define PERCPU_COUNTER_BATCH 32 547 if (attr->max_entries / 2 > num_online_cpus() * PERCPU_COUNTER_BATCH) 548 htab->use_percpu_counter = true; 549 550 if (htab->use_percpu_counter) { 551 err = percpu_counter_init(&htab->pcount, 0, GFP_KERNEL); 552 if (err) 553 goto free_map_locked; 554 } 555 556 if (prealloc) { 557 err = prealloc_init(htab); 558 if (err) 559 goto free_map_locked; 560 561 if (htab_has_extra_elems(htab)) { 562 err = alloc_extra_elems(htab); 563 if (err) 564 goto free_prealloc; 565 } 566 } else { 567 err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false); 568 if (err) 569 goto free_map_locked; 570 if (percpu) { 571 err = bpf_mem_alloc_init(&htab->pcpu_ma, 572 round_up(htab->map.value_size, 8), true); 573 if (err) 574 goto free_map_locked; 575 } 576 } 577 578 return &htab->map; 579 580 free_prealloc: 581 prealloc_destroy(htab); 582 free_map_locked: 583 if (htab->use_percpu_counter) 584 percpu_counter_destroy(&htab->pcount); 585 bpf_map_area_free(htab->buckets); 586 bpf_mem_alloc_destroy(&htab->pcpu_ma); 587 bpf_mem_alloc_destroy(&htab->ma); 588 free_elem_count: 589 bpf_map_free_elem_count(&htab->map); 590 free_htab: 591 bpf_map_area_free(htab); 592 return ERR_PTR(err); 593 } 594 595 static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) 596 { 597 if (likely(key_len % 4 == 0)) 598 return jhash2(key, key_len / 4, hashrnd); 599 return jhash(key, key_len, hashrnd); 600 } 601 602 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 603 { 604 return &htab->buckets[hash & (htab->n_buckets - 1)]; 605 } 606 607 static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) 608 { 609 return &__select_bucket(htab, hash)->head; 610 } 611 612 /* this lookup function can only be called with bucket lock taken */ 613 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, 614 void *key, u32 key_size) 615 { 616 struct hlist_nulls_node *n; 617 struct htab_elem *l; 618 619 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 620 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 621 return l; 622 623 return NULL; 624 } 625 626 /* can be called without bucket lock. it will repeat the loop in 627 * the unlikely event when elements moved from one bucket into another 628 * while link list is being walked 629 */ 630 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, 631 u32 hash, void *key, 632 u32 key_size, u32 n_buckets) 633 { 634 struct hlist_nulls_node *n; 635 struct htab_elem *l; 636 637 again: 638 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 639 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 640 return l; 641 642 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) 643 goto again; 644 645 return NULL; 646 } 647 648 /* Called from syscall or from eBPF program directly, so 649 * arguments have to match bpf_map_lookup_elem() exactly. 650 * The return value is adjusted by BPF instructions 651 * in htab_map_gen_lookup(). 652 */ 653 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 654 { 655 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 656 struct hlist_nulls_head *head; 657 struct htab_elem *l; 658 u32 hash, key_size; 659 660 WARN_ON_ONCE(!bpf_rcu_lock_held()); 661 662 key_size = map->key_size; 663 664 hash = htab_map_hash(key, key_size, htab->hashrnd); 665 666 head = select_bucket(htab, hash); 667 668 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 669 670 return l; 671 } 672 673 static void *htab_map_lookup_elem(struct bpf_map *map, void *key) 674 { 675 struct htab_elem *l = __htab_map_lookup_elem(map, key); 676 677 if (l) 678 return htab_elem_value(l, map->key_size); 679 680 return NULL; 681 } 682 683 /* inline bpf_map_lookup_elem() call. 684 * Instead of: 685 * bpf_prog 686 * bpf_map_lookup_elem 687 * map->ops->map_lookup_elem 688 * htab_map_lookup_elem 689 * __htab_map_lookup_elem 690 * do: 691 * bpf_prog 692 * __htab_map_lookup_elem 693 */ 694 static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 695 { 696 struct bpf_insn *insn = insn_buf; 697 const int ret = BPF_REG_0; 698 699 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 700 (void *(*)(struct bpf_map *map, void *key))NULL)); 701 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 702 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 703 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 704 offsetof(struct htab_elem, key) + 705 round_up(map->key_size, 8)); 706 return insn - insn_buf; 707 } 708 709 static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, 710 void *key, const bool mark) 711 { 712 struct htab_elem *l = __htab_map_lookup_elem(map, key); 713 714 if (l) { 715 if (mark) 716 bpf_lru_node_set_ref(&l->lru_node); 717 return htab_elem_value(l, map->key_size); 718 } 719 720 return NULL; 721 } 722 723 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) 724 { 725 return __htab_lru_map_lookup_elem(map, key, true); 726 } 727 728 static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) 729 { 730 return __htab_lru_map_lookup_elem(map, key, false); 731 } 732 733 static int htab_lru_map_gen_lookup(struct bpf_map *map, 734 struct bpf_insn *insn_buf) 735 { 736 struct bpf_insn *insn = insn_buf; 737 const int ret = BPF_REG_0; 738 const int ref_reg = BPF_REG_1; 739 740 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 741 (void *(*)(struct bpf_map *map, void *key))NULL)); 742 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 743 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); 744 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, 745 offsetof(struct htab_elem, lru_node) + 746 offsetof(struct bpf_lru_node, ref)); 747 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); 748 *insn++ = BPF_ST_MEM(BPF_B, ret, 749 offsetof(struct htab_elem, lru_node) + 750 offsetof(struct bpf_lru_node, ref), 751 1); 752 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 753 offsetof(struct htab_elem, key) + 754 round_up(map->key_size, 8)); 755 return insn - insn_buf; 756 } 757 758 static void check_and_free_fields(struct bpf_htab *htab, 759 struct htab_elem *elem) 760 { 761 if (IS_ERR_OR_NULL(htab->map.record)) 762 return; 763 764 if (htab_is_percpu(htab)) { 765 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 766 int cpu; 767 768 for_each_possible_cpu(cpu) 769 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 770 } else { 771 void *map_value = htab_elem_value(elem, htab->map.key_size); 772 773 bpf_obj_free_fields(htab->map.record, map_value); 774 } 775 } 776 777 /* It is called from the bpf_lru_list when the LRU needs to delete 778 * older elements from the htab. 779 */ 780 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 781 { 782 struct bpf_htab *htab = arg; 783 struct htab_elem *l = NULL, *tgt_l; 784 struct hlist_nulls_head *head; 785 struct hlist_nulls_node *n; 786 unsigned long flags; 787 struct bucket *b; 788 int ret; 789 790 tgt_l = container_of(node, struct htab_elem, lru_node); 791 b = __select_bucket(htab, tgt_l->hash); 792 head = &b->head; 793 794 ret = htab_lock_bucket(b, &flags); 795 if (ret) 796 return false; 797 798 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 799 if (l == tgt_l) { 800 hlist_nulls_del_rcu(&l->hash_node); 801 bpf_map_dec_elem_count(&htab->map); 802 break; 803 } 804 805 htab_unlock_bucket(b, flags); 806 807 if (l == tgt_l) 808 check_and_free_fields(htab, l); 809 return l == tgt_l; 810 } 811 812 /* Called from syscall */ 813 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 814 { 815 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 816 struct hlist_nulls_head *head; 817 struct htab_elem *l, *next_l; 818 u32 hash, key_size; 819 int i = 0; 820 821 WARN_ON_ONCE(!rcu_read_lock_held()); 822 823 key_size = map->key_size; 824 825 if (!key) 826 goto find_first_elem; 827 828 hash = htab_map_hash(key, key_size, htab->hashrnd); 829 830 head = select_bucket(htab, hash); 831 832 /* lookup the key */ 833 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 834 835 if (!l) 836 goto find_first_elem; 837 838 /* key was found, get next key in the same bucket */ 839 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), 840 struct htab_elem, hash_node); 841 842 if (next_l) { 843 /* if next elem in this hash list is non-zero, just return it */ 844 memcpy(next_key, next_l->key, key_size); 845 return 0; 846 } 847 848 /* no more elements in this hash list, go to the next bucket */ 849 i = hash & (htab->n_buckets - 1); 850 i++; 851 852 find_first_elem: 853 /* iterate over buckets */ 854 for (; i < htab->n_buckets; i++) { 855 head = select_bucket(htab, i); 856 857 /* pick first element in the bucket */ 858 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), 859 struct htab_elem, hash_node); 860 if (next_l) { 861 /* if it's not empty, just return it */ 862 memcpy(next_key, next_l->key, key_size); 863 return 0; 864 } 865 } 866 867 /* iterated over all buckets and all elements */ 868 return -ENOENT; 869 } 870 871 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 872 { 873 check_and_free_fields(htab, l); 874 875 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 876 bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); 877 bpf_mem_cache_free(&htab->ma, l); 878 } 879 880 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) 881 { 882 struct bpf_map *map = &htab->map; 883 void *ptr; 884 885 if (map->ops->map_fd_put_ptr) { 886 ptr = fd_htab_map_get_ptr(map, l); 887 map->ops->map_fd_put_ptr(map, ptr, true); 888 } 889 } 890 891 static bool is_map_full(struct bpf_htab *htab) 892 { 893 if (htab->use_percpu_counter) 894 return __percpu_counter_compare(&htab->pcount, htab->map.max_entries, 895 PERCPU_COUNTER_BATCH) >= 0; 896 return atomic_read(&htab->count) >= htab->map.max_entries; 897 } 898 899 static void inc_elem_count(struct bpf_htab *htab) 900 { 901 bpf_map_inc_elem_count(&htab->map); 902 903 if (htab->use_percpu_counter) 904 percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH); 905 else 906 atomic_inc(&htab->count); 907 } 908 909 static void dec_elem_count(struct bpf_htab *htab) 910 { 911 bpf_map_dec_elem_count(&htab->map); 912 913 if (htab->use_percpu_counter) 914 percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH); 915 else 916 atomic_dec(&htab->count); 917 } 918 919 920 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 921 { 922 htab_put_fd_value(htab, l); 923 924 if (htab_is_prealloc(htab)) { 925 bpf_map_dec_elem_count(&htab->map); 926 check_and_free_fields(htab, l); 927 pcpu_freelist_push(&htab->freelist, &l->fnode); 928 } else { 929 dec_elem_count(htab); 930 htab_elem_free(htab, l); 931 } 932 } 933 934 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 935 void *value, bool onallcpus, u64 map_flags) 936 { 937 void *ptr; 938 939 if (!onallcpus) { 940 /* copy true value_size bytes */ 941 ptr = this_cpu_ptr(pptr); 942 copy_map_value(&htab->map, ptr, value); 943 bpf_obj_free_fields(htab->map.record, ptr); 944 } else { 945 u32 size = round_up(htab->map.value_size, 8); 946 void *val; 947 int cpu; 948 949 if (map_flags & BPF_F_CPU) { 950 cpu = map_flags >> 32; 951 ptr = per_cpu_ptr(pptr, cpu); 952 copy_map_value(&htab->map, ptr, value); 953 bpf_obj_free_fields(htab->map.record, ptr); 954 return; 955 } 956 957 for_each_possible_cpu(cpu) { 958 ptr = per_cpu_ptr(pptr, cpu); 959 val = (map_flags & BPF_F_ALL_CPUS) ? value : value + size * cpu; 960 copy_map_value(&htab->map, ptr, val); 961 bpf_obj_free_fields(htab->map.record, ptr); 962 } 963 } 964 } 965 966 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 967 void *value, bool onallcpus, u64 map_flags) 968 { 969 /* When not setting the initial value on all cpus, zero-fill element 970 * values for other cpus. Otherwise, bpf program has no way to ensure 971 * known initial values for cpus other than current one 972 * (onallcpus=false always when coming from bpf prog). 973 */ 974 if (!onallcpus) { 975 int current_cpu = raw_smp_processor_id(); 976 int cpu; 977 978 for_each_possible_cpu(cpu) { 979 if (cpu == current_cpu) 980 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); 981 else /* Since elem is preallocated, we cannot touch special fields */ 982 zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 983 } 984 } else { 985 pcpu_copy_value(htab, pptr, value, onallcpus, map_flags); 986 } 987 } 988 989 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) 990 { 991 return is_fd_htab(htab) && BITS_PER_LONG == 64; 992 } 993 994 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 995 void *value, u32 key_size, u32 hash, 996 bool percpu, bool onallcpus, 997 struct htab_elem *old_elem, u64 map_flags) 998 { 999 u32 size = htab->map.value_size; 1000 bool prealloc = htab_is_prealloc(htab); 1001 struct htab_elem *l_new, **pl_new; 1002 void __percpu *pptr; 1003 1004 if (prealloc) { 1005 if (old_elem) { 1006 /* if we're updating the existing element, 1007 * use per-cpu extra elems to avoid freelist_pop/push 1008 */ 1009 pl_new = this_cpu_ptr(htab->extra_elems); 1010 l_new = *pl_new; 1011 *pl_new = old_elem; 1012 } else { 1013 struct pcpu_freelist_node *l; 1014 1015 l = __pcpu_freelist_pop(&htab->freelist); 1016 if (!l) 1017 return ERR_PTR(-E2BIG); 1018 l_new = container_of(l, struct htab_elem, fnode); 1019 bpf_map_inc_elem_count(&htab->map); 1020 } 1021 } else { 1022 if (is_map_full(htab)) 1023 if (!old_elem) 1024 /* when map is full and update() is replacing 1025 * old element, it's ok to allocate, since 1026 * old element will be freed immediately. 1027 * Otherwise return an error 1028 */ 1029 return ERR_PTR(-E2BIG); 1030 inc_elem_count(htab); 1031 l_new = bpf_mem_cache_alloc(&htab->ma); 1032 if (!l_new) { 1033 l_new = ERR_PTR(-ENOMEM); 1034 goto dec_count; 1035 } 1036 } 1037 1038 memcpy(l_new->key, key, key_size); 1039 if (percpu) { 1040 if (prealloc) { 1041 pptr = htab_elem_get_ptr(l_new, key_size); 1042 } else { 1043 /* alloc_percpu zero-fills */ 1044 void *ptr = bpf_mem_cache_alloc(&htab->pcpu_ma); 1045 1046 if (!ptr) { 1047 bpf_mem_cache_free(&htab->ma, l_new); 1048 l_new = ERR_PTR(-ENOMEM); 1049 goto dec_count; 1050 } 1051 l_new->ptr_to_pptr = ptr; 1052 pptr = *(void __percpu **)ptr; 1053 } 1054 1055 pcpu_init_value(htab, pptr, value, onallcpus, map_flags); 1056 1057 if (!prealloc) 1058 htab_elem_set_ptr(l_new, key_size, pptr); 1059 } else if (fd_htab_map_needs_adjust(htab)) { 1060 size = round_up(size, 8); 1061 memcpy(htab_elem_value(l_new, key_size), value, size); 1062 } else { 1063 copy_map_value(&htab->map, htab_elem_value(l_new, key_size), value); 1064 } 1065 1066 l_new->hash = hash; 1067 return l_new; 1068 dec_count: 1069 dec_elem_count(htab); 1070 return l_new; 1071 } 1072 1073 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 1074 u64 map_flags) 1075 { 1076 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 1077 /* elem already exists */ 1078 return -EEXIST; 1079 1080 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 1081 /* elem doesn't exist, cannot update it */ 1082 return -ENOENT; 1083 1084 return 0; 1085 } 1086 1087 /* Called from syscall or from eBPF program */ 1088 static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, 1089 u64 map_flags) 1090 { 1091 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1092 struct htab_elem *l_new, *l_old; 1093 struct hlist_nulls_head *head; 1094 unsigned long flags; 1095 struct bucket *b; 1096 u32 key_size, hash; 1097 int ret; 1098 1099 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 1100 /* unknown flags */ 1101 return -EINVAL; 1102 1103 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1104 1105 key_size = map->key_size; 1106 1107 hash = htab_map_hash(key, key_size, htab->hashrnd); 1108 1109 b = __select_bucket(htab, hash); 1110 head = &b->head; 1111 1112 if (unlikely(map_flags & BPF_F_LOCK)) { 1113 if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1114 return -EINVAL; 1115 /* find an element without taking the bucket lock */ 1116 l_old = lookup_nulls_elem_raw(head, hash, key, key_size, 1117 htab->n_buckets); 1118 ret = check_flags(htab, l_old, map_flags); 1119 if (ret) 1120 return ret; 1121 if (l_old) { 1122 /* grab the element lock and update value in place */ 1123 copy_map_value_locked(map, 1124 htab_elem_value(l_old, key_size), 1125 value, false); 1126 return 0; 1127 } 1128 /* fall through, grab the bucket lock and lookup again. 1129 * 99.9% chance that the element won't be found, 1130 * but second lookup under lock has to be done. 1131 */ 1132 } 1133 1134 ret = htab_lock_bucket(b, &flags); 1135 if (ret) 1136 return ret; 1137 1138 l_old = lookup_elem_raw(head, hash, key, key_size); 1139 1140 ret = check_flags(htab, l_old, map_flags); 1141 if (ret) 1142 goto err; 1143 1144 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { 1145 /* first lookup without the bucket lock didn't find the element, 1146 * but second lookup with the bucket lock found it. 1147 * This case is highly unlikely, but has to be dealt with: 1148 * grab the element lock in addition to the bucket lock 1149 * and update element in place 1150 */ 1151 copy_map_value_locked(map, 1152 htab_elem_value(l_old, key_size), 1153 value, false); 1154 ret = 0; 1155 goto err; 1156 } 1157 1158 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 1159 l_old, map_flags); 1160 if (IS_ERR(l_new)) { 1161 /* all pre-allocated elements are in use or memory exhausted */ 1162 ret = PTR_ERR(l_new); 1163 goto err; 1164 } 1165 1166 /* add new element to the head of the list, so that 1167 * concurrent search will find it before old elem 1168 */ 1169 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1170 if (l_old) { 1171 hlist_nulls_del_rcu(&l_old->hash_node); 1172 1173 /* l_old has already been stashed in htab->extra_elems, free 1174 * its special fields before it is available for reuse. 1175 */ 1176 if (htab_is_prealloc(htab)) 1177 check_and_free_fields(htab, l_old); 1178 } 1179 htab_unlock_bucket(b, flags); 1180 if (l_old && !htab_is_prealloc(htab)) 1181 free_htab_elem(htab, l_old); 1182 return 0; 1183 err: 1184 htab_unlock_bucket(b, flags); 1185 return ret; 1186 } 1187 1188 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) 1189 { 1190 check_and_free_fields(htab, elem); 1191 bpf_map_dec_elem_count(&htab->map); 1192 bpf_lru_push_free(&htab->lru, &elem->lru_node); 1193 } 1194 1195 static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, 1196 u64 map_flags) 1197 { 1198 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1199 struct htab_elem *l_new, *l_old = NULL; 1200 struct hlist_nulls_head *head; 1201 unsigned long flags; 1202 struct bucket *b; 1203 u32 key_size, hash; 1204 int ret; 1205 1206 if (unlikely(map_flags > BPF_EXIST)) 1207 /* unknown flags */ 1208 return -EINVAL; 1209 1210 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1211 1212 key_size = map->key_size; 1213 1214 hash = htab_map_hash(key, key_size, htab->hashrnd); 1215 1216 b = __select_bucket(htab, hash); 1217 head = &b->head; 1218 1219 /* For LRU, we need to alloc before taking bucket's 1220 * spinlock because getting free nodes from LRU may need 1221 * to remove older elements from htab and this removal 1222 * operation will need a bucket lock. 1223 */ 1224 l_new = prealloc_lru_pop(htab, key, hash); 1225 if (!l_new) 1226 return -ENOMEM; 1227 copy_map_value(&htab->map, htab_elem_value(l_new, map->key_size), value); 1228 1229 ret = htab_lock_bucket(b, &flags); 1230 if (ret) 1231 goto err_lock_bucket; 1232 1233 l_old = lookup_elem_raw(head, hash, key, key_size); 1234 1235 ret = check_flags(htab, l_old, map_flags); 1236 if (ret) 1237 goto err; 1238 1239 /* add new element to the head of the list, so that 1240 * concurrent search will find it before old elem 1241 */ 1242 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1243 if (l_old) { 1244 bpf_lru_node_set_ref(&l_new->lru_node); 1245 hlist_nulls_del_rcu(&l_old->hash_node); 1246 } 1247 ret = 0; 1248 1249 err: 1250 htab_unlock_bucket(b, flags); 1251 1252 err_lock_bucket: 1253 if (ret) 1254 htab_lru_push_free(htab, l_new); 1255 else if (l_old) 1256 htab_lru_push_free(htab, l_old); 1257 1258 return ret; 1259 } 1260 1261 static int htab_map_check_update_flags(bool onallcpus, u64 map_flags) 1262 { 1263 if (unlikely(!onallcpus && map_flags > BPF_EXIST)) 1264 return -EINVAL; 1265 if (unlikely(onallcpus && ((map_flags & BPF_F_LOCK) || (u32)map_flags > BPF_F_ALL_CPUS))) 1266 return -EINVAL; 1267 return 0; 1268 } 1269 1270 static long htab_map_update_elem_in_place(struct bpf_map *map, void *key, 1271 void *value, u64 map_flags, 1272 bool percpu, bool onallcpus) 1273 { 1274 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1275 struct htab_elem *l_new, *l_old; 1276 struct hlist_nulls_head *head; 1277 void *old_map_ptr = NULL; 1278 unsigned long flags; 1279 struct bucket *b; 1280 u32 key_size, hash; 1281 int ret; 1282 1283 ret = htab_map_check_update_flags(onallcpus, map_flags); 1284 if (unlikely(ret)) 1285 return ret; 1286 1287 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1288 1289 key_size = map->key_size; 1290 1291 hash = htab_map_hash(key, key_size, htab->hashrnd); 1292 1293 b = __select_bucket(htab, hash); 1294 head = &b->head; 1295 1296 ret = htab_lock_bucket(b, &flags); 1297 if (ret) 1298 return ret; 1299 1300 l_old = lookup_elem_raw(head, hash, key, key_size); 1301 1302 ret = check_flags(htab, l_old, map_flags); 1303 if (ret) 1304 goto err; 1305 1306 if (l_old) { 1307 /* Update value in-place */ 1308 if (percpu) { 1309 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1310 value, onallcpus, map_flags); 1311 } else { 1312 void **inner_map_pptr = htab_elem_value(l_old, key_size); 1313 1314 old_map_ptr = *inner_map_pptr; 1315 WRITE_ONCE(*inner_map_pptr, *(void **)value); 1316 } 1317 } else { 1318 l_new = alloc_htab_elem(htab, key, value, key_size, 1319 hash, percpu, onallcpus, NULL, map_flags); 1320 if (IS_ERR(l_new)) { 1321 ret = PTR_ERR(l_new); 1322 goto err; 1323 } 1324 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1325 } 1326 err: 1327 htab_unlock_bucket(b, flags); 1328 if (old_map_ptr) 1329 map->ops->map_fd_put_ptr(map, old_map_ptr, true); 1330 return ret; 1331 } 1332 1333 static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1334 void *value, u64 map_flags, 1335 bool onallcpus) 1336 { 1337 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1338 struct htab_elem *l_new = NULL, *l_old; 1339 struct hlist_nulls_head *head; 1340 unsigned long flags; 1341 struct bucket *b; 1342 u32 key_size, hash; 1343 int ret; 1344 1345 ret = htab_map_check_update_flags(onallcpus, map_flags); 1346 if (unlikely(ret)) 1347 return ret; 1348 1349 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1350 1351 key_size = map->key_size; 1352 1353 hash = htab_map_hash(key, key_size, htab->hashrnd); 1354 1355 b = __select_bucket(htab, hash); 1356 head = &b->head; 1357 1358 /* For LRU, we need to alloc before taking bucket's 1359 * spinlock because LRU's elem alloc may need 1360 * to remove older elem from htab and this removal 1361 * operation will need a bucket lock. 1362 */ 1363 if (map_flags != BPF_EXIST) { 1364 l_new = prealloc_lru_pop(htab, key, hash); 1365 if (!l_new) 1366 return -ENOMEM; 1367 } 1368 1369 ret = htab_lock_bucket(b, &flags); 1370 if (ret) 1371 goto err_lock_bucket; 1372 1373 l_old = lookup_elem_raw(head, hash, key, key_size); 1374 1375 ret = check_flags(htab, l_old, map_flags); 1376 if (ret) 1377 goto err; 1378 1379 if (l_old) { 1380 bpf_lru_node_set_ref(&l_old->lru_node); 1381 1382 /* per-cpu hash map can update value in-place */ 1383 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1384 value, onallcpus, map_flags); 1385 } else { 1386 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1387 value, onallcpus, map_flags); 1388 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1389 l_new = NULL; 1390 } 1391 ret = 0; 1392 err: 1393 htab_unlock_bucket(b, flags); 1394 err_lock_bucket: 1395 if (l_new) { 1396 bpf_map_dec_elem_count(&htab->map); 1397 bpf_lru_push_free(&htab->lru, &l_new->lru_node); 1398 } 1399 return ret; 1400 } 1401 1402 static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1403 void *value, u64 map_flags) 1404 { 1405 return htab_map_update_elem_in_place(map, key, value, map_flags, true, false); 1406 } 1407 1408 static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1409 void *value, u64 map_flags) 1410 { 1411 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, 1412 false); 1413 } 1414 1415 /* Called from syscall or from eBPF program */ 1416 static long htab_map_delete_elem(struct bpf_map *map, void *key) 1417 { 1418 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1419 struct hlist_nulls_head *head; 1420 struct bucket *b; 1421 struct htab_elem *l; 1422 unsigned long flags; 1423 u32 hash, key_size; 1424 int ret; 1425 1426 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1427 1428 key_size = map->key_size; 1429 1430 hash = htab_map_hash(key, key_size, htab->hashrnd); 1431 b = __select_bucket(htab, hash); 1432 head = &b->head; 1433 1434 ret = htab_lock_bucket(b, &flags); 1435 if (ret) 1436 return ret; 1437 1438 l = lookup_elem_raw(head, hash, key, key_size); 1439 if (l) 1440 hlist_nulls_del_rcu(&l->hash_node); 1441 else 1442 ret = -ENOENT; 1443 1444 htab_unlock_bucket(b, flags); 1445 1446 if (l) 1447 free_htab_elem(htab, l); 1448 return ret; 1449 } 1450 1451 static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) 1452 { 1453 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1454 struct hlist_nulls_head *head; 1455 struct bucket *b; 1456 struct htab_elem *l; 1457 unsigned long flags; 1458 u32 hash, key_size; 1459 int ret; 1460 1461 WARN_ON_ONCE(!bpf_rcu_lock_held()); 1462 1463 key_size = map->key_size; 1464 1465 hash = htab_map_hash(key, key_size, htab->hashrnd); 1466 b = __select_bucket(htab, hash); 1467 head = &b->head; 1468 1469 ret = htab_lock_bucket(b, &flags); 1470 if (ret) 1471 return ret; 1472 1473 l = lookup_elem_raw(head, hash, key, key_size); 1474 1475 if (l) 1476 hlist_nulls_del_rcu(&l->hash_node); 1477 else 1478 ret = -ENOENT; 1479 1480 htab_unlock_bucket(b, flags); 1481 if (l) 1482 htab_lru_push_free(htab, l); 1483 return ret; 1484 } 1485 1486 static void delete_all_elements(struct bpf_htab *htab) 1487 { 1488 int i; 1489 1490 /* It's called from a worker thread and migration has been disabled, 1491 * therefore, it is OK to invoke bpf_mem_cache_free() directly. 1492 */ 1493 for (i = 0; i < htab->n_buckets; i++) { 1494 struct hlist_nulls_head *head = select_bucket(htab, i); 1495 struct hlist_nulls_node *n; 1496 struct htab_elem *l; 1497 1498 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1499 hlist_nulls_del_rcu(&l->hash_node); 1500 htab_elem_free(htab, l); 1501 } 1502 cond_resched(); 1503 } 1504 } 1505 1506 static void htab_free_malloced_internal_structs(struct bpf_htab *htab) 1507 { 1508 int i; 1509 1510 rcu_read_lock(); 1511 for (i = 0; i < htab->n_buckets; i++) { 1512 struct hlist_nulls_head *head = select_bucket(htab, i); 1513 struct hlist_nulls_node *n; 1514 struct htab_elem *l; 1515 1516 hlist_nulls_for_each_entry(l, n, head, hash_node) { 1517 /* We only free internal structs on uref dropping to zero */ 1518 bpf_map_free_internal_structs(&htab->map, 1519 htab_elem_value(l, htab->map.key_size)); 1520 } 1521 cond_resched_rcu(); 1522 } 1523 rcu_read_unlock(); 1524 } 1525 1526 static void htab_map_free_internal_structs(struct bpf_map *map) 1527 { 1528 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1529 1530 /* We only free internal structs on uref dropping to zero */ 1531 if (!bpf_map_has_internal_structs(map)) 1532 return; 1533 1534 if (htab_is_prealloc(htab)) 1535 htab_free_prealloced_internal_structs(htab); 1536 else 1537 htab_free_malloced_internal_structs(htab); 1538 } 1539 1540 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 1541 static void htab_map_free(struct bpf_map *map) 1542 { 1543 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1544 1545 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. 1546 * bpf_free_used_maps() is called after bpf prog is no longer executing. 1547 * There is no need to synchronize_rcu() here to protect map elements. 1548 */ 1549 1550 /* htab no longer uses call_rcu() directly. bpf_mem_alloc does it 1551 * underneath and is responsible for waiting for callbacks to finish 1552 * during bpf_mem_alloc_destroy(). 1553 */ 1554 if (!htab_is_prealloc(htab)) { 1555 delete_all_elements(htab); 1556 } else { 1557 htab_free_prealloced_fields(htab); 1558 prealloc_destroy(htab); 1559 } 1560 1561 bpf_map_free_elem_count(map); 1562 free_percpu(htab->extra_elems); 1563 bpf_map_area_free(htab->buckets); 1564 bpf_mem_alloc_destroy(&htab->pcpu_ma); 1565 bpf_mem_alloc_destroy(&htab->ma); 1566 if (htab->use_percpu_counter) 1567 percpu_counter_destroy(&htab->pcount); 1568 bpf_map_area_free(htab); 1569 } 1570 1571 static void htab_map_seq_show_elem(struct bpf_map *map, void *key, 1572 struct seq_file *m) 1573 { 1574 void *value; 1575 1576 rcu_read_lock(); 1577 1578 value = htab_map_lookup_elem(map, key); 1579 if (!value) { 1580 rcu_read_unlock(); 1581 return; 1582 } 1583 1584 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 1585 seq_puts(m, ": "); 1586 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 1587 seq_putc(m, '\n'); 1588 1589 rcu_read_unlock(); 1590 } 1591 1592 static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1593 void *value, bool is_lru_map, 1594 bool is_percpu, u64 flags) 1595 { 1596 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1597 struct hlist_nulls_head *head; 1598 unsigned long bflags; 1599 struct htab_elem *l; 1600 u32 hash, key_size; 1601 struct bucket *b; 1602 int ret; 1603 1604 key_size = map->key_size; 1605 1606 hash = htab_map_hash(key, key_size, htab->hashrnd); 1607 b = __select_bucket(htab, hash); 1608 head = &b->head; 1609 1610 ret = htab_lock_bucket(b, &bflags); 1611 if (ret) 1612 return ret; 1613 1614 l = lookup_elem_raw(head, hash, key, key_size); 1615 if (!l) { 1616 ret = -ENOENT; 1617 goto out_unlock; 1618 } 1619 1620 if (is_percpu) { 1621 u32 roundup_value_size = round_up(map->value_size, 8); 1622 void __percpu *pptr; 1623 int off = 0, cpu; 1624 1625 pptr = htab_elem_get_ptr(l, key_size); 1626 for_each_possible_cpu(cpu) { 1627 copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); 1628 check_and_init_map_value(&htab->map, value + off); 1629 off += roundup_value_size; 1630 } 1631 } else { 1632 void *src = htab_elem_value(l, map->key_size); 1633 1634 if (flags & BPF_F_LOCK) 1635 copy_map_value_locked(map, value, src, true); 1636 else 1637 copy_map_value(map, value, src); 1638 /* Zeroing special fields in the temp buffer */ 1639 check_and_init_map_value(map, value); 1640 } 1641 hlist_nulls_del_rcu(&l->hash_node); 1642 1643 out_unlock: 1644 htab_unlock_bucket(b, bflags); 1645 1646 if (l) { 1647 if (is_lru_map) 1648 htab_lru_push_free(htab, l); 1649 else 1650 free_htab_elem(htab, l); 1651 } 1652 1653 return ret; 1654 } 1655 1656 static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1657 void *value, u64 flags) 1658 { 1659 return __htab_map_lookup_and_delete_elem(map, key, value, false, false, 1660 flags); 1661 } 1662 1663 static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1664 void *key, void *value, 1665 u64 flags) 1666 { 1667 return __htab_map_lookup_and_delete_elem(map, key, value, false, true, 1668 flags); 1669 } 1670 1671 static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1672 void *value, u64 flags) 1673 { 1674 return __htab_map_lookup_and_delete_elem(map, key, value, true, false, 1675 flags); 1676 } 1677 1678 static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1679 void *key, void *value, 1680 u64 flags) 1681 { 1682 return __htab_map_lookup_and_delete_elem(map, key, value, true, true, 1683 flags); 1684 } 1685 1686 static int 1687 __htab_map_lookup_and_delete_batch(struct bpf_map *map, 1688 const union bpf_attr *attr, 1689 union bpf_attr __user *uattr, 1690 bool do_delete, bool is_lru_map, 1691 bool is_percpu) 1692 { 1693 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1694 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val; 1695 void __user *uvalues = u64_to_user_ptr(attr->batch.values); 1696 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1697 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1698 u32 batch, max_count, size, bucket_size, map_id; 1699 u64 elem_map_flags, map_flags, allowed_flags; 1700 u32 bucket_cnt, total, key_size, value_size; 1701 struct htab_elem *node_to_free = NULL; 1702 struct hlist_nulls_head *head; 1703 struct hlist_nulls_node *n; 1704 unsigned long flags = 0; 1705 bool locked = false; 1706 struct htab_elem *l; 1707 struct bucket *b; 1708 int ret = 0; 1709 1710 elem_map_flags = attr->batch.elem_flags; 1711 allowed_flags = BPF_F_LOCK; 1712 if (!do_delete && is_percpu) 1713 allowed_flags |= BPF_F_CPU; 1714 ret = bpf_map_check_op_flags(map, elem_map_flags, allowed_flags); 1715 if (ret) 1716 return ret; 1717 1718 map_flags = attr->batch.flags; 1719 if (map_flags) 1720 return -EINVAL; 1721 1722 max_count = attr->batch.count; 1723 if (!max_count) 1724 return 0; 1725 1726 if (put_user(0, &uattr->batch.count)) 1727 return -EFAULT; 1728 1729 batch = 0; 1730 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch))) 1731 return -EFAULT; 1732 1733 if (batch >= htab->n_buckets) 1734 return -ENOENT; 1735 1736 key_size = htab->map.key_size; 1737 value_size = htab->map.value_size; 1738 size = round_up(value_size, 8); 1739 if (is_percpu && !(elem_map_flags & BPF_F_CPU)) 1740 value_size = size * num_possible_cpus(); 1741 total = 0; 1742 /* while experimenting with hash tables with sizes ranging from 10 to 1743 * 1000, it was observed that a bucket can have up to 5 entries. 1744 */ 1745 bucket_size = 5; 1746 1747 alloc: 1748 /* We cannot do copy_from_user or copy_to_user inside 1749 * the rcu_read_lock. Allocate enough space here. 1750 */ 1751 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); 1752 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); 1753 if (!keys || !values) { 1754 ret = -ENOMEM; 1755 goto after_loop; 1756 } 1757 1758 again: 1759 bpf_disable_instrumentation(); 1760 rcu_read_lock(); 1761 again_nocopy: 1762 dst_key = keys; 1763 dst_val = values; 1764 b = &htab->buckets[batch]; 1765 head = &b->head; 1766 /* do not grab the lock unless need it (bucket_cnt > 0). */ 1767 if (locked) { 1768 ret = htab_lock_bucket(b, &flags); 1769 if (ret) { 1770 rcu_read_unlock(); 1771 bpf_enable_instrumentation(); 1772 goto after_loop; 1773 } 1774 } 1775 1776 bucket_cnt = 0; 1777 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1778 bucket_cnt++; 1779 1780 if (bucket_cnt && !locked) { 1781 locked = true; 1782 goto again_nocopy; 1783 } 1784 1785 if (bucket_cnt > (max_count - total)) { 1786 if (total == 0) 1787 ret = -ENOSPC; 1788 /* Note that since bucket_cnt > 0 here, it is implicit 1789 * that the locked was grabbed, so release it. 1790 */ 1791 htab_unlock_bucket(b, flags); 1792 rcu_read_unlock(); 1793 bpf_enable_instrumentation(); 1794 goto after_loop; 1795 } 1796 1797 if (bucket_cnt > bucket_size) { 1798 bucket_size = bucket_cnt; 1799 /* Note that since bucket_cnt > 0 here, it is implicit 1800 * that the locked was grabbed, so release it. 1801 */ 1802 htab_unlock_bucket(b, flags); 1803 rcu_read_unlock(); 1804 bpf_enable_instrumentation(); 1805 kvfree(keys); 1806 kvfree(values); 1807 goto alloc; 1808 } 1809 1810 /* Next block is only safe to run if you have grabbed the lock */ 1811 if (!locked) 1812 goto next_batch; 1813 1814 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1815 memcpy(dst_key, l->key, key_size); 1816 1817 if (is_percpu) { 1818 int off = 0, cpu; 1819 void __percpu *pptr; 1820 1821 pptr = htab_elem_get_ptr(l, map->key_size); 1822 if (elem_map_flags & BPF_F_CPU) { 1823 cpu = elem_map_flags >> 32; 1824 copy_map_value(&htab->map, dst_val, per_cpu_ptr(pptr, cpu)); 1825 check_and_init_map_value(&htab->map, dst_val); 1826 } else { 1827 for_each_possible_cpu(cpu) { 1828 copy_map_value_long(&htab->map, dst_val + off, 1829 per_cpu_ptr(pptr, cpu)); 1830 check_and_init_map_value(&htab->map, dst_val + off); 1831 off += size; 1832 } 1833 } 1834 } else { 1835 value = htab_elem_value(l, key_size); 1836 if (is_fd_htab(htab)) { 1837 struct bpf_map **inner_map = value; 1838 1839 /* Actual value is the id of the inner map */ 1840 map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); 1841 value = &map_id; 1842 } 1843 1844 if (elem_map_flags & BPF_F_LOCK) 1845 copy_map_value_locked(map, dst_val, value, 1846 true); 1847 else 1848 copy_map_value(map, dst_val, value); 1849 /* Zeroing special fields in the temp buffer */ 1850 check_and_init_map_value(map, dst_val); 1851 } 1852 if (do_delete) { 1853 hlist_nulls_del_rcu(&l->hash_node); 1854 1855 /* bpf_lru_push_free() will acquire lru_lock, which 1856 * may cause deadlock. See comments in function 1857 * prealloc_lru_pop(). Let us do bpf_lru_push_free() 1858 * after releasing the bucket lock. 1859 * 1860 * For htab of maps, htab_put_fd_value() in 1861 * free_htab_elem() may acquire a spinlock with bucket 1862 * lock being held and it violates the lock rule, so 1863 * invoke free_htab_elem() after unlock as well. 1864 */ 1865 l->batch_flink = node_to_free; 1866 node_to_free = l; 1867 } 1868 dst_key += key_size; 1869 dst_val += value_size; 1870 } 1871 1872 htab_unlock_bucket(b, flags); 1873 locked = false; 1874 1875 while (node_to_free) { 1876 l = node_to_free; 1877 node_to_free = node_to_free->batch_flink; 1878 if (is_lru_map) 1879 htab_lru_push_free(htab, l); 1880 else 1881 free_htab_elem(htab, l); 1882 } 1883 1884 next_batch: 1885 /* If we are not copying data, we can go to next bucket and avoid 1886 * unlocking the rcu. 1887 */ 1888 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) { 1889 batch++; 1890 goto again_nocopy; 1891 } 1892 1893 rcu_read_unlock(); 1894 bpf_enable_instrumentation(); 1895 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys, 1896 key_size * bucket_cnt) || 1897 copy_to_user(uvalues + total * value_size, values, 1898 value_size * bucket_cnt))) { 1899 ret = -EFAULT; 1900 goto after_loop; 1901 } 1902 1903 total += bucket_cnt; 1904 batch++; 1905 if (batch >= htab->n_buckets) { 1906 ret = -ENOENT; 1907 goto after_loop; 1908 } 1909 goto again; 1910 1911 after_loop: 1912 if (ret == -EFAULT) 1913 goto out; 1914 1915 /* copy # of entries and next batch */ 1916 ubatch = u64_to_user_ptr(attr->batch.out_batch); 1917 if (copy_to_user(ubatch, &batch, sizeof(batch)) || 1918 put_user(total, &uattr->batch.count)) 1919 ret = -EFAULT; 1920 1921 out: 1922 kvfree(keys); 1923 kvfree(values); 1924 return ret; 1925 } 1926 1927 static int 1928 htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1929 union bpf_attr __user *uattr) 1930 { 1931 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1932 false, true); 1933 } 1934 1935 static int 1936 htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1937 const union bpf_attr *attr, 1938 union bpf_attr __user *uattr) 1939 { 1940 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1941 false, true); 1942 } 1943 1944 static int 1945 htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1946 union bpf_attr __user *uattr) 1947 { 1948 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1949 false, false); 1950 } 1951 1952 static int 1953 htab_map_lookup_and_delete_batch(struct bpf_map *map, 1954 const union bpf_attr *attr, 1955 union bpf_attr __user *uattr) 1956 { 1957 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1958 false, false); 1959 } 1960 1961 static int 1962 htab_lru_percpu_map_lookup_batch(struct bpf_map *map, 1963 const union bpf_attr *attr, 1964 union bpf_attr __user *uattr) 1965 { 1966 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1967 true, true); 1968 } 1969 1970 static int 1971 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1972 const union bpf_attr *attr, 1973 union bpf_attr __user *uattr) 1974 { 1975 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1976 true, true); 1977 } 1978 1979 static int 1980 htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1981 union bpf_attr __user *uattr) 1982 { 1983 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1984 true, false); 1985 } 1986 1987 static int 1988 htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, 1989 const union bpf_attr *attr, 1990 union bpf_attr __user *uattr) 1991 { 1992 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1993 true, false); 1994 } 1995 1996 struct bpf_iter_seq_hash_map_info { 1997 struct bpf_map *map; 1998 struct bpf_htab *htab; 1999 void *percpu_value_buf; // non-zero means percpu hash 2000 u32 bucket_id; 2001 u32 skip_elems; 2002 }; 2003 2004 static struct htab_elem * 2005 bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, 2006 struct htab_elem *prev_elem) 2007 { 2008 const struct bpf_htab *htab = info->htab; 2009 u32 skip_elems = info->skip_elems; 2010 u32 bucket_id = info->bucket_id; 2011 struct hlist_nulls_head *head; 2012 struct hlist_nulls_node *n; 2013 struct htab_elem *elem; 2014 struct bucket *b; 2015 u32 i, count; 2016 2017 if (bucket_id >= htab->n_buckets) 2018 return NULL; 2019 2020 /* try to find next elem in the same bucket */ 2021 if (prev_elem) { 2022 /* no update/deletion on this bucket, prev_elem should be still valid 2023 * and we won't skip elements. 2024 */ 2025 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); 2026 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); 2027 if (elem) 2028 return elem; 2029 2030 /* not found, unlock and go to the next bucket */ 2031 b = &htab->buckets[bucket_id++]; 2032 rcu_read_unlock(); 2033 skip_elems = 0; 2034 } 2035 2036 for (i = bucket_id; i < htab->n_buckets; i++) { 2037 b = &htab->buckets[i]; 2038 rcu_read_lock(); 2039 2040 count = 0; 2041 head = &b->head; 2042 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2043 if (count >= skip_elems) { 2044 info->bucket_id = i; 2045 info->skip_elems = count; 2046 return elem; 2047 } 2048 count++; 2049 } 2050 2051 rcu_read_unlock(); 2052 skip_elems = 0; 2053 } 2054 2055 info->bucket_id = i; 2056 info->skip_elems = 0; 2057 return NULL; 2058 } 2059 2060 static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) 2061 { 2062 struct bpf_iter_seq_hash_map_info *info = seq->private; 2063 struct htab_elem *elem; 2064 2065 elem = bpf_hash_map_seq_find_next(info, NULL); 2066 if (!elem) 2067 return NULL; 2068 2069 if (*pos == 0) 2070 ++*pos; 2071 return elem; 2072 } 2073 2074 static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2075 { 2076 struct bpf_iter_seq_hash_map_info *info = seq->private; 2077 2078 ++*pos; 2079 ++info->skip_elems; 2080 return bpf_hash_map_seq_find_next(info, v); 2081 } 2082 2083 static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) 2084 { 2085 struct bpf_iter_seq_hash_map_info *info = seq->private; 2086 struct bpf_iter__bpf_map_elem ctx = {}; 2087 struct bpf_map *map = info->map; 2088 struct bpf_iter_meta meta; 2089 int ret = 0, off = 0, cpu; 2090 u32 roundup_value_size; 2091 struct bpf_prog *prog; 2092 void __percpu *pptr; 2093 2094 meta.seq = seq; 2095 prog = bpf_iter_get_info(&meta, elem == NULL); 2096 if (prog) { 2097 ctx.meta = &meta; 2098 ctx.map = info->map; 2099 if (elem) { 2100 ctx.key = elem->key; 2101 if (!info->percpu_value_buf) { 2102 ctx.value = htab_elem_value(elem, map->key_size); 2103 } else { 2104 roundup_value_size = round_up(map->value_size, 8); 2105 pptr = htab_elem_get_ptr(elem, map->key_size); 2106 for_each_possible_cpu(cpu) { 2107 copy_map_value_long(map, info->percpu_value_buf + off, 2108 per_cpu_ptr(pptr, cpu)); 2109 check_and_init_map_value(map, info->percpu_value_buf + off); 2110 off += roundup_value_size; 2111 } 2112 ctx.value = info->percpu_value_buf; 2113 } 2114 } 2115 ret = bpf_iter_run_prog(prog, &ctx); 2116 } 2117 2118 return ret; 2119 } 2120 2121 static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) 2122 { 2123 return __bpf_hash_map_seq_show(seq, v); 2124 } 2125 2126 static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) 2127 { 2128 if (!v) 2129 (void)__bpf_hash_map_seq_show(seq, NULL); 2130 else 2131 rcu_read_unlock(); 2132 } 2133 2134 static int bpf_iter_init_hash_map(void *priv_data, 2135 struct bpf_iter_aux_info *aux) 2136 { 2137 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2138 struct bpf_map *map = aux->map; 2139 void *value_buf; 2140 u32 buf_size; 2141 2142 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2143 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 2144 buf_size = round_up(map->value_size, 8) * num_possible_cpus(); 2145 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 2146 if (!value_buf) 2147 return -ENOMEM; 2148 2149 seq_info->percpu_value_buf = value_buf; 2150 } 2151 2152 bpf_map_inc_with_uref(map); 2153 seq_info->map = map; 2154 seq_info->htab = container_of(map, struct bpf_htab, map); 2155 return 0; 2156 } 2157 2158 static void bpf_iter_fini_hash_map(void *priv_data) 2159 { 2160 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2161 2162 bpf_map_put_with_uref(seq_info->map); 2163 kfree(seq_info->percpu_value_buf); 2164 } 2165 2166 static const struct seq_operations bpf_hash_map_seq_ops = { 2167 .start = bpf_hash_map_seq_start, 2168 .next = bpf_hash_map_seq_next, 2169 .stop = bpf_hash_map_seq_stop, 2170 .show = bpf_hash_map_seq_show, 2171 }; 2172 2173 static const struct bpf_iter_seq_info iter_seq_info = { 2174 .seq_ops = &bpf_hash_map_seq_ops, 2175 .init_seq_private = bpf_iter_init_hash_map, 2176 .fini_seq_private = bpf_iter_fini_hash_map, 2177 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), 2178 }; 2179 2180 static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, 2181 void *callback_ctx, u64 flags) 2182 { 2183 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2184 struct hlist_nulls_head *head; 2185 struct hlist_nulls_node *n; 2186 struct htab_elem *elem; 2187 int i, num_elems = 0; 2188 void __percpu *pptr; 2189 struct bucket *b; 2190 void *key, *val; 2191 bool is_percpu; 2192 u64 ret = 0; 2193 2194 cant_migrate(); 2195 2196 if (flags != 0) 2197 return -EINVAL; 2198 2199 is_percpu = htab_is_percpu(htab); 2200 2201 /* migration has been disabled, so percpu value prepared here will be 2202 * the same as the one seen by the bpf program with 2203 * bpf_map_lookup_elem(). 2204 */ 2205 for (i = 0; i < htab->n_buckets; i++) { 2206 b = &htab->buckets[i]; 2207 rcu_read_lock(); 2208 head = &b->head; 2209 hlist_nulls_for_each_entry_safe(elem, n, head, hash_node) { 2210 key = elem->key; 2211 if (is_percpu) { 2212 /* current cpu value for percpu map */ 2213 pptr = htab_elem_get_ptr(elem, map->key_size); 2214 val = this_cpu_ptr(pptr); 2215 } else { 2216 val = htab_elem_value(elem, map->key_size); 2217 } 2218 num_elems++; 2219 ret = callback_fn((u64)(long)map, (u64)(long)key, 2220 (u64)(long)val, (u64)(long)callback_ctx, 0); 2221 /* return value: 0 - continue, 1 - stop and return */ 2222 if (ret) { 2223 rcu_read_unlock(); 2224 goto out; 2225 } 2226 } 2227 rcu_read_unlock(); 2228 } 2229 out: 2230 return num_elems; 2231 } 2232 2233 static u64 htab_map_mem_usage(const struct bpf_map *map) 2234 { 2235 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2236 u32 value_size = round_up(htab->map.value_size, 8); 2237 bool prealloc = htab_is_prealloc(htab); 2238 bool percpu = htab_is_percpu(htab); 2239 bool lru = htab_is_lru(htab); 2240 u64 num_entries; 2241 u64 usage = sizeof(struct bpf_htab); 2242 2243 usage += sizeof(struct bucket) * htab->n_buckets; 2244 usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT; 2245 if (prealloc) { 2246 num_entries = map->max_entries; 2247 if (htab_has_extra_elems(htab)) 2248 num_entries += num_possible_cpus(); 2249 2250 usage += htab->elem_size * num_entries; 2251 2252 if (percpu) 2253 usage += value_size * num_possible_cpus() * num_entries; 2254 else if (!lru) 2255 usage += sizeof(struct htab_elem *) * num_possible_cpus(); 2256 } else { 2257 #define LLIST_NODE_SZ sizeof(struct llist_node) 2258 2259 num_entries = htab->use_percpu_counter ? 2260 percpu_counter_sum(&htab->pcount) : 2261 atomic_read(&htab->count); 2262 usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries; 2263 if (percpu) { 2264 usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries; 2265 usage += value_size * num_possible_cpus() * num_entries; 2266 } 2267 } 2268 return usage; 2269 } 2270 2271 BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) 2272 const struct bpf_map_ops htab_map_ops = { 2273 .map_meta_equal = bpf_map_meta_equal, 2274 .map_alloc_check = htab_map_alloc_check, 2275 .map_alloc = htab_map_alloc, 2276 .map_free = htab_map_free, 2277 .map_get_next_key = htab_map_get_next_key, 2278 .map_release_uref = htab_map_free_internal_structs, 2279 .map_lookup_elem = htab_map_lookup_elem, 2280 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, 2281 .map_update_elem = htab_map_update_elem, 2282 .map_delete_elem = htab_map_delete_elem, 2283 .map_gen_lookup = htab_map_gen_lookup, 2284 .map_seq_show_elem = htab_map_seq_show_elem, 2285 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2286 .map_for_each_callback = bpf_for_each_hash_elem, 2287 .map_mem_usage = htab_map_mem_usage, 2288 BATCH_OPS(htab), 2289 .map_btf_id = &htab_map_btf_ids[0], 2290 .iter_seq_info = &iter_seq_info, 2291 }; 2292 2293 const struct bpf_map_ops htab_lru_map_ops = { 2294 .map_meta_equal = bpf_map_meta_equal, 2295 .map_alloc_check = htab_map_alloc_check, 2296 .map_alloc = htab_map_alloc, 2297 .map_free = htab_map_free, 2298 .map_get_next_key = htab_map_get_next_key, 2299 .map_release_uref = htab_map_free_internal_structs, 2300 .map_lookup_elem = htab_lru_map_lookup_elem, 2301 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, 2302 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, 2303 .map_update_elem = htab_lru_map_update_elem, 2304 .map_delete_elem = htab_lru_map_delete_elem, 2305 .map_gen_lookup = htab_lru_map_gen_lookup, 2306 .map_seq_show_elem = htab_map_seq_show_elem, 2307 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2308 .map_for_each_callback = bpf_for_each_hash_elem, 2309 .map_mem_usage = htab_map_mem_usage, 2310 BATCH_OPS(htab_lru), 2311 .map_btf_id = &htab_map_btf_ids[0], 2312 .iter_seq_info = &iter_seq_info, 2313 }; 2314 2315 /* Called from eBPF program */ 2316 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2317 { 2318 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2319 2320 if (l) 2321 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2322 else 2323 return NULL; 2324 } 2325 2326 /* inline bpf_map_lookup_elem() call for per-CPU hashmap */ 2327 static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 2328 { 2329 struct bpf_insn *insn = insn_buf; 2330 2331 if (!bpf_jit_supports_percpu_insn()) 2332 return -EOPNOTSUPP; 2333 2334 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2335 (void *(*)(struct bpf_map *map, void *key))NULL)); 2336 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2337 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); 2338 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2339 offsetof(struct htab_elem, key) + roundup(map->key_size, 8)); 2340 *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2341 *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2342 2343 return insn - insn_buf; 2344 } 2345 2346 static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2347 { 2348 struct htab_elem *l; 2349 2350 if (cpu >= nr_cpu_ids) 2351 return NULL; 2352 2353 l = __htab_map_lookup_elem(map, key); 2354 if (l) 2355 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2356 else 2357 return NULL; 2358 } 2359 2360 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2361 { 2362 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2363 2364 if (l) { 2365 bpf_lru_node_set_ref(&l->lru_node); 2366 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2367 } 2368 2369 return NULL; 2370 } 2371 2372 static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2373 { 2374 struct htab_elem *l; 2375 2376 if (cpu >= nr_cpu_ids) 2377 return NULL; 2378 2379 l = __htab_map_lookup_elem(map, key); 2380 if (l) { 2381 bpf_lru_node_set_ref(&l->lru_node); 2382 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2383 } 2384 2385 return NULL; 2386 } 2387 2388 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value, u64 map_flags) 2389 { 2390 struct htab_elem *l; 2391 void __percpu *pptr; 2392 int ret = -ENOENT; 2393 int cpu, off = 0; 2394 u32 size; 2395 2396 /* per_cpu areas are zero-filled and bpf programs can only 2397 * access 'value_size' of them, so copying rounded areas 2398 * will not leak any kernel data 2399 */ 2400 size = round_up(map->value_size, 8); 2401 rcu_read_lock(); 2402 l = __htab_map_lookup_elem(map, key); 2403 if (!l) 2404 goto out; 2405 ret = 0; 2406 /* We do not mark LRU map element here in order to not mess up 2407 * eviction heuristics when user space does a map walk. 2408 */ 2409 pptr = htab_elem_get_ptr(l, map->key_size); 2410 if (map_flags & BPF_F_CPU) { 2411 cpu = map_flags >> 32; 2412 copy_map_value(map, value, per_cpu_ptr(pptr, cpu)); 2413 check_and_init_map_value(map, value); 2414 goto out; 2415 } 2416 for_each_possible_cpu(cpu) { 2417 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2418 check_and_init_map_value(map, value + off); 2419 off += size; 2420 } 2421 out: 2422 rcu_read_unlock(); 2423 return ret; 2424 } 2425 2426 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 2427 u64 map_flags) 2428 { 2429 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2430 int ret; 2431 2432 rcu_read_lock(); 2433 if (htab_is_lru(htab)) 2434 ret = __htab_lru_percpu_map_update_elem(map, key, value, 2435 map_flags, true); 2436 else 2437 ret = htab_map_update_elem_in_place(map, key, value, map_flags, 2438 true, true); 2439 rcu_read_unlock(); 2440 2441 return ret; 2442 } 2443 2444 static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, 2445 struct seq_file *m) 2446 { 2447 struct htab_elem *l; 2448 void __percpu *pptr; 2449 int cpu; 2450 2451 rcu_read_lock(); 2452 2453 l = __htab_map_lookup_elem(map, key); 2454 if (!l) { 2455 rcu_read_unlock(); 2456 return; 2457 } 2458 2459 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 2460 seq_puts(m, ": {\n"); 2461 pptr = htab_elem_get_ptr(l, map->key_size); 2462 for_each_possible_cpu(cpu) { 2463 seq_printf(m, "\tcpu%d: ", cpu); 2464 btf_type_seq_show(map->btf, map->btf_value_type_id, 2465 per_cpu_ptr(pptr, cpu), m); 2466 seq_putc(m, '\n'); 2467 } 2468 seq_puts(m, "}\n"); 2469 2470 rcu_read_unlock(); 2471 } 2472 2473 const struct bpf_map_ops htab_percpu_map_ops = { 2474 .map_meta_equal = bpf_map_meta_equal, 2475 .map_alloc_check = htab_map_alloc_check, 2476 .map_alloc = htab_map_alloc, 2477 .map_free = htab_map_free, 2478 .map_get_next_key = htab_map_get_next_key, 2479 .map_lookup_elem = htab_percpu_map_lookup_elem, 2480 .map_gen_lookup = htab_percpu_map_gen_lookup, 2481 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, 2482 .map_update_elem = htab_percpu_map_update_elem, 2483 .map_delete_elem = htab_map_delete_elem, 2484 .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, 2485 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2486 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2487 .map_for_each_callback = bpf_for_each_hash_elem, 2488 .map_mem_usage = htab_map_mem_usage, 2489 BATCH_OPS(htab_percpu), 2490 .map_btf_id = &htab_map_btf_ids[0], 2491 .iter_seq_info = &iter_seq_info, 2492 }; 2493 2494 const struct bpf_map_ops htab_lru_percpu_map_ops = { 2495 .map_meta_equal = bpf_map_meta_equal, 2496 .map_alloc_check = htab_map_alloc_check, 2497 .map_alloc = htab_map_alloc, 2498 .map_free = htab_map_free, 2499 .map_get_next_key = htab_map_get_next_key, 2500 .map_lookup_elem = htab_lru_percpu_map_lookup_elem, 2501 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, 2502 .map_update_elem = htab_lru_percpu_map_update_elem, 2503 .map_delete_elem = htab_lru_map_delete_elem, 2504 .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, 2505 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2506 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2507 .map_for_each_callback = bpf_for_each_hash_elem, 2508 .map_mem_usage = htab_map_mem_usage, 2509 BATCH_OPS(htab_lru_percpu), 2510 .map_btf_id = &htab_map_btf_ids[0], 2511 .iter_seq_info = &iter_seq_info, 2512 }; 2513 2514 static int fd_htab_map_alloc_check(union bpf_attr *attr) 2515 { 2516 if (attr->value_size != sizeof(u32)) 2517 return -EINVAL; 2518 return htab_map_alloc_check(attr); 2519 } 2520 2521 static void fd_htab_map_free(struct bpf_map *map) 2522 { 2523 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2524 struct hlist_nulls_node *n; 2525 struct hlist_nulls_head *head; 2526 struct htab_elem *l; 2527 int i; 2528 2529 for (i = 0; i < htab->n_buckets; i++) { 2530 head = select_bucket(htab, i); 2531 2532 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 2533 void *ptr = fd_htab_map_get_ptr(map, l); 2534 2535 map->ops->map_fd_put_ptr(map, ptr, false); 2536 } 2537 } 2538 2539 htab_map_free(map); 2540 } 2541 2542 /* only called from syscall */ 2543 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 2544 { 2545 void **ptr; 2546 int ret = 0; 2547 2548 if (!map->ops->map_fd_sys_lookup_elem) 2549 return -ENOTSUPP; 2550 2551 rcu_read_lock(); 2552 ptr = htab_map_lookup_elem(map, key); 2553 if (ptr) 2554 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); 2555 else 2556 ret = -ENOENT; 2557 rcu_read_unlock(); 2558 2559 return ret; 2560 } 2561 2562 /* Only called from syscall */ 2563 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, 2564 void *key, void *value, u64 map_flags) 2565 { 2566 void *ptr; 2567 int ret; 2568 2569 ptr = map->ops->map_fd_get_ptr(map, map_file, *(int *)value); 2570 if (IS_ERR(ptr)) 2571 return PTR_ERR(ptr); 2572 2573 /* The htab bucket lock is always held during update operations in fd 2574 * htab map, and the following rcu_read_lock() is only used to avoid 2575 * the WARN_ON_ONCE in htab_map_update_elem_in_place(). 2576 */ 2577 rcu_read_lock(); 2578 ret = htab_map_update_elem_in_place(map, key, &ptr, map_flags, false, false); 2579 rcu_read_unlock(); 2580 if (ret) 2581 map->ops->map_fd_put_ptr(map, ptr, false); 2582 2583 return ret; 2584 } 2585 2586 static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) 2587 { 2588 struct bpf_map *map, *inner_map_meta; 2589 2590 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 2591 if (IS_ERR(inner_map_meta)) 2592 return inner_map_meta; 2593 2594 map = htab_map_alloc(attr); 2595 if (IS_ERR(map)) { 2596 bpf_map_meta_free(inner_map_meta); 2597 return map; 2598 } 2599 2600 map->inner_map_meta = inner_map_meta; 2601 2602 return map; 2603 } 2604 2605 static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) 2606 { 2607 struct bpf_map **inner_map = htab_map_lookup_elem(map, key); 2608 2609 if (!inner_map) 2610 return NULL; 2611 2612 return READ_ONCE(*inner_map); 2613 } 2614 2615 static int htab_of_map_gen_lookup(struct bpf_map *map, 2616 struct bpf_insn *insn_buf) 2617 { 2618 struct bpf_insn *insn = insn_buf; 2619 const int ret = BPF_REG_0; 2620 2621 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2622 (void *(*)(struct bpf_map *map, void *key))NULL)); 2623 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2624 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); 2625 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 2626 offsetof(struct htab_elem, key) + 2627 round_up(map->key_size, 8)); 2628 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 2629 2630 return insn - insn_buf; 2631 } 2632 2633 static void htab_of_map_free(struct bpf_map *map) 2634 { 2635 bpf_map_meta_free(map->inner_map_meta); 2636 fd_htab_map_free(map); 2637 } 2638 2639 const struct bpf_map_ops htab_of_maps_map_ops = { 2640 .map_alloc_check = fd_htab_map_alloc_check, 2641 .map_alloc = htab_of_map_alloc, 2642 .map_free = htab_of_map_free, 2643 .map_get_next_key = htab_map_get_next_key, 2644 .map_lookup_elem = htab_of_map_lookup_elem, 2645 .map_delete_elem = htab_map_delete_elem, 2646 .map_fd_get_ptr = bpf_map_fd_get_ptr, 2647 .map_fd_put_ptr = bpf_map_fd_put_ptr, 2648 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 2649 .map_gen_lookup = htab_of_map_gen_lookup, 2650 .map_check_btf = map_check_no_btf, 2651 .map_mem_usage = htab_map_mem_usage, 2652 BATCH_OPS(htab), 2653 .map_btf_id = &htab_map_btf_ids[0], 2654 }; 2655