1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/jhash.h> 8 #include <linux/filter.h> 9 #include <linux/rculist_nulls.h> 10 #include <linux/rcupdate_wait.h> 11 #include <linux/random.h> 12 #include <uapi/linux/btf.h> 13 #include <linux/rcupdate_trace.h> 14 #include <linux/btf_ids.h> 15 #include "percpu_freelist.h" 16 #include "bpf_lru_list.h" 17 #include "map_in_map.h" 18 #include <linux/bpf_mem_alloc.h> 19 20 #define HTAB_CREATE_FLAG_MASK \ 21 (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ 22 BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED) 23 24 #define BATCH_OPS(_name) \ 25 .map_lookup_batch = \ 26 _name##_map_lookup_batch, \ 27 .map_lookup_and_delete_batch = \ 28 _name##_map_lookup_and_delete_batch, \ 29 .map_update_batch = \ 30 generic_map_update_batch, \ 31 .map_delete_batch = \ 32 generic_map_delete_batch 33 34 /* 35 * The bucket lock has two protection scopes: 36 * 37 * 1) Serializing concurrent operations from BPF programs on different 38 * CPUs 39 * 40 * 2) Serializing concurrent operations from BPF programs and sys_bpf() 41 * 42 * BPF programs can execute in any context including perf, kprobes and 43 * tracing. As there are almost no limits where perf, kprobes and tracing 44 * can be invoked from the lock operations need to be protected against 45 * deadlocks. Deadlocks can be caused by recursion and by an invocation in 46 * the lock held section when functions which acquire this lock are invoked 47 * from sys_bpf(). BPF recursion is prevented by incrementing the per CPU 48 * variable bpf_prog_active, which prevents BPF programs attached to perf 49 * events, kprobes and tracing to be invoked before the prior invocation 50 * from one of these contexts completed. sys_bpf() uses the same mechanism 51 * by pinning the task to the current CPU and incrementing the recursion 52 * protection across the map operation. 53 * 54 * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain 55 * operations like memory allocations (even with GFP_ATOMIC) from atomic 56 * contexts. This is required because even with GFP_ATOMIC the memory 57 * allocator calls into code paths which acquire locks with long held lock 58 * sections. To ensure the deterministic behaviour these locks are regular 59 * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only 60 * true atomic contexts on an RT kernel are the low level hardware 61 * handling, scheduling, low level interrupt handling, NMIs etc. None of 62 * these contexts should ever do memory allocations. 63 * 64 * As regular device interrupt handlers and soft interrupts are forced into 65 * thread context, the existing code which does 66 * spin_lock*(); alloc(GFP_ATOMIC); spin_unlock*(); 67 * just works. 68 * 69 * In theory the BPF locks could be converted to regular spinlocks as well, 70 * but the bucket locks and percpu_freelist locks can be taken from 71 * arbitrary contexts (perf, kprobes, tracepoints) which are required to be 72 * atomic contexts even on RT. Before the introduction of bpf_mem_alloc, 73 * it is only safe to use raw spinlock for preallocated hash map on a RT kernel, 74 * because there is no memory allocation within the lock held sections. However 75 * after hash map was fully converted to use bpf_mem_alloc, there will be 76 * non-synchronous memory allocation for non-preallocated hash map, so it is 77 * safe to always use raw spinlock for bucket lock. 78 */ 79 struct bucket { 80 struct hlist_nulls_head head; 81 raw_spinlock_t raw_lock; 82 }; 83 84 #define HASHTAB_MAP_LOCK_COUNT 8 85 #define HASHTAB_MAP_LOCK_MASK (HASHTAB_MAP_LOCK_COUNT - 1) 86 87 struct bpf_htab { 88 struct bpf_map map; 89 struct bpf_mem_alloc ma; 90 struct bpf_mem_alloc pcpu_ma; 91 struct bucket *buckets; 92 void *elems; 93 union { 94 struct pcpu_freelist freelist; 95 struct bpf_lru lru; 96 }; 97 struct htab_elem *__percpu *extra_elems; 98 /* number of elements in non-preallocated hashtable are kept 99 * in either pcount or count 100 */ 101 struct percpu_counter pcount; 102 atomic_t count; 103 bool use_percpu_counter; 104 u32 n_buckets; /* number of hash buckets */ 105 u32 elem_size; /* size of each element in bytes */ 106 u32 hashrnd; 107 struct lock_class_key lockdep_key; 108 int __percpu *map_locked[HASHTAB_MAP_LOCK_COUNT]; 109 }; 110 111 /* each htab element is struct htab_elem + key + value */ 112 struct htab_elem { 113 union { 114 struct hlist_nulls_node hash_node; 115 struct { 116 void *padding; 117 union { 118 struct pcpu_freelist_node fnode; 119 struct htab_elem *batch_flink; 120 }; 121 }; 122 }; 123 union { 124 /* pointer to per-cpu pointer */ 125 void *ptr_to_pptr; 126 struct bpf_lru_node lru_node; 127 }; 128 u32 hash; 129 char key[] __aligned(8); 130 }; 131 132 static inline bool htab_is_prealloc(const struct bpf_htab *htab) 133 { 134 return !(htab->map.map_flags & BPF_F_NO_PREALLOC); 135 } 136 137 static void htab_init_buckets(struct bpf_htab *htab) 138 { 139 unsigned int i; 140 141 for (i = 0; i < htab->n_buckets; i++) { 142 INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); 143 raw_spin_lock_init(&htab->buckets[i].raw_lock); 144 lockdep_set_class(&htab->buckets[i].raw_lock, 145 &htab->lockdep_key); 146 cond_resched(); 147 } 148 } 149 150 static inline int htab_lock_bucket(const struct bpf_htab *htab, 151 struct bucket *b, u32 hash, 152 unsigned long *pflags) 153 { 154 unsigned long flags; 155 156 hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); 157 158 preempt_disable(); 159 local_irq_save(flags); 160 if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { 161 __this_cpu_dec(*(htab->map_locked[hash])); 162 local_irq_restore(flags); 163 preempt_enable(); 164 return -EBUSY; 165 } 166 167 raw_spin_lock(&b->raw_lock); 168 *pflags = flags; 169 170 return 0; 171 } 172 173 static inline void htab_unlock_bucket(const struct bpf_htab *htab, 174 struct bucket *b, u32 hash, 175 unsigned long flags) 176 { 177 hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1); 178 raw_spin_unlock(&b->raw_lock); 179 __this_cpu_dec(*(htab->map_locked[hash])); 180 local_irq_restore(flags); 181 preempt_enable(); 182 } 183 184 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node); 185 186 static bool htab_is_lru(const struct bpf_htab *htab) 187 { 188 return htab->map.map_type == BPF_MAP_TYPE_LRU_HASH || 189 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 190 } 191 192 static bool htab_is_percpu(const struct bpf_htab *htab) 193 { 194 return htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH || 195 htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; 196 } 197 198 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size, 199 void __percpu *pptr) 200 { 201 *(void __percpu **)(l->key + key_size) = pptr; 202 } 203 204 static inline void __percpu *htab_elem_get_ptr(struct htab_elem *l, u32 key_size) 205 { 206 return *(void __percpu **)(l->key + key_size); 207 } 208 209 static void *fd_htab_map_get_ptr(const struct bpf_map *map, struct htab_elem *l) 210 { 211 return *(void **)(l->key + roundup(map->key_size, 8)); 212 } 213 214 static struct htab_elem *get_htab_elem(struct bpf_htab *htab, int i) 215 { 216 return (struct htab_elem *) (htab->elems + i * (u64)htab->elem_size); 217 } 218 219 static bool htab_has_extra_elems(struct bpf_htab *htab) 220 { 221 return !htab_is_percpu(htab) && !htab_is_lru(htab); 222 } 223 224 static void htab_free_prealloced_timers(struct bpf_htab *htab) 225 { 226 u32 num_entries = htab->map.max_entries; 227 int i; 228 229 if (!btf_record_has_field(htab->map.record, BPF_TIMER)) 230 return; 231 if (htab_has_extra_elems(htab)) 232 num_entries += num_possible_cpus(); 233 234 for (i = 0; i < num_entries; i++) { 235 struct htab_elem *elem; 236 237 elem = get_htab_elem(htab, i); 238 bpf_obj_free_timer(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); 239 cond_resched(); 240 } 241 } 242 243 static void htab_free_prealloced_wq(struct bpf_htab *htab) 244 { 245 u32 num_entries = htab->map.max_entries; 246 int i; 247 248 if (!btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) 249 return; 250 if (htab_has_extra_elems(htab)) 251 num_entries += num_possible_cpus(); 252 253 for (i = 0; i < num_entries; i++) { 254 struct htab_elem *elem; 255 256 elem = get_htab_elem(htab, i); 257 bpf_obj_free_workqueue(htab->map.record, 258 elem->key + round_up(htab->map.key_size, 8)); 259 cond_resched(); 260 } 261 } 262 263 static void htab_free_prealloced_fields(struct bpf_htab *htab) 264 { 265 u32 num_entries = htab->map.max_entries; 266 int i; 267 268 if (IS_ERR_OR_NULL(htab->map.record)) 269 return; 270 if (htab_has_extra_elems(htab)) 271 num_entries += num_possible_cpus(); 272 for (i = 0; i < num_entries; i++) { 273 struct htab_elem *elem; 274 275 elem = get_htab_elem(htab, i); 276 if (htab_is_percpu(htab)) { 277 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 278 int cpu; 279 280 for_each_possible_cpu(cpu) { 281 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 282 cond_resched(); 283 } 284 } else { 285 bpf_obj_free_fields(htab->map.record, elem->key + round_up(htab->map.key_size, 8)); 286 cond_resched(); 287 } 288 cond_resched(); 289 } 290 } 291 292 static void htab_free_elems(struct bpf_htab *htab) 293 { 294 int i; 295 296 if (!htab_is_percpu(htab)) 297 goto free_elems; 298 299 for (i = 0; i < htab->map.max_entries; i++) { 300 void __percpu *pptr; 301 302 pptr = htab_elem_get_ptr(get_htab_elem(htab, i), 303 htab->map.key_size); 304 free_percpu(pptr); 305 cond_resched(); 306 } 307 free_elems: 308 bpf_map_area_free(htab->elems); 309 } 310 311 /* The LRU list has a lock (lru_lock). Each htab bucket has a lock 312 * (bucket_lock). If both locks need to be acquired together, the lock 313 * order is always lru_lock -> bucket_lock and this only happens in 314 * bpf_lru_list.c logic. For example, certain code path of 315 * bpf_lru_pop_free(), which is called by function prealloc_lru_pop(), 316 * will acquire lru_lock first followed by acquiring bucket_lock. 317 * 318 * In hashtab.c, to avoid deadlock, lock acquisition of 319 * bucket_lock followed by lru_lock is not allowed. In such cases, 320 * bucket_lock needs to be released first before acquiring lru_lock. 321 */ 322 static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, 323 u32 hash) 324 { 325 struct bpf_lru_node *node = bpf_lru_pop_free(&htab->lru, hash); 326 struct htab_elem *l; 327 328 if (node) { 329 bpf_map_inc_elem_count(&htab->map); 330 l = container_of(node, struct htab_elem, lru_node); 331 memcpy(l->key, key, htab->map.key_size); 332 return l; 333 } 334 335 return NULL; 336 } 337 338 static int prealloc_init(struct bpf_htab *htab) 339 { 340 u32 num_entries = htab->map.max_entries; 341 int err = -ENOMEM, i; 342 343 if (htab_has_extra_elems(htab)) 344 num_entries += num_possible_cpus(); 345 346 htab->elems = bpf_map_area_alloc((u64)htab->elem_size * num_entries, 347 htab->map.numa_node); 348 if (!htab->elems) 349 return -ENOMEM; 350 351 if (!htab_is_percpu(htab)) 352 goto skip_percpu_elems; 353 354 for (i = 0; i < num_entries; i++) { 355 u32 size = round_up(htab->map.value_size, 8); 356 void __percpu *pptr; 357 358 pptr = bpf_map_alloc_percpu(&htab->map, size, 8, 359 GFP_USER | __GFP_NOWARN); 360 if (!pptr) 361 goto free_elems; 362 htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, 363 pptr); 364 cond_resched(); 365 } 366 367 skip_percpu_elems: 368 if (htab_is_lru(htab)) 369 err = bpf_lru_init(&htab->lru, 370 htab->map.map_flags & BPF_F_NO_COMMON_LRU, 371 offsetof(struct htab_elem, hash) - 372 offsetof(struct htab_elem, lru_node), 373 htab_lru_map_delete_node, 374 htab); 375 else 376 err = pcpu_freelist_init(&htab->freelist); 377 378 if (err) 379 goto free_elems; 380 381 if (htab_is_lru(htab)) 382 bpf_lru_populate(&htab->lru, htab->elems, 383 offsetof(struct htab_elem, lru_node), 384 htab->elem_size, num_entries); 385 else 386 pcpu_freelist_populate(&htab->freelist, 387 htab->elems + offsetof(struct htab_elem, fnode), 388 htab->elem_size, num_entries); 389 390 return 0; 391 392 free_elems: 393 htab_free_elems(htab); 394 return err; 395 } 396 397 static void prealloc_destroy(struct bpf_htab *htab) 398 { 399 htab_free_elems(htab); 400 401 if (htab_is_lru(htab)) 402 bpf_lru_destroy(&htab->lru); 403 else 404 pcpu_freelist_destroy(&htab->freelist); 405 } 406 407 static int alloc_extra_elems(struct bpf_htab *htab) 408 { 409 struct htab_elem *__percpu *pptr, *l_new; 410 struct pcpu_freelist_node *l; 411 int cpu; 412 413 pptr = bpf_map_alloc_percpu(&htab->map, sizeof(struct htab_elem *), 8, 414 GFP_USER | __GFP_NOWARN); 415 if (!pptr) 416 return -ENOMEM; 417 418 for_each_possible_cpu(cpu) { 419 l = pcpu_freelist_pop(&htab->freelist); 420 /* pop will succeed, since prealloc_init() 421 * preallocated extra num_possible_cpus elements 422 */ 423 l_new = container_of(l, struct htab_elem, fnode); 424 *per_cpu_ptr(pptr, cpu) = l_new; 425 } 426 htab->extra_elems = pptr; 427 return 0; 428 } 429 430 /* Called from syscall */ 431 static int htab_map_alloc_check(union bpf_attr *attr) 432 { 433 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 434 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 435 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 436 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 437 /* percpu_lru means each cpu has its own LRU list. 438 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 439 * the map's value itself is percpu. percpu_lru has 440 * nothing to do with the map's value. 441 */ 442 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 443 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 444 bool zero_seed = (attr->map_flags & BPF_F_ZERO_SEED); 445 int numa_node = bpf_map_attr_numa_node(attr); 446 447 BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != 448 offsetof(struct htab_elem, hash_node.pprev)); 449 450 if (zero_seed && !capable(CAP_SYS_ADMIN)) 451 /* Guard against local DoS, and discourage production use. */ 452 return -EPERM; 453 454 if (attr->map_flags & ~HTAB_CREATE_FLAG_MASK || 455 !bpf_map_flags_access_ok(attr->map_flags)) 456 return -EINVAL; 457 458 if (!lru && percpu_lru) 459 return -EINVAL; 460 461 if (lru && !prealloc) 462 return -ENOTSUPP; 463 464 if (numa_node != NUMA_NO_NODE && (percpu || percpu_lru)) 465 return -EINVAL; 466 467 /* check sanity of attributes. 468 * value_size == 0 may be allowed in the future to use map as a set 469 */ 470 if (attr->max_entries == 0 || attr->key_size == 0 || 471 attr->value_size == 0) 472 return -EINVAL; 473 474 if ((u64)attr->key_size + attr->value_size >= KMALLOC_MAX_SIZE - 475 sizeof(struct htab_elem)) 476 /* if key_size + value_size is bigger, the user space won't be 477 * able to access the elements via bpf syscall. This check 478 * also makes sure that the elem_size doesn't overflow and it's 479 * kmalloc-able later in htab_map_update_elem() 480 */ 481 return -E2BIG; 482 483 return 0; 484 } 485 486 static struct bpf_map *htab_map_alloc(union bpf_attr *attr) 487 { 488 bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || 489 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 490 bool lru = (attr->map_type == BPF_MAP_TYPE_LRU_HASH || 491 attr->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH); 492 /* percpu_lru means each cpu has its own LRU list. 493 * it is different from BPF_MAP_TYPE_PERCPU_HASH where 494 * the map's value itself is percpu. percpu_lru has 495 * nothing to do with the map's value. 496 */ 497 bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); 498 bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); 499 struct bpf_htab *htab; 500 int err, i; 501 502 htab = bpf_map_area_alloc(sizeof(*htab), NUMA_NO_NODE); 503 if (!htab) 504 return ERR_PTR(-ENOMEM); 505 506 lockdep_register_key(&htab->lockdep_key); 507 508 bpf_map_init_from_attr(&htab->map, attr); 509 510 if (percpu_lru) { 511 /* ensure each CPU's lru list has >=1 elements. 512 * since we are at it, make each lru list has the same 513 * number of elements. 514 */ 515 htab->map.max_entries = roundup(attr->max_entries, 516 num_possible_cpus()); 517 if (htab->map.max_entries < attr->max_entries) 518 htab->map.max_entries = rounddown(attr->max_entries, 519 num_possible_cpus()); 520 } 521 522 /* hash table size must be power of 2; roundup_pow_of_two() can overflow 523 * into UB on 32-bit arches, so check that first 524 */ 525 err = -E2BIG; 526 if (htab->map.max_entries > 1UL << 31) 527 goto free_htab; 528 529 htab->n_buckets = roundup_pow_of_two(htab->map.max_entries); 530 531 htab->elem_size = sizeof(struct htab_elem) + 532 round_up(htab->map.key_size, 8); 533 if (percpu) 534 htab->elem_size += sizeof(void *); 535 else 536 htab->elem_size += round_up(htab->map.value_size, 8); 537 538 /* check for u32 overflow */ 539 if (htab->n_buckets > U32_MAX / sizeof(struct bucket)) 540 goto free_htab; 541 542 err = bpf_map_init_elem_count(&htab->map); 543 if (err) 544 goto free_htab; 545 546 err = -ENOMEM; 547 htab->buckets = bpf_map_area_alloc(htab->n_buckets * 548 sizeof(struct bucket), 549 htab->map.numa_node); 550 if (!htab->buckets) 551 goto free_elem_count; 552 553 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) { 554 htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map, 555 sizeof(int), 556 sizeof(int), 557 GFP_USER); 558 if (!htab->map_locked[i]) 559 goto free_map_locked; 560 } 561 562 if (htab->map.map_flags & BPF_F_ZERO_SEED) 563 htab->hashrnd = 0; 564 else 565 htab->hashrnd = get_random_u32(); 566 567 htab_init_buckets(htab); 568 569 /* compute_batch_value() computes batch value as num_online_cpus() * 2 570 * and __percpu_counter_compare() needs 571 * htab->max_entries - cur_number_of_elems to be more than batch * num_online_cpus() 572 * for percpu_counter to be faster than atomic_t. In practice the average bpf 573 * hash map size is 10k, which means that a system with 64 cpus will fill 574 * hashmap to 20% of 10k before percpu_counter becomes ineffective. Therefore 575 * define our own batch count as 32 then 10k hash map can be filled up to 80%: 576 * 10k - 8k > 32 _batch_ * 64 _cpus_ 577 * and __percpu_counter_compare() will still be fast. At that point hash map 578 * collisions will dominate its performance anyway. Assume that hash map filled 579 * to 50+% isn't going to be O(1) and use the following formula to choose 580 * between percpu_counter and atomic_t. 581 */ 582 #define PERCPU_COUNTER_BATCH 32 583 if (attr->max_entries / 2 > num_online_cpus() * PERCPU_COUNTER_BATCH) 584 htab->use_percpu_counter = true; 585 586 if (htab->use_percpu_counter) { 587 err = percpu_counter_init(&htab->pcount, 0, GFP_KERNEL); 588 if (err) 589 goto free_map_locked; 590 } 591 592 if (prealloc) { 593 err = prealloc_init(htab); 594 if (err) 595 goto free_map_locked; 596 597 if (!percpu && !lru) { 598 /* lru itself can remove the least used element, so 599 * there is no need for an extra elem during map_update. 600 */ 601 err = alloc_extra_elems(htab); 602 if (err) 603 goto free_prealloc; 604 } 605 } else { 606 err = bpf_mem_alloc_init(&htab->ma, htab->elem_size, false); 607 if (err) 608 goto free_map_locked; 609 if (percpu) { 610 err = bpf_mem_alloc_init(&htab->pcpu_ma, 611 round_up(htab->map.value_size, 8), true); 612 if (err) 613 goto free_map_locked; 614 } 615 } 616 617 return &htab->map; 618 619 free_prealloc: 620 prealloc_destroy(htab); 621 free_map_locked: 622 if (htab->use_percpu_counter) 623 percpu_counter_destroy(&htab->pcount); 624 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) 625 free_percpu(htab->map_locked[i]); 626 bpf_map_area_free(htab->buckets); 627 bpf_mem_alloc_destroy(&htab->pcpu_ma); 628 bpf_mem_alloc_destroy(&htab->ma); 629 free_elem_count: 630 bpf_map_free_elem_count(&htab->map); 631 free_htab: 632 lockdep_unregister_key(&htab->lockdep_key); 633 bpf_map_area_free(htab); 634 return ERR_PTR(err); 635 } 636 637 static inline u32 htab_map_hash(const void *key, u32 key_len, u32 hashrnd) 638 { 639 if (likely(key_len % 4 == 0)) 640 return jhash2(key, key_len / 4, hashrnd); 641 return jhash(key, key_len, hashrnd); 642 } 643 644 static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) 645 { 646 return &htab->buckets[hash & (htab->n_buckets - 1)]; 647 } 648 649 static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) 650 { 651 return &__select_bucket(htab, hash)->head; 652 } 653 654 /* this lookup function can only be called with bucket lock taken */ 655 static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, 656 void *key, u32 key_size) 657 { 658 struct hlist_nulls_node *n; 659 struct htab_elem *l; 660 661 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 662 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 663 return l; 664 665 return NULL; 666 } 667 668 /* can be called without bucket lock. it will repeat the loop in 669 * the unlikely event when elements moved from one bucket into another 670 * while link list is being walked 671 */ 672 static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, 673 u32 hash, void *key, 674 u32 key_size, u32 n_buckets) 675 { 676 struct hlist_nulls_node *n; 677 struct htab_elem *l; 678 679 again: 680 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 681 if (l->hash == hash && !memcmp(&l->key, key, key_size)) 682 return l; 683 684 if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) 685 goto again; 686 687 return NULL; 688 } 689 690 /* Called from syscall or from eBPF program directly, so 691 * arguments have to match bpf_map_lookup_elem() exactly. 692 * The return value is adjusted by BPF instructions 693 * in htab_map_gen_lookup(). 694 */ 695 static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) 696 { 697 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 698 struct hlist_nulls_head *head; 699 struct htab_elem *l; 700 u32 hash, key_size; 701 702 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 703 !rcu_read_lock_bh_held()); 704 705 key_size = map->key_size; 706 707 hash = htab_map_hash(key, key_size, htab->hashrnd); 708 709 head = select_bucket(htab, hash); 710 711 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 712 713 return l; 714 } 715 716 static void *htab_map_lookup_elem(struct bpf_map *map, void *key) 717 { 718 struct htab_elem *l = __htab_map_lookup_elem(map, key); 719 720 if (l) 721 return l->key + round_up(map->key_size, 8); 722 723 return NULL; 724 } 725 726 /* inline bpf_map_lookup_elem() call. 727 * Instead of: 728 * bpf_prog 729 * bpf_map_lookup_elem 730 * map->ops->map_lookup_elem 731 * htab_map_lookup_elem 732 * __htab_map_lookup_elem 733 * do: 734 * bpf_prog 735 * __htab_map_lookup_elem 736 */ 737 static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 738 { 739 struct bpf_insn *insn = insn_buf; 740 const int ret = BPF_REG_0; 741 742 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 743 (void *(*)(struct bpf_map *map, void *key))NULL)); 744 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 745 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 746 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 747 offsetof(struct htab_elem, key) + 748 round_up(map->key_size, 8)); 749 return insn - insn_buf; 750 } 751 752 static __always_inline void *__htab_lru_map_lookup_elem(struct bpf_map *map, 753 void *key, const bool mark) 754 { 755 struct htab_elem *l = __htab_map_lookup_elem(map, key); 756 757 if (l) { 758 if (mark) 759 bpf_lru_node_set_ref(&l->lru_node); 760 return l->key + round_up(map->key_size, 8); 761 } 762 763 return NULL; 764 } 765 766 static void *htab_lru_map_lookup_elem(struct bpf_map *map, void *key) 767 { 768 return __htab_lru_map_lookup_elem(map, key, true); 769 } 770 771 static void *htab_lru_map_lookup_elem_sys(struct bpf_map *map, void *key) 772 { 773 return __htab_lru_map_lookup_elem(map, key, false); 774 } 775 776 static int htab_lru_map_gen_lookup(struct bpf_map *map, 777 struct bpf_insn *insn_buf) 778 { 779 struct bpf_insn *insn = insn_buf; 780 const int ret = BPF_REG_0; 781 const int ref_reg = BPF_REG_1; 782 783 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 784 (void *(*)(struct bpf_map *map, void *key))NULL)); 785 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 786 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); 787 *insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret, 788 offsetof(struct htab_elem, lru_node) + 789 offsetof(struct bpf_lru_node, ref)); 790 *insn++ = BPF_JMP_IMM(BPF_JNE, ref_reg, 0, 1); 791 *insn++ = BPF_ST_MEM(BPF_B, ret, 792 offsetof(struct htab_elem, lru_node) + 793 offsetof(struct bpf_lru_node, ref), 794 1); 795 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 796 offsetof(struct htab_elem, key) + 797 round_up(map->key_size, 8)); 798 return insn - insn_buf; 799 } 800 801 static void check_and_free_fields(struct bpf_htab *htab, 802 struct htab_elem *elem) 803 { 804 if (htab_is_percpu(htab)) { 805 void __percpu *pptr = htab_elem_get_ptr(elem, htab->map.key_size); 806 int cpu; 807 808 for_each_possible_cpu(cpu) 809 bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu)); 810 } else { 811 void *map_value = elem->key + round_up(htab->map.key_size, 8); 812 813 bpf_obj_free_fields(htab->map.record, map_value); 814 } 815 } 816 817 /* It is called from the bpf_lru_list when the LRU needs to delete 818 * older elements from the htab. 819 */ 820 static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node) 821 { 822 struct bpf_htab *htab = arg; 823 struct htab_elem *l = NULL, *tgt_l; 824 struct hlist_nulls_head *head; 825 struct hlist_nulls_node *n; 826 unsigned long flags; 827 struct bucket *b; 828 int ret; 829 830 tgt_l = container_of(node, struct htab_elem, lru_node); 831 b = __select_bucket(htab, tgt_l->hash); 832 head = &b->head; 833 834 ret = htab_lock_bucket(htab, b, tgt_l->hash, &flags); 835 if (ret) 836 return false; 837 838 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 839 if (l == tgt_l) { 840 hlist_nulls_del_rcu(&l->hash_node); 841 check_and_free_fields(htab, l); 842 bpf_map_dec_elem_count(&htab->map); 843 break; 844 } 845 846 htab_unlock_bucket(htab, b, tgt_l->hash, flags); 847 848 return l == tgt_l; 849 } 850 851 /* Called from syscall */ 852 static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 853 { 854 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 855 struct hlist_nulls_head *head; 856 struct htab_elem *l, *next_l; 857 u32 hash, key_size; 858 int i = 0; 859 860 WARN_ON_ONCE(!rcu_read_lock_held()); 861 862 key_size = map->key_size; 863 864 if (!key) 865 goto find_first_elem; 866 867 hash = htab_map_hash(key, key_size, htab->hashrnd); 868 869 head = select_bucket(htab, hash); 870 871 /* lookup the key */ 872 l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); 873 874 if (!l) 875 goto find_first_elem; 876 877 /* key was found, get next key in the same bucket */ 878 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), 879 struct htab_elem, hash_node); 880 881 if (next_l) { 882 /* if next elem in this hash list is non-zero, just return it */ 883 memcpy(next_key, next_l->key, key_size); 884 return 0; 885 } 886 887 /* no more elements in this hash list, go to the next bucket */ 888 i = hash & (htab->n_buckets - 1); 889 i++; 890 891 find_first_elem: 892 /* iterate over buckets */ 893 for (; i < htab->n_buckets; i++) { 894 head = select_bucket(htab, i); 895 896 /* pick first element in the bucket */ 897 next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), 898 struct htab_elem, hash_node); 899 if (next_l) { 900 /* if it's not empty, just return it */ 901 memcpy(next_key, next_l->key, key_size); 902 return 0; 903 } 904 } 905 906 /* iterated over all buckets and all elements */ 907 return -ENOENT; 908 } 909 910 static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) 911 { 912 check_and_free_fields(htab, l); 913 if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) 914 bpf_mem_cache_free(&htab->pcpu_ma, l->ptr_to_pptr); 915 bpf_mem_cache_free(&htab->ma, l); 916 } 917 918 static void htab_put_fd_value(struct bpf_htab *htab, struct htab_elem *l) 919 { 920 struct bpf_map *map = &htab->map; 921 void *ptr; 922 923 if (map->ops->map_fd_put_ptr) { 924 ptr = fd_htab_map_get_ptr(map, l); 925 map->ops->map_fd_put_ptr(map, ptr, true); 926 } 927 } 928 929 static bool is_map_full(struct bpf_htab *htab) 930 { 931 if (htab->use_percpu_counter) 932 return __percpu_counter_compare(&htab->pcount, htab->map.max_entries, 933 PERCPU_COUNTER_BATCH) >= 0; 934 return atomic_read(&htab->count) >= htab->map.max_entries; 935 } 936 937 static void inc_elem_count(struct bpf_htab *htab) 938 { 939 bpf_map_inc_elem_count(&htab->map); 940 941 if (htab->use_percpu_counter) 942 percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH); 943 else 944 atomic_inc(&htab->count); 945 } 946 947 static void dec_elem_count(struct bpf_htab *htab) 948 { 949 bpf_map_dec_elem_count(&htab->map); 950 951 if (htab->use_percpu_counter) 952 percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH); 953 else 954 atomic_dec(&htab->count); 955 } 956 957 958 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) 959 { 960 htab_put_fd_value(htab, l); 961 962 if (htab_is_prealloc(htab)) { 963 bpf_map_dec_elem_count(&htab->map); 964 check_and_free_fields(htab, l); 965 __pcpu_freelist_push(&htab->freelist, &l->fnode); 966 } else { 967 dec_elem_count(htab); 968 htab_elem_free(htab, l); 969 } 970 } 971 972 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr, 973 void *value, bool onallcpus) 974 { 975 if (!onallcpus) { 976 /* copy true value_size bytes */ 977 copy_map_value(&htab->map, this_cpu_ptr(pptr), value); 978 } else { 979 u32 size = round_up(htab->map.value_size, 8); 980 int off = 0, cpu; 981 982 for_each_possible_cpu(cpu) { 983 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off); 984 off += size; 985 } 986 } 987 } 988 989 static void pcpu_init_value(struct bpf_htab *htab, void __percpu *pptr, 990 void *value, bool onallcpus) 991 { 992 /* When not setting the initial value on all cpus, zero-fill element 993 * values for other cpus. Otherwise, bpf program has no way to ensure 994 * known initial values for cpus other than current one 995 * (onallcpus=false always when coming from bpf prog). 996 */ 997 if (!onallcpus) { 998 int current_cpu = raw_smp_processor_id(); 999 int cpu; 1000 1001 for_each_possible_cpu(cpu) { 1002 if (cpu == current_cpu) 1003 copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value); 1004 else /* Since elem is preallocated, we cannot touch special fields */ 1005 zero_map_value(&htab->map, per_cpu_ptr(pptr, cpu)); 1006 } 1007 } else { 1008 pcpu_copy_value(htab, pptr, value, onallcpus); 1009 } 1010 } 1011 1012 static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab) 1013 { 1014 return htab->map.map_type == BPF_MAP_TYPE_HASH_OF_MAPS && 1015 BITS_PER_LONG == 64; 1016 } 1017 1018 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, 1019 void *value, u32 key_size, u32 hash, 1020 bool percpu, bool onallcpus, 1021 struct htab_elem *old_elem) 1022 { 1023 u32 size = htab->map.value_size; 1024 bool prealloc = htab_is_prealloc(htab); 1025 struct htab_elem *l_new, **pl_new; 1026 void __percpu *pptr; 1027 1028 if (prealloc) { 1029 if (old_elem) { 1030 /* if we're updating the existing element, 1031 * use per-cpu extra elems to avoid freelist_pop/push 1032 */ 1033 pl_new = this_cpu_ptr(htab->extra_elems); 1034 l_new = *pl_new; 1035 htab_put_fd_value(htab, old_elem); 1036 *pl_new = old_elem; 1037 } else { 1038 struct pcpu_freelist_node *l; 1039 1040 l = __pcpu_freelist_pop(&htab->freelist); 1041 if (!l) 1042 return ERR_PTR(-E2BIG); 1043 l_new = container_of(l, struct htab_elem, fnode); 1044 bpf_map_inc_elem_count(&htab->map); 1045 } 1046 } else { 1047 if (is_map_full(htab)) 1048 if (!old_elem) 1049 /* when map is full and update() is replacing 1050 * old element, it's ok to allocate, since 1051 * old element will be freed immediately. 1052 * Otherwise return an error 1053 */ 1054 return ERR_PTR(-E2BIG); 1055 inc_elem_count(htab); 1056 l_new = bpf_mem_cache_alloc(&htab->ma); 1057 if (!l_new) { 1058 l_new = ERR_PTR(-ENOMEM); 1059 goto dec_count; 1060 } 1061 } 1062 1063 memcpy(l_new->key, key, key_size); 1064 if (percpu) { 1065 if (prealloc) { 1066 pptr = htab_elem_get_ptr(l_new, key_size); 1067 } else { 1068 /* alloc_percpu zero-fills */ 1069 pptr = bpf_mem_cache_alloc(&htab->pcpu_ma); 1070 if (!pptr) { 1071 bpf_mem_cache_free(&htab->ma, l_new); 1072 l_new = ERR_PTR(-ENOMEM); 1073 goto dec_count; 1074 } 1075 l_new->ptr_to_pptr = pptr; 1076 pptr = *(void **)pptr; 1077 } 1078 1079 pcpu_init_value(htab, pptr, value, onallcpus); 1080 1081 if (!prealloc) 1082 htab_elem_set_ptr(l_new, key_size, pptr); 1083 } else if (fd_htab_map_needs_adjust(htab)) { 1084 size = round_up(size, 8); 1085 memcpy(l_new->key + round_up(key_size, 8), value, size); 1086 } else { 1087 copy_map_value(&htab->map, 1088 l_new->key + round_up(key_size, 8), 1089 value); 1090 } 1091 1092 l_new->hash = hash; 1093 return l_new; 1094 dec_count: 1095 dec_elem_count(htab); 1096 return l_new; 1097 } 1098 1099 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, 1100 u64 map_flags) 1101 { 1102 if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 1103 /* elem already exists */ 1104 return -EEXIST; 1105 1106 if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 1107 /* elem doesn't exist, cannot update it */ 1108 return -ENOENT; 1109 1110 return 0; 1111 } 1112 1113 /* Called from syscall or from eBPF program */ 1114 static long htab_map_update_elem(struct bpf_map *map, void *key, void *value, 1115 u64 map_flags) 1116 { 1117 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1118 struct htab_elem *l_new = NULL, *l_old; 1119 struct hlist_nulls_head *head; 1120 unsigned long flags; 1121 struct bucket *b; 1122 u32 key_size, hash; 1123 int ret; 1124 1125 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 1126 /* unknown flags */ 1127 return -EINVAL; 1128 1129 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1130 !rcu_read_lock_bh_held()); 1131 1132 key_size = map->key_size; 1133 1134 hash = htab_map_hash(key, key_size, htab->hashrnd); 1135 1136 b = __select_bucket(htab, hash); 1137 head = &b->head; 1138 1139 if (unlikely(map_flags & BPF_F_LOCK)) { 1140 if (unlikely(!btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1141 return -EINVAL; 1142 /* find an element without taking the bucket lock */ 1143 l_old = lookup_nulls_elem_raw(head, hash, key, key_size, 1144 htab->n_buckets); 1145 ret = check_flags(htab, l_old, map_flags); 1146 if (ret) 1147 return ret; 1148 if (l_old) { 1149 /* grab the element lock and update value in place */ 1150 copy_map_value_locked(map, 1151 l_old->key + round_up(key_size, 8), 1152 value, false); 1153 return 0; 1154 } 1155 /* fall through, grab the bucket lock and lookup again. 1156 * 99.9% chance that the element won't be found, 1157 * but second lookup under lock has to be done. 1158 */ 1159 } 1160 1161 ret = htab_lock_bucket(htab, b, hash, &flags); 1162 if (ret) 1163 return ret; 1164 1165 l_old = lookup_elem_raw(head, hash, key, key_size); 1166 1167 ret = check_flags(htab, l_old, map_flags); 1168 if (ret) 1169 goto err; 1170 1171 if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { 1172 /* first lookup without the bucket lock didn't find the element, 1173 * but second lookup with the bucket lock found it. 1174 * This case is highly unlikely, but has to be dealt with: 1175 * grab the element lock in addition to the bucket lock 1176 * and update element in place 1177 */ 1178 copy_map_value_locked(map, 1179 l_old->key + round_up(key_size, 8), 1180 value, false); 1181 ret = 0; 1182 goto err; 1183 } 1184 1185 l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, 1186 l_old); 1187 if (IS_ERR(l_new)) { 1188 /* all pre-allocated elements are in use or memory exhausted */ 1189 ret = PTR_ERR(l_new); 1190 goto err; 1191 } 1192 1193 /* add new element to the head of the list, so that 1194 * concurrent search will find it before old elem 1195 */ 1196 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1197 if (l_old) { 1198 hlist_nulls_del_rcu(&l_old->hash_node); 1199 if (!htab_is_prealloc(htab)) 1200 free_htab_elem(htab, l_old); 1201 else 1202 check_and_free_fields(htab, l_old); 1203 } 1204 ret = 0; 1205 err: 1206 htab_unlock_bucket(htab, b, hash, flags); 1207 return ret; 1208 } 1209 1210 static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem) 1211 { 1212 check_and_free_fields(htab, elem); 1213 bpf_map_dec_elem_count(&htab->map); 1214 bpf_lru_push_free(&htab->lru, &elem->lru_node); 1215 } 1216 1217 static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, 1218 u64 map_flags) 1219 { 1220 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1221 struct htab_elem *l_new, *l_old = NULL; 1222 struct hlist_nulls_head *head; 1223 unsigned long flags; 1224 struct bucket *b; 1225 u32 key_size, hash; 1226 int ret; 1227 1228 if (unlikely(map_flags > BPF_EXIST)) 1229 /* unknown flags */ 1230 return -EINVAL; 1231 1232 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1233 !rcu_read_lock_bh_held()); 1234 1235 key_size = map->key_size; 1236 1237 hash = htab_map_hash(key, key_size, htab->hashrnd); 1238 1239 b = __select_bucket(htab, hash); 1240 head = &b->head; 1241 1242 /* For LRU, we need to alloc before taking bucket's 1243 * spinlock because getting free nodes from LRU may need 1244 * to remove older elements from htab and this removal 1245 * operation will need a bucket lock. 1246 */ 1247 l_new = prealloc_lru_pop(htab, key, hash); 1248 if (!l_new) 1249 return -ENOMEM; 1250 copy_map_value(&htab->map, 1251 l_new->key + round_up(map->key_size, 8), value); 1252 1253 ret = htab_lock_bucket(htab, b, hash, &flags); 1254 if (ret) 1255 goto err_lock_bucket; 1256 1257 l_old = lookup_elem_raw(head, hash, key, key_size); 1258 1259 ret = check_flags(htab, l_old, map_flags); 1260 if (ret) 1261 goto err; 1262 1263 /* add new element to the head of the list, so that 1264 * concurrent search will find it before old elem 1265 */ 1266 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1267 if (l_old) { 1268 bpf_lru_node_set_ref(&l_new->lru_node); 1269 hlist_nulls_del_rcu(&l_old->hash_node); 1270 } 1271 ret = 0; 1272 1273 err: 1274 htab_unlock_bucket(htab, b, hash, flags); 1275 1276 err_lock_bucket: 1277 if (ret) 1278 htab_lru_push_free(htab, l_new); 1279 else if (l_old) 1280 htab_lru_push_free(htab, l_old); 1281 1282 return ret; 1283 } 1284 1285 static long __htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1286 void *value, u64 map_flags, 1287 bool onallcpus) 1288 { 1289 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1290 struct htab_elem *l_new = NULL, *l_old; 1291 struct hlist_nulls_head *head; 1292 unsigned long flags; 1293 struct bucket *b; 1294 u32 key_size, hash; 1295 int ret; 1296 1297 if (unlikely(map_flags > BPF_EXIST)) 1298 /* unknown flags */ 1299 return -EINVAL; 1300 1301 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1302 !rcu_read_lock_bh_held()); 1303 1304 key_size = map->key_size; 1305 1306 hash = htab_map_hash(key, key_size, htab->hashrnd); 1307 1308 b = __select_bucket(htab, hash); 1309 head = &b->head; 1310 1311 ret = htab_lock_bucket(htab, b, hash, &flags); 1312 if (ret) 1313 return ret; 1314 1315 l_old = lookup_elem_raw(head, hash, key, key_size); 1316 1317 ret = check_flags(htab, l_old, map_flags); 1318 if (ret) 1319 goto err; 1320 1321 if (l_old) { 1322 /* per-cpu hash map can update value in-place */ 1323 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1324 value, onallcpus); 1325 } else { 1326 l_new = alloc_htab_elem(htab, key, value, key_size, 1327 hash, true, onallcpus, NULL); 1328 if (IS_ERR(l_new)) { 1329 ret = PTR_ERR(l_new); 1330 goto err; 1331 } 1332 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1333 } 1334 ret = 0; 1335 err: 1336 htab_unlock_bucket(htab, b, hash, flags); 1337 return ret; 1338 } 1339 1340 static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1341 void *value, u64 map_flags, 1342 bool onallcpus) 1343 { 1344 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1345 struct htab_elem *l_new = NULL, *l_old; 1346 struct hlist_nulls_head *head; 1347 unsigned long flags; 1348 struct bucket *b; 1349 u32 key_size, hash; 1350 int ret; 1351 1352 if (unlikely(map_flags > BPF_EXIST)) 1353 /* unknown flags */ 1354 return -EINVAL; 1355 1356 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1357 !rcu_read_lock_bh_held()); 1358 1359 key_size = map->key_size; 1360 1361 hash = htab_map_hash(key, key_size, htab->hashrnd); 1362 1363 b = __select_bucket(htab, hash); 1364 head = &b->head; 1365 1366 /* For LRU, we need to alloc before taking bucket's 1367 * spinlock because LRU's elem alloc may need 1368 * to remove older elem from htab and this removal 1369 * operation will need a bucket lock. 1370 */ 1371 if (map_flags != BPF_EXIST) { 1372 l_new = prealloc_lru_pop(htab, key, hash); 1373 if (!l_new) 1374 return -ENOMEM; 1375 } 1376 1377 ret = htab_lock_bucket(htab, b, hash, &flags); 1378 if (ret) 1379 goto err_lock_bucket; 1380 1381 l_old = lookup_elem_raw(head, hash, key, key_size); 1382 1383 ret = check_flags(htab, l_old, map_flags); 1384 if (ret) 1385 goto err; 1386 1387 if (l_old) { 1388 bpf_lru_node_set_ref(&l_old->lru_node); 1389 1390 /* per-cpu hash map can update value in-place */ 1391 pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size), 1392 value, onallcpus); 1393 } else { 1394 pcpu_init_value(htab, htab_elem_get_ptr(l_new, key_size), 1395 value, onallcpus); 1396 hlist_nulls_add_head_rcu(&l_new->hash_node, head); 1397 l_new = NULL; 1398 } 1399 ret = 0; 1400 err: 1401 htab_unlock_bucket(htab, b, hash, flags); 1402 err_lock_bucket: 1403 if (l_new) { 1404 bpf_map_dec_elem_count(&htab->map); 1405 bpf_lru_push_free(&htab->lru, &l_new->lru_node); 1406 } 1407 return ret; 1408 } 1409 1410 static long htab_percpu_map_update_elem(struct bpf_map *map, void *key, 1411 void *value, u64 map_flags) 1412 { 1413 return __htab_percpu_map_update_elem(map, key, value, map_flags, false); 1414 } 1415 1416 static long htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, 1417 void *value, u64 map_flags) 1418 { 1419 return __htab_lru_percpu_map_update_elem(map, key, value, map_flags, 1420 false); 1421 } 1422 1423 /* Called from syscall or from eBPF program */ 1424 static long htab_map_delete_elem(struct bpf_map *map, void *key) 1425 { 1426 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1427 struct hlist_nulls_head *head; 1428 struct bucket *b; 1429 struct htab_elem *l; 1430 unsigned long flags; 1431 u32 hash, key_size; 1432 int ret; 1433 1434 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1435 !rcu_read_lock_bh_held()); 1436 1437 key_size = map->key_size; 1438 1439 hash = htab_map_hash(key, key_size, htab->hashrnd); 1440 b = __select_bucket(htab, hash); 1441 head = &b->head; 1442 1443 ret = htab_lock_bucket(htab, b, hash, &flags); 1444 if (ret) 1445 return ret; 1446 1447 l = lookup_elem_raw(head, hash, key, key_size); 1448 1449 if (l) { 1450 hlist_nulls_del_rcu(&l->hash_node); 1451 free_htab_elem(htab, l); 1452 } else { 1453 ret = -ENOENT; 1454 } 1455 1456 htab_unlock_bucket(htab, b, hash, flags); 1457 return ret; 1458 } 1459 1460 static long htab_lru_map_delete_elem(struct bpf_map *map, void *key) 1461 { 1462 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1463 struct hlist_nulls_head *head; 1464 struct bucket *b; 1465 struct htab_elem *l; 1466 unsigned long flags; 1467 u32 hash, key_size; 1468 int ret; 1469 1470 WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 1471 !rcu_read_lock_bh_held()); 1472 1473 key_size = map->key_size; 1474 1475 hash = htab_map_hash(key, key_size, htab->hashrnd); 1476 b = __select_bucket(htab, hash); 1477 head = &b->head; 1478 1479 ret = htab_lock_bucket(htab, b, hash, &flags); 1480 if (ret) 1481 return ret; 1482 1483 l = lookup_elem_raw(head, hash, key, key_size); 1484 1485 if (l) 1486 hlist_nulls_del_rcu(&l->hash_node); 1487 else 1488 ret = -ENOENT; 1489 1490 htab_unlock_bucket(htab, b, hash, flags); 1491 if (l) 1492 htab_lru_push_free(htab, l); 1493 return ret; 1494 } 1495 1496 static void delete_all_elements(struct bpf_htab *htab) 1497 { 1498 int i; 1499 1500 /* It's called from a worker thread, so disable migration here, 1501 * since bpf_mem_cache_free() relies on that. 1502 */ 1503 migrate_disable(); 1504 for (i = 0; i < htab->n_buckets; i++) { 1505 struct hlist_nulls_head *head = select_bucket(htab, i); 1506 struct hlist_nulls_node *n; 1507 struct htab_elem *l; 1508 1509 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1510 hlist_nulls_del_rcu(&l->hash_node); 1511 htab_elem_free(htab, l); 1512 } 1513 cond_resched(); 1514 } 1515 migrate_enable(); 1516 } 1517 1518 static void htab_free_malloced_timers_or_wq(struct bpf_htab *htab, bool is_timer) 1519 { 1520 int i; 1521 1522 rcu_read_lock(); 1523 for (i = 0; i < htab->n_buckets; i++) { 1524 struct hlist_nulls_head *head = select_bucket(htab, i); 1525 struct hlist_nulls_node *n; 1526 struct htab_elem *l; 1527 1528 hlist_nulls_for_each_entry(l, n, head, hash_node) { 1529 /* We only free timer on uref dropping to zero */ 1530 if (is_timer) 1531 bpf_obj_free_timer(htab->map.record, 1532 l->key + round_up(htab->map.key_size, 8)); 1533 else 1534 bpf_obj_free_workqueue(htab->map.record, 1535 l->key + round_up(htab->map.key_size, 8)); 1536 } 1537 cond_resched_rcu(); 1538 } 1539 rcu_read_unlock(); 1540 } 1541 1542 static void htab_map_free_timers_and_wq(struct bpf_map *map) 1543 { 1544 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1545 1546 /* We only free timer and workqueue on uref dropping to zero */ 1547 if (btf_record_has_field(htab->map.record, BPF_TIMER)) { 1548 if (!htab_is_prealloc(htab)) 1549 htab_free_malloced_timers_or_wq(htab, true); 1550 else 1551 htab_free_prealloced_timers(htab); 1552 } 1553 if (btf_record_has_field(htab->map.record, BPF_WORKQUEUE)) { 1554 if (!htab_is_prealloc(htab)) 1555 htab_free_malloced_timers_or_wq(htab, false); 1556 else 1557 htab_free_prealloced_wq(htab); 1558 } 1559 } 1560 1561 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 1562 static void htab_map_free(struct bpf_map *map) 1563 { 1564 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1565 int i; 1566 1567 /* bpf_free_used_maps() or close(map_fd) will trigger this map_free callback. 1568 * bpf_free_used_maps() is called after bpf prog is no longer executing. 1569 * There is no need to synchronize_rcu() here to protect map elements. 1570 */ 1571 1572 /* htab no longer uses call_rcu() directly. bpf_mem_alloc does it 1573 * underneath and is responsible for waiting for callbacks to finish 1574 * during bpf_mem_alloc_destroy(). 1575 */ 1576 if (!htab_is_prealloc(htab)) { 1577 delete_all_elements(htab); 1578 } else { 1579 htab_free_prealloced_fields(htab); 1580 prealloc_destroy(htab); 1581 } 1582 1583 bpf_map_free_elem_count(map); 1584 free_percpu(htab->extra_elems); 1585 bpf_map_area_free(htab->buckets); 1586 bpf_mem_alloc_destroy(&htab->pcpu_ma); 1587 bpf_mem_alloc_destroy(&htab->ma); 1588 if (htab->use_percpu_counter) 1589 percpu_counter_destroy(&htab->pcount); 1590 for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) 1591 free_percpu(htab->map_locked[i]); 1592 lockdep_unregister_key(&htab->lockdep_key); 1593 bpf_map_area_free(htab); 1594 } 1595 1596 static void htab_map_seq_show_elem(struct bpf_map *map, void *key, 1597 struct seq_file *m) 1598 { 1599 void *value; 1600 1601 rcu_read_lock(); 1602 1603 value = htab_map_lookup_elem(map, key); 1604 if (!value) { 1605 rcu_read_unlock(); 1606 return; 1607 } 1608 1609 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 1610 seq_puts(m, ": "); 1611 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 1612 seq_puts(m, "\n"); 1613 1614 rcu_read_unlock(); 1615 } 1616 1617 static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1618 void *value, bool is_lru_map, 1619 bool is_percpu, u64 flags) 1620 { 1621 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1622 struct hlist_nulls_head *head; 1623 unsigned long bflags; 1624 struct htab_elem *l; 1625 u32 hash, key_size; 1626 struct bucket *b; 1627 int ret; 1628 1629 key_size = map->key_size; 1630 1631 hash = htab_map_hash(key, key_size, htab->hashrnd); 1632 b = __select_bucket(htab, hash); 1633 head = &b->head; 1634 1635 ret = htab_lock_bucket(htab, b, hash, &bflags); 1636 if (ret) 1637 return ret; 1638 1639 l = lookup_elem_raw(head, hash, key, key_size); 1640 if (!l) { 1641 ret = -ENOENT; 1642 } else { 1643 if (is_percpu) { 1644 u32 roundup_value_size = round_up(map->value_size, 8); 1645 void __percpu *pptr; 1646 int off = 0, cpu; 1647 1648 pptr = htab_elem_get_ptr(l, key_size); 1649 for_each_possible_cpu(cpu) { 1650 copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu)); 1651 check_and_init_map_value(&htab->map, value + off); 1652 off += roundup_value_size; 1653 } 1654 } else { 1655 u32 roundup_key_size = round_up(map->key_size, 8); 1656 1657 if (flags & BPF_F_LOCK) 1658 copy_map_value_locked(map, value, l->key + 1659 roundup_key_size, 1660 true); 1661 else 1662 copy_map_value(map, value, l->key + 1663 roundup_key_size); 1664 /* Zeroing special fields in the temp buffer */ 1665 check_and_init_map_value(map, value); 1666 } 1667 1668 hlist_nulls_del_rcu(&l->hash_node); 1669 if (!is_lru_map) 1670 free_htab_elem(htab, l); 1671 } 1672 1673 htab_unlock_bucket(htab, b, hash, bflags); 1674 1675 if (is_lru_map && l) 1676 htab_lru_push_free(htab, l); 1677 1678 return ret; 1679 } 1680 1681 static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1682 void *value, u64 flags) 1683 { 1684 return __htab_map_lookup_and_delete_elem(map, key, value, false, false, 1685 flags); 1686 } 1687 1688 static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1689 void *key, void *value, 1690 u64 flags) 1691 { 1692 return __htab_map_lookup_and_delete_elem(map, key, value, false, true, 1693 flags); 1694 } 1695 1696 static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key, 1697 void *value, u64 flags) 1698 { 1699 return __htab_map_lookup_and_delete_elem(map, key, value, true, false, 1700 flags); 1701 } 1702 1703 static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, 1704 void *key, void *value, 1705 u64 flags) 1706 { 1707 return __htab_map_lookup_and_delete_elem(map, key, value, true, true, 1708 flags); 1709 } 1710 1711 static int 1712 __htab_map_lookup_and_delete_batch(struct bpf_map *map, 1713 const union bpf_attr *attr, 1714 union bpf_attr __user *uattr, 1715 bool do_delete, bool is_lru_map, 1716 bool is_percpu) 1717 { 1718 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 1719 u32 bucket_cnt, total, key_size, value_size, roundup_key_size; 1720 void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val; 1721 void __user *uvalues = u64_to_user_ptr(attr->batch.values); 1722 void __user *ukeys = u64_to_user_ptr(attr->batch.keys); 1723 void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); 1724 u32 batch, max_count, size, bucket_size, map_id; 1725 struct htab_elem *node_to_free = NULL; 1726 u64 elem_map_flags, map_flags; 1727 struct hlist_nulls_head *head; 1728 struct hlist_nulls_node *n; 1729 unsigned long flags = 0; 1730 bool locked = false; 1731 struct htab_elem *l; 1732 struct bucket *b; 1733 int ret = 0; 1734 1735 elem_map_flags = attr->batch.elem_flags; 1736 if ((elem_map_flags & ~BPF_F_LOCK) || 1737 ((elem_map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))) 1738 return -EINVAL; 1739 1740 map_flags = attr->batch.flags; 1741 if (map_flags) 1742 return -EINVAL; 1743 1744 max_count = attr->batch.count; 1745 if (!max_count) 1746 return 0; 1747 1748 if (put_user(0, &uattr->batch.count)) 1749 return -EFAULT; 1750 1751 batch = 0; 1752 if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch))) 1753 return -EFAULT; 1754 1755 if (batch >= htab->n_buckets) 1756 return -ENOENT; 1757 1758 key_size = htab->map.key_size; 1759 roundup_key_size = round_up(htab->map.key_size, 8); 1760 value_size = htab->map.value_size; 1761 size = round_up(value_size, 8); 1762 if (is_percpu) 1763 value_size = size * num_possible_cpus(); 1764 total = 0; 1765 /* while experimenting with hash tables with sizes ranging from 10 to 1766 * 1000, it was observed that a bucket can have up to 5 entries. 1767 */ 1768 bucket_size = 5; 1769 1770 alloc: 1771 /* We cannot do copy_from_user or copy_to_user inside 1772 * the rcu_read_lock. Allocate enough space here. 1773 */ 1774 keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); 1775 values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); 1776 if (!keys || !values) { 1777 ret = -ENOMEM; 1778 goto after_loop; 1779 } 1780 1781 again: 1782 bpf_disable_instrumentation(); 1783 rcu_read_lock(); 1784 again_nocopy: 1785 dst_key = keys; 1786 dst_val = values; 1787 b = &htab->buckets[batch]; 1788 head = &b->head; 1789 /* do not grab the lock unless need it (bucket_cnt > 0). */ 1790 if (locked) { 1791 ret = htab_lock_bucket(htab, b, batch, &flags); 1792 if (ret) { 1793 rcu_read_unlock(); 1794 bpf_enable_instrumentation(); 1795 goto after_loop; 1796 } 1797 } 1798 1799 bucket_cnt = 0; 1800 hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) 1801 bucket_cnt++; 1802 1803 if (bucket_cnt && !locked) { 1804 locked = true; 1805 goto again_nocopy; 1806 } 1807 1808 if (bucket_cnt > (max_count - total)) { 1809 if (total == 0) 1810 ret = -ENOSPC; 1811 /* Note that since bucket_cnt > 0 here, it is implicit 1812 * that the locked was grabbed, so release it. 1813 */ 1814 htab_unlock_bucket(htab, b, batch, flags); 1815 rcu_read_unlock(); 1816 bpf_enable_instrumentation(); 1817 goto after_loop; 1818 } 1819 1820 if (bucket_cnt > bucket_size) { 1821 bucket_size = bucket_cnt; 1822 /* Note that since bucket_cnt > 0 here, it is implicit 1823 * that the locked was grabbed, so release it. 1824 */ 1825 htab_unlock_bucket(htab, b, batch, flags); 1826 rcu_read_unlock(); 1827 bpf_enable_instrumentation(); 1828 kvfree(keys); 1829 kvfree(values); 1830 goto alloc; 1831 } 1832 1833 /* Next block is only safe to run if you have grabbed the lock */ 1834 if (!locked) 1835 goto next_batch; 1836 1837 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 1838 memcpy(dst_key, l->key, key_size); 1839 1840 if (is_percpu) { 1841 int off = 0, cpu; 1842 void __percpu *pptr; 1843 1844 pptr = htab_elem_get_ptr(l, map->key_size); 1845 for_each_possible_cpu(cpu) { 1846 copy_map_value_long(&htab->map, dst_val + off, per_cpu_ptr(pptr, cpu)); 1847 check_and_init_map_value(&htab->map, dst_val + off); 1848 off += size; 1849 } 1850 } else { 1851 value = l->key + roundup_key_size; 1852 if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { 1853 struct bpf_map **inner_map = value; 1854 1855 /* Actual value is the id of the inner map */ 1856 map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); 1857 value = &map_id; 1858 } 1859 1860 if (elem_map_flags & BPF_F_LOCK) 1861 copy_map_value_locked(map, dst_val, value, 1862 true); 1863 else 1864 copy_map_value(map, dst_val, value); 1865 /* Zeroing special fields in the temp buffer */ 1866 check_and_init_map_value(map, dst_val); 1867 } 1868 if (do_delete) { 1869 hlist_nulls_del_rcu(&l->hash_node); 1870 1871 /* bpf_lru_push_free() will acquire lru_lock, which 1872 * may cause deadlock. See comments in function 1873 * prealloc_lru_pop(). Let us do bpf_lru_push_free() 1874 * after releasing the bucket lock. 1875 */ 1876 if (is_lru_map) { 1877 l->batch_flink = node_to_free; 1878 node_to_free = l; 1879 } else { 1880 free_htab_elem(htab, l); 1881 } 1882 } 1883 dst_key += key_size; 1884 dst_val += value_size; 1885 } 1886 1887 htab_unlock_bucket(htab, b, batch, flags); 1888 locked = false; 1889 1890 while (node_to_free) { 1891 l = node_to_free; 1892 node_to_free = node_to_free->batch_flink; 1893 htab_lru_push_free(htab, l); 1894 } 1895 1896 next_batch: 1897 /* If we are not copying data, we can go to next bucket and avoid 1898 * unlocking the rcu. 1899 */ 1900 if (!bucket_cnt && (batch + 1 < htab->n_buckets)) { 1901 batch++; 1902 goto again_nocopy; 1903 } 1904 1905 rcu_read_unlock(); 1906 bpf_enable_instrumentation(); 1907 if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys, 1908 key_size * bucket_cnt) || 1909 copy_to_user(uvalues + total * value_size, values, 1910 value_size * bucket_cnt))) { 1911 ret = -EFAULT; 1912 goto after_loop; 1913 } 1914 1915 total += bucket_cnt; 1916 batch++; 1917 if (batch >= htab->n_buckets) { 1918 ret = -ENOENT; 1919 goto after_loop; 1920 } 1921 goto again; 1922 1923 after_loop: 1924 if (ret == -EFAULT) 1925 goto out; 1926 1927 /* copy # of entries and next batch */ 1928 ubatch = u64_to_user_ptr(attr->batch.out_batch); 1929 if (copy_to_user(ubatch, &batch, sizeof(batch)) || 1930 put_user(total, &uattr->batch.count)) 1931 ret = -EFAULT; 1932 1933 out: 1934 kvfree(keys); 1935 kvfree(values); 1936 return ret; 1937 } 1938 1939 static int 1940 htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1941 union bpf_attr __user *uattr) 1942 { 1943 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1944 false, true); 1945 } 1946 1947 static int 1948 htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1949 const union bpf_attr *attr, 1950 union bpf_attr __user *uattr) 1951 { 1952 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1953 false, true); 1954 } 1955 1956 static int 1957 htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1958 union bpf_attr __user *uattr) 1959 { 1960 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1961 false, false); 1962 } 1963 1964 static int 1965 htab_map_lookup_and_delete_batch(struct bpf_map *map, 1966 const union bpf_attr *attr, 1967 union bpf_attr __user *uattr) 1968 { 1969 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1970 false, false); 1971 } 1972 1973 static int 1974 htab_lru_percpu_map_lookup_batch(struct bpf_map *map, 1975 const union bpf_attr *attr, 1976 union bpf_attr __user *uattr) 1977 { 1978 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1979 true, true); 1980 } 1981 1982 static int 1983 htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map, 1984 const union bpf_attr *attr, 1985 union bpf_attr __user *uattr) 1986 { 1987 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 1988 true, true); 1989 } 1990 1991 static int 1992 htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, 1993 union bpf_attr __user *uattr) 1994 { 1995 return __htab_map_lookup_and_delete_batch(map, attr, uattr, false, 1996 true, false); 1997 } 1998 1999 static int 2000 htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, 2001 const union bpf_attr *attr, 2002 union bpf_attr __user *uattr) 2003 { 2004 return __htab_map_lookup_and_delete_batch(map, attr, uattr, true, 2005 true, false); 2006 } 2007 2008 struct bpf_iter_seq_hash_map_info { 2009 struct bpf_map *map; 2010 struct bpf_htab *htab; 2011 void *percpu_value_buf; // non-zero means percpu hash 2012 u32 bucket_id; 2013 u32 skip_elems; 2014 }; 2015 2016 static struct htab_elem * 2017 bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, 2018 struct htab_elem *prev_elem) 2019 { 2020 const struct bpf_htab *htab = info->htab; 2021 u32 skip_elems = info->skip_elems; 2022 u32 bucket_id = info->bucket_id; 2023 struct hlist_nulls_head *head; 2024 struct hlist_nulls_node *n; 2025 struct htab_elem *elem; 2026 struct bucket *b; 2027 u32 i, count; 2028 2029 if (bucket_id >= htab->n_buckets) 2030 return NULL; 2031 2032 /* try to find next elem in the same bucket */ 2033 if (prev_elem) { 2034 /* no update/deletion on this bucket, prev_elem should be still valid 2035 * and we won't skip elements. 2036 */ 2037 n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); 2038 elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); 2039 if (elem) 2040 return elem; 2041 2042 /* not found, unlock and go to the next bucket */ 2043 b = &htab->buckets[bucket_id++]; 2044 rcu_read_unlock(); 2045 skip_elems = 0; 2046 } 2047 2048 for (i = bucket_id; i < htab->n_buckets; i++) { 2049 b = &htab->buckets[i]; 2050 rcu_read_lock(); 2051 2052 count = 0; 2053 head = &b->head; 2054 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2055 if (count >= skip_elems) { 2056 info->bucket_id = i; 2057 info->skip_elems = count; 2058 return elem; 2059 } 2060 count++; 2061 } 2062 2063 rcu_read_unlock(); 2064 skip_elems = 0; 2065 } 2066 2067 info->bucket_id = i; 2068 info->skip_elems = 0; 2069 return NULL; 2070 } 2071 2072 static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) 2073 { 2074 struct bpf_iter_seq_hash_map_info *info = seq->private; 2075 struct htab_elem *elem; 2076 2077 elem = bpf_hash_map_seq_find_next(info, NULL); 2078 if (!elem) 2079 return NULL; 2080 2081 if (*pos == 0) 2082 ++*pos; 2083 return elem; 2084 } 2085 2086 static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2087 { 2088 struct bpf_iter_seq_hash_map_info *info = seq->private; 2089 2090 ++*pos; 2091 ++info->skip_elems; 2092 return bpf_hash_map_seq_find_next(info, v); 2093 } 2094 2095 static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) 2096 { 2097 struct bpf_iter_seq_hash_map_info *info = seq->private; 2098 u32 roundup_key_size, roundup_value_size; 2099 struct bpf_iter__bpf_map_elem ctx = {}; 2100 struct bpf_map *map = info->map; 2101 struct bpf_iter_meta meta; 2102 int ret = 0, off = 0, cpu; 2103 struct bpf_prog *prog; 2104 void __percpu *pptr; 2105 2106 meta.seq = seq; 2107 prog = bpf_iter_get_info(&meta, elem == NULL); 2108 if (prog) { 2109 ctx.meta = &meta; 2110 ctx.map = info->map; 2111 if (elem) { 2112 roundup_key_size = round_up(map->key_size, 8); 2113 ctx.key = elem->key; 2114 if (!info->percpu_value_buf) { 2115 ctx.value = elem->key + roundup_key_size; 2116 } else { 2117 roundup_value_size = round_up(map->value_size, 8); 2118 pptr = htab_elem_get_ptr(elem, map->key_size); 2119 for_each_possible_cpu(cpu) { 2120 copy_map_value_long(map, info->percpu_value_buf + off, 2121 per_cpu_ptr(pptr, cpu)); 2122 check_and_init_map_value(map, info->percpu_value_buf + off); 2123 off += roundup_value_size; 2124 } 2125 ctx.value = info->percpu_value_buf; 2126 } 2127 } 2128 ret = bpf_iter_run_prog(prog, &ctx); 2129 } 2130 2131 return ret; 2132 } 2133 2134 static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) 2135 { 2136 return __bpf_hash_map_seq_show(seq, v); 2137 } 2138 2139 static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) 2140 { 2141 if (!v) 2142 (void)__bpf_hash_map_seq_show(seq, NULL); 2143 else 2144 rcu_read_unlock(); 2145 } 2146 2147 static int bpf_iter_init_hash_map(void *priv_data, 2148 struct bpf_iter_aux_info *aux) 2149 { 2150 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2151 struct bpf_map *map = aux->map; 2152 void *value_buf; 2153 u32 buf_size; 2154 2155 if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || 2156 map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { 2157 buf_size = round_up(map->value_size, 8) * num_possible_cpus(); 2158 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 2159 if (!value_buf) 2160 return -ENOMEM; 2161 2162 seq_info->percpu_value_buf = value_buf; 2163 } 2164 2165 bpf_map_inc_with_uref(map); 2166 seq_info->map = map; 2167 seq_info->htab = container_of(map, struct bpf_htab, map); 2168 return 0; 2169 } 2170 2171 static void bpf_iter_fini_hash_map(void *priv_data) 2172 { 2173 struct bpf_iter_seq_hash_map_info *seq_info = priv_data; 2174 2175 bpf_map_put_with_uref(seq_info->map); 2176 kfree(seq_info->percpu_value_buf); 2177 } 2178 2179 static const struct seq_operations bpf_hash_map_seq_ops = { 2180 .start = bpf_hash_map_seq_start, 2181 .next = bpf_hash_map_seq_next, 2182 .stop = bpf_hash_map_seq_stop, 2183 .show = bpf_hash_map_seq_show, 2184 }; 2185 2186 static const struct bpf_iter_seq_info iter_seq_info = { 2187 .seq_ops = &bpf_hash_map_seq_ops, 2188 .init_seq_private = bpf_iter_init_hash_map, 2189 .fini_seq_private = bpf_iter_fini_hash_map, 2190 .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), 2191 }; 2192 2193 static long bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn, 2194 void *callback_ctx, u64 flags) 2195 { 2196 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2197 struct hlist_nulls_head *head; 2198 struct hlist_nulls_node *n; 2199 struct htab_elem *elem; 2200 u32 roundup_key_size; 2201 int i, num_elems = 0; 2202 void __percpu *pptr; 2203 struct bucket *b; 2204 void *key, *val; 2205 bool is_percpu; 2206 u64 ret = 0; 2207 2208 if (flags != 0) 2209 return -EINVAL; 2210 2211 is_percpu = htab_is_percpu(htab); 2212 2213 roundup_key_size = round_up(map->key_size, 8); 2214 /* disable migration so percpu value prepared here will be the 2215 * same as the one seen by the bpf program with bpf_map_lookup_elem(). 2216 */ 2217 if (is_percpu) 2218 migrate_disable(); 2219 for (i = 0; i < htab->n_buckets; i++) { 2220 b = &htab->buckets[i]; 2221 rcu_read_lock(); 2222 head = &b->head; 2223 hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { 2224 key = elem->key; 2225 if (is_percpu) { 2226 /* current cpu value for percpu map */ 2227 pptr = htab_elem_get_ptr(elem, map->key_size); 2228 val = this_cpu_ptr(pptr); 2229 } else { 2230 val = elem->key + roundup_key_size; 2231 } 2232 num_elems++; 2233 ret = callback_fn((u64)(long)map, (u64)(long)key, 2234 (u64)(long)val, (u64)(long)callback_ctx, 0); 2235 /* return value: 0 - continue, 1 - stop and return */ 2236 if (ret) { 2237 rcu_read_unlock(); 2238 goto out; 2239 } 2240 } 2241 rcu_read_unlock(); 2242 } 2243 out: 2244 if (is_percpu) 2245 migrate_enable(); 2246 return num_elems; 2247 } 2248 2249 static u64 htab_map_mem_usage(const struct bpf_map *map) 2250 { 2251 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2252 u32 value_size = round_up(htab->map.value_size, 8); 2253 bool prealloc = htab_is_prealloc(htab); 2254 bool percpu = htab_is_percpu(htab); 2255 bool lru = htab_is_lru(htab); 2256 u64 num_entries; 2257 u64 usage = sizeof(struct bpf_htab); 2258 2259 usage += sizeof(struct bucket) * htab->n_buckets; 2260 usage += sizeof(int) * num_possible_cpus() * HASHTAB_MAP_LOCK_COUNT; 2261 if (prealloc) { 2262 num_entries = map->max_entries; 2263 if (htab_has_extra_elems(htab)) 2264 num_entries += num_possible_cpus(); 2265 2266 usage += htab->elem_size * num_entries; 2267 2268 if (percpu) 2269 usage += value_size * num_possible_cpus() * num_entries; 2270 else if (!lru) 2271 usage += sizeof(struct htab_elem *) * num_possible_cpus(); 2272 } else { 2273 #define LLIST_NODE_SZ sizeof(struct llist_node) 2274 2275 num_entries = htab->use_percpu_counter ? 2276 percpu_counter_sum(&htab->pcount) : 2277 atomic_read(&htab->count); 2278 usage += (htab->elem_size + LLIST_NODE_SZ) * num_entries; 2279 if (percpu) { 2280 usage += (LLIST_NODE_SZ + sizeof(void *)) * num_entries; 2281 usage += value_size * num_possible_cpus() * num_entries; 2282 } 2283 } 2284 return usage; 2285 } 2286 2287 BTF_ID_LIST_SINGLE(htab_map_btf_ids, struct, bpf_htab) 2288 const struct bpf_map_ops htab_map_ops = { 2289 .map_meta_equal = bpf_map_meta_equal, 2290 .map_alloc_check = htab_map_alloc_check, 2291 .map_alloc = htab_map_alloc, 2292 .map_free = htab_map_free, 2293 .map_get_next_key = htab_map_get_next_key, 2294 .map_release_uref = htab_map_free_timers_and_wq, 2295 .map_lookup_elem = htab_map_lookup_elem, 2296 .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem, 2297 .map_update_elem = htab_map_update_elem, 2298 .map_delete_elem = htab_map_delete_elem, 2299 .map_gen_lookup = htab_map_gen_lookup, 2300 .map_seq_show_elem = htab_map_seq_show_elem, 2301 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2302 .map_for_each_callback = bpf_for_each_hash_elem, 2303 .map_mem_usage = htab_map_mem_usage, 2304 BATCH_OPS(htab), 2305 .map_btf_id = &htab_map_btf_ids[0], 2306 .iter_seq_info = &iter_seq_info, 2307 }; 2308 2309 const struct bpf_map_ops htab_lru_map_ops = { 2310 .map_meta_equal = bpf_map_meta_equal, 2311 .map_alloc_check = htab_map_alloc_check, 2312 .map_alloc = htab_map_alloc, 2313 .map_free = htab_map_free, 2314 .map_get_next_key = htab_map_get_next_key, 2315 .map_release_uref = htab_map_free_timers_and_wq, 2316 .map_lookup_elem = htab_lru_map_lookup_elem, 2317 .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem, 2318 .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys, 2319 .map_update_elem = htab_lru_map_update_elem, 2320 .map_delete_elem = htab_lru_map_delete_elem, 2321 .map_gen_lookup = htab_lru_map_gen_lookup, 2322 .map_seq_show_elem = htab_map_seq_show_elem, 2323 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2324 .map_for_each_callback = bpf_for_each_hash_elem, 2325 .map_mem_usage = htab_map_mem_usage, 2326 BATCH_OPS(htab_lru), 2327 .map_btf_id = &htab_map_btf_ids[0], 2328 .iter_seq_info = &iter_seq_info, 2329 }; 2330 2331 /* Called from eBPF program */ 2332 static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2333 { 2334 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2335 2336 if (l) 2337 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2338 else 2339 return NULL; 2340 } 2341 2342 /* inline bpf_map_lookup_elem() call for per-CPU hashmap */ 2343 static int htab_percpu_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 2344 { 2345 struct bpf_insn *insn = insn_buf; 2346 2347 if (!bpf_jit_supports_percpu_insn()) 2348 return -EOPNOTSUPP; 2349 2350 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2351 (void *(*)(struct bpf_map *map, void *key))NULL)); 2352 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2353 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3); 2354 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2355 offsetof(struct htab_elem, key) + map->key_size); 2356 *insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0); 2357 *insn++ = BPF_MOV64_PERCPU_REG(BPF_REG_0, BPF_REG_0); 2358 2359 return insn - insn_buf; 2360 } 2361 2362 static void *htab_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2363 { 2364 struct htab_elem *l; 2365 2366 if (cpu >= nr_cpu_ids) 2367 return NULL; 2368 2369 l = __htab_map_lookup_elem(map, key); 2370 if (l) 2371 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2372 else 2373 return NULL; 2374 } 2375 2376 static void *htab_lru_percpu_map_lookup_elem(struct bpf_map *map, void *key) 2377 { 2378 struct htab_elem *l = __htab_map_lookup_elem(map, key); 2379 2380 if (l) { 2381 bpf_lru_node_set_ref(&l->lru_node); 2382 return this_cpu_ptr(htab_elem_get_ptr(l, map->key_size)); 2383 } 2384 2385 return NULL; 2386 } 2387 2388 static void *htab_lru_percpu_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 2389 { 2390 struct htab_elem *l; 2391 2392 if (cpu >= nr_cpu_ids) 2393 return NULL; 2394 2395 l = __htab_map_lookup_elem(map, key); 2396 if (l) { 2397 bpf_lru_node_set_ref(&l->lru_node); 2398 return per_cpu_ptr(htab_elem_get_ptr(l, map->key_size), cpu); 2399 } 2400 2401 return NULL; 2402 } 2403 2404 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value) 2405 { 2406 struct htab_elem *l; 2407 void __percpu *pptr; 2408 int ret = -ENOENT; 2409 int cpu, off = 0; 2410 u32 size; 2411 2412 /* per_cpu areas are zero-filled and bpf programs can only 2413 * access 'value_size' of them, so copying rounded areas 2414 * will not leak any kernel data 2415 */ 2416 size = round_up(map->value_size, 8); 2417 rcu_read_lock(); 2418 l = __htab_map_lookup_elem(map, key); 2419 if (!l) 2420 goto out; 2421 /* We do not mark LRU map element here in order to not mess up 2422 * eviction heuristics when user space does a map walk. 2423 */ 2424 pptr = htab_elem_get_ptr(l, map->key_size); 2425 for_each_possible_cpu(cpu) { 2426 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 2427 check_and_init_map_value(map, value + off); 2428 off += size; 2429 } 2430 ret = 0; 2431 out: 2432 rcu_read_unlock(); 2433 return ret; 2434 } 2435 2436 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, 2437 u64 map_flags) 2438 { 2439 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2440 int ret; 2441 2442 rcu_read_lock(); 2443 if (htab_is_lru(htab)) 2444 ret = __htab_lru_percpu_map_update_elem(map, key, value, 2445 map_flags, true); 2446 else 2447 ret = __htab_percpu_map_update_elem(map, key, value, map_flags, 2448 true); 2449 rcu_read_unlock(); 2450 2451 return ret; 2452 } 2453 2454 static void htab_percpu_map_seq_show_elem(struct bpf_map *map, void *key, 2455 struct seq_file *m) 2456 { 2457 struct htab_elem *l; 2458 void __percpu *pptr; 2459 int cpu; 2460 2461 rcu_read_lock(); 2462 2463 l = __htab_map_lookup_elem(map, key); 2464 if (!l) { 2465 rcu_read_unlock(); 2466 return; 2467 } 2468 2469 btf_type_seq_show(map->btf, map->btf_key_type_id, key, m); 2470 seq_puts(m, ": {\n"); 2471 pptr = htab_elem_get_ptr(l, map->key_size); 2472 for_each_possible_cpu(cpu) { 2473 seq_printf(m, "\tcpu%d: ", cpu); 2474 btf_type_seq_show(map->btf, map->btf_value_type_id, 2475 per_cpu_ptr(pptr, cpu), m); 2476 seq_puts(m, "\n"); 2477 } 2478 seq_puts(m, "}\n"); 2479 2480 rcu_read_unlock(); 2481 } 2482 2483 const struct bpf_map_ops htab_percpu_map_ops = { 2484 .map_meta_equal = bpf_map_meta_equal, 2485 .map_alloc_check = htab_map_alloc_check, 2486 .map_alloc = htab_map_alloc, 2487 .map_free = htab_map_free, 2488 .map_get_next_key = htab_map_get_next_key, 2489 .map_lookup_elem = htab_percpu_map_lookup_elem, 2490 .map_gen_lookup = htab_percpu_map_gen_lookup, 2491 .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem, 2492 .map_update_elem = htab_percpu_map_update_elem, 2493 .map_delete_elem = htab_map_delete_elem, 2494 .map_lookup_percpu_elem = htab_percpu_map_lookup_percpu_elem, 2495 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2496 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2497 .map_for_each_callback = bpf_for_each_hash_elem, 2498 .map_mem_usage = htab_map_mem_usage, 2499 BATCH_OPS(htab_percpu), 2500 .map_btf_id = &htab_map_btf_ids[0], 2501 .iter_seq_info = &iter_seq_info, 2502 }; 2503 2504 const struct bpf_map_ops htab_lru_percpu_map_ops = { 2505 .map_meta_equal = bpf_map_meta_equal, 2506 .map_alloc_check = htab_map_alloc_check, 2507 .map_alloc = htab_map_alloc, 2508 .map_free = htab_map_free, 2509 .map_get_next_key = htab_map_get_next_key, 2510 .map_lookup_elem = htab_lru_percpu_map_lookup_elem, 2511 .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem, 2512 .map_update_elem = htab_lru_percpu_map_update_elem, 2513 .map_delete_elem = htab_lru_map_delete_elem, 2514 .map_lookup_percpu_elem = htab_lru_percpu_map_lookup_percpu_elem, 2515 .map_seq_show_elem = htab_percpu_map_seq_show_elem, 2516 .map_set_for_each_callback_args = map_set_for_each_callback_args, 2517 .map_for_each_callback = bpf_for_each_hash_elem, 2518 .map_mem_usage = htab_map_mem_usage, 2519 BATCH_OPS(htab_lru_percpu), 2520 .map_btf_id = &htab_map_btf_ids[0], 2521 .iter_seq_info = &iter_seq_info, 2522 }; 2523 2524 static int fd_htab_map_alloc_check(union bpf_attr *attr) 2525 { 2526 if (attr->value_size != sizeof(u32)) 2527 return -EINVAL; 2528 return htab_map_alloc_check(attr); 2529 } 2530 2531 static void fd_htab_map_free(struct bpf_map *map) 2532 { 2533 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 2534 struct hlist_nulls_node *n; 2535 struct hlist_nulls_head *head; 2536 struct htab_elem *l; 2537 int i; 2538 2539 for (i = 0; i < htab->n_buckets; i++) { 2540 head = select_bucket(htab, i); 2541 2542 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 2543 void *ptr = fd_htab_map_get_ptr(map, l); 2544 2545 map->ops->map_fd_put_ptr(map, ptr, false); 2546 } 2547 } 2548 2549 htab_map_free(map); 2550 } 2551 2552 /* only called from syscall */ 2553 int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 2554 { 2555 void **ptr; 2556 int ret = 0; 2557 2558 if (!map->ops->map_fd_sys_lookup_elem) 2559 return -ENOTSUPP; 2560 2561 rcu_read_lock(); 2562 ptr = htab_map_lookup_elem(map, key); 2563 if (ptr) 2564 *value = map->ops->map_fd_sys_lookup_elem(READ_ONCE(*ptr)); 2565 else 2566 ret = -ENOENT; 2567 rcu_read_unlock(); 2568 2569 return ret; 2570 } 2571 2572 /* only called from syscall */ 2573 int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, 2574 void *key, void *value, u64 map_flags) 2575 { 2576 void *ptr; 2577 int ret; 2578 u32 ufd = *(u32 *)value; 2579 2580 ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 2581 if (IS_ERR(ptr)) 2582 return PTR_ERR(ptr); 2583 2584 /* The htab bucket lock is always held during update operations in fd 2585 * htab map, and the following rcu_read_lock() is only used to avoid 2586 * the WARN_ON_ONCE in htab_map_update_elem(). 2587 */ 2588 rcu_read_lock(); 2589 ret = htab_map_update_elem(map, key, &ptr, map_flags); 2590 rcu_read_unlock(); 2591 if (ret) 2592 map->ops->map_fd_put_ptr(map, ptr, false); 2593 2594 return ret; 2595 } 2596 2597 static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) 2598 { 2599 struct bpf_map *map, *inner_map_meta; 2600 2601 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 2602 if (IS_ERR(inner_map_meta)) 2603 return inner_map_meta; 2604 2605 map = htab_map_alloc(attr); 2606 if (IS_ERR(map)) { 2607 bpf_map_meta_free(inner_map_meta); 2608 return map; 2609 } 2610 2611 map->inner_map_meta = inner_map_meta; 2612 2613 return map; 2614 } 2615 2616 static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) 2617 { 2618 struct bpf_map **inner_map = htab_map_lookup_elem(map, key); 2619 2620 if (!inner_map) 2621 return NULL; 2622 2623 return READ_ONCE(*inner_map); 2624 } 2625 2626 static int htab_of_map_gen_lookup(struct bpf_map *map, 2627 struct bpf_insn *insn_buf) 2628 { 2629 struct bpf_insn *insn = insn_buf; 2630 const int ret = BPF_REG_0; 2631 2632 BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, 2633 (void *(*)(struct bpf_map *map, void *key))NULL)); 2634 *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); 2635 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); 2636 *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, 2637 offsetof(struct htab_elem, key) + 2638 round_up(map->key_size, 8)); 2639 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 2640 2641 return insn - insn_buf; 2642 } 2643 2644 static void htab_of_map_free(struct bpf_map *map) 2645 { 2646 bpf_map_meta_free(map->inner_map_meta); 2647 fd_htab_map_free(map); 2648 } 2649 2650 const struct bpf_map_ops htab_of_maps_map_ops = { 2651 .map_alloc_check = fd_htab_map_alloc_check, 2652 .map_alloc = htab_of_map_alloc, 2653 .map_free = htab_of_map_free, 2654 .map_get_next_key = htab_map_get_next_key, 2655 .map_lookup_elem = htab_of_map_lookup_elem, 2656 .map_delete_elem = htab_map_delete_elem, 2657 .map_fd_get_ptr = bpf_map_fd_get_ptr, 2658 .map_fd_put_ptr = bpf_map_fd_put_ptr, 2659 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 2660 .map_gen_lookup = htab_of_map_gen_lookup, 2661 .map_check_btf = map_check_no_btf, 2662 .map_mem_usage = htab_map_mem_usage, 2663 BATCH_OPS(htab), 2664 .map_btf_id = &htab_map_btf_ids[0], 2665 }; 2666