1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 3 * Copyright (c) 2016,2017 Facebook 4 */ 5 #include <linux/bpf.h> 6 #include <linux/btf.h> 7 #include <linux/err.h> 8 #include <linux/slab.h> 9 #include <linux/mm.h> 10 #include <linux/filter.h> 11 #include <linux/perf_event.h> 12 #include <uapi/linux/btf.h> 13 #include <linux/rcupdate_trace.h> 14 #include <linux/btf_ids.h> 15 16 #include "map_in_map.h" 17 18 #define ARRAY_CREATE_FLAG_MASK \ 19 (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \ 20 BPF_F_PRESERVE_ELEMS | BPF_F_INNER_MAP) 21 22 static void bpf_array_free_percpu(struct bpf_array *array) 23 { 24 int i; 25 26 for (i = 0; i < array->map.max_entries; i++) { 27 free_percpu(array->pptrs[i]); 28 cond_resched(); 29 } 30 } 31 32 static int bpf_array_alloc_percpu(struct bpf_array *array) 33 { 34 void __percpu *ptr; 35 int i; 36 37 for (i = 0; i < array->map.max_entries; i++) { 38 ptr = bpf_map_alloc_percpu(&array->map, array->elem_size, 8, 39 GFP_USER | __GFP_NOWARN); 40 if (!ptr) { 41 bpf_array_free_percpu(array); 42 return -ENOMEM; 43 } 44 array->pptrs[i] = ptr; 45 cond_resched(); 46 } 47 48 return 0; 49 } 50 51 /* Called from syscall */ 52 int array_map_alloc_check(union bpf_attr *attr) 53 { 54 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 55 int numa_node = bpf_map_attr_numa_node(attr); 56 57 /* check sanity of attributes */ 58 if (attr->max_entries == 0 || attr->key_size != 4 || 59 attr->value_size == 0 || 60 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 61 !bpf_map_flags_access_ok(attr->map_flags) || 62 (percpu && numa_node != NUMA_NO_NODE)) 63 return -EINVAL; 64 65 if (attr->map_type != BPF_MAP_TYPE_ARRAY && 66 attr->map_flags & (BPF_F_MMAPABLE | BPF_F_INNER_MAP)) 67 return -EINVAL; 68 69 if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY && 70 attr->map_flags & BPF_F_PRESERVE_ELEMS) 71 return -EINVAL; 72 73 /* avoid overflow on round_up(map->value_size) */ 74 if (attr->value_size > INT_MAX) 75 return -E2BIG; 76 77 return 0; 78 } 79 80 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 81 { 82 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 83 int numa_node = bpf_map_attr_numa_node(attr); 84 u32 elem_size, index_mask, max_entries; 85 bool bypass_spec_v1 = bpf_bypass_spec_v1(); 86 u64 array_size, mask64; 87 struct bpf_array *array; 88 89 elem_size = round_up(attr->value_size, 8); 90 91 max_entries = attr->max_entries; 92 93 /* On 32 bit archs roundup_pow_of_two() with max_entries that has 94 * upper most bit set in u32 space is undefined behavior due to 95 * resulting 1U << 32, so do it manually here in u64 space. 96 */ 97 mask64 = fls_long(max_entries - 1); 98 mask64 = 1ULL << mask64; 99 mask64 -= 1; 100 101 index_mask = mask64; 102 if (!bypass_spec_v1) { 103 /* round up array size to nearest power of 2, 104 * since cpu will speculate within index_mask limits 105 */ 106 max_entries = index_mask + 1; 107 /* Check for overflows. */ 108 if (max_entries < attr->max_entries) 109 return ERR_PTR(-E2BIG); 110 } 111 112 array_size = sizeof(*array); 113 if (percpu) { 114 array_size += (u64) max_entries * sizeof(void *); 115 } else { 116 /* rely on vmalloc() to return page-aligned memory and 117 * ensure array->value is exactly page-aligned 118 */ 119 if (attr->map_flags & BPF_F_MMAPABLE) { 120 array_size = PAGE_ALIGN(array_size); 121 array_size += PAGE_ALIGN((u64) max_entries * elem_size); 122 } else { 123 array_size += (u64) max_entries * elem_size; 124 } 125 } 126 127 /* allocate all map elements and zero-initialize them */ 128 if (attr->map_flags & BPF_F_MMAPABLE) { 129 void *data; 130 131 /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ 132 data = bpf_map_area_mmapable_alloc(array_size, numa_node); 133 if (!data) 134 return ERR_PTR(-ENOMEM); 135 array = data + PAGE_ALIGN(sizeof(struct bpf_array)) 136 - offsetof(struct bpf_array, value); 137 } else { 138 array = bpf_map_area_alloc(array_size, numa_node); 139 } 140 if (!array) 141 return ERR_PTR(-ENOMEM); 142 array->index_mask = index_mask; 143 array->map.bypass_spec_v1 = bypass_spec_v1; 144 145 /* copy mandatory map attributes */ 146 bpf_map_init_from_attr(&array->map, attr); 147 array->elem_size = elem_size; 148 149 if (percpu && bpf_array_alloc_percpu(array)) { 150 bpf_map_area_free(array); 151 return ERR_PTR(-ENOMEM); 152 } 153 154 return &array->map; 155 } 156 157 static void *array_map_elem_ptr(struct bpf_array* array, u32 index) 158 { 159 return array->value + (u64)array->elem_size * index; 160 } 161 162 /* Called from syscall or from eBPF program */ 163 static void *array_map_lookup_elem(struct bpf_map *map, void *key) 164 { 165 struct bpf_array *array = container_of(map, struct bpf_array, map); 166 u32 index = *(u32 *)key; 167 168 if (unlikely(index >= array->map.max_entries)) 169 return NULL; 170 171 return array->value + (u64)array->elem_size * (index & array->index_mask); 172 } 173 174 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, 175 u32 off) 176 { 177 struct bpf_array *array = container_of(map, struct bpf_array, map); 178 179 if (map->max_entries != 1) 180 return -ENOTSUPP; 181 if (off >= map->value_size) 182 return -EINVAL; 183 184 *imm = (unsigned long)array->value; 185 return 0; 186 } 187 188 static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, 189 u32 *off) 190 { 191 struct bpf_array *array = container_of(map, struct bpf_array, map); 192 u64 base = (unsigned long)array->value; 193 u64 range = array->elem_size; 194 195 if (map->max_entries != 1) 196 return -ENOTSUPP; 197 if (imm < base || imm >= base + range) 198 return -ENOENT; 199 200 *off = imm - base; 201 return 0; 202 } 203 204 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ 205 static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 206 { 207 struct bpf_array *array = container_of(map, struct bpf_array, map); 208 struct bpf_insn *insn = insn_buf; 209 u32 elem_size = array->elem_size; 210 const int ret = BPF_REG_0; 211 const int map_ptr = BPF_REG_1; 212 const int index = BPF_REG_2; 213 214 if (map->map_flags & BPF_F_INNER_MAP) 215 return -EOPNOTSUPP; 216 217 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 218 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 219 if (!map->bypass_spec_v1) { 220 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); 221 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 222 } else { 223 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); 224 } 225 226 if (is_power_of_2(elem_size)) { 227 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 228 } else { 229 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 230 } 231 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 232 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 233 *insn++ = BPF_MOV64_IMM(ret, 0); 234 return insn - insn_buf; 235 } 236 237 /* Called from eBPF program */ 238 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) 239 { 240 struct bpf_array *array = container_of(map, struct bpf_array, map); 241 u32 index = *(u32 *)key; 242 243 if (unlikely(index >= array->map.max_entries)) 244 return NULL; 245 246 return this_cpu_ptr(array->pptrs[index & array->index_mask]); 247 } 248 249 static void *percpu_array_map_lookup_percpu_elem(struct bpf_map *map, void *key, u32 cpu) 250 { 251 struct bpf_array *array = container_of(map, struct bpf_array, map); 252 u32 index = *(u32 *)key; 253 254 if (cpu >= nr_cpu_ids) 255 return NULL; 256 257 if (unlikely(index >= array->map.max_entries)) 258 return NULL; 259 260 return per_cpu_ptr(array->pptrs[index & array->index_mask], cpu); 261 } 262 263 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) 264 { 265 struct bpf_array *array = container_of(map, struct bpf_array, map); 266 u32 index = *(u32 *)key; 267 void __percpu *pptr; 268 int cpu, off = 0; 269 u32 size; 270 271 if (unlikely(index >= array->map.max_entries)) 272 return -ENOENT; 273 274 /* per_cpu areas are zero-filled and bpf programs can only 275 * access 'value_size' of them, so copying rounded areas 276 * will not leak any kernel data 277 */ 278 size = array->elem_size; 279 rcu_read_lock(); 280 pptr = array->pptrs[index & array->index_mask]; 281 for_each_possible_cpu(cpu) { 282 copy_map_value_long(map, value + off, per_cpu_ptr(pptr, cpu)); 283 check_and_init_map_value(map, value + off); 284 off += size; 285 } 286 rcu_read_unlock(); 287 return 0; 288 } 289 290 /* Called from syscall */ 291 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 292 { 293 struct bpf_array *array = container_of(map, struct bpf_array, map); 294 u32 index = key ? *(u32 *)key : U32_MAX; 295 u32 *next = (u32 *)next_key; 296 297 if (index >= array->map.max_entries) { 298 *next = 0; 299 return 0; 300 } 301 302 if (index == array->map.max_entries - 1) 303 return -ENOENT; 304 305 *next = index + 1; 306 return 0; 307 } 308 309 static void check_and_free_fields(struct bpf_array *arr, void *val) 310 { 311 if (map_value_has_timer(&arr->map)) 312 bpf_timer_cancel_and_free(val + arr->map.timer_off); 313 if (map_value_has_kptrs(&arr->map)) 314 bpf_map_free_kptrs(&arr->map, val); 315 } 316 317 /* Called from syscall or from eBPF program */ 318 static int array_map_update_elem(struct bpf_map *map, void *key, void *value, 319 u64 map_flags) 320 { 321 struct bpf_array *array = container_of(map, struct bpf_array, map); 322 u32 index = *(u32 *)key; 323 char *val; 324 325 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 326 /* unknown flags */ 327 return -EINVAL; 328 329 if (unlikely(index >= array->map.max_entries)) 330 /* all elements were pre-allocated, cannot insert a new one */ 331 return -E2BIG; 332 333 if (unlikely(map_flags & BPF_NOEXIST)) 334 /* all elements already exist */ 335 return -EEXIST; 336 337 if (unlikely((map_flags & BPF_F_LOCK) && 338 !map_value_has_spin_lock(map))) 339 return -EINVAL; 340 341 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 342 val = this_cpu_ptr(array->pptrs[index & array->index_mask]); 343 copy_map_value(map, val, value); 344 check_and_free_fields(array, val); 345 } else { 346 val = array->value + 347 (u64)array->elem_size * (index & array->index_mask); 348 if (map_flags & BPF_F_LOCK) 349 copy_map_value_locked(map, val, value, false); 350 else 351 copy_map_value(map, val, value); 352 check_and_free_fields(array, val); 353 } 354 return 0; 355 } 356 357 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, 358 u64 map_flags) 359 { 360 struct bpf_array *array = container_of(map, struct bpf_array, map); 361 u32 index = *(u32 *)key; 362 void __percpu *pptr; 363 int cpu, off = 0; 364 u32 size; 365 366 if (unlikely(map_flags > BPF_EXIST)) 367 /* unknown flags */ 368 return -EINVAL; 369 370 if (unlikely(index >= array->map.max_entries)) 371 /* all elements were pre-allocated, cannot insert a new one */ 372 return -E2BIG; 373 374 if (unlikely(map_flags == BPF_NOEXIST)) 375 /* all elements already exist */ 376 return -EEXIST; 377 378 /* the user space will provide round_up(value_size, 8) bytes that 379 * will be copied into per-cpu area. bpf programs can only access 380 * value_size of it. During lookup the same extra bytes will be 381 * returned or zeros which were zero-filled by percpu_alloc, 382 * so no kernel data leaks possible 383 */ 384 size = array->elem_size; 385 rcu_read_lock(); 386 pptr = array->pptrs[index & array->index_mask]; 387 for_each_possible_cpu(cpu) { 388 copy_map_value_long(map, per_cpu_ptr(pptr, cpu), value + off); 389 check_and_free_fields(array, per_cpu_ptr(pptr, cpu)); 390 off += size; 391 } 392 rcu_read_unlock(); 393 return 0; 394 } 395 396 /* Called from syscall or from eBPF program */ 397 static int array_map_delete_elem(struct bpf_map *map, void *key) 398 { 399 return -EINVAL; 400 } 401 402 static void *array_map_vmalloc_addr(struct bpf_array *array) 403 { 404 return (void *)round_down((unsigned long)array, PAGE_SIZE); 405 } 406 407 static void array_map_free_timers(struct bpf_map *map) 408 { 409 struct bpf_array *array = container_of(map, struct bpf_array, map); 410 int i; 411 412 /* We don't reset or free kptr on uref dropping to zero. */ 413 if (!map_value_has_timer(map)) 414 return; 415 416 for (i = 0; i < array->map.max_entries; i++) 417 bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off); 418 } 419 420 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 421 static void array_map_free(struct bpf_map *map) 422 { 423 struct bpf_array *array = container_of(map, struct bpf_array, map); 424 int i; 425 426 if (map_value_has_kptrs(map)) { 427 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 428 for (i = 0; i < array->map.max_entries; i++) { 429 void __percpu *pptr = array->pptrs[i & array->index_mask]; 430 int cpu; 431 432 for_each_possible_cpu(cpu) { 433 bpf_map_free_kptrs(map, per_cpu_ptr(pptr, cpu)); 434 cond_resched(); 435 } 436 } 437 } else { 438 for (i = 0; i < array->map.max_entries; i++) 439 bpf_map_free_kptrs(map, array_map_elem_ptr(array, i)); 440 } 441 bpf_map_free_kptr_off_tab(map); 442 } 443 444 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 445 bpf_array_free_percpu(array); 446 447 if (array->map.map_flags & BPF_F_MMAPABLE) 448 bpf_map_area_free(array_map_vmalloc_addr(array)); 449 else 450 bpf_map_area_free(array); 451 } 452 453 static void array_map_seq_show_elem(struct bpf_map *map, void *key, 454 struct seq_file *m) 455 { 456 void *value; 457 458 rcu_read_lock(); 459 460 value = array_map_lookup_elem(map, key); 461 if (!value) { 462 rcu_read_unlock(); 463 return; 464 } 465 466 if (map->btf_key_type_id) 467 seq_printf(m, "%u: ", *(u32 *)key); 468 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 469 seq_puts(m, "\n"); 470 471 rcu_read_unlock(); 472 } 473 474 static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, 475 struct seq_file *m) 476 { 477 struct bpf_array *array = container_of(map, struct bpf_array, map); 478 u32 index = *(u32 *)key; 479 void __percpu *pptr; 480 int cpu; 481 482 rcu_read_lock(); 483 484 seq_printf(m, "%u: {\n", *(u32 *)key); 485 pptr = array->pptrs[index & array->index_mask]; 486 for_each_possible_cpu(cpu) { 487 seq_printf(m, "\tcpu%d: ", cpu); 488 btf_type_seq_show(map->btf, map->btf_value_type_id, 489 per_cpu_ptr(pptr, cpu), m); 490 seq_puts(m, "\n"); 491 } 492 seq_puts(m, "}\n"); 493 494 rcu_read_unlock(); 495 } 496 497 static int array_map_check_btf(const struct bpf_map *map, 498 const struct btf *btf, 499 const struct btf_type *key_type, 500 const struct btf_type *value_type) 501 { 502 u32 int_data; 503 504 /* One exception for keyless BTF: .bss/.data/.rodata map */ 505 if (btf_type_is_void(key_type)) { 506 if (map->map_type != BPF_MAP_TYPE_ARRAY || 507 map->max_entries != 1) 508 return -EINVAL; 509 510 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC) 511 return -EINVAL; 512 513 return 0; 514 } 515 516 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 517 return -EINVAL; 518 519 int_data = *(u32 *)(key_type + 1); 520 /* bpf array can only take a u32 key. This check makes sure 521 * that the btf matches the attr used during map_create. 522 */ 523 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) 524 return -EINVAL; 525 526 return 0; 527 } 528 529 static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) 530 { 531 struct bpf_array *array = container_of(map, struct bpf_array, map); 532 pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT; 533 534 if (!(map->map_flags & BPF_F_MMAPABLE)) 535 return -EINVAL; 536 537 if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > 538 PAGE_ALIGN((u64)array->map.max_entries * array->elem_size)) 539 return -EINVAL; 540 541 return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), 542 vma->vm_pgoff + pgoff); 543 } 544 545 static bool array_map_meta_equal(const struct bpf_map *meta0, 546 const struct bpf_map *meta1) 547 { 548 if (!bpf_map_meta_equal(meta0, meta1)) 549 return false; 550 return meta0->map_flags & BPF_F_INNER_MAP ? true : 551 meta0->max_entries == meta1->max_entries; 552 } 553 554 struct bpf_iter_seq_array_map_info { 555 struct bpf_map *map; 556 void *percpu_value_buf; 557 u32 index; 558 }; 559 560 static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) 561 { 562 struct bpf_iter_seq_array_map_info *info = seq->private; 563 struct bpf_map *map = info->map; 564 struct bpf_array *array; 565 u32 index; 566 567 if (info->index >= map->max_entries) 568 return NULL; 569 570 if (*pos == 0) 571 ++*pos; 572 array = container_of(map, struct bpf_array, map); 573 index = info->index & array->index_mask; 574 if (info->percpu_value_buf) 575 return array->pptrs[index]; 576 return array_map_elem_ptr(array, index); 577 } 578 579 static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) 580 { 581 struct bpf_iter_seq_array_map_info *info = seq->private; 582 struct bpf_map *map = info->map; 583 struct bpf_array *array; 584 u32 index; 585 586 ++*pos; 587 ++info->index; 588 if (info->index >= map->max_entries) 589 return NULL; 590 591 array = container_of(map, struct bpf_array, map); 592 index = info->index & array->index_mask; 593 if (info->percpu_value_buf) 594 return array->pptrs[index]; 595 return array_map_elem_ptr(array, index); 596 } 597 598 static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) 599 { 600 struct bpf_iter_seq_array_map_info *info = seq->private; 601 struct bpf_iter__bpf_map_elem ctx = {}; 602 struct bpf_map *map = info->map; 603 struct bpf_array *array = container_of(map, struct bpf_array, map); 604 struct bpf_iter_meta meta; 605 struct bpf_prog *prog; 606 int off = 0, cpu = 0; 607 void __percpu **pptr; 608 u32 size; 609 610 meta.seq = seq; 611 prog = bpf_iter_get_info(&meta, v == NULL); 612 if (!prog) 613 return 0; 614 615 ctx.meta = &meta; 616 ctx.map = info->map; 617 if (v) { 618 ctx.key = &info->index; 619 620 if (!info->percpu_value_buf) { 621 ctx.value = v; 622 } else { 623 pptr = v; 624 size = array->elem_size; 625 for_each_possible_cpu(cpu) { 626 copy_map_value_long(map, info->percpu_value_buf + off, 627 per_cpu_ptr(pptr, cpu)); 628 check_and_init_map_value(map, info->percpu_value_buf + off); 629 off += size; 630 } 631 ctx.value = info->percpu_value_buf; 632 } 633 } 634 635 return bpf_iter_run_prog(prog, &ctx); 636 } 637 638 static int bpf_array_map_seq_show(struct seq_file *seq, void *v) 639 { 640 return __bpf_array_map_seq_show(seq, v); 641 } 642 643 static void bpf_array_map_seq_stop(struct seq_file *seq, void *v) 644 { 645 if (!v) 646 (void)__bpf_array_map_seq_show(seq, NULL); 647 } 648 649 static int bpf_iter_init_array_map(void *priv_data, 650 struct bpf_iter_aux_info *aux) 651 { 652 struct bpf_iter_seq_array_map_info *seq_info = priv_data; 653 struct bpf_map *map = aux->map; 654 struct bpf_array *array = container_of(map, struct bpf_array, map); 655 void *value_buf; 656 u32 buf_size; 657 658 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 659 buf_size = array->elem_size * num_possible_cpus(); 660 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 661 if (!value_buf) 662 return -ENOMEM; 663 664 seq_info->percpu_value_buf = value_buf; 665 } 666 667 /* bpf_iter_attach_map() acquires a map uref, and the uref may be 668 * released before or in the middle of iterating map elements, so 669 * acquire an extra map uref for iterator. 670 */ 671 bpf_map_inc_with_uref(map); 672 seq_info->map = map; 673 return 0; 674 } 675 676 static void bpf_iter_fini_array_map(void *priv_data) 677 { 678 struct bpf_iter_seq_array_map_info *seq_info = priv_data; 679 680 bpf_map_put_with_uref(seq_info->map); 681 kfree(seq_info->percpu_value_buf); 682 } 683 684 static const struct seq_operations bpf_array_map_seq_ops = { 685 .start = bpf_array_map_seq_start, 686 .next = bpf_array_map_seq_next, 687 .stop = bpf_array_map_seq_stop, 688 .show = bpf_array_map_seq_show, 689 }; 690 691 static const struct bpf_iter_seq_info iter_seq_info = { 692 .seq_ops = &bpf_array_map_seq_ops, 693 .init_seq_private = bpf_iter_init_array_map, 694 .fini_seq_private = bpf_iter_fini_array_map, 695 .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), 696 }; 697 698 static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn, 699 void *callback_ctx, u64 flags) 700 { 701 u32 i, key, num_elems = 0; 702 struct bpf_array *array; 703 bool is_percpu; 704 u64 ret = 0; 705 void *val; 706 707 if (flags != 0) 708 return -EINVAL; 709 710 is_percpu = map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 711 array = container_of(map, struct bpf_array, map); 712 if (is_percpu) 713 migrate_disable(); 714 for (i = 0; i < map->max_entries; i++) { 715 if (is_percpu) 716 val = this_cpu_ptr(array->pptrs[i]); 717 else 718 val = array_map_elem_ptr(array, i); 719 num_elems++; 720 key = i; 721 ret = callback_fn((u64)(long)map, (u64)(long)&key, 722 (u64)(long)val, (u64)(long)callback_ctx, 0); 723 /* return value: 0 - continue, 1 - stop and return */ 724 if (ret) 725 break; 726 } 727 728 if (is_percpu) 729 migrate_enable(); 730 return num_elems; 731 } 732 733 BTF_ID_LIST_SINGLE(array_map_btf_ids, struct, bpf_array) 734 const struct bpf_map_ops array_map_ops = { 735 .map_meta_equal = array_map_meta_equal, 736 .map_alloc_check = array_map_alloc_check, 737 .map_alloc = array_map_alloc, 738 .map_free = array_map_free, 739 .map_get_next_key = array_map_get_next_key, 740 .map_release_uref = array_map_free_timers, 741 .map_lookup_elem = array_map_lookup_elem, 742 .map_update_elem = array_map_update_elem, 743 .map_delete_elem = array_map_delete_elem, 744 .map_gen_lookup = array_map_gen_lookup, 745 .map_direct_value_addr = array_map_direct_value_addr, 746 .map_direct_value_meta = array_map_direct_value_meta, 747 .map_mmap = array_map_mmap, 748 .map_seq_show_elem = array_map_seq_show_elem, 749 .map_check_btf = array_map_check_btf, 750 .map_lookup_batch = generic_map_lookup_batch, 751 .map_update_batch = generic_map_update_batch, 752 .map_set_for_each_callback_args = map_set_for_each_callback_args, 753 .map_for_each_callback = bpf_for_each_array_elem, 754 .map_btf_id = &array_map_btf_ids[0], 755 .iter_seq_info = &iter_seq_info, 756 }; 757 758 const struct bpf_map_ops percpu_array_map_ops = { 759 .map_meta_equal = bpf_map_meta_equal, 760 .map_alloc_check = array_map_alloc_check, 761 .map_alloc = array_map_alloc, 762 .map_free = array_map_free, 763 .map_get_next_key = array_map_get_next_key, 764 .map_lookup_elem = percpu_array_map_lookup_elem, 765 .map_update_elem = array_map_update_elem, 766 .map_delete_elem = array_map_delete_elem, 767 .map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem, 768 .map_seq_show_elem = percpu_array_map_seq_show_elem, 769 .map_check_btf = array_map_check_btf, 770 .map_lookup_batch = generic_map_lookup_batch, 771 .map_update_batch = generic_map_update_batch, 772 .map_set_for_each_callback_args = map_set_for_each_callback_args, 773 .map_for_each_callback = bpf_for_each_array_elem, 774 .map_btf_id = &array_map_btf_ids[0], 775 .iter_seq_info = &iter_seq_info, 776 }; 777 778 static int fd_array_map_alloc_check(union bpf_attr *attr) 779 { 780 /* only file descriptors can be stored in this type of map */ 781 if (attr->value_size != sizeof(u32)) 782 return -EINVAL; 783 /* Program read-only/write-only not supported for special maps yet. */ 784 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) 785 return -EINVAL; 786 return array_map_alloc_check(attr); 787 } 788 789 static void fd_array_map_free(struct bpf_map *map) 790 { 791 struct bpf_array *array = container_of(map, struct bpf_array, map); 792 int i; 793 794 /* make sure it's empty */ 795 for (i = 0; i < array->map.max_entries; i++) 796 BUG_ON(array->ptrs[i] != NULL); 797 798 bpf_map_area_free(array); 799 } 800 801 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) 802 { 803 return ERR_PTR(-EOPNOTSUPP); 804 } 805 806 /* only called from syscall */ 807 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 808 { 809 void **elem, *ptr; 810 int ret = 0; 811 812 if (!map->ops->map_fd_sys_lookup_elem) 813 return -ENOTSUPP; 814 815 rcu_read_lock(); 816 elem = array_map_lookup_elem(map, key); 817 if (elem && (ptr = READ_ONCE(*elem))) 818 *value = map->ops->map_fd_sys_lookup_elem(ptr); 819 else 820 ret = -ENOENT; 821 rcu_read_unlock(); 822 823 return ret; 824 } 825 826 /* only called from syscall */ 827 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, 828 void *key, void *value, u64 map_flags) 829 { 830 struct bpf_array *array = container_of(map, struct bpf_array, map); 831 void *new_ptr, *old_ptr; 832 u32 index = *(u32 *)key, ufd; 833 834 if (map_flags != BPF_ANY) 835 return -EINVAL; 836 837 if (index >= array->map.max_entries) 838 return -E2BIG; 839 840 ufd = *(u32 *)value; 841 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 842 if (IS_ERR(new_ptr)) 843 return PTR_ERR(new_ptr); 844 845 if (map->ops->map_poke_run) { 846 mutex_lock(&array->aux->poke_mutex); 847 old_ptr = xchg(array->ptrs + index, new_ptr); 848 map->ops->map_poke_run(map, index, old_ptr, new_ptr); 849 mutex_unlock(&array->aux->poke_mutex); 850 } else { 851 old_ptr = xchg(array->ptrs + index, new_ptr); 852 } 853 854 if (old_ptr) 855 map->ops->map_fd_put_ptr(old_ptr); 856 return 0; 857 } 858 859 static int fd_array_map_delete_elem(struct bpf_map *map, void *key) 860 { 861 struct bpf_array *array = container_of(map, struct bpf_array, map); 862 void *old_ptr; 863 u32 index = *(u32 *)key; 864 865 if (index >= array->map.max_entries) 866 return -E2BIG; 867 868 if (map->ops->map_poke_run) { 869 mutex_lock(&array->aux->poke_mutex); 870 old_ptr = xchg(array->ptrs + index, NULL); 871 map->ops->map_poke_run(map, index, old_ptr, NULL); 872 mutex_unlock(&array->aux->poke_mutex); 873 } else { 874 old_ptr = xchg(array->ptrs + index, NULL); 875 } 876 877 if (old_ptr) { 878 map->ops->map_fd_put_ptr(old_ptr); 879 return 0; 880 } else { 881 return -ENOENT; 882 } 883 } 884 885 static void *prog_fd_array_get_ptr(struct bpf_map *map, 886 struct file *map_file, int fd) 887 { 888 struct bpf_prog *prog = bpf_prog_get(fd); 889 890 if (IS_ERR(prog)) 891 return prog; 892 893 if (!bpf_prog_map_compatible(map, prog)) { 894 bpf_prog_put(prog); 895 return ERR_PTR(-EINVAL); 896 } 897 898 return prog; 899 } 900 901 static void prog_fd_array_put_ptr(void *ptr) 902 { 903 bpf_prog_put(ptr); 904 } 905 906 static u32 prog_fd_array_sys_lookup_elem(void *ptr) 907 { 908 return ((struct bpf_prog *)ptr)->aux->id; 909 } 910 911 /* decrement refcnt of all bpf_progs that are stored in this map */ 912 static void bpf_fd_array_map_clear(struct bpf_map *map) 913 { 914 struct bpf_array *array = container_of(map, struct bpf_array, map); 915 int i; 916 917 for (i = 0; i < array->map.max_entries; i++) 918 fd_array_map_delete_elem(map, &i); 919 } 920 921 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, 922 struct seq_file *m) 923 { 924 void **elem, *ptr; 925 u32 prog_id; 926 927 rcu_read_lock(); 928 929 elem = array_map_lookup_elem(map, key); 930 if (elem) { 931 ptr = READ_ONCE(*elem); 932 if (ptr) { 933 seq_printf(m, "%u: ", *(u32 *)key); 934 prog_id = prog_fd_array_sys_lookup_elem(ptr); 935 btf_type_seq_show(map->btf, map->btf_value_type_id, 936 &prog_id, m); 937 seq_puts(m, "\n"); 938 } 939 } 940 941 rcu_read_unlock(); 942 } 943 944 struct prog_poke_elem { 945 struct list_head list; 946 struct bpf_prog_aux *aux; 947 }; 948 949 static int prog_array_map_poke_track(struct bpf_map *map, 950 struct bpf_prog_aux *prog_aux) 951 { 952 struct prog_poke_elem *elem; 953 struct bpf_array_aux *aux; 954 int ret = 0; 955 956 aux = container_of(map, struct bpf_array, map)->aux; 957 mutex_lock(&aux->poke_mutex); 958 list_for_each_entry(elem, &aux->poke_progs, list) { 959 if (elem->aux == prog_aux) 960 goto out; 961 } 962 963 elem = kmalloc(sizeof(*elem), GFP_KERNEL); 964 if (!elem) { 965 ret = -ENOMEM; 966 goto out; 967 } 968 969 INIT_LIST_HEAD(&elem->list); 970 /* We must track the program's aux info at this point in time 971 * since the program pointer itself may not be stable yet, see 972 * also comment in prog_array_map_poke_run(). 973 */ 974 elem->aux = prog_aux; 975 976 list_add_tail(&elem->list, &aux->poke_progs); 977 out: 978 mutex_unlock(&aux->poke_mutex); 979 return ret; 980 } 981 982 static void prog_array_map_poke_untrack(struct bpf_map *map, 983 struct bpf_prog_aux *prog_aux) 984 { 985 struct prog_poke_elem *elem, *tmp; 986 struct bpf_array_aux *aux; 987 988 aux = container_of(map, struct bpf_array, map)->aux; 989 mutex_lock(&aux->poke_mutex); 990 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) { 991 if (elem->aux == prog_aux) { 992 list_del_init(&elem->list); 993 kfree(elem); 994 break; 995 } 996 } 997 mutex_unlock(&aux->poke_mutex); 998 } 999 1000 static void prog_array_map_poke_run(struct bpf_map *map, u32 key, 1001 struct bpf_prog *old, 1002 struct bpf_prog *new) 1003 { 1004 u8 *old_addr, *new_addr, *old_bypass_addr; 1005 struct prog_poke_elem *elem; 1006 struct bpf_array_aux *aux; 1007 1008 aux = container_of(map, struct bpf_array, map)->aux; 1009 WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex)); 1010 1011 list_for_each_entry(elem, &aux->poke_progs, list) { 1012 struct bpf_jit_poke_descriptor *poke; 1013 int i, ret; 1014 1015 for (i = 0; i < elem->aux->size_poke_tab; i++) { 1016 poke = &elem->aux->poke_tab[i]; 1017 1018 /* Few things to be aware of: 1019 * 1020 * 1) We can only ever access aux in this context, but 1021 * not aux->prog since it might not be stable yet and 1022 * there could be danger of use after free otherwise. 1023 * 2) Initially when we start tracking aux, the program 1024 * is not JITed yet and also does not have a kallsyms 1025 * entry. We skip these as poke->tailcall_target_stable 1026 * is not active yet. The JIT will do the final fixup 1027 * before setting it stable. The various 1028 * poke->tailcall_target_stable are successively 1029 * activated, so tail call updates can arrive from here 1030 * while JIT is still finishing its final fixup for 1031 * non-activated poke entries. 1032 * 3) On program teardown, the program's kallsym entry gets 1033 * removed out of RCU callback, but we can only untrack 1034 * from sleepable context, therefore bpf_arch_text_poke() 1035 * might not see that this is in BPF text section and 1036 * bails out with -EINVAL. As these are unreachable since 1037 * RCU grace period already passed, we simply skip them. 1038 * 4) Also programs reaching refcount of zero while patching 1039 * is in progress is okay since we're protected under 1040 * poke_mutex and untrack the programs before the JIT 1041 * buffer is freed. When we're still in the middle of 1042 * patching and suddenly kallsyms entry of the program 1043 * gets evicted, we just skip the rest which is fine due 1044 * to point 3). 1045 * 5) Any other error happening below from bpf_arch_text_poke() 1046 * is a unexpected bug. 1047 */ 1048 if (!READ_ONCE(poke->tailcall_target_stable)) 1049 continue; 1050 if (poke->reason != BPF_POKE_REASON_TAIL_CALL) 1051 continue; 1052 if (poke->tail_call.map != map || 1053 poke->tail_call.key != key) 1054 continue; 1055 1056 old_bypass_addr = old ? NULL : poke->bypass_addr; 1057 old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; 1058 new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; 1059 1060 if (new) { 1061 ret = bpf_arch_text_poke(poke->tailcall_target, 1062 BPF_MOD_JUMP, 1063 old_addr, new_addr); 1064 BUG_ON(ret < 0 && ret != -EINVAL); 1065 if (!old) { 1066 ret = bpf_arch_text_poke(poke->tailcall_bypass, 1067 BPF_MOD_JUMP, 1068 poke->bypass_addr, 1069 NULL); 1070 BUG_ON(ret < 0 && ret != -EINVAL); 1071 } 1072 } else { 1073 ret = bpf_arch_text_poke(poke->tailcall_bypass, 1074 BPF_MOD_JUMP, 1075 old_bypass_addr, 1076 poke->bypass_addr); 1077 BUG_ON(ret < 0 && ret != -EINVAL); 1078 /* let other CPUs finish the execution of program 1079 * so that it will not possible to expose them 1080 * to invalid nop, stack unwind, nop state 1081 */ 1082 if (!ret) 1083 synchronize_rcu(); 1084 ret = bpf_arch_text_poke(poke->tailcall_target, 1085 BPF_MOD_JUMP, 1086 old_addr, NULL); 1087 BUG_ON(ret < 0 && ret != -EINVAL); 1088 } 1089 } 1090 } 1091 } 1092 1093 static void prog_array_map_clear_deferred(struct work_struct *work) 1094 { 1095 struct bpf_map *map = container_of(work, struct bpf_array_aux, 1096 work)->map; 1097 bpf_fd_array_map_clear(map); 1098 bpf_map_put(map); 1099 } 1100 1101 static void prog_array_map_clear(struct bpf_map *map) 1102 { 1103 struct bpf_array_aux *aux = container_of(map, struct bpf_array, 1104 map)->aux; 1105 bpf_map_inc(map); 1106 schedule_work(&aux->work); 1107 } 1108 1109 static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) 1110 { 1111 struct bpf_array_aux *aux; 1112 struct bpf_map *map; 1113 1114 aux = kzalloc(sizeof(*aux), GFP_KERNEL_ACCOUNT); 1115 if (!aux) 1116 return ERR_PTR(-ENOMEM); 1117 1118 INIT_WORK(&aux->work, prog_array_map_clear_deferred); 1119 INIT_LIST_HEAD(&aux->poke_progs); 1120 mutex_init(&aux->poke_mutex); 1121 1122 map = array_map_alloc(attr); 1123 if (IS_ERR(map)) { 1124 kfree(aux); 1125 return map; 1126 } 1127 1128 container_of(map, struct bpf_array, map)->aux = aux; 1129 aux->map = map; 1130 1131 return map; 1132 } 1133 1134 static void prog_array_map_free(struct bpf_map *map) 1135 { 1136 struct prog_poke_elem *elem, *tmp; 1137 struct bpf_array_aux *aux; 1138 1139 aux = container_of(map, struct bpf_array, map)->aux; 1140 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) { 1141 list_del_init(&elem->list); 1142 kfree(elem); 1143 } 1144 kfree(aux); 1145 fd_array_map_free(map); 1146 } 1147 1148 /* prog_array->aux->{type,jited} is a runtime binding. 1149 * Doing static check alone in the verifier is not enough. 1150 * Thus, prog_array_map cannot be used as an inner_map 1151 * and map_meta_equal is not implemented. 1152 */ 1153 const struct bpf_map_ops prog_array_map_ops = { 1154 .map_alloc_check = fd_array_map_alloc_check, 1155 .map_alloc = prog_array_map_alloc, 1156 .map_free = prog_array_map_free, 1157 .map_poke_track = prog_array_map_poke_track, 1158 .map_poke_untrack = prog_array_map_poke_untrack, 1159 .map_poke_run = prog_array_map_poke_run, 1160 .map_get_next_key = array_map_get_next_key, 1161 .map_lookup_elem = fd_array_map_lookup_elem, 1162 .map_delete_elem = fd_array_map_delete_elem, 1163 .map_fd_get_ptr = prog_fd_array_get_ptr, 1164 .map_fd_put_ptr = prog_fd_array_put_ptr, 1165 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, 1166 .map_release_uref = prog_array_map_clear, 1167 .map_seq_show_elem = prog_array_map_seq_show_elem, 1168 .map_btf_id = &array_map_btf_ids[0], 1169 }; 1170 1171 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, 1172 struct file *map_file) 1173 { 1174 struct bpf_event_entry *ee; 1175 1176 ee = kzalloc(sizeof(*ee), GFP_ATOMIC); 1177 if (ee) { 1178 ee->event = perf_file->private_data; 1179 ee->perf_file = perf_file; 1180 ee->map_file = map_file; 1181 } 1182 1183 return ee; 1184 } 1185 1186 static void __bpf_event_entry_free(struct rcu_head *rcu) 1187 { 1188 struct bpf_event_entry *ee; 1189 1190 ee = container_of(rcu, struct bpf_event_entry, rcu); 1191 fput(ee->perf_file); 1192 kfree(ee); 1193 } 1194 1195 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) 1196 { 1197 call_rcu(&ee->rcu, __bpf_event_entry_free); 1198 } 1199 1200 static void *perf_event_fd_array_get_ptr(struct bpf_map *map, 1201 struct file *map_file, int fd) 1202 { 1203 struct bpf_event_entry *ee; 1204 struct perf_event *event; 1205 struct file *perf_file; 1206 u64 value; 1207 1208 perf_file = perf_event_get(fd); 1209 if (IS_ERR(perf_file)) 1210 return perf_file; 1211 1212 ee = ERR_PTR(-EOPNOTSUPP); 1213 event = perf_file->private_data; 1214 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) 1215 goto err_out; 1216 1217 ee = bpf_event_entry_gen(perf_file, map_file); 1218 if (ee) 1219 return ee; 1220 ee = ERR_PTR(-ENOMEM); 1221 err_out: 1222 fput(perf_file); 1223 return ee; 1224 } 1225 1226 static void perf_event_fd_array_put_ptr(void *ptr) 1227 { 1228 bpf_event_entry_free_rcu(ptr); 1229 } 1230 1231 static void perf_event_fd_array_release(struct bpf_map *map, 1232 struct file *map_file) 1233 { 1234 struct bpf_array *array = container_of(map, struct bpf_array, map); 1235 struct bpf_event_entry *ee; 1236 int i; 1237 1238 if (map->map_flags & BPF_F_PRESERVE_ELEMS) 1239 return; 1240 1241 rcu_read_lock(); 1242 for (i = 0; i < array->map.max_entries; i++) { 1243 ee = READ_ONCE(array->ptrs[i]); 1244 if (ee && ee->map_file == map_file) 1245 fd_array_map_delete_elem(map, &i); 1246 } 1247 rcu_read_unlock(); 1248 } 1249 1250 static void perf_event_fd_array_map_free(struct bpf_map *map) 1251 { 1252 if (map->map_flags & BPF_F_PRESERVE_ELEMS) 1253 bpf_fd_array_map_clear(map); 1254 fd_array_map_free(map); 1255 } 1256 1257 const struct bpf_map_ops perf_event_array_map_ops = { 1258 .map_meta_equal = bpf_map_meta_equal, 1259 .map_alloc_check = fd_array_map_alloc_check, 1260 .map_alloc = array_map_alloc, 1261 .map_free = perf_event_fd_array_map_free, 1262 .map_get_next_key = array_map_get_next_key, 1263 .map_lookup_elem = fd_array_map_lookup_elem, 1264 .map_delete_elem = fd_array_map_delete_elem, 1265 .map_fd_get_ptr = perf_event_fd_array_get_ptr, 1266 .map_fd_put_ptr = perf_event_fd_array_put_ptr, 1267 .map_release = perf_event_fd_array_release, 1268 .map_check_btf = map_check_no_btf, 1269 .map_btf_id = &array_map_btf_ids[0], 1270 }; 1271 1272 #ifdef CONFIG_CGROUPS 1273 static void *cgroup_fd_array_get_ptr(struct bpf_map *map, 1274 struct file *map_file /* not used */, 1275 int fd) 1276 { 1277 return cgroup_get_from_fd(fd); 1278 } 1279 1280 static void cgroup_fd_array_put_ptr(void *ptr) 1281 { 1282 /* cgroup_put free cgrp after a rcu grace period */ 1283 cgroup_put(ptr); 1284 } 1285 1286 static void cgroup_fd_array_free(struct bpf_map *map) 1287 { 1288 bpf_fd_array_map_clear(map); 1289 fd_array_map_free(map); 1290 } 1291 1292 const struct bpf_map_ops cgroup_array_map_ops = { 1293 .map_meta_equal = bpf_map_meta_equal, 1294 .map_alloc_check = fd_array_map_alloc_check, 1295 .map_alloc = array_map_alloc, 1296 .map_free = cgroup_fd_array_free, 1297 .map_get_next_key = array_map_get_next_key, 1298 .map_lookup_elem = fd_array_map_lookup_elem, 1299 .map_delete_elem = fd_array_map_delete_elem, 1300 .map_fd_get_ptr = cgroup_fd_array_get_ptr, 1301 .map_fd_put_ptr = cgroup_fd_array_put_ptr, 1302 .map_check_btf = map_check_no_btf, 1303 .map_btf_id = &array_map_btf_ids[0], 1304 }; 1305 #endif 1306 1307 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) 1308 { 1309 struct bpf_map *map, *inner_map_meta; 1310 1311 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 1312 if (IS_ERR(inner_map_meta)) 1313 return inner_map_meta; 1314 1315 map = array_map_alloc(attr); 1316 if (IS_ERR(map)) { 1317 bpf_map_meta_free(inner_map_meta); 1318 return map; 1319 } 1320 1321 map->inner_map_meta = inner_map_meta; 1322 1323 return map; 1324 } 1325 1326 static void array_of_map_free(struct bpf_map *map) 1327 { 1328 /* map->inner_map_meta is only accessed by syscall which 1329 * is protected by fdget/fdput. 1330 */ 1331 bpf_map_meta_free(map->inner_map_meta); 1332 bpf_fd_array_map_clear(map); 1333 fd_array_map_free(map); 1334 } 1335 1336 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) 1337 { 1338 struct bpf_map **inner_map = array_map_lookup_elem(map, key); 1339 1340 if (!inner_map) 1341 return NULL; 1342 1343 return READ_ONCE(*inner_map); 1344 } 1345 1346 static int array_of_map_gen_lookup(struct bpf_map *map, 1347 struct bpf_insn *insn_buf) 1348 { 1349 struct bpf_array *array = container_of(map, struct bpf_array, map); 1350 u32 elem_size = array->elem_size; 1351 struct bpf_insn *insn = insn_buf; 1352 const int ret = BPF_REG_0; 1353 const int map_ptr = BPF_REG_1; 1354 const int index = BPF_REG_2; 1355 1356 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 1357 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 1358 if (!map->bypass_spec_v1) { 1359 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); 1360 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 1361 } else { 1362 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 1363 } 1364 if (is_power_of_2(elem_size)) 1365 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 1366 else 1367 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 1368 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 1369 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 1370 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 1371 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 1372 *insn++ = BPF_MOV64_IMM(ret, 0); 1373 1374 return insn - insn_buf; 1375 } 1376 1377 const struct bpf_map_ops array_of_maps_map_ops = { 1378 .map_alloc_check = fd_array_map_alloc_check, 1379 .map_alloc = array_of_map_alloc, 1380 .map_free = array_of_map_free, 1381 .map_get_next_key = array_map_get_next_key, 1382 .map_lookup_elem = array_of_map_lookup_elem, 1383 .map_delete_elem = fd_array_map_delete_elem, 1384 .map_fd_get_ptr = bpf_map_fd_get_ptr, 1385 .map_fd_put_ptr = bpf_map_fd_put_ptr, 1386 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 1387 .map_gen_lookup = array_of_map_gen_lookup, 1388 .map_lookup_batch = generic_map_lookup_batch, 1389 .map_update_batch = generic_map_update_batch, 1390 .map_check_btf = map_check_no_btf, 1391 .map_btf_id = &array_map_btf_ids[0], 1392 }; 1393