1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 2 * Copyright (c) 2016,2017 Facebook 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of version 2 of the GNU General Public 6 * License as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13 #include <linux/bpf.h> 14 #include <linux/btf.h> 15 #include <linux/err.h> 16 #include <linux/slab.h> 17 #include <linux/mm.h> 18 #include <linux/filter.h> 19 #include <linux/perf_event.h> 20 #include <uapi/linux/btf.h> 21 22 #include "map_in_map.h" 23 24 #define ARRAY_CREATE_FLAG_MASK \ 25 (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) 26 27 static void bpf_array_free_percpu(struct bpf_array *array) 28 { 29 int i; 30 31 for (i = 0; i < array->map.max_entries; i++) { 32 free_percpu(array->pptrs[i]); 33 cond_resched(); 34 } 35 } 36 37 static int bpf_array_alloc_percpu(struct bpf_array *array) 38 { 39 void __percpu *ptr; 40 int i; 41 42 for (i = 0; i < array->map.max_entries; i++) { 43 ptr = __alloc_percpu_gfp(array->elem_size, 8, 44 GFP_USER | __GFP_NOWARN); 45 if (!ptr) { 46 bpf_array_free_percpu(array); 47 return -ENOMEM; 48 } 49 array->pptrs[i] = ptr; 50 cond_resched(); 51 } 52 53 return 0; 54 } 55 56 /* Called from syscall */ 57 int array_map_alloc_check(union bpf_attr *attr) 58 { 59 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 60 int numa_node = bpf_map_attr_numa_node(attr); 61 62 /* check sanity of attributes */ 63 if (attr->max_entries == 0 || attr->key_size != 4 || 64 attr->value_size == 0 || 65 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 66 !bpf_map_flags_access_ok(attr->map_flags) || 67 (percpu && numa_node != NUMA_NO_NODE)) 68 return -EINVAL; 69 70 if (attr->value_size > KMALLOC_MAX_SIZE) 71 /* if value_size is bigger, the user space won't be able to 72 * access the elements. 73 */ 74 return -E2BIG; 75 76 return 0; 77 } 78 79 static struct bpf_map *array_map_alloc(union bpf_attr *attr) 80 { 81 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; 82 int ret, numa_node = bpf_map_attr_numa_node(attr); 83 u32 elem_size, index_mask, max_entries; 84 bool unpriv = !capable(CAP_SYS_ADMIN); 85 u64 cost, array_size, mask64; 86 struct bpf_map_memory mem; 87 struct bpf_array *array; 88 89 elem_size = round_up(attr->value_size, 8); 90 91 max_entries = attr->max_entries; 92 93 /* On 32 bit archs roundup_pow_of_two() with max_entries that has 94 * upper most bit set in u32 space is undefined behavior due to 95 * resulting 1U << 32, so do it manually here in u64 space. 96 */ 97 mask64 = fls_long(max_entries - 1); 98 mask64 = 1ULL << mask64; 99 mask64 -= 1; 100 101 index_mask = mask64; 102 if (unpriv) { 103 /* round up array size to nearest power of 2, 104 * since cpu will speculate within index_mask limits 105 */ 106 max_entries = index_mask + 1; 107 /* Check for overflows. */ 108 if (max_entries < attr->max_entries) 109 return ERR_PTR(-E2BIG); 110 } 111 112 array_size = sizeof(*array); 113 if (percpu) 114 array_size += (u64) max_entries * sizeof(void *); 115 else 116 array_size += (u64) max_entries * elem_size; 117 118 /* make sure there is no u32 overflow later in round_up() */ 119 cost = array_size; 120 if (percpu) 121 cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); 122 123 ret = bpf_map_charge_init(&mem, cost); 124 if (ret < 0) 125 return ERR_PTR(ret); 126 127 /* allocate all map elements and zero-initialize them */ 128 array = bpf_map_area_alloc(array_size, numa_node); 129 if (!array) { 130 bpf_map_charge_finish(&mem); 131 return ERR_PTR(-ENOMEM); 132 } 133 array->index_mask = index_mask; 134 array->map.unpriv_array = unpriv; 135 136 /* copy mandatory map attributes */ 137 bpf_map_init_from_attr(&array->map, attr); 138 bpf_map_charge_move(&array->map.memory, &mem); 139 array->elem_size = elem_size; 140 141 if (percpu && bpf_array_alloc_percpu(array)) { 142 bpf_map_charge_finish(&array->map.memory); 143 bpf_map_area_free(array); 144 return ERR_PTR(-ENOMEM); 145 } 146 147 return &array->map; 148 } 149 150 /* Called from syscall or from eBPF program */ 151 static void *array_map_lookup_elem(struct bpf_map *map, void *key) 152 { 153 struct bpf_array *array = container_of(map, struct bpf_array, map); 154 u32 index = *(u32 *)key; 155 156 if (unlikely(index >= array->map.max_entries)) 157 return NULL; 158 159 return array->value + array->elem_size * (index & array->index_mask); 160 } 161 162 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, 163 u32 off) 164 { 165 struct bpf_array *array = container_of(map, struct bpf_array, map); 166 167 if (map->max_entries != 1) 168 return -ENOTSUPP; 169 if (off >= map->value_size) 170 return -EINVAL; 171 172 *imm = (unsigned long)array->value; 173 return 0; 174 } 175 176 static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm, 177 u32 *off) 178 { 179 struct bpf_array *array = container_of(map, struct bpf_array, map); 180 u64 base = (unsigned long)array->value; 181 u64 range = array->elem_size; 182 183 if (map->max_entries != 1) 184 return -ENOTSUPP; 185 if (imm < base || imm >= base + range) 186 return -ENOENT; 187 188 *off = imm - base; 189 return 0; 190 } 191 192 /* emit BPF instructions equivalent to C code of array_map_lookup_elem() */ 193 static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf) 194 { 195 struct bpf_array *array = container_of(map, struct bpf_array, map); 196 struct bpf_insn *insn = insn_buf; 197 u32 elem_size = round_up(map->value_size, 8); 198 const int ret = BPF_REG_0; 199 const int map_ptr = BPF_REG_1; 200 const int index = BPF_REG_2; 201 202 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 203 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 204 if (map->unpriv_array) { 205 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); 206 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 207 } else { 208 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); 209 } 210 211 if (is_power_of_2(elem_size)) { 212 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 213 } else { 214 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 215 } 216 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 217 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 218 *insn++ = BPF_MOV64_IMM(ret, 0); 219 return insn - insn_buf; 220 } 221 222 /* Called from eBPF program */ 223 static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key) 224 { 225 struct bpf_array *array = container_of(map, struct bpf_array, map); 226 u32 index = *(u32 *)key; 227 228 if (unlikely(index >= array->map.max_entries)) 229 return NULL; 230 231 return this_cpu_ptr(array->pptrs[index & array->index_mask]); 232 } 233 234 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value) 235 { 236 struct bpf_array *array = container_of(map, struct bpf_array, map); 237 u32 index = *(u32 *)key; 238 void __percpu *pptr; 239 int cpu, off = 0; 240 u32 size; 241 242 if (unlikely(index >= array->map.max_entries)) 243 return -ENOENT; 244 245 /* per_cpu areas are zero-filled and bpf programs can only 246 * access 'value_size' of them, so copying rounded areas 247 * will not leak any kernel data 248 */ 249 size = round_up(map->value_size, 8); 250 rcu_read_lock(); 251 pptr = array->pptrs[index & array->index_mask]; 252 for_each_possible_cpu(cpu) { 253 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size); 254 off += size; 255 } 256 rcu_read_unlock(); 257 return 0; 258 } 259 260 /* Called from syscall */ 261 static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) 262 { 263 struct bpf_array *array = container_of(map, struct bpf_array, map); 264 u32 index = key ? *(u32 *)key : U32_MAX; 265 u32 *next = (u32 *)next_key; 266 267 if (index >= array->map.max_entries) { 268 *next = 0; 269 return 0; 270 } 271 272 if (index == array->map.max_entries - 1) 273 return -ENOENT; 274 275 *next = index + 1; 276 return 0; 277 } 278 279 /* Called from syscall or from eBPF program */ 280 static int array_map_update_elem(struct bpf_map *map, void *key, void *value, 281 u64 map_flags) 282 { 283 struct bpf_array *array = container_of(map, struct bpf_array, map); 284 u32 index = *(u32 *)key; 285 char *val; 286 287 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) 288 /* unknown flags */ 289 return -EINVAL; 290 291 if (unlikely(index >= array->map.max_entries)) 292 /* all elements were pre-allocated, cannot insert a new one */ 293 return -E2BIG; 294 295 if (unlikely(map_flags & BPF_NOEXIST)) 296 /* all elements already exist */ 297 return -EEXIST; 298 299 if (unlikely((map_flags & BPF_F_LOCK) && 300 !map_value_has_spin_lock(map))) 301 return -EINVAL; 302 303 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 304 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]), 305 value, map->value_size); 306 } else { 307 val = array->value + 308 array->elem_size * (index & array->index_mask); 309 if (map_flags & BPF_F_LOCK) 310 copy_map_value_locked(map, val, value, false); 311 else 312 copy_map_value(map, val, value); 313 } 314 return 0; 315 } 316 317 int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, 318 u64 map_flags) 319 { 320 struct bpf_array *array = container_of(map, struct bpf_array, map); 321 u32 index = *(u32 *)key; 322 void __percpu *pptr; 323 int cpu, off = 0; 324 u32 size; 325 326 if (unlikely(map_flags > BPF_EXIST)) 327 /* unknown flags */ 328 return -EINVAL; 329 330 if (unlikely(index >= array->map.max_entries)) 331 /* all elements were pre-allocated, cannot insert a new one */ 332 return -E2BIG; 333 334 if (unlikely(map_flags == BPF_NOEXIST)) 335 /* all elements already exist */ 336 return -EEXIST; 337 338 /* the user space will provide round_up(value_size, 8) bytes that 339 * will be copied into per-cpu area. bpf programs can only access 340 * value_size of it. During lookup the same extra bytes will be 341 * returned or zeros which were zero-filled by percpu_alloc, 342 * so no kernel data leaks possible 343 */ 344 size = round_up(map->value_size, 8); 345 rcu_read_lock(); 346 pptr = array->pptrs[index & array->index_mask]; 347 for_each_possible_cpu(cpu) { 348 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size); 349 off += size; 350 } 351 rcu_read_unlock(); 352 return 0; 353 } 354 355 /* Called from syscall or from eBPF program */ 356 static int array_map_delete_elem(struct bpf_map *map, void *key) 357 { 358 return -EINVAL; 359 } 360 361 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 362 static void array_map_free(struct bpf_map *map) 363 { 364 struct bpf_array *array = container_of(map, struct bpf_array, map); 365 366 /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 367 * so the programs (can be more than one that used this map) were 368 * disconnected from events. Wait for outstanding programs to complete 369 * and free the array 370 */ 371 synchronize_rcu(); 372 373 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 374 bpf_array_free_percpu(array); 375 376 bpf_map_area_free(array); 377 } 378 379 static void array_map_seq_show_elem(struct bpf_map *map, void *key, 380 struct seq_file *m) 381 { 382 void *value; 383 384 rcu_read_lock(); 385 386 value = array_map_lookup_elem(map, key); 387 if (!value) { 388 rcu_read_unlock(); 389 return; 390 } 391 392 if (map->btf_key_type_id) 393 seq_printf(m, "%u: ", *(u32 *)key); 394 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m); 395 seq_puts(m, "\n"); 396 397 rcu_read_unlock(); 398 } 399 400 static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, 401 struct seq_file *m) 402 { 403 struct bpf_array *array = container_of(map, struct bpf_array, map); 404 u32 index = *(u32 *)key; 405 void __percpu *pptr; 406 int cpu; 407 408 rcu_read_lock(); 409 410 seq_printf(m, "%u: {\n", *(u32 *)key); 411 pptr = array->pptrs[index & array->index_mask]; 412 for_each_possible_cpu(cpu) { 413 seq_printf(m, "\tcpu%d: ", cpu); 414 btf_type_seq_show(map->btf, map->btf_value_type_id, 415 per_cpu_ptr(pptr, cpu), m); 416 seq_puts(m, "\n"); 417 } 418 seq_puts(m, "}\n"); 419 420 rcu_read_unlock(); 421 } 422 423 static int array_map_check_btf(const struct bpf_map *map, 424 const struct btf *btf, 425 const struct btf_type *key_type, 426 const struct btf_type *value_type) 427 { 428 u32 int_data; 429 430 /* One exception for keyless BTF: .bss/.data/.rodata map */ 431 if (btf_type_is_void(key_type)) { 432 if (map->map_type != BPF_MAP_TYPE_ARRAY || 433 map->max_entries != 1) 434 return -EINVAL; 435 436 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC) 437 return -EINVAL; 438 439 return 0; 440 } 441 442 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 443 return -EINVAL; 444 445 int_data = *(u32 *)(key_type + 1); 446 /* bpf array can only take a u32 key. This check makes sure 447 * that the btf matches the attr used during map_create. 448 */ 449 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) 450 return -EINVAL; 451 452 return 0; 453 } 454 455 const struct bpf_map_ops array_map_ops = { 456 .map_alloc_check = array_map_alloc_check, 457 .map_alloc = array_map_alloc, 458 .map_free = array_map_free, 459 .map_get_next_key = array_map_get_next_key, 460 .map_lookup_elem = array_map_lookup_elem, 461 .map_update_elem = array_map_update_elem, 462 .map_delete_elem = array_map_delete_elem, 463 .map_gen_lookup = array_map_gen_lookup, 464 .map_direct_value_addr = array_map_direct_value_addr, 465 .map_direct_value_meta = array_map_direct_value_meta, 466 .map_seq_show_elem = array_map_seq_show_elem, 467 .map_check_btf = array_map_check_btf, 468 }; 469 470 const struct bpf_map_ops percpu_array_map_ops = { 471 .map_alloc_check = array_map_alloc_check, 472 .map_alloc = array_map_alloc, 473 .map_free = array_map_free, 474 .map_get_next_key = array_map_get_next_key, 475 .map_lookup_elem = percpu_array_map_lookup_elem, 476 .map_update_elem = array_map_update_elem, 477 .map_delete_elem = array_map_delete_elem, 478 .map_seq_show_elem = percpu_array_map_seq_show_elem, 479 .map_check_btf = array_map_check_btf, 480 }; 481 482 static int fd_array_map_alloc_check(union bpf_attr *attr) 483 { 484 /* only file descriptors can be stored in this type of map */ 485 if (attr->value_size != sizeof(u32)) 486 return -EINVAL; 487 /* Program read-only/write-only not supported for special maps yet. */ 488 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) 489 return -EINVAL; 490 return array_map_alloc_check(attr); 491 } 492 493 static void fd_array_map_free(struct bpf_map *map) 494 { 495 struct bpf_array *array = container_of(map, struct bpf_array, map); 496 int i; 497 498 synchronize_rcu(); 499 500 /* make sure it's empty */ 501 for (i = 0; i < array->map.max_entries; i++) 502 BUG_ON(array->ptrs[i] != NULL); 503 504 bpf_map_area_free(array); 505 } 506 507 static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) 508 { 509 return ERR_PTR(-EOPNOTSUPP); 510 } 511 512 /* only called from syscall */ 513 int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) 514 { 515 void **elem, *ptr; 516 int ret = 0; 517 518 if (!map->ops->map_fd_sys_lookup_elem) 519 return -ENOTSUPP; 520 521 rcu_read_lock(); 522 elem = array_map_lookup_elem(map, key); 523 if (elem && (ptr = READ_ONCE(*elem))) 524 *value = map->ops->map_fd_sys_lookup_elem(ptr); 525 else 526 ret = -ENOENT; 527 rcu_read_unlock(); 528 529 return ret; 530 } 531 532 /* only called from syscall */ 533 int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, 534 void *key, void *value, u64 map_flags) 535 { 536 struct bpf_array *array = container_of(map, struct bpf_array, map); 537 void *new_ptr, *old_ptr; 538 u32 index = *(u32 *)key, ufd; 539 540 if (map_flags != BPF_ANY) 541 return -EINVAL; 542 543 if (index >= array->map.max_entries) 544 return -E2BIG; 545 546 ufd = *(u32 *)value; 547 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd); 548 if (IS_ERR(new_ptr)) 549 return PTR_ERR(new_ptr); 550 551 old_ptr = xchg(array->ptrs + index, new_ptr); 552 if (old_ptr) 553 map->ops->map_fd_put_ptr(old_ptr); 554 555 return 0; 556 } 557 558 static int fd_array_map_delete_elem(struct bpf_map *map, void *key) 559 { 560 struct bpf_array *array = container_of(map, struct bpf_array, map); 561 void *old_ptr; 562 u32 index = *(u32 *)key; 563 564 if (index >= array->map.max_entries) 565 return -E2BIG; 566 567 old_ptr = xchg(array->ptrs + index, NULL); 568 if (old_ptr) { 569 map->ops->map_fd_put_ptr(old_ptr); 570 return 0; 571 } else { 572 return -ENOENT; 573 } 574 } 575 576 static void *prog_fd_array_get_ptr(struct bpf_map *map, 577 struct file *map_file, int fd) 578 { 579 struct bpf_array *array = container_of(map, struct bpf_array, map); 580 struct bpf_prog *prog = bpf_prog_get(fd); 581 582 if (IS_ERR(prog)) 583 return prog; 584 585 if (!bpf_prog_array_compatible(array, prog)) { 586 bpf_prog_put(prog); 587 return ERR_PTR(-EINVAL); 588 } 589 590 return prog; 591 } 592 593 static void prog_fd_array_put_ptr(void *ptr) 594 { 595 bpf_prog_put(ptr); 596 } 597 598 static u32 prog_fd_array_sys_lookup_elem(void *ptr) 599 { 600 return ((struct bpf_prog *)ptr)->aux->id; 601 } 602 603 /* decrement refcnt of all bpf_progs that are stored in this map */ 604 static void bpf_fd_array_map_clear(struct bpf_map *map) 605 { 606 struct bpf_array *array = container_of(map, struct bpf_array, map); 607 int i; 608 609 for (i = 0; i < array->map.max_entries; i++) 610 fd_array_map_delete_elem(map, &i); 611 } 612 613 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, 614 struct seq_file *m) 615 { 616 void **elem, *ptr; 617 u32 prog_id; 618 619 rcu_read_lock(); 620 621 elem = array_map_lookup_elem(map, key); 622 if (elem) { 623 ptr = READ_ONCE(*elem); 624 if (ptr) { 625 seq_printf(m, "%u: ", *(u32 *)key); 626 prog_id = prog_fd_array_sys_lookup_elem(ptr); 627 btf_type_seq_show(map->btf, map->btf_value_type_id, 628 &prog_id, m); 629 seq_puts(m, "\n"); 630 } 631 } 632 633 rcu_read_unlock(); 634 } 635 636 const struct bpf_map_ops prog_array_map_ops = { 637 .map_alloc_check = fd_array_map_alloc_check, 638 .map_alloc = array_map_alloc, 639 .map_free = fd_array_map_free, 640 .map_get_next_key = array_map_get_next_key, 641 .map_lookup_elem = fd_array_map_lookup_elem, 642 .map_delete_elem = fd_array_map_delete_elem, 643 .map_fd_get_ptr = prog_fd_array_get_ptr, 644 .map_fd_put_ptr = prog_fd_array_put_ptr, 645 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, 646 .map_release_uref = bpf_fd_array_map_clear, 647 .map_seq_show_elem = prog_array_map_seq_show_elem, 648 }; 649 650 static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, 651 struct file *map_file) 652 { 653 struct bpf_event_entry *ee; 654 655 ee = kzalloc(sizeof(*ee), GFP_ATOMIC); 656 if (ee) { 657 ee->event = perf_file->private_data; 658 ee->perf_file = perf_file; 659 ee->map_file = map_file; 660 } 661 662 return ee; 663 } 664 665 static void __bpf_event_entry_free(struct rcu_head *rcu) 666 { 667 struct bpf_event_entry *ee; 668 669 ee = container_of(rcu, struct bpf_event_entry, rcu); 670 fput(ee->perf_file); 671 kfree(ee); 672 } 673 674 static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee) 675 { 676 call_rcu(&ee->rcu, __bpf_event_entry_free); 677 } 678 679 static void *perf_event_fd_array_get_ptr(struct bpf_map *map, 680 struct file *map_file, int fd) 681 { 682 struct bpf_event_entry *ee; 683 struct perf_event *event; 684 struct file *perf_file; 685 u64 value; 686 687 perf_file = perf_event_get(fd); 688 if (IS_ERR(perf_file)) 689 return perf_file; 690 691 ee = ERR_PTR(-EOPNOTSUPP); 692 event = perf_file->private_data; 693 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP) 694 goto err_out; 695 696 ee = bpf_event_entry_gen(perf_file, map_file); 697 if (ee) 698 return ee; 699 ee = ERR_PTR(-ENOMEM); 700 err_out: 701 fput(perf_file); 702 return ee; 703 } 704 705 static void perf_event_fd_array_put_ptr(void *ptr) 706 { 707 bpf_event_entry_free_rcu(ptr); 708 } 709 710 static void perf_event_fd_array_release(struct bpf_map *map, 711 struct file *map_file) 712 { 713 struct bpf_array *array = container_of(map, struct bpf_array, map); 714 struct bpf_event_entry *ee; 715 int i; 716 717 rcu_read_lock(); 718 for (i = 0; i < array->map.max_entries; i++) { 719 ee = READ_ONCE(array->ptrs[i]); 720 if (ee && ee->map_file == map_file) 721 fd_array_map_delete_elem(map, &i); 722 } 723 rcu_read_unlock(); 724 } 725 726 const struct bpf_map_ops perf_event_array_map_ops = { 727 .map_alloc_check = fd_array_map_alloc_check, 728 .map_alloc = array_map_alloc, 729 .map_free = fd_array_map_free, 730 .map_get_next_key = array_map_get_next_key, 731 .map_lookup_elem = fd_array_map_lookup_elem, 732 .map_delete_elem = fd_array_map_delete_elem, 733 .map_fd_get_ptr = perf_event_fd_array_get_ptr, 734 .map_fd_put_ptr = perf_event_fd_array_put_ptr, 735 .map_release = perf_event_fd_array_release, 736 .map_check_btf = map_check_no_btf, 737 }; 738 739 #ifdef CONFIG_CGROUPS 740 static void *cgroup_fd_array_get_ptr(struct bpf_map *map, 741 struct file *map_file /* not used */, 742 int fd) 743 { 744 return cgroup_get_from_fd(fd); 745 } 746 747 static void cgroup_fd_array_put_ptr(void *ptr) 748 { 749 /* cgroup_put free cgrp after a rcu grace period */ 750 cgroup_put(ptr); 751 } 752 753 static void cgroup_fd_array_free(struct bpf_map *map) 754 { 755 bpf_fd_array_map_clear(map); 756 fd_array_map_free(map); 757 } 758 759 const struct bpf_map_ops cgroup_array_map_ops = { 760 .map_alloc_check = fd_array_map_alloc_check, 761 .map_alloc = array_map_alloc, 762 .map_free = cgroup_fd_array_free, 763 .map_get_next_key = array_map_get_next_key, 764 .map_lookup_elem = fd_array_map_lookup_elem, 765 .map_delete_elem = fd_array_map_delete_elem, 766 .map_fd_get_ptr = cgroup_fd_array_get_ptr, 767 .map_fd_put_ptr = cgroup_fd_array_put_ptr, 768 .map_check_btf = map_check_no_btf, 769 }; 770 #endif 771 772 static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) 773 { 774 struct bpf_map *map, *inner_map_meta; 775 776 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd); 777 if (IS_ERR(inner_map_meta)) 778 return inner_map_meta; 779 780 map = array_map_alloc(attr); 781 if (IS_ERR(map)) { 782 bpf_map_meta_free(inner_map_meta); 783 return map; 784 } 785 786 map->inner_map_meta = inner_map_meta; 787 788 return map; 789 } 790 791 static void array_of_map_free(struct bpf_map *map) 792 { 793 /* map->inner_map_meta is only accessed by syscall which 794 * is protected by fdget/fdput. 795 */ 796 bpf_map_meta_free(map->inner_map_meta); 797 bpf_fd_array_map_clear(map); 798 fd_array_map_free(map); 799 } 800 801 static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) 802 { 803 struct bpf_map **inner_map = array_map_lookup_elem(map, key); 804 805 if (!inner_map) 806 return NULL; 807 808 return READ_ONCE(*inner_map); 809 } 810 811 static u32 array_of_map_gen_lookup(struct bpf_map *map, 812 struct bpf_insn *insn_buf) 813 { 814 struct bpf_array *array = container_of(map, struct bpf_array, map); 815 u32 elem_size = round_up(map->value_size, 8); 816 struct bpf_insn *insn = insn_buf; 817 const int ret = BPF_REG_0; 818 const int map_ptr = BPF_REG_1; 819 const int index = BPF_REG_2; 820 821 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); 822 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); 823 if (map->unpriv_array) { 824 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); 825 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); 826 } else { 827 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); 828 } 829 if (is_power_of_2(elem_size)) 830 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); 831 else 832 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); 833 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); 834 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); 835 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); 836 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); 837 *insn++ = BPF_MOV64_IMM(ret, 0); 838 839 return insn - insn_buf; 840 } 841 842 const struct bpf_map_ops array_of_maps_map_ops = { 843 .map_alloc_check = fd_array_map_alloc_check, 844 .map_alloc = array_of_map_alloc, 845 .map_free = array_of_map_free, 846 .map_get_next_key = array_map_get_next_key, 847 .map_lookup_elem = array_of_map_lookup_elem, 848 .map_delete_elem = fd_array_map_delete_elem, 849 .map_fd_get_ptr = bpf_map_fd_get_ptr, 850 .map_fd_put_ptr = bpf_map_fd_put_ptr, 851 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, 852 .map_gen_lookup = array_of_map_gen_lookup, 853 .map_check_btf = map_check_no_btf, 854 }; 855