1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2019 Facebook */ 3 4 #include <linux/bpf.h> 5 #include <linux/bpf_verifier.h> 6 #include <linux/btf.h> 7 #include <linux/filter.h> 8 #include <linux/slab.h> 9 #include <linux/numa.h> 10 #include <linux/seq_file.h> 11 #include <linux/refcount.h> 12 #include <linux/mutex.h> 13 #include <linux/btf_ids.h> 14 15 enum bpf_struct_ops_state { 16 BPF_STRUCT_OPS_STATE_INIT, 17 BPF_STRUCT_OPS_STATE_INUSE, 18 BPF_STRUCT_OPS_STATE_TOBEFREE, 19 }; 20 21 #define BPF_STRUCT_OPS_COMMON_VALUE \ 22 refcount_t refcnt; \ 23 enum bpf_struct_ops_state state 24 25 struct bpf_struct_ops_value { 26 BPF_STRUCT_OPS_COMMON_VALUE; 27 char data[] ____cacheline_aligned_in_smp; 28 }; 29 30 struct bpf_struct_ops_map { 31 struct bpf_map map; 32 struct rcu_head rcu; 33 const struct bpf_struct_ops *st_ops; 34 /* protect map_update */ 35 struct mutex lock; 36 /* link has all the bpf_links that is populated 37 * to the func ptr of the kernel's struct 38 * (in kvalue.data). 39 */ 40 struct bpf_link **links; 41 /* image is a page that has all the trampolines 42 * that stores the func args before calling the bpf_prog. 43 * A PAGE_SIZE "image" is enough to store all trampoline for 44 * "links[]". 45 */ 46 void *image; 47 /* uvalue->data stores the kernel struct 48 * (e.g. tcp_congestion_ops) that is more useful 49 * to userspace than the kvalue. For example, 50 * the bpf_prog's id is stored instead of the kernel 51 * address of a func ptr. 52 */ 53 struct bpf_struct_ops_value *uvalue; 54 /* kvalue.data stores the actual kernel's struct 55 * (e.g. tcp_congestion_ops) that will be 56 * registered to the kernel subsystem. 57 */ 58 struct bpf_struct_ops_value kvalue; 59 }; 60 61 #define VALUE_PREFIX "bpf_struct_ops_" 62 #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) 63 64 /* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is 65 * the map's value exposed to the userspace and its btf-type-id is 66 * stored at the map->btf_vmlinux_value_type_id. 67 * 68 */ 69 #define BPF_STRUCT_OPS_TYPE(_name) \ 70 extern struct bpf_struct_ops bpf_##_name; \ 71 \ 72 struct bpf_struct_ops_##_name { \ 73 BPF_STRUCT_OPS_COMMON_VALUE; \ 74 struct _name data ____cacheline_aligned_in_smp; \ 75 }; 76 #include "bpf_struct_ops_types.h" 77 #undef BPF_STRUCT_OPS_TYPE 78 79 enum { 80 #define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name, 81 #include "bpf_struct_ops_types.h" 82 #undef BPF_STRUCT_OPS_TYPE 83 __NR_BPF_STRUCT_OPS_TYPE, 84 }; 85 86 static struct bpf_struct_ops * const bpf_struct_ops[] = { 87 #define BPF_STRUCT_OPS_TYPE(_name) \ 88 [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name, 89 #include "bpf_struct_ops_types.h" 90 #undef BPF_STRUCT_OPS_TYPE 91 }; 92 93 const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { 94 }; 95 96 const struct bpf_prog_ops bpf_struct_ops_prog_ops = { 97 #ifdef CONFIG_NET 98 .test_run = bpf_struct_ops_test_run, 99 #endif 100 }; 101 102 static const struct btf_type *module_type; 103 104 void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) 105 { 106 s32 type_id, value_id, module_id; 107 const struct btf_member *member; 108 struct bpf_struct_ops *st_ops; 109 const struct btf_type *t; 110 char value_name[128]; 111 const char *mname; 112 u32 i, j; 113 114 /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */ 115 #define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name); 116 #include "bpf_struct_ops_types.h" 117 #undef BPF_STRUCT_OPS_TYPE 118 119 module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT); 120 if (module_id < 0) { 121 pr_warn("Cannot find struct module in btf_vmlinux\n"); 122 return; 123 } 124 module_type = btf_type_by_id(btf, module_id); 125 126 for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { 127 st_ops = bpf_struct_ops[i]; 128 129 if (strlen(st_ops->name) + VALUE_PREFIX_LEN >= 130 sizeof(value_name)) { 131 pr_warn("struct_ops name %s is too long\n", 132 st_ops->name); 133 continue; 134 } 135 sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name); 136 137 value_id = btf_find_by_name_kind(btf, value_name, 138 BTF_KIND_STRUCT); 139 if (value_id < 0) { 140 pr_warn("Cannot find struct %s in btf_vmlinux\n", 141 value_name); 142 continue; 143 } 144 145 type_id = btf_find_by_name_kind(btf, st_ops->name, 146 BTF_KIND_STRUCT); 147 if (type_id < 0) { 148 pr_warn("Cannot find struct %s in btf_vmlinux\n", 149 st_ops->name); 150 continue; 151 } 152 t = btf_type_by_id(btf, type_id); 153 if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) { 154 pr_warn("Cannot support #%u members in struct %s\n", 155 btf_type_vlen(t), st_ops->name); 156 continue; 157 } 158 159 for_each_member(j, t, member) { 160 const struct btf_type *func_proto; 161 162 mname = btf_name_by_offset(btf, member->name_off); 163 if (!*mname) { 164 pr_warn("anon member in struct %s is not supported\n", 165 st_ops->name); 166 break; 167 } 168 169 if (__btf_member_bitfield_size(t, member)) { 170 pr_warn("bit field member %s in struct %s is not supported\n", 171 mname, st_ops->name); 172 break; 173 } 174 175 func_proto = btf_type_resolve_func_ptr(btf, 176 member->type, 177 NULL); 178 if (func_proto && 179 btf_distill_func_proto(log, btf, 180 func_proto, mname, 181 &st_ops->func_models[j])) { 182 pr_warn("Error in parsing func ptr %s in struct %s\n", 183 mname, st_ops->name); 184 break; 185 } 186 } 187 188 if (j == btf_type_vlen(t)) { 189 if (st_ops->init(btf)) { 190 pr_warn("Error in init bpf_struct_ops %s\n", 191 st_ops->name); 192 } else { 193 st_ops->type_id = type_id; 194 st_ops->type = t; 195 st_ops->value_id = value_id; 196 st_ops->value_type = btf_type_by_id(btf, 197 value_id); 198 } 199 } 200 } 201 } 202 203 extern struct btf *btf_vmlinux; 204 205 static const struct bpf_struct_ops * 206 bpf_struct_ops_find_value(u32 value_id) 207 { 208 unsigned int i; 209 210 if (!value_id || !btf_vmlinux) 211 return NULL; 212 213 for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { 214 if (bpf_struct_ops[i]->value_id == value_id) 215 return bpf_struct_ops[i]; 216 } 217 218 return NULL; 219 } 220 221 const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) 222 { 223 unsigned int i; 224 225 if (!type_id || !btf_vmlinux) 226 return NULL; 227 228 for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { 229 if (bpf_struct_ops[i]->type_id == type_id) 230 return bpf_struct_ops[i]; 231 } 232 233 return NULL; 234 } 235 236 static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key, 237 void *next_key) 238 { 239 if (key && *(u32 *)key == 0) 240 return -ENOENT; 241 242 *(u32 *)next_key = 0; 243 return 0; 244 } 245 246 int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, 247 void *value) 248 { 249 struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; 250 struct bpf_struct_ops_value *uvalue, *kvalue; 251 enum bpf_struct_ops_state state; 252 253 if (unlikely(*(u32 *)key != 0)) 254 return -ENOENT; 255 256 kvalue = &st_map->kvalue; 257 /* Pair with smp_store_release() during map_update */ 258 state = smp_load_acquire(&kvalue->state); 259 if (state == BPF_STRUCT_OPS_STATE_INIT) { 260 memset(value, 0, map->value_size); 261 return 0; 262 } 263 264 /* No lock is needed. state and refcnt do not need 265 * to be updated together under atomic context. 266 */ 267 uvalue = value; 268 memcpy(uvalue, st_map->uvalue, map->value_size); 269 uvalue->state = state; 270 refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); 271 272 return 0; 273 } 274 275 static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key) 276 { 277 return ERR_PTR(-EINVAL); 278 } 279 280 static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) 281 { 282 const struct btf_type *t = st_map->st_ops->type; 283 u32 i; 284 285 for (i = 0; i < btf_type_vlen(t); i++) { 286 if (st_map->links[i]) { 287 bpf_link_put(st_map->links[i]); 288 st_map->links[i] = NULL; 289 } 290 } 291 } 292 293 static int check_zero_holes(const struct btf_type *t, void *data) 294 { 295 const struct btf_member *member; 296 u32 i, moff, msize, prev_mend = 0; 297 const struct btf_type *mtype; 298 299 for_each_member(i, t, member) { 300 moff = __btf_member_bit_offset(t, member) / 8; 301 if (moff > prev_mend && 302 memchr_inv(data + prev_mend, 0, moff - prev_mend)) 303 return -EINVAL; 304 305 mtype = btf_type_by_id(btf_vmlinux, member->type); 306 mtype = btf_resolve_size(btf_vmlinux, mtype, &msize); 307 if (IS_ERR(mtype)) 308 return PTR_ERR(mtype); 309 prev_mend = moff + msize; 310 } 311 312 if (t->size > prev_mend && 313 memchr_inv(data + prev_mend, 0, t->size - prev_mend)) 314 return -EINVAL; 315 316 return 0; 317 } 318 319 static void bpf_struct_ops_link_release(struct bpf_link *link) 320 { 321 } 322 323 static void bpf_struct_ops_link_dealloc(struct bpf_link *link) 324 { 325 struct bpf_tramp_link *tlink = container_of(link, struct bpf_tramp_link, link); 326 327 kfree(tlink); 328 } 329 330 const struct bpf_link_ops bpf_struct_ops_link_lops = { 331 .release = bpf_struct_ops_link_release, 332 .dealloc = bpf_struct_ops_link_dealloc, 333 }; 334 335 int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, 336 struct bpf_tramp_link *link, 337 const struct btf_func_model *model, 338 void *image, void *image_end) 339 { 340 u32 flags; 341 342 tlinks[BPF_TRAMP_FENTRY].links[0] = link; 343 tlinks[BPF_TRAMP_FENTRY].nr_links = 1; 344 /* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, 345 * and it must be used alone. 346 */ 347 flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; 348 return arch_prepare_bpf_trampoline(NULL, image, image_end, 349 model, flags, tlinks, NULL); 350 } 351 352 static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, 353 void *value, u64 flags) 354 { 355 struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; 356 const struct bpf_struct_ops *st_ops = st_map->st_ops; 357 struct bpf_struct_ops_value *uvalue, *kvalue; 358 const struct btf_member *member; 359 const struct btf_type *t = st_ops->type; 360 struct bpf_tramp_links *tlinks = NULL; 361 void *udata, *kdata; 362 int prog_fd, err = 0; 363 void *image, *image_end; 364 u32 i; 365 366 if (flags) 367 return -EINVAL; 368 369 if (*(u32 *)key != 0) 370 return -E2BIG; 371 372 err = check_zero_holes(st_ops->value_type, value); 373 if (err) 374 return err; 375 376 uvalue = value; 377 err = check_zero_holes(t, uvalue->data); 378 if (err) 379 return err; 380 381 if (uvalue->state || refcount_read(&uvalue->refcnt)) 382 return -EINVAL; 383 384 tlinks = kcalloc(BPF_TRAMP_MAX, sizeof(*tlinks), GFP_KERNEL); 385 if (!tlinks) 386 return -ENOMEM; 387 388 uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; 389 kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue; 390 391 mutex_lock(&st_map->lock); 392 393 if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) { 394 err = -EBUSY; 395 goto unlock; 396 } 397 398 memcpy(uvalue, value, map->value_size); 399 400 udata = &uvalue->data; 401 kdata = &kvalue->data; 402 image = st_map->image; 403 image_end = st_map->image + PAGE_SIZE; 404 405 for_each_member(i, t, member) { 406 const struct btf_type *mtype, *ptype; 407 struct bpf_prog *prog; 408 struct bpf_tramp_link *link; 409 u32 moff; 410 411 moff = __btf_member_bit_offset(t, member) / 8; 412 ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); 413 if (ptype == module_type) { 414 if (*(void **)(udata + moff)) 415 goto reset_unlock; 416 *(void **)(kdata + moff) = BPF_MODULE_OWNER; 417 continue; 418 } 419 420 err = st_ops->init_member(t, member, kdata, udata); 421 if (err < 0) 422 goto reset_unlock; 423 424 /* The ->init_member() has handled this member */ 425 if (err > 0) 426 continue; 427 428 /* If st_ops->init_member does not handle it, 429 * we will only handle func ptrs and zero-ed members 430 * here. Reject everything else. 431 */ 432 433 /* All non func ptr member must be 0 */ 434 if (!ptype || !btf_type_is_func_proto(ptype)) { 435 u32 msize; 436 437 mtype = btf_type_by_id(btf_vmlinux, member->type); 438 mtype = btf_resolve_size(btf_vmlinux, mtype, &msize); 439 if (IS_ERR(mtype)) { 440 err = PTR_ERR(mtype); 441 goto reset_unlock; 442 } 443 444 if (memchr_inv(udata + moff, 0, msize)) { 445 err = -EINVAL; 446 goto reset_unlock; 447 } 448 449 continue; 450 } 451 452 prog_fd = (int)(*(unsigned long *)(udata + moff)); 453 /* Similar check as the attr->attach_prog_fd */ 454 if (!prog_fd) 455 continue; 456 457 prog = bpf_prog_get(prog_fd); 458 if (IS_ERR(prog)) { 459 err = PTR_ERR(prog); 460 goto reset_unlock; 461 } 462 463 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || 464 prog->aux->attach_btf_id != st_ops->type_id || 465 prog->expected_attach_type != i) { 466 bpf_prog_put(prog); 467 err = -EINVAL; 468 goto reset_unlock; 469 } 470 471 link = kzalloc(sizeof(*link), GFP_USER); 472 if (!link) { 473 bpf_prog_put(prog); 474 err = -ENOMEM; 475 goto reset_unlock; 476 } 477 bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, 478 &bpf_struct_ops_link_lops, prog); 479 st_map->links[i] = &link->link; 480 481 err = bpf_struct_ops_prepare_trampoline(tlinks, link, 482 &st_ops->func_models[i], 483 image, image_end); 484 if (err < 0) 485 goto reset_unlock; 486 487 *(void **)(kdata + moff) = image; 488 image += err; 489 490 /* put prog_id to udata */ 491 *(unsigned long *)(udata + moff) = prog->aux->id; 492 } 493 494 refcount_set(&kvalue->refcnt, 1); 495 bpf_map_inc(map); 496 497 set_memory_rox((long)st_map->image, 1); 498 err = st_ops->reg(kdata); 499 if (likely(!err)) { 500 /* Pair with smp_load_acquire() during lookup_elem(). 501 * It ensures the above udata updates (e.g. prog->aux->id) 502 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. 503 */ 504 smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE); 505 goto unlock; 506 } 507 508 /* Error during st_ops->reg(). Can happen if this struct_ops needs to be 509 * verified as a whole, after all init_member() calls. Can also happen if 510 * there was a race in registering the struct_ops (under the same name) to 511 * a sub-system through different struct_ops's maps. 512 */ 513 set_memory_nx((long)st_map->image, 1); 514 set_memory_rw((long)st_map->image, 1); 515 bpf_map_put(map); 516 517 reset_unlock: 518 bpf_struct_ops_map_put_progs(st_map); 519 memset(uvalue, 0, map->value_size); 520 memset(kvalue, 0, map->value_size); 521 unlock: 522 kfree(tlinks); 523 mutex_unlock(&st_map->lock); 524 return err; 525 } 526 527 static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) 528 { 529 enum bpf_struct_ops_state prev_state; 530 struct bpf_struct_ops_map *st_map; 531 532 st_map = (struct bpf_struct_ops_map *)map; 533 prev_state = cmpxchg(&st_map->kvalue.state, 534 BPF_STRUCT_OPS_STATE_INUSE, 535 BPF_STRUCT_OPS_STATE_TOBEFREE); 536 switch (prev_state) { 537 case BPF_STRUCT_OPS_STATE_INUSE: 538 st_map->st_ops->unreg(&st_map->kvalue.data); 539 if (refcount_dec_and_test(&st_map->kvalue.refcnt)) 540 bpf_map_put(map); 541 return 0; 542 case BPF_STRUCT_OPS_STATE_TOBEFREE: 543 return -EINPROGRESS; 544 case BPF_STRUCT_OPS_STATE_INIT: 545 return -ENOENT; 546 default: 547 WARN_ON_ONCE(1); 548 /* Should never happen. Treat it as not found. */ 549 return -ENOENT; 550 } 551 } 552 553 static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, 554 struct seq_file *m) 555 { 556 void *value; 557 int err; 558 559 value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); 560 if (!value) 561 return; 562 563 err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); 564 if (!err) { 565 btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id, 566 value, m); 567 seq_puts(m, "\n"); 568 } 569 570 kfree(value); 571 } 572 573 static void bpf_struct_ops_map_free(struct bpf_map *map) 574 { 575 struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; 576 577 if (st_map->links) 578 bpf_struct_ops_map_put_progs(st_map); 579 bpf_map_area_free(st_map->links); 580 bpf_jit_free_exec(st_map->image); 581 bpf_map_area_free(st_map->uvalue); 582 bpf_map_area_free(st_map); 583 } 584 585 static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) 586 { 587 if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || 588 attr->map_flags || !attr->btf_vmlinux_value_type_id) 589 return -EINVAL; 590 return 0; 591 } 592 593 static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) 594 { 595 const struct bpf_struct_ops *st_ops; 596 size_t st_map_size; 597 struct bpf_struct_ops_map *st_map; 598 const struct btf_type *t, *vt; 599 struct bpf_map *map; 600 601 if (!bpf_capable()) 602 return ERR_PTR(-EPERM); 603 604 st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); 605 if (!st_ops) 606 return ERR_PTR(-ENOTSUPP); 607 608 vt = st_ops->value_type; 609 if (attr->value_size != vt->size) 610 return ERR_PTR(-EINVAL); 611 612 t = st_ops->type; 613 614 st_map_size = sizeof(*st_map) + 615 /* kvalue stores the 616 * struct bpf_struct_ops_tcp_congestions_ops 617 */ 618 (vt->size - sizeof(struct bpf_struct_ops_value)); 619 620 st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); 621 if (!st_map) 622 return ERR_PTR(-ENOMEM); 623 624 st_map->st_ops = st_ops; 625 map = &st_map->map; 626 627 st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); 628 st_map->links = 629 bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *), 630 NUMA_NO_NODE); 631 st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); 632 if (!st_map->uvalue || !st_map->links || !st_map->image) { 633 bpf_struct_ops_map_free(map); 634 return ERR_PTR(-ENOMEM); 635 } 636 637 mutex_init(&st_map->lock); 638 set_vm_flush_reset_perms(st_map->image); 639 bpf_map_init_from_attr(map, attr); 640 641 return map; 642 } 643 644 static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map) 645 { 646 struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; 647 const struct bpf_struct_ops *st_ops = st_map->st_ops; 648 const struct btf_type *vt = st_ops->value_type; 649 u64 usage; 650 651 usage = sizeof(*st_map) + 652 vt->size - sizeof(struct bpf_struct_ops_value); 653 usage += vt->size; 654 usage += btf_type_vlen(vt) * sizeof(struct bpf_links *); 655 usage += PAGE_SIZE; 656 return usage; 657 } 658 659 BTF_ID_LIST_SINGLE(bpf_struct_ops_map_btf_ids, struct, bpf_struct_ops_map) 660 const struct bpf_map_ops bpf_struct_ops_map_ops = { 661 .map_alloc_check = bpf_struct_ops_map_alloc_check, 662 .map_alloc = bpf_struct_ops_map_alloc, 663 .map_free = bpf_struct_ops_map_free, 664 .map_get_next_key = bpf_struct_ops_map_get_next_key, 665 .map_lookup_elem = bpf_struct_ops_map_lookup_elem, 666 .map_delete_elem = bpf_struct_ops_map_delete_elem, 667 .map_update_elem = bpf_struct_ops_map_update_elem, 668 .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, 669 .map_mem_usage = bpf_struct_ops_map_mem_usage, 670 .map_btf_id = &bpf_struct_ops_map_btf_ids[0], 671 }; 672 673 /* "const void *" because some subsystem is 674 * passing a const (e.g. const struct tcp_congestion_ops *) 675 */ 676 bool bpf_struct_ops_get(const void *kdata) 677 { 678 struct bpf_struct_ops_value *kvalue; 679 680 kvalue = container_of(kdata, struct bpf_struct_ops_value, data); 681 682 return refcount_inc_not_zero(&kvalue->refcnt); 683 } 684 685 static void bpf_struct_ops_put_rcu(struct rcu_head *head) 686 { 687 struct bpf_struct_ops_map *st_map; 688 689 st_map = container_of(head, struct bpf_struct_ops_map, rcu); 690 bpf_map_put(&st_map->map); 691 } 692 693 void bpf_struct_ops_put(const void *kdata) 694 { 695 struct bpf_struct_ops_value *kvalue; 696 697 kvalue = container_of(kdata, struct bpf_struct_ops_value, data); 698 if (refcount_dec_and_test(&kvalue->refcnt)) { 699 struct bpf_struct_ops_map *st_map; 700 701 st_map = container_of(kvalue, struct bpf_struct_ops_map, 702 kvalue); 703 /* The struct_ops's function may switch to another struct_ops. 704 * 705 * For example, bpf_tcp_cc_x->init() may switch to 706 * another tcp_cc_y by calling 707 * setsockopt(TCP_CONGESTION, "tcp_cc_y"). 708 * During the switch, bpf_struct_ops_put(tcp_cc_x) is called 709 * and its map->refcnt may reach 0 which then free its 710 * trampoline image while tcp_cc_x is still running. 711 * 712 * Thus, a rcu grace period is needed here. 713 */ 714 call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu); 715 } 716 } 717