1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Functions to manage eBPF programs attached to cgroups 4 * 5 * Copyright (c) 2016 Daniel Mack 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/atomic.h> 10 #include <linux/cgroup.h> 11 #include <linux/filter.h> 12 #include <linux/slab.h> 13 #include <linux/sysctl.h> 14 #include <linux/string.h> 15 #include <linux/bpf.h> 16 #include <linux/bpf-cgroup.h> 17 #include <linux/bpf_lsm.h> 18 #include <linux/bpf_verifier.h> 19 #include <net/sock.h> 20 #include <net/bpf_sk_storage.h> 21 22 #include "../cgroup/cgroup-internal.h" 23 24 DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE); 25 EXPORT_SYMBOL(cgroup_bpf_enabled_key); 26 27 /* 28 * cgroup bpf destruction makes heavy use of work items and there can be a lot 29 * of concurrent destructions. Use a separate workqueue so that cgroup bpf 30 * destruction work items don't end up filling up max_active of system_wq 31 * which may lead to deadlock. 32 */ 33 static struct workqueue_struct *cgroup_bpf_destroy_wq; 34 35 static int __init cgroup_bpf_wq_init(void) 36 { 37 cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1); 38 if (!cgroup_bpf_destroy_wq) 39 panic("Failed to alloc workqueue for cgroup bpf destroy.\n"); 40 return 0; 41 } 42 core_initcall(cgroup_bpf_wq_init); 43 44 /* __always_inline is necessary to prevent indirect call through run_prog 45 * function pointer. 46 */ 47 static __always_inline int 48 bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, 49 enum cgroup_bpf_attach_type atype, 50 const void *ctx, bpf_prog_run_fn run_prog, 51 int retval, u32 *ret_flags) 52 { 53 const struct bpf_prog_array_item *item; 54 const struct bpf_prog *prog; 55 const struct bpf_prog_array *array; 56 struct bpf_run_ctx *old_run_ctx; 57 struct bpf_cg_run_ctx run_ctx; 58 u32 func_ret; 59 60 run_ctx.retval = retval; 61 migrate_disable(); 62 rcu_read_lock(); 63 array = rcu_dereference(cgrp->effective[atype]); 64 item = &array->items[0]; 65 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 66 while ((prog = READ_ONCE(item->prog))) { 67 run_ctx.prog_item = item; 68 func_ret = run_prog(prog, ctx); 69 if (ret_flags) { 70 *(ret_flags) |= (func_ret >> 1); 71 func_ret &= 1; 72 } 73 if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval)) 74 run_ctx.retval = -EPERM; 75 item++; 76 } 77 bpf_reset_run_ctx(old_run_ctx); 78 rcu_read_unlock(); 79 migrate_enable(); 80 return run_ctx.retval; 81 } 82 83 unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, 84 const struct bpf_insn *insn) 85 { 86 const struct bpf_prog *shim_prog; 87 struct sock *sk; 88 struct cgroup *cgrp; 89 int ret = 0; 90 u64 *args; 91 92 args = (u64 *)ctx; 93 sk = (void *)(unsigned long)args[0]; 94 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 95 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 96 97 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 98 if (likely(cgrp)) 99 ret = bpf_prog_run_array_cg(&cgrp->bpf, 100 shim_prog->aux->cgroup_atype, 101 ctx, bpf_prog_run, 0, NULL); 102 return ret; 103 } 104 105 unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, 106 const struct bpf_insn *insn) 107 { 108 const struct bpf_prog *shim_prog; 109 struct socket *sock; 110 struct cgroup *cgrp; 111 int ret = 0; 112 u64 *args; 113 114 args = (u64 *)ctx; 115 sock = (void *)(unsigned long)args[0]; 116 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 117 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 118 119 cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data); 120 if (likely(cgrp)) 121 ret = bpf_prog_run_array_cg(&cgrp->bpf, 122 shim_prog->aux->cgroup_atype, 123 ctx, bpf_prog_run, 0, NULL); 124 return ret; 125 } 126 127 unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, 128 const struct bpf_insn *insn) 129 { 130 const struct bpf_prog *shim_prog; 131 struct cgroup *cgrp; 132 int ret = 0; 133 134 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 135 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 136 137 /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */ 138 cgrp = task_dfl_cgroup(current); 139 if (likely(cgrp)) 140 ret = bpf_prog_run_array_cg(&cgrp->bpf, 141 shim_prog->aux->cgroup_atype, 142 ctx, bpf_prog_run, 0, NULL); 143 return ret; 144 } 145 146 #ifdef CONFIG_BPF_LSM 147 struct cgroup_lsm_atype { 148 u32 attach_btf_id; 149 int refcnt; 150 }; 151 152 static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM]; 153 154 static enum cgroup_bpf_attach_type 155 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) 156 { 157 int i; 158 159 lockdep_assert_held(&cgroup_mutex); 160 161 if (attach_type != BPF_LSM_CGROUP) 162 return to_cgroup_bpf_attach_type(attach_type); 163 164 for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++) 165 if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id) 166 return CGROUP_LSM_START + i; 167 168 for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++) 169 if (cgroup_lsm_atype[i].attach_btf_id == 0) 170 return CGROUP_LSM_START + i; 171 172 return -E2BIG; 173 174 } 175 176 void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) 177 { 178 int i = cgroup_atype - CGROUP_LSM_START; 179 180 lockdep_assert_held(&cgroup_mutex); 181 182 WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id && 183 cgroup_lsm_atype[i].attach_btf_id != attach_btf_id); 184 185 cgroup_lsm_atype[i].attach_btf_id = attach_btf_id; 186 cgroup_lsm_atype[i].refcnt++; 187 } 188 189 void bpf_cgroup_atype_put(int cgroup_atype) 190 { 191 int i = cgroup_atype - CGROUP_LSM_START; 192 193 cgroup_lock(); 194 if (--cgroup_lsm_atype[i].refcnt <= 0) 195 cgroup_lsm_atype[i].attach_btf_id = 0; 196 WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0); 197 cgroup_unlock(); 198 } 199 #else 200 static enum cgroup_bpf_attach_type 201 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) 202 { 203 if (attach_type != BPF_LSM_CGROUP) 204 return to_cgroup_bpf_attach_type(attach_type); 205 return -EOPNOTSUPP; 206 } 207 #endif /* CONFIG_BPF_LSM */ 208 209 void cgroup_bpf_offline(struct cgroup *cgrp) 210 { 211 cgroup_get(cgrp); 212 percpu_ref_kill(&cgrp->bpf.refcnt); 213 } 214 215 static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[]) 216 { 217 enum bpf_cgroup_storage_type stype; 218 219 for_each_cgroup_storage_type(stype) 220 bpf_cgroup_storage_free(storages[stype]); 221 } 222 223 static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[], 224 struct bpf_cgroup_storage *new_storages[], 225 enum bpf_attach_type type, 226 struct bpf_prog *prog, 227 struct cgroup *cgrp) 228 { 229 enum bpf_cgroup_storage_type stype; 230 struct bpf_cgroup_storage_key key; 231 struct bpf_map *map; 232 233 key.cgroup_inode_id = cgroup_id(cgrp); 234 key.attach_type = type; 235 236 for_each_cgroup_storage_type(stype) { 237 map = prog->aux->cgroup_storage[stype]; 238 if (!map) 239 continue; 240 241 storages[stype] = cgroup_storage_lookup((void *)map, &key, false); 242 if (storages[stype]) 243 continue; 244 245 storages[stype] = bpf_cgroup_storage_alloc(prog, stype); 246 if (IS_ERR(storages[stype])) { 247 bpf_cgroup_storages_free(new_storages); 248 return -ENOMEM; 249 } 250 251 new_storages[stype] = storages[stype]; 252 } 253 254 return 0; 255 } 256 257 static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[], 258 struct bpf_cgroup_storage *src[]) 259 { 260 enum bpf_cgroup_storage_type stype; 261 262 for_each_cgroup_storage_type(stype) 263 dst[stype] = src[stype]; 264 } 265 266 static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[], 267 struct cgroup *cgrp, 268 enum bpf_attach_type attach_type) 269 { 270 enum bpf_cgroup_storage_type stype; 271 272 for_each_cgroup_storage_type(stype) 273 bpf_cgroup_storage_link(storages[stype], cgrp, attach_type); 274 } 275 276 /* Called when bpf_cgroup_link is auto-detached from dying cgroup. 277 * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It 278 * doesn't free link memory, which will eventually be done by bpf_link's 279 * release() callback, when its last FD is closed. 280 */ 281 static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link) 282 { 283 cgroup_put(link->cgroup); 284 link->cgroup = NULL; 285 } 286 287 /** 288 * cgroup_bpf_release() - put references of all bpf programs and 289 * release all cgroup bpf data 290 * @work: work structure embedded into the cgroup to modify 291 */ 292 static void cgroup_bpf_release(struct work_struct *work) 293 { 294 struct cgroup *p, *cgrp = container_of(work, struct cgroup, 295 bpf.release_work); 296 struct bpf_prog_array *old_array; 297 struct list_head *storages = &cgrp->bpf.storages; 298 struct bpf_cgroup_storage *storage, *stmp; 299 300 unsigned int atype; 301 302 cgroup_lock(); 303 304 for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) { 305 struct hlist_head *progs = &cgrp->bpf.progs[atype]; 306 struct bpf_prog_list *pl; 307 struct hlist_node *pltmp; 308 309 hlist_for_each_entry_safe(pl, pltmp, progs, node) { 310 hlist_del(&pl->node); 311 if (pl->prog) { 312 if (pl->prog->expected_attach_type == BPF_LSM_CGROUP) 313 bpf_trampoline_unlink_cgroup_shim(pl->prog); 314 bpf_prog_put(pl->prog); 315 } 316 if (pl->link) { 317 if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP) 318 bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog); 319 bpf_cgroup_link_auto_detach(pl->link); 320 } 321 kfree(pl); 322 static_branch_dec(&cgroup_bpf_enabled_key[atype]); 323 } 324 old_array = rcu_dereference_protected( 325 cgrp->bpf.effective[atype], 326 lockdep_is_held(&cgroup_mutex)); 327 bpf_prog_array_free(old_array); 328 } 329 330 list_for_each_entry_safe(storage, stmp, storages, list_cg) { 331 bpf_cgroup_storage_unlink(storage); 332 bpf_cgroup_storage_free(storage); 333 } 334 335 cgroup_unlock(); 336 337 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 338 cgroup_bpf_put(p); 339 340 percpu_ref_exit(&cgrp->bpf.refcnt); 341 cgroup_put(cgrp); 342 } 343 344 /** 345 * cgroup_bpf_release_fn() - callback used to schedule releasing 346 * of bpf cgroup data 347 * @ref: percpu ref counter structure 348 */ 349 static void cgroup_bpf_release_fn(struct percpu_ref *ref) 350 { 351 struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); 352 353 INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); 354 queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work); 355 } 356 357 /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through 358 * link or direct prog. 359 */ 360 static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl) 361 { 362 if (pl->prog) 363 return pl->prog; 364 if (pl->link) 365 return pl->link->link.prog; 366 return NULL; 367 } 368 369 /* count number of elements in the list. 370 * it's slow but the list cannot be long 371 */ 372 static u32 prog_list_length(struct hlist_head *head) 373 { 374 struct bpf_prog_list *pl; 375 u32 cnt = 0; 376 377 hlist_for_each_entry(pl, head, node) { 378 if (!prog_list_prog(pl)) 379 continue; 380 cnt++; 381 } 382 return cnt; 383 } 384 385 /* if parent has non-overridable prog attached, 386 * disallow attaching new programs to the descendent cgroup. 387 * if parent has overridable or multi-prog, allow attaching 388 */ 389 static bool hierarchy_allows_attach(struct cgroup *cgrp, 390 enum cgroup_bpf_attach_type atype) 391 { 392 struct cgroup *p; 393 394 p = cgroup_parent(cgrp); 395 if (!p) 396 return true; 397 do { 398 u32 flags = p->bpf.flags[atype]; 399 u32 cnt; 400 401 if (flags & BPF_F_ALLOW_MULTI) 402 return true; 403 cnt = prog_list_length(&p->bpf.progs[atype]); 404 WARN_ON_ONCE(cnt > 1); 405 if (cnt == 1) 406 return !!(flags & BPF_F_ALLOW_OVERRIDE); 407 p = cgroup_parent(p); 408 } while (p); 409 return true; 410 } 411 412 /* compute a chain of effective programs for a given cgroup: 413 * start from the list of programs in this cgroup and add 414 * all parent programs. 415 * Note that parent's F_ALLOW_OVERRIDE-type program is yielding 416 * to programs in this cgroup 417 */ 418 static int compute_effective_progs(struct cgroup *cgrp, 419 enum cgroup_bpf_attach_type atype, 420 struct bpf_prog_array **array) 421 { 422 struct bpf_prog_array_item *item; 423 struct bpf_prog_array *progs; 424 struct bpf_prog_list *pl; 425 struct cgroup *p = cgrp; 426 int cnt = 0; 427 428 /* count number of effective programs by walking parents */ 429 do { 430 if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 431 cnt += prog_list_length(&p->bpf.progs[atype]); 432 p = cgroup_parent(p); 433 } while (p); 434 435 progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); 436 if (!progs) 437 return -ENOMEM; 438 439 /* populate the array with effective progs */ 440 cnt = 0; 441 p = cgrp; 442 do { 443 if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 444 continue; 445 446 hlist_for_each_entry(pl, &p->bpf.progs[atype], node) { 447 if (!prog_list_prog(pl)) 448 continue; 449 450 item = &progs->items[cnt]; 451 item->prog = prog_list_prog(pl); 452 bpf_cgroup_storages_assign(item->cgroup_storage, 453 pl->storage); 454 cnt++; 455 } 456 } while ((p = cgroup_parent(p))); 457 458 *array = progs; 459 return 0; 460 } 461 462 static void activate_effective_progs(struct cgroup *cgrp, 463 enum cgroup_bpf_attach_type atype, 464 struct bpf_prog_array *old_array) 465 { 466 old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array, 467 lockdep_is_held(&cgroup_mutex)); 468 /* free prog array after grace period, since __cgroup_bpf_run_*() 469 * might be still walking the array 470 */ 471 bpf_prog_array_free(old_array); 472 } 473 474 /** 475 * cgroup_bpf_inherit() - inherit effective programs from parent 476 * @cgrp: the cgroup to modify 477 */ 478 int cgroup_bpf_inherit(struct cgroup *cgrp) 479 { 480 /* has to use marco instead of const int, since compiler thinks 481 * that array below is variable length 482 */ 483 #define NR ARRAY_SIZE(cgrp->bpf.effective) 484 struct bpf_prog_array *arrays[NR] = {}; 485 struct cgroup *p; 486 int ret, i; 487 488 ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, 489 GFP_KERNEL); 490 if (ret) 491 return ret; 492 493 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 494 cgroup_bpf_get(p); 495 496 for (i = 0; i < NR; i++) 497 INIT_HLIST_HEAD(&cgrp->bpf.progs[i]); 498 499 INIT_LIST_HEAD(&cgrp->bpf.storages); 500 501 for (i = 0; i < NR; i++) 502 if (compute_effective_progs(cgrp, i, &arrays[i])) 503 goto cleanup; 504 505 for (i = 0; i < NR; i++) 506 activate_effective_progs(cgrp, i, arrays[i]); 507 508 return 0; 509 cleanup: 510 for (i = 0; i < NR; i++) 511 bpf_prog_array_free(arrays[i]); 512 513 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 514 cgroup_bpf_put(p); 515 516 percpu_ref_exit(&cgrp->bpf.refcnt); 517 518 return -ENOMEM; 519 } 520 521 static int update_effective_progs(struct cgroup *cgrp, 522 enum cgroup_bpf_attach_type atype) 523 { 524 struct cgroup_subsys_state *css; 525 int err; 526 527 /* allocate and recompute effective prog arrays */ 528 css_for_each_descendant_pre(css, &cgrp->self) { 529 struct cgroup *desc = container_of(css, struct cgroup, self); 530 531 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 532 continue; 533 534 err = compute_effective_progs(desc, atype, &desc->bpf.inactive); 535 if (err) 536 goto cleanup; 537 } 538 539 /* all allocations were successful. Activate all prog arrays */ 540 css_for_each_descendant_pre(css, &cgrp->self) { 541 struct cgroup *desc = container_of(css, struct cgroup, self); 542 543 if (percpu_ref_is_zero(&desc->bpf.refcnt)) { 544 if (unlikely(desc->bpf.inactive)) { 545 bpf_prog_array_free(desc->bpf.inactive); 546 desc->bpf.inactive = NULL; 547 } 548 continue; 549 } 550 551 activate_effective_progs(desc, atype, desc->bpf.inactive); 552 desc->bpf.inactive = NULL; 553 } 554 555 return 0; 556 557 cleanup: 558 /* oom while computing effective. Free all computed effective arrays 559 * since they were not activated 560 */ 561 css_for_each_descendant_pre(css, &cgrp->self) { 562 struct cgroup *desc = container_of(css, struct cgroup, self); 563 564 bpf_prog_array_free(desc->bpf.inactive); 565 desc->bpf.inactive = NULL; 566 } 567 568 return err; 569 } 570 571 #define BPF_CGROUP_MAX_PROGS 64 572 573 static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs, 574 struct bpf_prog *prog, 575 struct bpf_cgroup_link *link, 576 struct bpf_prog *replace_prog, 577 bool allow_multi) 578 { 579 struct bpf_prog_list *pl; 580 581 /* single-attach case */ 582 if (!allow_multi) { 583 if (hlist_empty(progs)) 584 return NULL; 585 return hlist_entry(progs->first, typeof(*pl), node); 586 } 587 588 hlist_for_each_entry(pl, progs, node) { 589 if (prog && pl->prog == prog && prog != replace_prog) 590 /* disallow attaching the same prog twice */ 591 return ERR_PTR(-EINVAL); 592 if (link && pl->link == link) 593 /* disallow attaching the same link twice */ 594 return ERR_PTR(-EINVAL); 595 } 596 597 /* direct prog multi-attach w/ replacement case */ 598 if (replace_prog) { 599 hlist_for_each_entry(pl, progs, node) { 600 if (pl->prog == replace_prog) 601 /* a match found */ 602 return pl; 603 } 604 /* prog to replace not found for cgroup */ 605 return ERR_PTR(-ENOENT); 606 } 607 608 return NULL; 609 } 610 611 /** 612 * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and 613 * propagate the change to descendants 614 * @cgrp: The cgroup which descendants to traverse 615 * @prog: A program to attach 616 * @link: A link to attach 617 * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set 618 * @type: Type of attach operation 619 * @flags: Option flags 620 * 621 * Exactly one of @prog or @link can be non-null. 622 * Must be called with cgroup_mutex held. 623 */ 624 static int __cgroup_bpf_attach(struct cgroup *cgrp, 625 struct bpf_prog *prog, struct bpf_prog *replace_prog, 626 struct bpf_cgroup_link *link, 627 enum bpf_attach_type type, u32 flags) 628 { 629 u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); 630 struct bpf_prog *old_prog = NULL; 631 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; 632 struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; 633 struct bpf_prog *new_prog = prog ? : link->link.prog; 634 enum cgroup_bpf_attach_type atype; 635 struct bpf_prog_list *pl; 636 struct hlist_head *progs; 637 int err; 638 639 if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || 640 ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) 641 /* invalid combination */ 642 return -EINVAL; 643 if (link && (prog || replace_prog)) 644 /* only either link or prog/replace_prog can be specified */ 645 return -EINVAL; 646 if (!!replace_prog != !!(flags & BPF_F_REPLACE)) 647 /* replace_prog implies BPF_F_REPLACE, and vice versa */ 648 return -EINVAL; 649 650 atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id); 651 if (atype < 0) 652 return -EINVAL; 653 654 progs = &cgrp->bpf.progs[atype]; 655 656 if (!hierarchy_allows_attach(cgrp, atype)) 657 return -EPERM; 658 659 if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags) 660 /* Disallow attaching non-overridable on top 661 * of existing overridable in this cgroup. 662 * Disallow attaching multi-prog if overridable or none 663 */ 664 return -EPERM; 665 666 if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) 667 return -E2BIG; 668 669 pl = find_attach_entry(progs, prog, link, replace_prog, 670 flags & BPF_F_ALLOW_MULTI); 671 if (IS_ERR(pl)) 672 return PTR_ERR(pl); 673 674 if (bpf_cgroup_storages_alloc(storage, new_storage, type, 675 prog ? : link->link.prog, cgrp)) 676 return -ENOMEM; 677 678 if (pl) { 679 old_prog = pl->prog; 680 } else { 681 struct hlist_node *last = NULL; 682 683 pl = kmalloc(sizeof(*pl), GFP_KERNEL); 684 if (!pl) { 685 bpf_cgroup_storages_free(new_storage); 686 return -ENOMEM; 687 } 688 if (hlist_empty(progs)) 689 hlist_add_head(&pl->node, progs); 690 else 691 hlist_for_each(last, progs) { 692 if (last->next) 693 continue; 694 hlist_add_behind(&pl->node, last); 695 break; 696 } 697 } 698 699 pl->prog = prog; 700 pl->link = link; 701 bpf_cgroup_storages_assign(pl->storage, storage); 702 cgrp->bpf.flags[atype] = saved_flags; 703 704 if (type == BPF_LSM_CGROUP) { 705 err = bpf_trampoline_link_cgroup_shim(new_prog, atype); 706 if (err) 707 goto cleanup; 708 } 709 710 err = update_effective_progs(cgrp, atype); 711 if (err) 712 goto cleanup_trampoline; 713 714 if (old_prog) { 715 if (type == BPF_LSM_CGROUP) 716 bpf_trampoline_unlink_cgroup_shim(old_prog); 717 bpf_prog_put(old_prog); 718 } else { 719 static_branch_inc(&cgroup_bpf_enabled_key[atype]); 720 } 721 bpf_cgroup_storages_link(new_storage, cgrp, type); 722 return 0; 723 724 cleanup_trampoline: 725 if (type == BPF_LSM_CGROUP) 726 bpf_trampoline_unlink_cgroup_shim(new_prog); 727 728 cleanup: 729 if (old_prog) { 730 pl->prog = old_prog; 731 pl->link = NULL; 732 } 733 bpf_cgroup_storages_free(new_storage); 734 if (!old_prog) { 735 hlist_del(&pl->node); 736 kfree(pl); 737 } 738 return err; 739 } 740 741 static int cgroup_bpf_attach(struct cgroup *cgrp, 742 struct bpf_prog *prog, struct bpf_prog *replace_prog, 743 struct bpf_cgroup_link *link, 744 enum bpf_attach_type type, 745 u32 flags) 746 { 747 int ret; 748 749 cgroup_lock(); 750 ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); 751 cgroup_unlock(); 752 return ret; 753 } 754 755 /* Swap updated BPF program for given link in effective program arrays across 756 * all descendant cgroups. This function is guaranteed to succeed. 757 */ 758 static void replace_effective_prog(struct cgroup *cgrp, 759 enum cgroup_bpf_attach_type atype, 760 struct bpf_cgroup_link *link) 761 { 762 struct bpf_prog_array_item *item; 763 struct cgroup_subsys_state *css; 764 struct bpf_prog_array *progs; 765 struct bpf_prog_list *pl; 766 struct hlist_head *head; 767 struct cgroup *cg; 768 int pos; 769 770 css_for_each_descendant_pre(css, &cgrp->self) { 771 struct cgroup *desc = container_of(css, struct cgroup, self); 772 773 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 774 continue; 775 776 /* find position of link in effective progs array */ 777 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { 778 if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 779 continue; 780 781 head = &cg->bpf.progs[atype]; 782 hlist_for_each_entry(pl, head, node) { 783 if (!prog_list_prog(pl)) 784 continue; 785 if (pl->link == link) 786 goto found; 787 pos++; 788 } 789 } 790 found: 791 BUG_ON(!cg); 792 progs = rcu_dereference_protected( 793 desc->bpf.effective[atype], 794 lockdep_is_held(&cgroup_mutex)); 795 item = &progs->items[pos]; 796 WRITE_ONCE(item->prog, link->link.prog); 797 } 798 } 799 800 /** 801 * __cgroup_bpf_replace() - Replace link's program and propagate the change 802 * to descendants 803 * @cgrp: The cgroup which descendants to traverse 804 * @link: A link for which to replace BPF program 805 * @new_prog: &struct bpf_prog for the target BPF program with its refcnt 806 * incremented 807 * 808 * Must be called with cgroup_mutex held. 809 */ 810 static int __cgroup_bpf_replace(struct cgroup *cgrp, 811 struct bpf_cgroup_link *link, 812 struct bpf_prog *new_prog) 813 { 814 enum cgroup_bpf_attach_type atype; 815 struct bpf_prog *old_prog; 816 struct bpf_prog_list *pl; 817 struct hlist_head *progs; 818 bool found = false; 819 820 atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id); 821 if (atype < 0) 822 return -EINVAL; 823 824 progs = &cgrp->bpf.progs[atype]; 825 826 if (link->link.prog->type != new_prog->type) 827 return -EINVAL; 828 829 hlist_for_each_entry(pl, progs, node) { 830 if (pl->link == link) { 831 found = true; 832 break; 833 } 834 } 835 if (!found) 836 return -ENOENT; 837 838 old_prog = xchg(&link->link.prog, new_prog); 839 replace_effective_prog(cgrp, atype, link); 840 bpf_prog_put(old_prog); 841 return 0; 842 } 843 844 static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog, 845 struct bpf_prog *old_prog) 846 { 847 struct bpf_cgroup_link *cg_link; 848 int ret; 849 850 cg_link = container_of(link, struct bpf_cgroup_link, link); 851 852 cgroup_lock(); 853 /* link might have been auto-released by dying cgroup, so fail */ 854 if (!cg_link->cgroup) { 855 ret = -ENOLINK; 856 goto out_unlock; 857 } 858 if (old_prog && link->prog != old_prog) { 859 ret = -EPERM; 860 goto out_unlock; 861 } 862 ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); 863 out_unlock: 864 cgroup_unlock(); 865 return ret; 866 } 867 868 static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs, 869 struct bpf_prog *prog, 870 struct bpf_cgroup_link *link, 871 bool allow_multi) 872 { 873 struct bpf_prog_list *pl; 874 875 if (!allow_multi) { 876 if (hlist_empty(progs)) 877 /* report error when trying to detach and nothing is attached */ 878 return ERR_PTR(-ENOENT); 879 880 /* to maintain backward compatibility NONE and OVERRIDE cgroups 881 * allow detaching with invalid FD (prog==NULL) in legacy mode 882 */ 883 return hlist_entry(progs->first, typeof(*pl), node); 884 } 885 886 if (!prog && !link) 887 /* to detach MULTI prog the user has to specify valid FD 888 * of the program or link to be detached 889 */ 890 return ERR_PTR(-EINVAL); 891 892 /* find the prog or link and detach it */ 893 hlist_for_each_entry(pl, progs, node) { 894 if (pl->prog == prog && pl->link == link) 895 return pl; 896 } 897 return ERR_PTR(-ENOENT); 898 } 899 900 /** 901 * purge_effective_progs() - After compute_effective_progs fails to alloc new 902 * cgrp->bpf.inactive table we can recover by 903 * recomputing the array in place. 904 * 905 * @cgrp: The cgroup which descendants to travers 906 * @prog: A program to detach or NULL 907 * @link: A link to detach or NULL 908 * @atype: Type of detach operation 909 */ 910 static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, 911 struct bpf_cgroup_link *link, 912 enum cgroup_bpf_attach_type atype) 913 { 914 struct cgroup_subsys_state *css; 915 struct bpf_prog_array *progs; 916 struct bpf_prog_list *pl; 917 struct hlist_head *head; 918 struct cgroup *cg; 919 int pos; 920 921 /* recompute effective prog array in place */ 922 css_for_each_descendant_pre(css, &cgrp->self) { 923 struct cgroup *desc = container_of(css, struct cgroup, self); 924 925 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 926 continue; 927 928 /* find position of link or prog in effective progs array */ 929 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { 930 if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 931 continue; 932 933 head = &cg->bpf.progs[atype]; 934 hlist_for_each_entry(pl, head, node) { 935 if (!prog_list_prog(pl)) 936 continue; 937 if (pl->prog == prog && pl->link == link) 938 goto found; 939 pos++; 940 } 941 } 942 943 /* no link or prog match, skip the cgroup of this layer */ 944 continue; 945 found: 946 progs = rcu_dereference_protected( 947 desc->bpf.effective[atype], 948 lockdep_is_held(&cgroup_mutex)); 949 950 /* Remove the program from the array */ 951 WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos), 952 "Failed to purge a prog from array at index %d", pos); 953 } 954 } 955 956 /** 957 * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and 958 * propagate the change to descendants 959 * @cgrp: The cgroup which descendants to traverse 960 * @prog: A program to detach or NULL 961 * @link: A link to detach or NULL 962 * @type: Type of detach operation 963 * 964 * At most one of @prog or @link can be non-NULL. 965 * Must be called with cgroup_mutex held. 966 */ 967 static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 968 struct bpf_cgroup_link *link, enum bpf_attach_type type) 969 { 970 enum cgroup_bpf_attach_type atype; 971 struct bpf_prog *old_prog; 972 struct bpf_prog_list *pl; 973 struct hlist_head *progs; 974 u32 attach_btf_id = 0; 975 u32 flags; 976 977 if (prog) 978 attach_btf_id = prog->aux->attach_btf_id; 979 if (link) 980 attach_btf_id = link->link.prog->aux->attach_btf_id; 981 982 atype = bpf_cgroup_atype_find(type, attach_btf_id); 983 if (atype < 0) 984 return -EINVAL; 985 986 progs = &cgrp->bpf.progs[atype]; 987 flags = cgrp->bpf.flags[atype]; 988 989 if (prog && link) 990 /* only one of prog or link can be specified */ 991 return -EINVAL; 992 993 pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI); 994 if (IS_ERR(pl)) 995 return PTR_ERR(pl); 996 997 /* mark it deleted, so it's ignored while recomputing effective */ 998 old_prog = pl->prog; 999 pl->prog = NULL; 1000 pl->link = NULL; 1001 1002 if (update_effective_progs(cgrp, atype)) { 1003 /* if update effective array failed replace the prog with a dummy prog*/ 1004 pl->prog = old_prog; 1005 pl->link = link; 1006 purge_effective_progs(cgrp, old_prog, link, atype); 1007 } 1008 1009 /* now can actually delete it from this cgroup list */ 1010 hlist_del(&pl->node); 1011 1012 kfree(pl); 1013 if (hlist_empty(progs)) 1014 /* last program was detached, reset flags to zero */ 1015 cgrp->bpf.flags[atype] = 0; 1016 if (old_prog) { 1017 if (type == BPF_LSM_CGROUP) 1018 bpf_trampoline_unlink_cgroup_shim(old_prog); 1019 bpf_prog_put(old_prog); 1020 } 1021 static_branch_dec(&cgroup_bpf_enabled_key[atype]); 1022 return 0; 1023 } 1024 1025 static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 1026 enum bpf_attach_type type) 1027 { 1028 int ret; 1029 1030 cgroup_lock(); 1031 ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); 1032 cgroup_unlock(); 1033 return ret; 1034 } 1035 1036 /* Must be called with cgroup_mutex held to avoid races. */ 1037 static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 1038 union bpf_attr __user *uattr) 1039 { 1040 __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags); 1041 bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE; 1042 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 1043 enum bpf_attach_type type = attr->query.attach_type; 1044 enum cgroup_bpf_attach_type from_atype, to_atype; 1045 enum cgroup_bpf_attach_type atype; 1046 struct bpf_prog_array *effective; 1047 int cnt, ret = 0, i; 1048 int total_cnt = 0; 1049 u32 flags; 1050 1051 if (effective_query && prog_attach_flags) 1052 return -EINVAL; 1053 1054 if (type == BPF_LSM_CGROUP) { 1055 if (!effective_query && attr->query.prog_cnt && 1056 prog_ids && !prog_attach_flags) 1057 return -EINVAL; 1058 1059 from_atype = CGROUP_LSM_START; 1060 to_atype = CGROUP_LSM_END; 1061 flags = 0; 1062 } else { 1063 from_atype = to_cgroup_bpf_attach_type(type); 1064 if (from_atype < 0) 1065 return -EINVAL; 1066 to_atype = from_atype; 1067 flags = cgrp->bpf.flags[from_atype]; 1068 } 1069 1070 for (atype = from_atype; atype <= to_atype; atype++) { 1071 if (effective_query) { 1072 effective = rcu_dereference_protected(cgrp->bpf.effective[atype], 1073 lockdep_is_held(&cgroup_mutex)); 1074 total_cnt += bpf_prog_array_length(effective); 1075 } else { 1076 total_cnt += prog_list_length(&cgrp->bpf.progs[atype]); 1077 } 1078 } 1079 1080 /* always output uattr->query.attach_flags as 0 during effective query */ 1081 flags = effective_query ? 0 : flags; 1082 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 1083 return -EFAULT; 1084 if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt))) 1085 return -EFAULT; 1086 if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt) 1087 /* return early if user requested only program count + flags */ 1088 return 0; 1089 1090 if (attr->query.prog_cnt < total_cnt) { 1091 total_cnt = attr->query.prog_cnt; 1092 ret = -ENOSPC; 1093 } 1094 1095 for (atype = from_atype; atype <= to_atype && total_cnt; atype++) { 1096 if (effective_query) { 1097 effective = rcu_dereference_protected(cgrp->bpf.effective[atype], 1098 lockdep_is_held(&cgroup_mutex)); 1099 cnt = min_t(int, bpf_prog_array_length(effective), total_cnt); 1100 ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt); 1101 } else { 1102 struct hlist_head *progs; 1103 struct bpf_prog_list *pl; 1104 struct bpf_prog *prog; 1105 u32 id; 1106 1107 progs = &cgrp->bpf.progs[atype]; 1108 cnt = min_t(int, prog_list_length(progs), total_cnt); 1109 i = 0; 1110 hlist_for_each_entry(pl, progs, node) { 1111 prog = prog_list_prog(pl); 1112 id = prog->aux->id; 1113 if (copy_to_user(prog_ids + i, &id, sizeof(id))) 1114 return -EFAULT; 1115 if (++i == cnt) 1116 break; 1117 } 1118 1119 if (prog_attach_flags) { 1120 flags = cgrp->bpf.flags[atype]; 1121 1122 for (i = 0; i < cnt; i++) 1123 if (copy_to_user(prog_attach_flags + i, 1124 &flags, sizeof(flags))) 1125 return -EFAULT; 1126 prog_attach_flags += cnt; 1127 } 1128 } 1129 1130 prog_ids += cnt; 1131 total_cnt -= cnt; 1132 } 1133 return ret; 1134 } 1135 1136 static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 1137 union bpf_attr __user *uattr) 1138 { 1139 int ret; 1140 1141 cgroup_lock(); 1142 ret = __cgroup_bpf_query(cgrp, attr, uattr); 1143 cgroup_unlock(); 1144 return ret; 1145 } 1146 1147 int cgroup_bpf_prog_attach(const union bpf_attr *attr, 1148 enum bpf_prog_type ptype, struct bpf_prog *prog) 1149 { 1150 struct bpf_prog *replace_prog = NULL; 1151 struct cgroup *cgrp; 1152 int ret; 1153 1154 cgrp = cgroup_get_from_fd(attr->target_fd); 1155 if (IS_ERR(cgrp)) 1156 return PTR_ERR(cgrp); 1157 1158 if ((attr->attach_flags & BPF_F_ALLOW_MULTI) && 1159 (attr->attach_flags & BPF_F_REPLACE)) { 1160 replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype); 1161 if (IS_ERR(replace_prog)) { 1162 cgroup_put(cgrp); 1163 return PTR_ERR(replace_prog); 1164 } 1165 } 1166 1167 ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL, 1168 attr->attach_type, attr->attach_flags); 1169 1170 if (replace_prog) 1171 bpf_prog_put(replace_prog); 1172 cgroup_put(cgrp); 1173 return ret; 1174 } 1175 1176 int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 1177 { 1178 struct bpf_prog *prog; 1179 struct cgroup *cgrp; 1180 int ret; 1181 1182 cgrp = cgroup_get_from_fd(attr->target_fd); 1183 if (IS_ERR(cgrp)) 1184 return PTR_ERR(cgrp); 1185 1186 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1187 if (IS_ERR(prog)) 1188 prog = NULL; 1189 1190 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type); 1191 if (prog) 1192 bpf_prog_put(prog); 1193 1194 cgroup_put(cgrp); 1195 return ret; 1196 } 1197 1198 static void bpf_cgroup_link_release(struct bpf_link *link) 1199 { 1200 struct bpf_cgroup_link *cg_link = 1201 container_of(link, struct bpf_cgroup_link, link); 1202 struct cgroup *cg; 1203 1204 /* link might have been auto-detached by dying cgroup already, 1205 * in that case our work is done here 1206 */ 1207 if (!cg_link->cgroup) 1208 return; 1209 1210 cgroup_lock(); 1211 1212 /* re-check cgroup under lock again */ 1213 if (!cg_link->cgroup) { 1214 cgroup_unlock(); 1215 return; 1216 } 1217 1218 WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link, 1219 cg_link->type)); 1220 if (cg_link->type == BPF_LSM_CGROUP) 1221 bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog); 1222 1223 cg = cg_link->cgroup; 1224 cg_link->cgroup = NULL; 1225 1226 cgroup_unlock(); 1227 1228 cgroup_put(cg); 1229 } 1230 1231 static void bpf_cgroup_link_dealloc(struct bpf_link *link) 1232 { 1233 struct bpf_cgroup_link *cg_link = 1234 container_of(link, struct bpf_cgroup_link, link); 1235 1236 kfree(cg_link); 1237 } 1238 1239 static int bpf_cgroup_link_detach(struct bpf_link *link) 1240 { 1241 bpf_cgroup_link_release(link); 1242 1243 return 0; 1244 } 1245 1246 static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link, 1247 struct seq_file *seq) 1248 { 1249 struct bpf_cgroup_link *cg_link = 1250 container_of(link, struct bpf_cgroup_link, link); 1251 u64 cg_id = 0; 1252 1253 cgroup_lock(); 1254 if (cg_link->cgroup) 1255 cg_id = cgroup_id(cg_link->cgroup); 1256 cgroup_unlock(); 1257 1258 seq_printf(seq, 1259 "cgroup_id:\t%llu\n" 1260 "attach_type:\t%d\n", 1261 cg_id, 1262 cg_link->type); 1263 } 1264 1265 static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link, 1266 struct bpf_link_info *info) 1267 { 1268 struct bpf_cgroup_link *cg_link = 1269 container_of(link, struct bpf_cgroup_link, link); 1270 u64 cg_id = 0; 1271 1272 cgroup_lock(); 1273 if (cg_link->cgroup) 1274 cg_id = cgroup_id(cg_link->cgroup); 1275 cgroup_unlock(); 1276 1277 info->cgroup.cgroup_id = cg_id; 1278 info->cgroup.attach_type = cg_link->type; 1279 return 0; 1280 } 1281 1282 static const struct bpf_link_ops bpf_cgroup_link_lops = { 1283 .release = bpf_cgroup_link_release, 1284 .dealloc = bpf_cgroup_link_dealloc, 1285 .detach = bpf_cgroup_link_detach, 1286 .update_prog = cgroup_bpf_replace, 1287 .show_fdinfo = bpf_cgroup_link_show_fdinfo, 1288 .fill_link_info = bpf_cgroup_link_fill_link_info, 1289 }; 1290 1291 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 1292 { 1293 struct bpf_link_primer link_primer; 1294 struct bpf_cgroup_link *link; 1295 struct cgroup *cgrp; 1296 int err; 1297 1298 if (attr->link_create.flags) 1299 return -EINVAL; 1300 1301 cgrp = cgroup_get_from_fd(attr->link_create.target_fd); 1302 if (IS_ERR(cgrp)) 1303 return PTR_ERR(cgrp); 1304 1305 link = kzalloc(sizeof(*link), GFP_USER); 1306 if (!link) { 1307 err = -ENOMEM; 1308 goto out_put_cgroup; 1309 } 1310 bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops, 1311 prog); 1312 link->cgroup = cgrp; 1313 link->type = attr->link_create.attach_type; 1314 1315 err = bpf_link_prime(&link->link, &link_primer); 1316 if (err) { 1317 kfree(link); 1318 goto out_put_cgroup; 1319 } 1320 1321 err = cgroup_bpf_attach(cgrp, NULL, NULL, link, 1322 link->type, BPF_F_ALLOW_MULTI); 1323 if (err) { 1324 bpf_link_cleanup(&link_primer); 1325 goto out_put_cgroup; 1326 } 1327 1328 return bpf_link_settle(&link_primer); 1329 1330 out_put_cgroup: 1331 cgroup_put(cgrp); 1332 return err; 1333 } 1334 1335 int cgroup_bpf_prog_query(const union bpf_attr *attr, 1336 union bpf_attr __user *uattr) 1337 { 1338 struct cgroup *cgrp; 1339 int ret; 1340 1341 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1342 if (IS_ERR(cgrp)) 1343 return PTR_ERR(cgrp); 1344 1345 ret = cgroup_bpf_query(cgrp, attr, uattr); 1346 1347 cgroup_put(cgrp); 1348 return ret; 1349 } 1350 1351 /** 1352 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering 1353 * @sk: The socket sending or receiving traffic 1354 * @skb: The skb that is being sent or received 1355 * @atype: The type of program to be executed 1356 * 1357 * If no socket is passed, or the socket is not of type INET or INET6, 1358 * this function does nothing and returns 0. 1359 * 1360 * The program type passed in via @type must be suitable for network 1361 * filtering. No further check is performed to assert that. 1362 * 1363 * For egress packets, this function can return: 1364 * NET_XMIT_SUCCESS (0) - continue with packet output 1365 * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr 1366 * NET_XMIT_CN (2) - continue with packet output and notify TCP 1367 * to call cwr 1368 * -err - drop packet 1369 * 1370 * For ingress packets, this function will return -EPERM if any 1371 * attached program was found and if it returned != 1 during execution. 1372 * Otherwise 0 is returned. 1373 */ 1374 int __cgroup_bpf_run_filter_skb(struct sock *sk, 1375 struct sk_buff *skb, 1376 enum cgroup_bpf_attach_type atype) 1377 { 1378 unsigned int offset = -skb_network_offset(skb); 1379 struct sock *save_sk; 1380 void *saved_data_end; 1381 struct cgroup *cgrp; 1382 int ret; 1383 1384 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) 1385 return 0; 1386 1387 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1388 save_sk = skb->sk; 1389 skb->sk = sk; 1390 __skb_push(skb, offset); 1391 1392 /* compute pointers for the bpf prog */ 1393 bpf_compute_and_save_data_end(skb, &saved_data_end); 1394 1395 if (atype == CGROUP_INET_EGRESS) { 1396 u32 flags = 0; 1397 bool cn; 1398 1399 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb, 1400 __bpf_prog_run_save_cb, 0, &flags); 1401 1402 /* Return values of CGROUP EGRESS BPF programs are: 1403 * 0: drop packet 1404 * 1: keep packet 1405 * 2: drop packet and cn 1406 * 3: keep packet and cn 1407 * 1408 * The returned value is then converted to one of the NET_XMIT 1409 * or an error code that is then interpreted as drop packet 1410 * (and no cn): 1411 * 0: NET_XMIT_SUCCESS skb should be transmitted 1412 * 1: NET_XMIT_DROP skb should be dropped and cn 1413 * 2: NET_XMIT_CN skb should be transmitted and cn 1414 * 3: -err skb should be dropped 1415 */ 1416 1417 cn = flags & BPF_RET_SET_CN; 1418 if (ret && !IS_ERR_VALUE((long)ret)) 1419 ret = -EFAULT; 1420 if (!ret) 1421 ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); 1422 else 1423 ret = (cn ? NET_XMIT_DROP : ret); 1424 } else { 1425 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, 1426 skb, __bpf_prog_run_save_cb, 0, 1427 NULL); 1428 if (ret && !IS_ERR_VALUE((long)ret)) 1429 ret = -EFAULT; 1430 } 1431 bpf_restore_data_end(skb, saved_data_end); 1432 __skb_pull(skb, offset); 1433 skb->sk = save_sk; 1434 1435 return ret; 1436 } 1437 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); 1438 1439 /** 1440 * __cgroup_bpf_run_filter_sk() - Run a program on a sock 1441 * @sk: sock structure to manipulate 1442 * @atype: The type of program to be executed 1443 * 1444 * socket is passed is expected to be of type INET or INET6. 1445 * 1446 * The program type passed in via @type must be suitable for sock 1447 * filtering. No further check is performed to assert that. 1448 * 1449 * This function will return %-EPERM if any if an attached program was found 1450 * and if it returned != 1 during execution. In all other cases, 0 is returned. 1451 */ 1452 int __cgroup_bpf_run_filter_sk(struct sock *sk, 1453 enum cgroup_bpf_attach_type atype) 1454 { 1455 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1456 1457 return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0, 1458 NULL); 1459 } 1460 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 1461 1462 /** 1463 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and 1464 * provided by user sockaddr 1465 * @sk: sock struct that will use sockaddr 1466 * @uaddr: sockaddr struct provided by user 1467 * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is 1468 * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX 1469 * uaddr. 1470 * @atype: The type of program to be executed 1471 * @t_ctx: Pointer to attach type specific context 1472 * @flags: Pointer to u32 which contains higher bits of BPF program 1473 * return value (OR'ed together). 1474 * 1475 * socket is expected to be of type INET, INET6 or UNIX. 1476 * 1477 * This function will return %-EPERM if an attached program is found and 1478 * returned value != 1 during execution. In all other cases, 0 is returned. 1479 */ 1480 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, 1481 struct sockaddr *uaddr, 1482 int *uaddrlen, 1483 enum cgroup_bpf_attach_type atype, 1484 void *t_ctx, 1485 u32 *flags) 1486 { 1487 struct bpf_sock_addr_kern ctx = { 1488 .sk = sk, 1489 .uaddr = uaddr, 1490 .t_ctx = t_ctx, 1491 }; 1492 struct sockaddr_storage unspec; 1493 struct cgroup *cgrp; 1494 int ret; 1495 1496 /* Check socket family since not all sockets represent network 1497 * endpoint (e.g. AF_UNIX). 1498 */ 1499 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 && 1500 sk->sk_family != AF_UNIX) 1501 return 0; 1502 1503 if (!ctx.uaddr) { 1504 memset(&unspec, 0, sizeof(unspec)); 1505 ctx.uaddr = (struct sockaddr *)&unspec; 1506 ctx.uaddrlen = 0; 1507 } else { 1508 ctx.uaddrlen = *uaddrlen; 1509 } 1510 1511 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1512 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 1513 0, flags); 1514 1515 if (!ret && uaddr) 1516 *uaddrlen = ctx.uaddrlen; 1517 1518 return ret; 1519 } 1520 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); 1521 1522 /** 1523 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock 1524 * @sk: socket to get cgroup from 1525 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains 1526 * sk with connection information (IP addresses, etc.) May not contain 1527 * cgroup info if it is a req sock. 1528 * @atype: The type of program to be executed 1529 * 1530 * socket passed is expected to be of type INET or INET6. 1531 * 1532 * The program type passed in via @type must be suitable for sock_ops 1533 * filtering. No further check is performed to assert that. 1534 * 1535 * This function will return %-EPERM if any if an attached program was found 1536 * and if it returned != 1 during execution. In all other cases, 0 is returned. 1537 */ 1538 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, 1539 struct bpf_sock_ops_kern *sock_ops, 1540 enum cgroup_bpf_attach_type atype) 1541 { 1542 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1543 1544 return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run, 1545 0, NULL); 1546 } 1547 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); 1548 1549 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, 1550 short access, enum cgroup_bpf_attach_type atype) 1551 { 1552 struct cgroup *cgrp; 1553 struct bpf_cgroup_dev_ctx ctx = { 1554 .access_type = (access << 16) | dev_type, 1555 .major = major, 1556 .minor = minor, 1557 }; 1558 int ret; 1559 1560 rcu_read_lock(); 1561 cgrp = task_dfl_cgroup(current); 1562 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, 1563 NULL); 1564 rcu_read_unlock(); 1565 1566 return ret; 1567 } 1568 1569 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 1570 { 1571 /* flags argument is not used now, 1572 * but provides an ability to extend the API. 1573 * verifier checks that its value is correct. 1574 */ 1575 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 1576 struct bpf_cgroup_storage *storage; 1577 struct bpf_cg_run_ctx *ctx; 1578 void *ptr; 1579 1580 /* get current cgroup storage from BPF run context */ 1581 ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1582 storage = ctx->prog_item->cgroup_storage[stype]; 1583 1584 if (stype == BPF_CGROUP_STORAGE_SHARED) 1585 ptr = &READ_ONCE(storage->buf)->data[0]; 1586 else 1587 ptr = this_cpu_ptr(storage->percpu_buf); 1588 1589 return (unsigned long)ptr; 1590 } 1591 1592 const struct bpf_func_proto bpf_get_local_storage_proto = { 1593 .func = bpf_get_local_storage, 1594 .gpl_only = false, 1595 .ret_type = RET_PTR_TO_MAP_VALUE, 1596 .arg1_type = ARG_CONST_MAP_PTR, 1597 .arg2_type = ARG_ANYTHING, 1598 }; 1599 1600 BPF_CALL_0(bpf_get_retval) 1601 { 1602 struct bpf_cg_run_ctx *ctx = 1603 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1604 1605 return ctx->retval; 1606 } 1607 1608 const struct bpf_func_proto bpf_get_retval_proto = { 1609 .func = bpf_get_retval, 1610 .gpl_only = false, 1611 .ret_type = RET_INTEGER, 1612 }; 1613 1614 BPF_CALL_1(bpf_set_retval, int, retval) 1615 { 1616 struct bpf_cg_run_ctx *ctx = 1617 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1618 1619 ctx->retval = retval; 1620 return 0; 1621 } 1622 1623 const struct bpf_func_proto bpf_set_retval_proto = { 1624 .func = bpf_set_retval, 1625 .gpl_only = false, 1626 .ret_type = RET_INTEGER, 1627 .arg1_type = ARG_ANYTHING, 1628 }; 1629 1630 static const struct bpf_func_proto * 1631 cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1632 { 1633 const struct bpf_func_proto *func_proto; 1634 1635 func_proto = cgroup_common_func_proto(func_id, prog); 1636 if (func_proto) 1637 return func_proto; 1638 1639 func_proto = cgroup_current_func_proto(func_id, prog); 1640 if (func_proto) 1641 return func_proto; 1642 1643 switch (func_id) { 1644 case BPF_FUNC_perf_event_output: 1645 return &bpf_event_output_data_proto; 1646 default: 1647 return bpf_base_func_proto(func_id, prog); 1648 } 1649 } 1650 1651 static bool cgroup_dev_is_valid_access(int off, int size, 1652 enum bpf_access_type type, 1653 const struct bpf_prog *prog, 1654 struct bpf_insn_access_aux *info) 1655 { 1656 const int size_default = sizeof(__u32); 1657 1658 if (type == BPF_WRITE) 1659 return false; 1660 1661 if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) 1662 return false; 1663 /* The verifier guarantees that size > 0. */ 1664 if (off % size != 0) 1665 return false; 1666 1667 switch (off) { 1668 case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): 1669 bpf_ctx_record_field_size(info, size_default); 1670 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 1671 return false; 1672 break; 1673 default: 1674 if (size != size_default) 1675 return false; 1676 } 1677 1678 return true; 1679 } 1680 1681 const struct bpf_prog_ops cg_dev_prog_ops = { 1682 }; 1683 1684 const struct bpf_verifier_ops cg_dev_verifier_ops = { 1685 .get_func_proto = cgroup_dev_func_proto, 1686 .is_valid_access = cgroup_dev_is_valid_access, 1687 }; 1688 1689 /** 1690 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl 1691 * 1692 * @head: sysctl table header 1693 * @table: sysctl table 1694 * @write: sysctl is being read (= 0) or written (= 1) 1695 * @buf: pointer to buffer (in and out) 1696 * @pcount: value-result argument: value is size of buffer pointed to by @buf, 1697 * result is size of @new_buf if program set new value, initial value 1698 * otherwise 1699 * @ppos: value-result argument: value is position at which read from or write 1700 * to sysctl is happening, result is new position if program overrode it, 1701 * initial value otherwise 1702 * @atype: type of program to be executed 1703 * 1704 * Program is run when sysctl is being accessed, either read or written, and 1705 * can allow or deny such access. 1706 * 1707 * This function will return %-EPERM if an attached program is found and 1708 * returned value != 1 during execution. In all other cases 0 is returned. 1709 */ 1710 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, 1711 const struct ctl_table *table, int write, 1712 char **buf, size_t *pcount, loff_t *ppos, 1713 enum cgroup_bpf_attach_type atype) 1714 { 1715 struct bpf_sysctl_kern ctx = { 1716 .head = head, 1717 .table = table, 1718 .write = write, 1719 .ppos = ppos, 1720 .cur_val = NULL, 1721 .cur_len = PAGE_SIZE, 1722 .new_val = NULL, 1723 .new_len = 0, 1724 .new_updated = 0, 1725 }; 1726 struct cgroup *cgrp; 1727 loff_t pos = 0; 1728 int ret; 1729 1730 ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); 1731 if (!ctx.cur_val || 1732 table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) { 1733 /* Let BPF program decide how to proceed. */ 1734 ctx.cur_len = 0; 1735 } 1736 1737 if (write && *buf && *pcount) { 1738 /* BPF program should be able to override new value with a 1739 * buffer bigger than provided by user. 1740 */ 1741 ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); 1742 ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount); 1743 if (ctx.new_val) { 1744 memcpy(ctx.new_val, *buf, ctx.new_len); 1745 } else { 1746 /* Let BPF program decide how to proceed. */ 1747 ctx.new_len = 0; 1748 } 1749 } 1750 1751 rcu_read_lock(); 1752 cgrp = task_dfl_cgroup(current); 1753 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, 1754 NULL); 1755 rcu_read_unlock(); 1756 1757 kfree(ctx.cur_val); 1758 1759 if (ret == 1 && ctx.new_updated) { 1760 kfree(*buf); 1761 *buf = ctx.new_val; 1762 *pcount = ctx.new_len; 1763 } else { 1764 kfree(ctx.new_val); 1765 } 1766 1767 return ret; 1768 } 1769 1770 #ifdef CONFIG_NET 1771 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen, 1772 struct bpf_sockopt_buf *buf) 1773 { 1774 if (unlikely(max_optlen < 0)) 1775 return -EINVAL; 1776 1777 if (unlikely(max_optlen > PAGE_SIZE)) { 1778 /* We don't expose optvals that are greater than PAGE_SIZE 1779 * to the BPF program. 1780 */ 1781 max_optlen = PAGE_SIZE; 1782 } 1783 1784 if (max_optlen <= sizeof(buf->data)) { 1785 /* When the optval fits into BPF_SOCKOPT_KERN_BUF_SIZE 1786 * bytes avoid the cost of kzalloc. 1787 */ 1788 ctx->optval = buf->data; 1789 ctx->optval_end = ctx->optval + max_optlen; 1790 return max_optlen; 1791 } 1792 1793 ctx->optval = kzalloc(max_optlen, GFP_USER); 1794 if (!ctx->optval) 1795 return -ENOMEM; 1796 1797 ctx->optval_end = ctx->optval + max_optlen; 1798 1799 return max_optlen; 1800 } 1801 1802 static void sockopt_free_buf(struct bpf_sockopt_kern *ctx, 1803 struct bpf_sockopt_buf *buf) 1804 { 1805 if (ctx->optval == buf->data) 1806 return; 1807 kfree(ctx->optval); 1808 } 1809 1810 static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx, 1811 struct bpf_sockopt_buf *buf) 1812 { 1813 return ctx->optval != buf->data; 1814 } 1815 1816 int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, 1817 int *optname, sockptr_t optval, 1818 int *optlen, char **kernel_optval) 1819 { 1820 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1821 struct bpf_sockopt_buf buf = {}; 1822 struct bpf_sockopt_kern ctx = { 1823 .sk = sk, 1824 .level = *level, 1825 .optname = *optname, 1826 }; 1827 int ret, max_optlen; 1828 1829 /* Allocate a bit more than the initial user buffer for 1830 * BPF program. The canonical use case is overriding 1831 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic). 1832 */ 1833 max_optlen = max_t(int, 16, *optlen); 1834 max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf); 1835 if (max_optlen < 0) 1836 return max_optlen; 1837 1838 ctx.optlen = *optlen; 1839 1840 if (copy_from_sockptr(ctx.optval, optval, 1841 min(*optlen, max_optlen))) { 1842 ret = -EFAULT; 1843 goto out; 1844 } 1845 1846 lock_sock(sk); 1847 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT, 1848 &ctx, bpf_prog_run, 0, NULL); 1849 release_sock(sk); 1850 1851 if (ret) 1852 goto out; 1853 1854 if (ctx.optlen == -1) { 1855 /* optlen set to -1, bypass kernel */ 1856 ret = 1; 1857 } else if (ctx.optlen > max_optlen || ctx.optlen < -1) { 1858 /* optlen is out of bounds */ 1859 if (*optlen > PAGE_SIZE && ctx.optlen >= 0) { 1860 pr_info_once("bpf setsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n", 1861 ctx.optlen, max_optlen); 1862 ret = 0; 1863 goto out; 1864 } 1865 ret = -EFAULT; 1866 } else { 1867 /* optlen within bounds, run kernel handler */ 1868 ret = 0; 1869 1870 /* export any potential modifications */ 1871 *level = ctx.level; 1872 *optname = ctx.optname; 1873 1874 /* optlen == 0 from BPF indicates that we should 1875 * use original userspace data. 1876 */ 1877 if (ctx.optlen != 0) { 1878 *optlen = ctx.optlen; 1879 /* We've used bpf_sockopt_kern->buf as an intermediary 1880 * storage, but the BPF program indicates that we need 1881 * to pass this data to the kernel setsockopt handler. 1882 * No way to export on-stack buf, have to allocate a 1883 * new buffer. 1884 */ 1885 if (!sockopt_buf_allocated(&ctx, &buf)) { 1886 void *p = kmalloc(ctx.optlen, GFP_USER); 1887 1888 if (!p) { 1889 ret = -ENOMEM; 1890 goto out; 1891 } 1892 memcpy(p, ctx.optval, ctx.optlen); 1893 *kernel_optval = p; 1894 } else { 1895 *kernel_optval = ctx.optval; 1896 } 1897 /* export and don't free sockopt buf */ 1898 return 0; 1899 } 1900 } 1901 1902 out: 1903 sockopt_free_buf(&ctx, &buf); 1904 return ret; 1905 } 1906 1907 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, 1908 int optname, sockptr_t optval, 1909 sockptr_t optlen, int max_optlen, 1910 int retval) 1911 { 1912 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1913 struct bpf_sockopt_buf buf = {}; 1914 struct bpf_sockopt_kern ctx = { 1915 .sk = sk, 1916 .level = level, 1917 .optname = optname, 1918 .current_task = current, 1919 }; 1920 int orig_optlen; 1921 int ret; 1922 1923 orig_optlen = max_optlen; 1924 ctx.optlen = max_optlen; 1925 max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf); 1926 if (max_optlen < 0) 1927 return max_optlen; 1928 1929 if (!retval) { 1930 /* If kernel getsockopt finished successfully, 1931 * copy whatever was returned to the user back 1932 * into our temporary buffer. Set optlen to the 1933 * one that kernel returned as well to let 1934 * BPF programs inspect the value. 1935 */ 1936 if (copy_from_sockptr(&ctx.optlen, optlen, 1937 sizeof(ctx.optlen))) { 1938 ret = -EFAULT; 1939 goto out; 1940 } 1941 1942 if (ctx.optlen < 0) { 1943 ret = -EFAULT; 1944 goto out; 1945 } 1946 orig_optlen = ctx.optlen; 1947 1948 if (copy_from_sockptr(ctx.optval, optval, 1949 min(ctx.optlen, max_optlen))) { 1950 ret = -EFAULT; 1951 goto out; 1952 } 1953 } 1954 1955 lock_sock(sk); 1956 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, 1957 &ctx, bpf_prog_run, retval, NULL); 1958 release_sock(sk); 1959 1960 if (ret < 0) 1961 goto out; 1962 1963 if (!sockptr_is_null(optval) && 1964 (ctx.optlen > max_optlen || ctx.optlen < 0)) { 1965 if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) { 1966 pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n", 1967 ctx.optlen, max_optlen); 1968 ret = retval; 1969 goto out; 1970 } 1971 ret = -EFAULT; 1972 goto out; 1973 } 1974 1975 if (ctx.optlen != 0) { 1976 if (!sockptr_is_null(optval) && 1977 copy_to_sockptr(optval, ctx.optval, ctx.optlen)) { 1978 ret = -EFAULT; 1979 goto out; 1980 } 1981 if (copy_to_sockptr(optlen, &ctx.optlen, sizeof(ctx.optlen))) { 1982 ret = -EFAULT; 1983 goto out; 1984 } 1985 } 1986 1987 out: 1988 sockopt_free_buf(&ctx, &buf); 1989 return ret; 1990 } 1991 1992 int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, 1993 int optname, void *optval, 1994 int *optlen, int retval) 1995 { 1996 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1997 struct bpf_sockopt_kern ctx = { 1998 .sk = sk, 1999 .level = level, 2000 .optname = optname, 2001 .optlen = *optlen, 2002 .optval = optval, 2003 .optval_end = optval + *optlen, 2004 .current_task = current, 2005 }; 2006 int ret; 2007 2008 /* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy 2009 * user data back into BPF buffer when reval != 0. This is 2010 * done as an optimization to avoid extra copy, assuming 2011 * kernel won't populate the data in case of an error. 2012 * Here we always pass the data and memset() should 2013 * be called if that data shouldn't be "exported". 2014 */ 2015 2016 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, 2017 &ctx, bpf_prog_run, retval, NULL); 2018 if (ret < 0) 2019 return ret; 2020 2021 if (ctx.optlen > *optlen) 2022 return -EFAULT; 2023 2024 /* BPF programs can shrink the buffer, export the modifications. 2025 */ 2026 if (ctx.optlen != 0) 2027 *optlen = ctx.optlen; 2028 2029 return ret; 2030 } 2031 #endif 2032 2033 static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, 2034 size_t *lenp) 2035 { 2036 ssize_t tmp_ret = 0, ret; 2037 2038 if (dir->header.parent) { 2039 tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp); 2040 if (tmp_ret < 0) 2041 return tmp_ret; 2042 } 2043 2044 ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp); 2045 if (ret < 0) 2046 return ret; 2047 *bufp += ret; 2048 *lenp -= ret; 2049 ret += tmp_ret; 2050 2051 /* Avoid leading slash. */ 2052 if (!ret) 2053 return ret; 2054 2055 tmp_ret = strscpy(*bufp, "/", *lenp); 2056 if (tmp_ret < 0) 2057 return tmp_ret; 2058 *bufp += tmp_ret; 2059 *lenp -= tmp_ret; 2060 2061 return ret + tmp_ret; 2062 } 2063 2064 BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf, 2065 size_t, buf_len, u64, flags) 2066 { 2067 ssize_t tmp_ret = 0, ret; 2068 2069 if (!buf) 2070 return -EINVAL; 2071 2072 if (!(flags & BPF_F_SYSCTL_BASE_NAME)) { 2073 if (!ctx->head) 2074 return -EINVAL; 2075 tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len); 2076 if (tmp_ret < 0) 2077 return tmp_ret; 2078 } 2079 2080 ret = strscpy(buf, ctx->table->procname, buf_len); 2081 2082 return ret < 0 ? ret : tmp_ret + ret; 2083 } 2084 2085 static const struct bpf_func_proto bpf_sysctl_get_name_proto = { 2086 .func = bpf_sysctl_get_name, 2087 .gpl_only = false, 2088 .ret_type = RET_INTEGER, 2089 .arg1_type = ARG_PTR_TO_CTX, 2090 .arg2_type = ARG_PTR_TO_MEM, 2091 .arg3_type = ARG_CONST_SIZE, 2092 .arg4_type = ARG_ANYTHING, 2093 }; 2094 2095 static int copy_sysctl_value(char *dst, size_t dst_len, char *src, 2096 size_t src_len) 2097 { 2098 if (!dst) 2099 return -EINVAL; 2100 2101 if (!dst_len) 2102 return -E2BIG; 2103 2104 if (!src || !src_len) { 2105 memset(dst, 0, dst_len); 2106 return -EINVAL; 2107 } 2108 2109 memcpy(dst, src, min(dst_len, src_len)); 2110 2111 if (dst_len > src_len) { 2112 memset(dst + src_len, '\0', dst_len - src_len); 2113 return src_len; 2114 } 2115 2116 dst[dst_len - 1] = '\0'; 2117 2118 return -E2BIG; 2119 } 2120 2121 BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx, 2122 char *, buf, size_t, buf_len) 2123 { 2124 return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len); 2125 } 2126 2127 static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = { 2128 .func = bpf_sysctl_get_current_value, 2129 .gpl_only = false, 2130 .ret_type = RET_INTEGER, 2131 .arg1_type = ARG_PTR_TO_CTX, 2132 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 2133 .arg3_type = ARG_CONST_SIZE, 2134 }; 2135 2136 BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf, 2137 size_t, buf_len) 2138 { 2139 if (!ctx->write) { 2140 if (buf && buf_len) 2141 memset(buf, '\0', buf_len); 2142 return -EINVAL; 2143 } 2144 return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len); 2145 } 2146 2147 static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = { 2148 .func = bpf_sysctl_get_new_value, 2149 .gpl_only = false, 2150 .ret_type = RET_INTEGER, 2151 .arg1_type = ARG_PTR_TO_CTX, 2152 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 2153 .arg3_type = ARG_CONST_SIZE, 2154 }; 2155 2156 BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx, 2157 const char *, buf, size_t, buf_len) 2158 { 2159 if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len) 2160 return -EINVAL; 2161 2162 if (buf_len > PAGE_SIZE - 1) 2163 return -E2BIG; 2164 2165 memcpy(ctx->new_val, buf, buf_len); 2166 ctx->new_len = buf_len; 2167 ctx->new_updated = 1; 2168 2169 return 0; 2170 } 2171 2172 static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { 2173 .func = bpf_sysctl_set_new_value, 2174 .gpl_only = false, 2175 .ret_type = RET_INTEGER, 2176 .arg1_type = ARG_PTR_TO_CTX, 2177 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 2178 .arg3_type = ARG_CONST_SIZE, 2179 }; 2180 2181 static const struct bpf_func_proto * 2182 sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2183 { 2184 const struct bpf_func_proto *func_proto; 2185 2186 func_proto = cgroup_common_func_proto(func_id, prog); 2187 if (func_proto) 2188 return func_proto; 2189 2190 func_proto = cgroup_current_func_proto(func_id, prog); 2191 if (func_proto) 2192 return func_proto; 2193 2194 switch (func_id) { 2195 case BPF_FUNC_sysctl_get_name: 2196 return &bpf_sysctl_get_name_proto; 2197 case BPF_FUNC_sysctl_get_current_value: 2198 return &bpf_sysctl_get_current_value_proto; 2199 case BPF_FUNC_sysctl_get_new_value: 2200 return &bpf_sysctl_get_new_value_proto; 2201 case BPF_FUNC_sysctl_set_new_value: 2202 return &bpf_sysctl_set_new_value_proto; 2203 case BPF_FUNC_ktime_get_coarse_ns: 2204 return &bpf_ktime_get_coarse_ns_proto; 2205 case BPF_FUNC_perf_event_output: 2206 return &bpf_event_output_data_proto; 2207 default: 2208 return bpf_base_func_proto(func_id, prog); 2209 } 2210 } 2211 2212 static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, 2213 const struct bpf_prog *prog, 2214 struct bpf_insn_access_aux *info) 2215 { 2216 const int size_default = sizeof(__u32); 2217 2218 if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size) 2219 return false; 2220 2221 switch (off) { 2222 case bpf_ctx_range(struct bpf_sysctl, write): 2223 if (type != BPF_READ) 2224 return false; 2225 bpf_ctx_record_field_size(info, size_default); 2226 return bpf_ctx_narrow_access_ok(off, size, size_default); 2227 case bpf_ctx_range(struct bpf_sysctl, file_pos): 2228 if (type == BPF_READ) { 2229 bpf_ctx_record_field_size(info, size_default); 2230 return bpf_ctx_narrow_access_ok(off, size, size_default); 2231 } else { 2232 return size == size_default; 2233 } 2234 default: 2235 return false; 2236 } 2237 } 2238 2239 static u32 sysctl_convert_ctx_access(enum bpf_access_type type, 2240 const struct bpf_insn *si, 2241 struct bpf_insn *insn_buf, 2242 struct bpf_prog *prog, u32 *target_size) 2243 { 2244 struct bpf_insn *insn = insn_buf; 2245 u32 read_size; 2246 2247 switch (si->off) { 2248 case offsetof(struct bpf_sysctl, write): 2249 *insn++ = BPF_LDX_MEM( 2250 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 2251 bpf_target_off(struct bpf_sysctl_kern, write, 2252 sizeof_field(struct bpf_sysctl_kern, 2253 write), 2254 target_size)); 2255 break; 2256 case offsetof(struct bpf_sysctl, file_pos): 2257 /* ppos is a pointer so it should be accessed via indirect 2258 * loads and stores. Also for stores additional temporary 2259 * register is used since neither src_reg nor dst_reg can be 2260 * overridden. 2261 */ 2262 if (type == BPF_WRITE) { 2263 int treg = BPF_REG_9; 2264 2265 if (si->src_reg == treg || si->dst_reg == treg) 2266 --treg; 2267 if (si->src_reg == treg || si->dst_reg == treg) 2268 --treg; 2269 *insn++ = BPF_STX_MEM( 2270 BPF_DW, si->dst_reg, treg, 2271 offsetof(struct bpf_sysctl_kern, tmp_reg)); 2272 *insn++ = BPF_LDX_MEM( 2273 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), 2274 treg, si->dst_reg, 2275 offsetof(struct bpf_sysctl_kern, ppos)); 2276 *insn++ = BPF_RAW_INSN( 2277 BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32), 2278 treg, si->src_reg, 2279 bpf_ctx_narrow_access_offset( 2280 0, sizeof(u32), sizeof(loff_t)), 2281 si->imm); 2282 *insn++ = BPF_LDX_MEM( 2283 BPF_DW, treg, si->dst_reg, 2284 offsetof(struct bpf_sysctl_kern, tmp_reg)); 2285 } else { 2286 *insn++ = BPF_LDX_MEM( 2287 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), 2288 si->dst_reg, si->src_reg, 2289 offsetof(struct bpf_sysctl_kern, ppos)); 2290 read_size = bpf_size_to_bytes(BPF_SIZE(si->code)); 2291 *insn++ = BPF_LDX_MEM( 2292 BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 2293 bpf_ctx_narrow_access_offset( 2294 0, read_size, sizeof(loff_t))); 2295 } 2296 *target_size = sizeof(u32); 2297 break; 2298 } 2299 2300 return insn - insn_buf; 2301 } 2302 2303 const struct bpf_verifier_ops cg_sysctl_verifier_ops = { 2304 .get_func_proto = sysctl_func_proto, 2305 .is_valid_access = sysctl_is_valid_access, 2306 .convert_ctx_access = sysctl_convert_ctx_access, 2307 }; 2308 2309 const struct bpf_prog_ops cg_sysctl_prog_ops = { 2310 }; 2311 2312 #ifdef CONFIG_NET 2313 BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx) 2314 { 2315 const struct net *net = ctx ? sock_net(ctx->sk) : &init_net; 2316 2317 return net->net_cookie; 2318 } 2319 2320 static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = { 2321 .func = bpf_get_netns_cookie_sockopt, 2322 .gpl_only = false, 2323 .ret_type = RET_INTEGER, 2324 .arg1_type = ARG_PTR_TO_CTX_OR_NULL, 2325 }; 2326 #endif 2327 2328 static const struct bpf_func_proto * 2329 cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2330 { 2331 const struct bpf_func_proto *func_proto; 2332 2333 func_proto = cgroup_common_func_proto(func_id, prog); 2334 if (func_proto) 2335 return func_proto; 2336 2337 func_proto = cgroup_current_func_proto(func_id, prog); 2338 if (func_proto) 2339 return func_proto; 2340 2341 switch (func_id) { 2342 #ifdef CONFIG_NET 2343 case BPF_FUNC_get_netns_cookie: 2344 return &bpf_get_netns_cookie_sockopt_proto; 2345 case BPF_FUNC_sk_storage_get: 2346 return &bpf_sk_storage_get_proto; 2347 case BPF_FUNC_sk_storage_delete: 2348 return &bpf_sk_storage_delete_proto; 2349 case BPF_FUNC_setsockopt: 2350 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT) 2351 return &bpf_sk_setsockopt_proto; 2352 return NULL; 2353 case BPF_FUNC_getsockopt: 2354 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT) 2355 return &bpf_sk_getsockopt_proto; 2356 return NULL; 2357 #endif 2358 #ifdef CONFIG_INET 2359 case BPF_FUNC_tcp_sock: 2360 return &bpf_tcp_sock_proto; 2361 #endif 2362 case BPF_FUNC_perf_event_output: 2363 return &bpf_event_output_data_proto; 2364 default: 2365 return bpf_base_func_proto(func_id, prog); 2366 } 2367 } 2368 2369 static bool cg_sockopt_is_valid_access(int off, int size, 2370 enum bpf_access_type type, 2371 const struct bpf_prog *prog, 2372 struct bpf_insn_access_aux *info) 2373 { 2374 const int size_default = sizeof(__u32); 2375 2376 if (off < 0 || off >= sizeof(struct bpf_sockopt)) 2377 return false; 2378 2379 if (off % size != 0) 2380 return false; 2381 2382 if (type == BPF_WRITE) { 2383 switch (off) { 2384 case offsetof(struct bpf_sockopt, retval): 2385 if (size != size_default) 2386 return false; 2387 return prog->expected_attach_type == 2388 BPF_CGROUP_GETSOCKOPT; 2389 case offsetof(struct bpf_sockopt, optname): 2390 fallthrough; 2391 case offsetof(struct bpf_sockopt, level): 2392 if (size != size_default) 2393 return false; 2394 return prog->expected_attach_type == 2395 BPF_CGROUP_SETSOCKOPT; 2396 case offsetof(struct bpf_sockopt, optlen): 2397 return size == size_default; 2398 default: 2399 return false; 2400 } 2401 } 2402 2403 switch (off) { 2404 case offsetof(struct bpf_sockopt, sk): 2405 if (size != sizeof(__u64)) 2406 return false; 2407 info->reg_type = PTR_TO_SOCKET; 2408 break; 2409 case offsetof(struct bpf_sockopt, optval): 2410 if (size != sizeof(__u64)) 2411 return false; 2412 info->reg_type = PTR_TO_PACKET; 2413 break; 2414 case offsetof(struct bpf_sockopt, optval_end): 2415 if (size != sizeof(__u64)) 2416 return false; 2417 info->reg_type = PTR_TO_PACKET_END; 2418 break; 2419 case offsetof(struct bpf_sockopt, retval): 2420 if (size != size_default) 2421 return false; 2422 return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT; 2423 default: 2424 if (size != size_default) 2425 return false; 2426 break; 2427 } 2428 return true; 2429 } 2430 2431 #define CG_SOCKOPT_READ_FIELD(F) \ 2432 BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ 2433 si->dst_reg, si->src_reg, \ 2434 offsetof(struct bpf_sockopt_kern, F)) 2435 2436 #define CG_SOCKOPT_WRITE_FIELD(F) \ 2437 BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) | \ 2438 BPF_MEM | BPF_CLASS(si->code)), \ 2439 si->dst_reg, si->src_reg, \ 2440 offsetof(struct bpf_sockopt_kern, F), \ 2441 si->imm) 2442 2443 static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, 2444 const struct bpf_insn *si, 2445 struct bpf_insn *insn_buf, 2446 struct bpf_prog *prog, 2447 u32 *target_size) 2448 { 2449 struct bpf_insn *insn = insn_buf; 2450 2451 switch (si->off) { 2452 case offsetof(struct bpf_sockopt, sk): 2453 *insn++ = CG_SOCKOPT_READ_FIELD(sk); 2454 break; 2455 case offsetof(struct bpf_sockopt, level): 2456 if (type == BPF_WRITE) 2457 *insn++ = CG_SOCKOPT_WRITE_FIELD(level); 2458 else 2459 *insn++ = CG_SOCKOPT_READ_FIELD(level); 2460 break; 2461 case offsetof(struct bpf_sockopt, optname): 2462 if (type == BPF_WRITE) 2463 *insn++ = CG_SOCKOPT_WRITE_FIELD(optname); 2464 else 2465 *insn++ = CG_SOCKOPT_READ_FIELD(optname); 2466 break; 2467 case offsetof(struct bpf_sockopt, optlen): 2468 if (type == BPF_WRITE) 2469 *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen); 2470 else 2471 *insn++ = CG_SOCKOPT_READ_FIELD(optlen); 2472 break; 2473 case offsetof(struct bpf_sockopt, retval): 2474 BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0); 2475 2476 if (type == BPF_WRITE) { 2477 int treg = BPF_REG_9; 2478 2479 if (si->src_reg == treg || si->dst_reg == treg) 2480 --treg; 2481 if (si->src_reg == treg || si->dst_reg == treg) 2482 --treg; 2483 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg, 2484 offsetof(struct bpf_sockopt_kern, tmp_reg)); 2485 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), 2486 treg, si->dst_reg, 2487 offsetof(struct bpf_sockopt_kern, current_task)); 2488 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), 2489 treg, treg, 2490 offsetof(struct task_struct, bpf_ctx)); 2491 *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM | 2492 BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), 2493 treg, si->src_reg, 2494 offsetof(struct bpf_cg_run_ctx, retval), 2495 si->imm); 2496 *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg, 2497 offsetof(struct bpf_sockopt_kern, tmp_reg)); 2498 } else { 2499 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), 2500 si->dst_reg, si->src_reg, 2501 offsetof(struct bpf_sockopt_kern, current_task)); 2502 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), 2503 si->dst_reg, si->dst_reg, 2504 offsetof(struct task_struct, bpf_ctx)); 2505 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), 2506 si->dst_reg, si->dst_reg, 2507 offsetof(struct bpf_cg_run_ctx, retval)); 2508 } 2509 break; 2510 case offsetof(struct bpf_sockopt, optval): 2511 *insn++ = CG_SOCKOPT_READ_FIELD(optval); 2512 break; 2513 case offsetof(struct bpf_sockopt, optval_end): 2514 *insn++ = CG_SOCKOPT_READ_FIELD(optval_end); 2515 break; 2516 } 2517 2518 return insn - insn_buf; 2519 } 2520 2521 static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf, 2522 bool direct_write, 2523 const struct bpf_prog *prog) 2524 { 2525 /* Nothing to do for sockopt argument. The data is kzalloc'ated. 2526 */ 2527 return 0; 2528 } 2529 2530 const struct bpf_verifier_ops cg_sockopt_verifier_ops = { 2531 .get_func_proto = cg_sockopt_func_proto, 2532 .is_valid_access = cg_sockopt_is_valid_access, 2533 .convert_ctx_access = cg_sockopt_convert_ctx_access, 2534 .gen_prologue = cg_sockopt_get_prologue, 2535 }; 2536 2537 const struct bpf_prog_ops cg_sockopt_prog_ops = { 2538 }; 2539 2540 /* Common helpers for cgroup hooks. */ 2541 const struct bpf_func_proto * 2542 cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2543 { 2544 switch (func_id) { 2545 case BPF_FUNC_get_local_storage: 2546 return &bpf_get_local_storage_proto; 2547 case BPF_FUNC_get_retval: 2548 switch (prog->expected_attach_type) { 2549 case BPF_CGROUP_INET_INGRESS: 2550 case BPF_CGROUP_INET_EGRESS: 2551 case BPF_CGROUP_SOCK_OPS: 2552 case BPF_CGROUP_UDP4_RECVMSG: 2553 case BPF_CGROUP_UDP6_RECVMSG: 2554 case BPF_CGROUP_UNIX_RECVMSG: 2555 case BPF_CGROUP_INET4_GETPEERNAME: 2556 case BPF_CGROUP_INET6_GETPEERNAME: 2557 case BPF_CGROUP_UNIX_GETPEERNAME: 2558 case BPF_CGROUP_INET4_GETSOCKNAME: 2559 case BPF_CGROUP_INET6_GETSOCKNAME: 2560 case BPF_CGROUP_UNIX_GETSOCKNAME: 2561 return NULL; 2562 default: 2563 return &bpf_get_retval_proto; 2564 } 2565 case BPF_FUNC_set_retval: 2566 switch (prog->expected_attach_type) { 2567 case BPF_CGROUP_INET_INGRESS: 2568 case BPF_CGROUP_INET_EGRESS: 2569 case BPF_CGROUP_SOCK_OPS: 2570 case BPF_CGROUP_UDP4_RECVMSG: 2571 case BPF_CGROUP_UDP6_RECVMSG: 2572 case BPF_CGROUP_UNIX_RECVMSG: 2573 case BPF_CGROUP_INET4_GETPEERNAME: 2574 case BPF_CGROUP_INET6_GETPEERNAME: 2575 case BPF_CGROUP_UNIX_GETPEERNAME: 2576 case BPF_CGROUP_INET4_GETSOCKNAME: 2577 case BPF_CGROUP_INET6_GETSOCKNAME: 2578 case BPF_CGROUP_UNIX_GETSOCKNAME: 2579 return NULL; 2580 default: 2581 return &bpf_set_retval_proto; 2582 } 2583 default: 2584 return NULL; 2585 } 2586 } 2587 2588 /* Common helpers for cgroup hooks with valid process context. */ 2589 const struct bpf_func_proto * 2590 cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2591 { 2592 switch (func_id) { 2593 case BPF_FUNC_get_current_uid_gid: 2594 return &bpf_get_current_uid_gid_proto; 2595 case BPF_FUNC_get_current_comm: 2596 return &bpf_get_current_comm_proto; 2597 #ifdef CONFIG_CGROUP_NET_CLASSID 2598 case BPF_FUNC_get_cgroup_classid: 2599 return &bpf_get_cgroup_classid_curr_proto; 2600 #endif 2601 case BPF_FUNC_current_task_under_cgroup: 2602 return &bpf_current_task_under_cgroup_proto; 2603 default: 2604 return NULL; 2605 } 2606 } 2607