1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Functions to manage eBPF programs attached to cgroups 4 * 5 * Copyright (c) 2016 Daniel Mack 6 */ 7 8 #include <linux/kernel.h> 9 #include <linux/atomic.h> 10 #include <linux/cgroup.h> 11 #include <linux/filter.h> 12 #include <linux/slab.h> 13 #include <linux/sysctl.h> 14 #include <linux/string.h> 15 #include <linux/bpf.h> 16 #include <linux/bpf-cgroup.h> 17 #include <linux/bpf_lsm.h> 18 #include <linux/bpf_verifier.h> 19 #include <net/sock.h> 20 #include <net/bpf_sk_storage.h> 21 22 #include "../cgroup/cgroup-internal.h" 23 24 DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE); 25 EXPORT_SYMBOL(cgroup_bpf_enabled_key); 26 27 /* 28 * cgroup bpf destruction makes heavy use of work items and there can be a lot 29 * of concurrent destructions. Use a separate workqueue so that cgroup bpf 30 * destruction work items don't end up filling up max_active of system_wq 31 * which may lead to deadlock. 32 */ 33 static struct workqueue_struct *cgroup_bpf_destroy_wq; 34 35 static int __init cgroup_bpf_wq_init(void) 36 { 37 cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1); 38 if (!cgroup_bpf_destroy_wq) 39 panic("Failed to alloc workqueue for cgroup bpf destroy.\n"); 40 return 0; 41 } 42 core_initcall(cgroup_bpf_wq_init); 43 44 /* __always_inline is necessary to prevent indirect call through run_prog 45 * function pointer. 46 */ 47 static __always_inline int 48 bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp, 49 enum cgroup_bpf_attach_type atype, 50 const void *ctx, bpf_prog_run_fn run_prog, 51 int retval, u32 *ret_flags) 52 { 53 const struct bpf_prog_array_item *item; 54 const struct bpf_prog *prog; 55 const struct bpf_prog_array *array; 56 struct bpf_run_ctx *old_run_ctx; 57 struct bpf_cg_run_ctx run_ctx; 58 u32 func_ret; 59 60 run_ctx.retval = retval; 61 migrate_disable(); 62 rcu_read_lock(); 63 array = rcu_dereference(cgrp->effective[atype]); 64 item = &array->items[0]; 65 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 66 while ((prog = READ_ONCE(item->prog))) { 67 run_ctx.prog_item = item; 68 func_ret = run_prog(prog, ctx); 69 if (ret_flags) { 70 *(ret_flags) |= (func_ret >> 1); 71 func_ret &= 1; 72 } 73 if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval)) 74 run_ctx.retval = -EPERM; 75 item++; 76 } 77 bpf_reset_run_ctx(old_run_ctx); 78 rcu_read_unlock(); 79 migrate_enable(); 80 return run_ctx.retval; 81 } 82 83 unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx, 84 const struct bpf_insn *insn) 85 { 86 const struct bpf_prog *shim_prog; 87 struct sock *sk; 88 struct cgroup *cgrp; 89 int ret = 0; 90 u64 *args; 91 92 args = (u64 *)ctx; 93 sk = (void *)(unsigned long)args[0]; 94 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 95 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 96 97 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 98 if (likely(cgrp)) 99 ret = bpf_prog_run_array_cg(&cgrp->bpf, 100 shim_prog->aux->cgroup_atype, 101 ctx, bpf_prog_run, 0, NULL); 102 return ret; 103 } 104 105 unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx, 106 const struct bpf_insn *insn) 107 { 108 const struct bpf_prog *shim_prog; 109 struct socket *sock; 110 struct cgroup *cgrp; 111 int ret = 0; 112 u64 *args; 113 114 args = (u64 *)ctx; 115 sock = (void *)(unsigned long)args[0]; 116 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 117 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 118 119 cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data); 120 if (likely(cgrp)) 121 ret = bpf_prog_run_array_cg(&cgrp->bpf, 122 shim_prog->aux->cgroup_atype, 123 ctx, bpf_prog_run, 0, NULL); 124 return ret; 125 } 126 127 unsigned int __cgroup_bpf_run_lsm_current(const void *ctx, 128 const struct bpf_insn *insn) 129 { 130 const struct bpf_prog *shim_prog; 131 struct cgroup *cgrp; 132 int ret = 0; 133 134 /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/ 135 shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi)); 136 137 /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */ 138 cgrp = task_dfl_cgroup(current); 139 if (likely(cgrp)) 140 ret = bpf_prog_run_array_cg(&cgrp->bpf, 141 shim_prog->aux->cgroup_atype, 142 ctx, bpf_prog_run, 0, NULL); 143 return ret; 144 } 145 146 #ifdef CONFIG_BPF_LSM 147 struct cgroup_lsm_atype { 148 u32 attach_btf_id; 149 int refcnt; 150 }; 151 152 static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM]; 153 154 static enum cgroup_bpf_attach_type 155 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) 156 { 157 int i; 158 159 lockdep_assert_held(&cgroup_mutex); 160 161 if (attach_type != BPF_LSM_CGROUP) 162 return to_cgroup_bpf_attach_type(attach_type); 163 164 for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++) 165 if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id) 166 return CGROUP_LSM_START + i; 167 168 for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++) 169 if (cgroup_lsm_atype[i].attach_btf_id == 0) 170 return CGROUP_LSM_START + i; 171 172 return -E2BIG; 173 174 } 175 176 void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype) 177 { 178 int i = cgroup_atype - CGROUP_LSM_START; 179 180 lockdep_assert_held(&cgroup_mutex); 181 182 WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id && 183 cgroup_lsm_atype[i].attach_btf_id != attach_btf_id); 184 185 cgroup_lsm_atype[i].attach_btf_id = attach_btf_id; 186 cgroup_lsm_atype[i].refcnt++; 187 } 188 189 void bpf_cgroup_atype_put(int cgroup_atype) 190 { 191 int i = cgroup_atype - CGROUP_LSM_START; 192 193 cgroup_lock(); 194 if (--cgroup_lsm_atype[i].refcnt <= 0) 195 cgroup_lsm_atype[i].attach_btf_id = 0; 196 WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0); 197 cgroup_unlock(); 198 } 199 #else 200 static enum cgroup_bpf_attach_type 201 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id) 202 { 203 if (attach_type != BPF_LSM_CGROUP) 204 return to_cgroup_bpf_attach_type(attach_type); 205 return -EOPNOTSUPP; 206 } 207 #endif /* CONFIG_BPF_LSM */ 208 209 void cgroup_bpf_offline(struct cgroup *cgrp) 210 { 211 cgroup_get(cgrp); 212 percpu_ref_kill(&cgrp->bpf.refcnt); 213 } 214 215 static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[]) 216 { 217 enum bpf_cgroup_storage_type stype; 218 219 for_each_cgroup_storage_type(stype) 220 bpf_cgroup_storage_free(storages[stype]); 221 } 222 223 static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[], 224 struct bpf_cgroup_storage *new_storages[], 225 enum bpf_attach_type type, 226 struct bpf_prog *prog, 227 struct cgroup *cgrp) 228 { 229 enum bpf_cgroup_storage_type stype; 230 struct bpf_cgroup_storage_key key; 231 struct bpf_map *map; 232 233 key.cgroup_inode_id = cgroup_id(cgrp); 234 key.attach_type = type; 235 236 for_each_cgroup_storage_type(stype) { 237 map = prog->aux->cgroup_storage[stype]; 238 if (!map) 239 continue; 240 241 storages[stype] = cgroup_storage_lookup((void *)map, &key, false); 242 if (storages[stype]) 243 continue; 244 245 storages[stype] = bpf_cgroup_storage_alloc(prog, stype); 246 if (IS_ERR(storages[stype])) { 247 bpf_cgroup_storages_free(new_storages); 248 return -ENOMEM; 249 } 250 251 new_storages[stype] = storages[stype]; 252 } 253 254 return 0; 255 } 256 257 static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[], 258 struct bpf_cgroup_storage *src[]) 259 { 260 enum bpf_cgroup_storage_type stype; 261 262 for_each_cgroup_storage_type(stype) 263 dst[stype] = src[stype]; 264 } 265 266 static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[], 267 struct cgroup *cgrp, 268 enum bpf_attach_type attach_type) 269 { 270 enum bpf_cgroup_storage_type stype; 271 272 for_each_cgroup_storage_type(stype) 273 bpf_cgroup_storage_link(storages[stype], cgrp, attach_type); 274 } 275 276 /* Called when bpf_cgroup_link is auto-detached from dying cgroup. 277 * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It 278 * doesn't free link memory, which will eventually be done by bpf_link's 279 * release() callback, when its last FD is closed. 280 */ 281 static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link) 282 { 283 cgroup_put(link->cgroup); 284 link->cgroup = NULL; 285 } 286 287 /** 288 * cgroup_bpf_release() - put references of all bpf programs and 289 * release all cgroup bpf data 290 * @work: work structure embedded into the cgroup to modify 291 */ 292 static void cgroup_bpf_release(struct work_struct *work) 293 { 294 struct cgroup *p, *cgrp = container_of(work, struct cgroup, 295 bpf.release_work); 296 struct bpf_prog_array *old_array; 297 struct list_head *storages = &cgrp->bpf.storages; 298 struct bpf_cgroup_storage *storage, *stmp; 299 300 unsigned int atype; 301 302 cgroup_lock(); 303 304 for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) { 305 struct hlist_head *progs = &cgrp->bpf.progs[atype]; 306 struct bpf_prog_list *pl; 307 struct hlist_node *pltmp; 308 309 hlist_for_each_entry_safe(pl, pltmp, progs, node) { 310 hlist_del(&pl->node); 311 if (pl->prog) { 312 if (pl->prog->expected_attach_type == BPF_LSM_CGROUP) 313 bpf_trampoline_unlink_cgroup_shim(pl->prog); 314 bpf_prog_put(pl->prog); 315 } 316 if (pl->link) { 317 if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP) 318 bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog); 319 bpf_cgroup_link_auto_detach(pl->link); 320 } 321 kfree(pl); 322 static_branch_dec(&cgroup_bpf_enabled_key[atype]); 323 } 324 old_array = rcu_dereference_protected( 325 cgrp->bpf.effective[atype], 326 lockdep_is_held(&cgroup_mutex)); 327 bpf_prog_array_free(old_array); 328 } 329 330 list_for_each_entry_safe(storage, stmp, storages, list_cg) { 331 bpf_cgroup_storage_unlink(storage); 332 bpf_cgroup_storage_free(storage); 333 } 334 335 cgroup_unlock(); 336 337 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 338 cgroup_bpf_put(p); 339 340 percpu_ref_exit(&cgrp->bpf.refcnt); 341 cgroup_put(cgrp); 342 } 343 344 /** 345 * cgroup_bpf_release_fn() - callback used to schedule releasing 346 * of bpf cgroup data 347 * @ref: percpu ref counter structure 348 */ 349 static void cgroup_bpf_release_fn(struct percpu_ref *ref) 350 { 351 struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt); 352 353 INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release); 354 queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work); 355 } 356 357 /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through 358 * link or direct prog. 359 */ 360 static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl) 361 { 362 if (pl->prog) 363 return pl->prog; 364 if (pl->link) 365 return pl->link->link.prog; 366 return NULL; 367 } 368 369 /* count number of elements in the list. 370 * it's slow but the list cannot be long 371 */ 372 static u32 prog_list_length(struct hlist_head *head, int *preorder_cnt) 373 { 374 struct bpf_prog_list *pl; 375 u32 cnt = 0; 376 377 hlist_for_each_entry(pl, head, node) { 378 if (!prog_list_prog(pl)) 379 continue; 380 if (preorder_cnt && (pl->flags & BPF_F_PREORDER)) 381 (*preorder_cnt)++; 382 cnt++; 383 } 384 return cnt; 385 } 386 387 /* if parent has non-overridable prog attached, 388 * disallow attaching new programs to the descendent cgroup. 389 * if parent has overridable or multi-prog, allow attaching 390 */ 391 static bool hierarchy_allows_attach(struct cgroup *cgrp, 392 enum cgroup_bpf_attach_type atype) 393 { 394 struct cgroup *p; 395 396 p = cgroup_parent(cgrp); 397 if (!p) 398 return true; 399 do { 400 u32 flags = p->bpf.flags[atype]; 401 u32 cnt; 402 403 if (flags & BPF_F_ALLOW_MULTI) 404 return true; 405 cnt = prog_list_length(&p->bpf.progs[atype], NULL); 406 WARN_ON_ONCE(cnt > 1); 407 if (cnt == 1) 408 return !!(flags & BPF_F_ALLOW_OVERRIDE); 409 p = cgroup_parent(p); 410 } while (p); 411 return true; 412 } 413 414 /* compute a chain of effective programs for a given cgroup: 415 * start from the list of programs in this cgroup and add 416 * all parent programs. 417 * Note that parent's F_ALLOW_OVERRIDE-type program is yielding 418 * to programs in this cgroup 419 */ 420 static int compute_effective_progs(struct cgroup *cgrp, 421 enum cgroup_bpf_attach_type atype, 422 struct bpf_prog_array **array) 423 { 424 struct bpf_prog_array_item *item; 425 struct bpf_prog_array *progs; 426 struct bpf_prog_list *pl; 427 struct cgroup *p = cgrp; 428 int i, j, cnt = 0, preorder_cnt = 0, fstart, bstart, init_bstart; 429 430 /* count number of effective programs by walking parents */ 431 do { 432 if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 433 cnt += prog_list_length(&p->bpf.progs[atype], &preorder_cnt); 434 p = cgroup_parent(p); 435 } while (p); 436 437 progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); 438 if (!progs) 439 return -ENOMEM; 440 441 /* populate the array with effective progs */ 442 cnt = 0; 443 p = cgrp; 444 fstart = preorder_cnt; 445 bstart = preorder_cnt - 1; 446 do { 447 if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 448 continue; 449 450 init_bstart = bstart; 451 hlist_for_each_entry(pl, &p->bpf.progs[atype], node) { 452 if (!prog_list_prog(pl)) 453 continue; 454 455 if (pl->flags & BPF_F_PREORDER) { 456 item = &progs->items[bstart]; 457 bstart--; 458 } else { 459 item = &progs->items[fstart]; 460 fstart++; 461 } 462 item->prog = prog_list_prog(pl); 463 bpf_cgroup_storages_assign(item->cgroup_storage, 464 pl->storage); 465 cnt++; 466 } 467 468 /* reverse pre-ordering progs at this cgroup level */ 469 for (i = bstart + 1, j = init_bstart; i < j; i++, j--) 470 swap(progs->items[i], progs->items[j]); 471 472 } while ((p = cgroup_parent(p))); 473 474 *array = progs; 475 return 0; 476 } 477 478 static void activate_effective_progs(struct cgroup *cgrp, 479 enum cgroup_bpf_attach_type atype, 480 struct bpf_prog_array *old_array) 481 { 482 old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array, 483 lockdep_is_held(&cgroup_mutex)); 484 /* free prog array after grace period, since __cgroup_bpf_run_*() 485 * might be still walking the array 486 */ 487 bpf_prog_array_free(old_array); 488 } 489 490 /** 491 * cgroup_bpf_inherit() - inherit effective programs from parent 492 * @cgrp: the cgroup to modify 493 */ 494 int cgroup_bpf_inherit(struct cgroup *cgrp) 495 { 496 /* has to use marco instead of const int, since compiler thinks 497 * that array below is variable length 498 */ 499 #define NR ARRAY_SIZE(cgrp->bpf.effective) 500 struct bpf_prog_array *arrays[NR] = {}; 501 struct cgroup *p; 502 int ret, i; 503 504 ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0, 505 GFP_KERNEL); 506 if (ret) 507 return ret; 508 509 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 510 cgroup_bpf_get(p); 511 512 for (i = 0; i < NR; i++) 513 INIT_HLIST_HEAD(&cgrp->bpf.progs[i]); 514 515 INIT_LIST_HEAD(&cgrp->bpf.storages); 516 517 for (i = 0; i < NR; i++) 518 if (compute_effective_progs(cgrp, i, &arrays[i])) 519 goto cleanup; 520 521 for (i = 0; i < NR; i++) 522 activate_effective_progs(cgrp, i, arrays[i]); 523 524 return 0; 525 cleanup: 526 for (i = 0; i < NR; i++) 527 bpf_prog_array_free(arrays[i]); 528 529 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) 530 cgroup_bpf_put(p); 531 532 percpu_ref_exit(&cgrp->bpf.refcnt); 533 534 return -ENOMEM; 535 } 536 537 static int update_effective_progs(struct cgroup *cgrp, 538 enum cgroup_bpf_attach_type atype) 539 { 540 struct cgroup_subsys_state *css; 541 int err; 542 543 /* allocate and recompute effective prog arrays */ 544 css_for_each_descendant_pre(css, &cgrp->self) { 545 struct cgroup *desc = container_of(css, struct cgroup, self); 546 547 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 548 continue; 549 550 err = compute_effective_progs(desc, atype, &desc->bpf.inactive); 551 if (err) 552 goto cleanup; 553 } 554 555 /* all allocations were successful. Activate all prog arrays */ 556 css_for_each_descendant_pre(css, &cgrp->self) { 557 struct cgroup *desc = container_of(css, struct cgroup, self); 558 559 if (percpu_ref_is_zero(&desc->bpf.refcnt)) { 560 if (unlikely(desc->bpf.inactive)) { 561 bpf_prog_array_free(desc->bpf.inactive); 562 desc->bpf.inactive = NULL; 563 } 564 continue; 565 } 566 567 activate_effective_progs(desc, atype, desc->bpf.inactive); 568 desc->bpf.inactive = NULL; 569 } 570 571 return 0; 572 573 cleanup: 574 /* oom while computing effective. Free all computed effective arrays 575 * since they were not activated 576 */ 577 css_for_each_descendant_pre(css, &cgrp->self) { 578 struct cgroup *desc = container_of(css, struct cgroup, self); 579 580 bpf_prog_array_free(desc->bpf.inactive); 581 desc->bpf.inactive = NULL; 582 } 583 584 return err; 585 } 586 587 #define BPF_CGROUP_MAX_PROGS 64 588 589 static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs, 590 struct bpf_prog *prog, 591 struct bpf_cgroup_link *link, 592 struct bpf_prog *replace_prog, 593 bool allow_multi) 594 { 595 struct bpf_prog_list *pl; 596 597 /* single-attach case */ 598 if (!allow_multi) { 599 if (hlist_empty(progs)) 600 return NULL; 601 return hlist_entry(progs->first, typeof(*pl), node); 602 } 603 604 hlist_for_each_entry(pl, progs, node) { 605 if (prog && pl->prog == prog && prog != replace_prog) 606 /* disallow attaching the same prog twice */ 607 return ERR_PTR(-EINVAL); 608 if (link && pl->link == link) 609 /* disallow attaching the same link twice */ 610 return ERR_PTR(-EINVAL); 611 } 612 613 /* direct prog multi-attach w/ replacement case */ 614 if (replace_prog) { 615 hlist_for_each_entry(pl, progs, node) { 616 if (pl->prog == replace_prog) 617 /* a match found */ 618 return pl; 619 } 620 /* prog to replace not found for cgroup */ 621 return ERR_PTR(-ENOENT); 622 } 623 624 return NULL; 625 } 626 627 /** 628 * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and 629 * propagate the change to descendants 630 * @cgrp: The cgroup which descendants to traverse 631 * @prog: A program to attach 632 * @link: A link to attach 633 * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set 634 * @type: Type of attach operation 635 * @flags: Option flags 636 * 637 * Exactly one of @prog or @link can be non-null. 638 * Must be called with cgroup_mutex held. 639 */ 640 static int __cgroup_bpf_attach(struct cgroup *cgrp, 641 struct bpf_prog *prog, struct bpf_prog *replace_prog, 642 struct bpf_cgroup_link *link, 643 enum bpf_attach_type type, u32 flags) 644 { 645 u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); 646 struct bpf_prog *old_prog = NULL; 647 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; 648 struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; 649 struct bpf_prog *new_prog = prog ? : link->link.prog; 650 enum cgroup_bpf_attach_type atype; 651 struct bpf_prog_list *pl; 652 struct hlist_head *progs; 653 int err; 654 655 if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || 656 ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) 657 /* invalid combination */ 658 return -EINVAL; 659 if (link && (prog || replace_prog)) 660 /* only either link or prog/replace_prog can be specified */ 661 return -EINVAL; 662 if (!!replace_prog != !!(flags & BPF_F_REPLACE)) 663 /* replace_prog implies BPF_F_REPLACE, and vice versa */ 664 return -EINVAL; 665 666 atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id); 667 if (atype < 0) 668 return -EINVAL; 669 670 progs = &cgrp->bpf.progs[atype]; 671 672 if (!hierarchy_allows_attach(cgrp, atype)) 673 return -EPERM; 674 675 if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags) 676 /* Disallow attaching non-overridable on top 677 * of existing overridable in this cgroup. 678 * Disallow attaching multi-prog if overridable or none 679 */ 680 return -EPERM; 681 682 if (prog_list_length(progs, NULL) >= BPF_CGROUP_MAX_PROGS) 683 return -E2BIG; 684 685 pl = find_attach_entry(progs, prog, link, replace_prog, 686 flags & BPF_F_ALLOW_MULTI); 687 if (IS_ERR(pl)) 688 return PTR_ERR(pl); 689 690 if (bpf_cgroup_storages_alloc(storage, new_storage, type, 691 prog ? : link->link.prog, cgrp)) 692 return -ENOMEM; 693 694 if (pl) { 695 old_prog = pl->prog; 696 } else { 697 struct hlist_node *last = NULL; 698 699 pl = kmalloc(sizeof(*pl), GFP_KERNEL); 700 if (!pl) { 701 bpf_cgroup_storages_free(new_storage); 702 return -ENOMEM; 703 } 704 if (hlist_empty(progs)) 705 hlist_add_head(&pl->node, progs); 706 else 707 hlist_for_each(last, progs) { 708 if (last->next) 709 continue; 710 hlist_add_behind(&pl->node, last); 711 break; 712 } 713 } 714 715 pl->prog = prog; 716 pl->link = link; 717 pl->flags = flags; 718 bpf_cgroup_storages_assign(pl->storage, storage); 719 cgrp->bpf.flags[atype] = saved_flags; 720 721 if (type == BPF_LSM_CGROUP) { 722 err = bpf_trampoline_link_cgroup_shim(new_prog, atype); 723 if (err) 724 goto cleanup; 725 } 726 727 err = update_effective_progs(cgrp, atype); 728 if (err) 729 goto cleanup_trampoline; 730 731 if (old_prog) { 732 if (type == BPF_LSM_CGROUP) 733 bpf_trampoline_unlink_cgroup_shim(old_prog); 734 bpf_prog_put(old_prog); 735 } else { 736 static_branch_inc(&cgroup_bpf_enabled_key[atype]); 737 } 738 bpf_cgroup_storages_link(new_storage, cgrp, type); 739 return 0; 740 741 cleanup_trampoline: 742 if (type == BPF_LSM_CGROUP) 743 bpf_trampoline_unlink_cgroup_shim(new_prog); 744 745 cleanup: 746 if (old_prog) { 747 pl->prog = old_prog; 748 pl->link = NULL; 749 } 750 bpf_cgroup_storages_free(new_storage); 751 if (!old_prog) { 752 hlist_del(&pl->node); 753 kfree(pl); 754 } 755 return err; 756 } 757 758 static int cgroup_bpf_attach(struct cgroup *cgrp, 759 struct bpf_prog *prog, struct bpf_prog *replace_prog, 760 struct bpf_cgroup_link *link, 761 enum bpf_attach_type type, 762 u32 flags) 763 { 764 int ret; 765 766 cgroup_lock(); 767 ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags); 768 cgroup_unlock(); 769 return ret; 770 } 771 772 /* Swap updated BPF program for given link in effective program arrays across 773 * all descendant cgroups. This function is guaranteed to succeed. 774 */ 775 static void replace_effective_prog(struct cgroup *cgrp, 776 enum cgroup_bpf_attach_type atype, 777 struct bpf_cgroup_link *link) 778 { 779 struct bpf_prog_array_item *item; 780 struct cgroup_subsys_state *css; 781 struct bpf_prog_array *progs; 782 struct bpf_prog_list *pl; 783 struct hlist_head *head; 784 struct cgroup *cg; 785 int pos; 786 787 css_for_each_descendant_pre(css, &cgrp->self) { 788 struct cgroup *desc = container_of(css, struct cgroup, self); 789 790 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 791 continue; 792 793 /* find position of link in effective progs array */ 794 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { 795 if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 796 continue; 797 798 head = &cg->bpf.progs[atype]; 799 hlist_for_each_entry(pl, head, node) { 800 if (!prog_list_prog(pl)) 801 continue; 802 if (pl->link == link) 803 goto found; 804 pos++; 805 } 806 } 807 found: 808 BUG_ON(!cg); 809 progs = rcu_dereference_protected( 810 desc->bpf.effective[atype], 811 lockdep_is_held(&cgroup_mutex)); 812 item = &progs->items[pos]; 813 WRITE_ONCE(item->prog, link->link.prog); 814 } 815 } 816 817 /** 818 * __cgroup_bpf_replace() - Replace link's program and propagate the change 819 * to descendants 820 * @cgrp: The cgroup which descendants to traverse 821 * @link: A link for which to replace BPF program 822 * @new_prog: &struct bpf_prog for the target BPF program with its refcnt 823 * incremented 824 * 825 * Must be called with cgroup_mutex held. 826 */ 827 static int __cgroup_bpf_replace(struct cgroup *cgrp, 828 struct bpf_cgroup_link *link, 829 struct bpf_prog *new_prog) 830 { 831 enum cgroup_bpf_attach_type atype; 832 struct bpf_prog *old_prog; 833 struct bpf_prog_list *pl; 834 struct hlist_head *progs; 835 bool found = false; 836 837 atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id); 838 if (atype < 0) 839 return -EINVAL; 840 841 progs = &cgrp->bpf.progs[atype]; 842 843 if (link->link.prog->type != new_prog->type) 844 return -EINVAL; 845 846 hlist_for_each_entry(pl, progs, node) { 847 if (pl->link == link) { 848 found = true; 849 break; 850 } 851 } 852 if (!found) 853 return -ENOENT; 854 855 old_prog = xchg(&link->link.prog, new_prog); 856 replace_effective_prog(cgrp, atype, link); 857 bpf_prog_put(old_prog); 858 return 0; 859 } 860 861 static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog, 862 struct bpf_prog *old_prog) 863 { 864 struct bpf_cgroup_link *cg_link; 865 int ret; 866 867 cg_link = container_of(link, struct bpf_cgroup_link, link); 868 869 cgroup_lock(); 870 /* link might have been auto-released by dying cgroup, so fail */ 871 if (!cg_link->cgroup) { 872 ret = -ENOLINK; 873 goto out_unlock; 874 } 875 if (old_prog && link->prog != old_prog) { 876 ret = -EPERM; 877 goto out_unlock; 878 } 879 ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); 880 out_unlock: 881 cgroup_unlock(); 882 return ret; 883 } 884 885 static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs, 886 struct bpf_prog *prog, 887 struct bpf_cgroup_link *link, 888 bool allow_multi) 889 { 890 struct bpf_prog_list *pl; 891 892 if (!allow_multi) { 893 if (hlist_empty(progs)) 894 /* report error when trying to detach and nothing is attached */ 895 return ERR_PTR(-ENOENT); 896 897 /* to maintain backward compatibility NONE and OVERRIDE cgroups 898 * allow detaching with invalid FD (prog==NULL) in legacy mode 899 */ 900 return hlist_entry(progs->first, typeof(*pl), node); 901 } 902 903 if (!prog && !link) 904 /* to detach MULTI prog the user has to specify valid FD 905 * of the program or link to be detached 906 */ 907 return ERR_PTR(-EINVAL); 908 909 /* find the prog or link and detach it */ 910 hlist_for_each_entry(pl, progs, node) { 911 if (pl->prog == prog && pl->link == link) 912 return pl; 913 } 914 return ERR_PTR(-ENOENT); 915 } 916 917 /** 918 * purge_effective_progs() - After compute_effective_progs fails to alloc new 919 * cgrp->bpf.inactive table we can recover by 920 * recomputing the array in place. 921 * 922 * @cgrp: The cgroup which descendants to travers 923 * @prog: A program to detach or NULL 924 * @link: A link to detach or NULL 925 * @atype: Type of detach operation 926 */ 927 static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, 928 struct bpf_cgroup_link *link, 929 enum cgroup_bpf_attach_type atype) 930 { 931 struct cgroup_subsys_state *css; 932 struct bpf_prog_array *progs; 933 struct bpf_prog_list *pl; 934 struct hlist_head *head; 935 struct cgroup *cg; 936 int pos; 937 938 /* recompute effective prog array in place */ 939 css_for_each_descendant_pre(css, &cgrp->self) { 940 struct cgroup *desc = container_of(css, struct cgroup, self); 941 942 if (percpu_ref_is_zero(&desc->bpf.refcnt)) 943 continue; 944 945 /* find position of link or prog in effective progs array */ 946 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { 947 if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) 948 continue; 949 950 head = &cg->bpf.progs[atype]; 951 hlist_for_each_entry(pl, head, node) { 952 if (!prog_list_prog(pl)) 953 continue; 954 if (pl->prog == prog && pl->link == link) 955 goto found; 956 pos++; 957 } 958 } 959 960 /* no link or prog match, skip the cgroup of this layer */ 961 continue; 962 found: 963 progs = rcu_dereference_protected( 964 desc->bpf.effective[atype], 965 lockdep_is_held(&cgroup_mutex)); 966 967 /* Remove the program from the array */ 968 WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos), 969 "Failed to purge a prog from array at index %d", pos); 970 } 971 } 972 973 /** 974 * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and 975 * propagate the change to descendants 976 * @cgrp: The cgroup which descendants to traverse 977 * @prog: A program to detach or NULL 978 * @link: A link to detach or NULL 979 * @type: Type of detach operation 980 * 981 * At most one of @prog or @link can be non-NULL. 982 * Must be called with cgroup_mutex held. 983 */ 984 static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 985 struct bpf_cgroup_link *link, enum bpf_attach_type type) 986 { 987 enum cgroup_bpf_attach_type atype; 988 struct bpf_prog *old_prog; 989 struct bpf_prog_list *pl; 990 struct hlist_head *progs; 991 u32 attach_btf_id = 0; 992 u32 flags; 993 994 if (prog) 995 attach_btf_id = prog->aux->attach_btf_id; 996 if (link) 997 attach_btf_id = link->link.prog->aux->attach_btf_id; 998 999 atype = bpf_cgroup_atype_find(type, attach_btf_id); 1000 if (atype < 0) 1001 return -EINVAL; 1002 1003 progs = &cgrp->bpf.progs[atype]; 1004 flags = cgrp->bpf.flags[atype]; 1005 1006 if (prog && link) 1007 /* only one of prog or link can be specified */ 1008 return -EINVAL; 1009 1010 pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI); 1011 if (IS_ERR(pl)) 1012 return PTR_ERR(pl); 1013 1014 /* mark it deleted, so it's ignored while recomputing effective */ 1015 old_prog = pl->prog; 1016 pl->prog = NULL; 1017 pl->link = NULL; 1018 1019 if (update_effective_progs(cgrp, atype)) { 1020 /* if update effective array failed replace the prog with a dummy prog*/ 1021 pl->prog = old_prog; 1022 pl->link = link; 1023 purge_effective_progs(cgrp, old_prog, link, atype); 1024 } 1025 1026 /* now can actually delete it from this cgroup list */ 1027 hlist_del(&pl->node); 1028 1029 kfree(pl); 1030 if (hlist_empty(progs)) 1031 /* last program was detached, reset flags to zero */ 1032 cgrp->bpf.flags[atype] = 0; 1033 if (old_prog) { 1034 if (type == BPF_LSM_CGROUP) 1035 bpf_trampoline_unlink_cgroup_shim(old_prog); 1036 bpf_prog_put(old_prog); 1037 } 1038 static_branch_dec(&cgroup_bpf_enabled_key[atype]); 1039 return 0; 1040 } 1041 1042 static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 1043 enum bpf_attach_type type) 1044 { 1045 int ret; 1046 1047 cgroup_lock(); 1048 ret = __cgroup_bpf_detach(cgrp, prog, NULL, type); 1049 cgroup_unlock(); 1050 return ret; 1051 } 1052 1053 /* Must be called with cgroup_mutex held to avoid races. */ 1054 static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 1055 union bpf_attr __user *uattr) 1056 { 1057 __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags); 1058 bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE; 1059 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 1060 enum bpf_attach_type type = attr->query.attach_type; 1061 enum cgroup_bpf_attach_type from_atype, to_atype; 1062 enum cgroup_bpf_attach_type atype; 1063 struct bpf_prog_array *effective; 1064 int cnt, ret = 0, i; 1065 int total_cnt = 0; 1066 u32 flags; 1067 1068 if (effective_query && prog_attach_flags) 1069 return -EINVAL; 1070 1071 if (type == BPF_LSM_CGROUP) { 1072 if (!effective_query && attr->query.prog_cnt && 1073 prog_ids && !prog_attach_flags) 1074 return -EINVAL; 1075 1076 from_atype = CGROUP_LSM_START; 1077 to_atype = CGROUP_LSM_END; 1078 flags = 0; 1079 } else { 1080 from_atype = to_cgroup_bpf_attach_type(type); 1081 if (from_atype < 0) 1082 return -EINVAL; 1083 to_atype = from_atype; 1084 flags = cgrp->bpf.flags[from_atype]; 1085 } 1086 1087 for (atype = from_atype; atype <= to_atype; atype++) { 1088 if (effective_query) { 1089 effective = rcu_dereference_protected(cgrp->bpf.effective[atype], 1090 lockdep_is_held(&cgroup_mutex)); 1091 total_cnt += bpf_prog_array_length(effective); 1092 } else { 1093 total_cnt += prog_list_length(&cgrp->bpf.progs[atype], NULL); 1094 } 1095 } 1096 1097 /* always output uattr->query.attach_flags as 0 during effective query */ 1098 flags = effective_query ? 0 : flags; 1099 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) 1100 return -EFAULT; 1101 if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt))) 1102 return -EFAULT; 1103 if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt) 1104 /* return early if user requested only program count + flags */ 1105 return 0; 1106 1107 if (attr->query.prog_cnt < total_cnt) { 1108 total_cnt = attr->query.prog_cnt; 1109 ret = -ENOSPC; 1110 } 1111 1112 for (atype = from_atype; atype <= to_atype && total_cnt; atype++) { 1113 if (effective_query) { 1114 effective = rcu_dereference_protected(cgrp->bpf.effective[atype], 1115 lockdep_is_held(&cgroup_mutex)); 1116 cnt = min_t(int, bpf_prog_array_length(effective), total_cnt); 1117 ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt); 1118 } else { 1119 struct hlist_head *progs; 1120 struct bpf_prog_list *pl; 1121 struct bpf_prog *prog; 1122 u32 id; 1123 1124 progs = &cgrp->bpf.progs[atype]; 1125 cnt = min_t(int, prog_list_length(progs, NULL), total_cnt); 1126 i = 0; 1127 hlist_for_each_entry(pl, progs, node) { 1128 prog = prog_list_prog(pl); 1129 id = prog->aux->id; 1130 if (copy_to_user(prog_ids + i, &id, sizeof(id))) 1131 return -EFAULT; 1132 if (++i == cnt) 1133 break; 1134 } 1135 1136 if (prog_attach_flags) { 1137 flags = cgrp->bpf.flags[atype]; 1138 1139 for (i = 0; i < cnt; i++) 1140 if (copy_to_user(prog_attach_flags + i, 1141 &flags, sizeof(flags))) 1142 return -EFAULT; 1143 prog_attach_flags += cnt; 1144 } 1145 } 1146 1147 prog_ids += cnt; 1148 total_cnt -= cnt; 1149 } 1150 return ret; 1151 } 1152 1153 static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, 1154 union bpf_attr __user *uattr) 1155 { 1156 int ret; 1157 1158 cgroup_lock(); 1159 ret = __cgroup_bpf_query(cgrp, attr, uattr); 1160 cgroup_unlock(); 1161 return ret; 1162 } 1163 1164 int cgroup_bpf_prog_attach(const union bpf_attr *attr, 1165 enum bpf_prog_type ptype, struct bpf_prog *prog) 1166 { 1167 struct bpf_prog *replace_prog = NULL; 1168 struct cgroup *cgrp; 1169 int ret; 1170 1171 cgrp = cgroup_get_from_fd(attr->target_fd); 1172 if (IS_ERR(cgrp)) 1173 return PTR_ERR(cgrp); 1174 1175 if ((attr->attach_flags & BPF_F_ALLOW_MULTI) && 1176 (attr->attach_flags & BPF_F_REPLACE)) { 1177 replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype); 1178 if (IS_ERR(replace_prog)) { 1179 cgroup_put(cgrp); 1180 return PTR_ERR(replace_prog); 1181 } 1182 } 1183 1184 ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL, 1185 attr->attach_type, attr->attach_flags); 1186 1187 if (replace_prog) 1188 bpf_prog_put(replace_prog); 1189 cgroup_put(cgrp); 1190 return ret; 1191 } 1192 1193 int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) 1194 { 1195 struct bpf_prog *prog; 1196 struct cgroup *cgrp; 1197 int ret; 1198 1199 cgrp = cgroup_get_from_fd(attr->target_fd); 1200 if (IS_ERR(cgrp)) 1201 return PTR_ERR(cgrp); 1202 1203 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); 1204 if (IS_ERR(prog)) 1205 prog = NULL; 1206 1207 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type); 1208 if (prog) 1209 bpf_prog_put(prog); 1210 1211 cgroup_put(cgrp); 1212 return ret; 1213 } 1214 1215 static void bpf_cgroup_link_release(struct bpf_link *link) 1216 { 1217 struct bpf_cgroup_link *cg_link = 1218 container_of(link, struct bpf_cgroup_link, link); 1219 struct cgroup *cg; 1220 1221 /* link might have been auto-detached by dying cgroup already, 1222 * in that case our work is done here 1223 */ 1224 if (!cg_link->cgroup) 1225 return; 1226 1227 cgroup_lock(); 1228 1229 /* re-check cgroup under lock again */ 1230 if (!cg_link->cgroup) { 1231 cgroup_unlock(); 1232 return; 1233 } 1234 1235 WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link, 1236 cg_link->type)); 1237 if (cg_link->type == BPF_LSM_CGROUP) 1238 bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog); 1239 1240 cg = cg_link->cgroup; 1241 cg_link->cgroup = NULL; 1242 1243 cgroup_unlock(); 1244 1245 cgroup_put(cg); 1246 } 1247 1248 static void bpf_cgroup_link_dealloc(struct bpf_link *link) 1249 { 1250 struct bpf_cgroup_link *cg_link = 1251 container_of(link, struct bpf_cgroup_link, link); 1252 1253 kfree(cg_link); 1254 } 1255 1256 static int bpf_cgroup_link_detach(struct bpf_link *link) 1257 { 1258 bpf_cgroup_link_release(link); 1259 1260 return 0; 1261 } 1262 1263 static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link, 1264 struct seq_file *seq) 1265 { 1266 struct bpf_cgroup_link *cg_link = 1267 container_of(link, struct bpf_cgroup_link, link); 1268 u64 cg_id = 0; 1269 1270 cgroup_lock(); 1271 if (cg_link->cgroup) 1272 cg_id = cgroup_id(cg_link->cgroup); 1273 cgroup_unlock(); 1274 1275 seq_printf(seq, 1276 "cgroup_id:\t%llu\n" 1277 "attach_type:\t%d\n", 1278 cg_id, 1279 cg_link->type); 1280 } 1281 1282 static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link, 1283 struct bpf_link_info *info) 1284 { 1285 struct bpf_cgroup_link *cg_link = 1286 container_of(link, struct bpf_cgroup_link, link); 1287 u64 cg_id = 0; 1288 1289 cgroup_lock(); 1290 if (cg_link->cgroup) 1291 cg_id = cgroup_id(cg_link->cgroup); 1292 cgroup_unlock(); 1293 1294 info->cgroup.cgroup_id = cg_id; 1295 info->cgroup.attach_type = cg_link->type; 1296 return 0; 1297 } 1298 1299 static const struct bpf_link_ops bpf_cgroup_link_lops = { 1300 .release = bpf_cgroup_link_release, 1301 .dealloc = bpf_cgroup_link_dealloc, 1302 .detach = bpf_cgroup_link_detach, 1303 .update_prog = cgroup_bpf_replace, 1304 .show_fdinfo = bpf_cgroup_link_show_fdinfo, 1305 .fill_link_info = bpf_cgroup_link_fill_link_info, 1306 }; 1307 1308 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 1309 { 1310 struct bpf_link_primer link_primer; 1311 struct bpf_cgroup_link *link; 1312 struct cgroup *cgrp; 1313 int err; 1314 1315 if (attr->link_create.flags) 1316 return -EINVAL; 1317 1318 cgrp = cgroup_get_from_fd(attr->link_create.target_fd); 1319 if (IS_ERR(cgrp)) 1320 return PTR_ERR(cgrp); 1321 1322 link = kzalloc(sizeof(*link), GFP_USER); 1323 if (!link) { 1324 err = -ENOMEM; 1325 goto out_put_cgroup; 1326 } 1327 bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops, 1328 prog); 1329 link->cgroup = cgrp; 1330 link->type = attr->link_create.attach_type; 1331 1332 err = bpf_link_prime(&link->link, &link_primer); 1333 if (err) { 1334 kfree(link); 1335 goto out_put_cgroup; 1336 } 1337 1338 err = cgroup_bpf_attach(cgrp, NULL, NULL, link, 1339 link->type, BPF_F_ALLOW_MULTI); 1340 if (err) { 1341 bpf_link_cleanup(&link_primer); 1342 goto out_put_cgroup; 1343 } 1344 1345 return bpf_link_settle(&link_primer); 1346 1347 out_put_cgroup: 1348 cgroup_put(cgrp); 1349 return err; 1350 } 1351 1352 int cgroup_bpf_prog_query(const union bpf_attr *attr, 1353 union bpf_attr __user *uattr) 1354 { 1355 struct cgroup *cgrp; 1356 int ret; 1357 1358 cgrp = cgroup_get_from_fd(attr->query.target_fd); 1359 if (IS_ERR(cgrp)) 1360 return PTR_ERR(cgrp); 1361 1362 ret = cgroup_bpf_query(cgrp, attr, uattr); 1363 1364 cgroup_put(cgrp); 1365 return ret; 1366 } 1367 1368 /** 1369 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering 1370 * @sk: The socket sending or receiving traffic 1371 * @skb: The skb that is being sent or received 1372 * @atype: The type of program to be executed 1373 * 1374 * If no socket is passed, or the socket is not of type INET or INET6, 1375 * this function does nothing and returns 0. 1376 * 1377 * The program type passed in via @type must be suitable for network 1378 * filtering. No further check is performed to assert that. 1379 * 1380 * For egress packets, this function can return: 1381 * NET_XMIT_SUCCESS (0) - continue with packet output 1382 * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr 1383 * NET_XMIT_CN (2) - continue with packet output and notify TCP 1384 * to call cwr 1385 * -err - drop packet 1386 * 1387 * For ingress packets, this function will return -EPERM if any 1388 * attached program was found and if it returned != 1 during execution. 1389 * Otherwise 0 is returned. 1390 */ 1391 int __cgroup_bpf_run_filter_skb(struct sock *sk, 1392 struct sk_buff *skb, 1393 enum cgroup_bpf_attach_type atype) 1394 { 1395 unsigned int offset = -skb_network_offset(skb); 1396 struct sock *save_sk; 1397 void *saved_data_end; 1398 struct cgroup *cgrp; 1399 int ret; 1400 1401 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) 1402 return 0; 1403 1404 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1405 save_sk = skb->sk; 1406 skb->sk = sk; 1407 __skb_push(skb, offset); 1408 1409 /* compute pointers for the bpf prog */ 1410 bpf_compute_and_save_data_end(skb, &saved_data_end); 1411 1412 if (atype == CGROUP_INET_EGRESS) { 1413 u32 flags = 0; 1414 bool cn; 1415 1416 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb, 1417 __bpf_prog_run_save_cb, 0, &flags); 1418 1419 /* Return values of CGROUP EGRESS BPF programs are: 1420 * 0: drop packet 1421 * 1: keep packet 1422 * 2: drop packet and cn 1423 * 3: keep packet and cn 1424 * 1425 * The returned value is then converted to one of the NET_XMIT 1426 * or an error code that is then interpreted as drop packet 1427 * (and no cn): 1428 * 0: NET_XMIT_SUCCESS skb should be transmitted 1429 * 1: NET_XMIT_DROP skb should be dropped and cn 1430 * 2: NET_XMIT_CN skb should be transmitted and cn 1431 * 3: -err skb should be dropped 1432 */ 1433 1434 cn = flags & BPF_RET_SET_CN; 1435 if (ret && !IS_ERR_VALUE((long)ret)) 1436 ret = -EFAULT; 1437 if (!ret) 1438 ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); 1439 else 1440 ret = (cn ? NET_XMIT_DROP : ret); 1441 } else { 1442 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, 1443 skb, __bpf_prog_run_save_cb, 0, 1444 NULL); 1445 if (ret && !IS_ERR_VALUE((long)ret)) 1446 ret = -EFAULT; 1447 } 1448 bpf_restore_data_end(skb, saved_data_end); 1449 __skb_pull(skb, offset); 1450 skb->sk = save_sk; 1451 1452 return ret; 1453 } 1454 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); 1455 1456 /** 1457 * __cgroup_bpf_run_filter_sk() - Run a program on a sock 1458 * @sk: sock structure to manipulate 1459 * @atype: The type of program to be executed 1460 * 1461 * socket is passed is expected to be of type INET or INET6. 1462 * 1463 * The program type passed in via @type must be suitable for sock 1464 * filtering. No further check is performed to assert that. 1465 * 1466 * This function will return %-EPERM if any if an attached program was found 1467 * and if it returned != 1 during execution. In all other cases, 0 is returned. 1468 */ 1469 int __cgroup_bpf_run_filter_sk(struct sock *sk, 1470 enum cgroup_bpf_attach_type atype) 1471 { 1472 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1473 1474 return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0, 1475 NULL); 1476 } 1477 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 1478 1479 /** 1480 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and 1481 * provided by user sockaddr 1482 * @sk: sock struct that will use sockaddr 1483 * @uaddr: sockaddr struct provided by user 1484 * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is 1485 * read-only for AF_INET[6] uaddr but can be modified for AF_UNIX 1486 * uaddr. 1487 * @atype: The type of program to be executed 1488 * @t_ctx: Pointer to attach type specific context 1489 * @flags: Pointer to u32 which contains higher bits of BPF program 1490 * return value (OR'ed together). 1491 * 1492 * socket is expected to be of type INET, INET6 or UNIX. 1493 * 1494 * This function will return %-EPERM if an attached program is found and 1495 * returned value != 1 during execution. In all other cases, 0 is returned. 1496 */ 1497 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, 1498 struct sockaddr *uaddr, 1499 int *uaddrlen, 1500 enum cgroup_bpf_attach_type atype, 1501 void *t_ctx, 1502 u32 *flags) 1503 { 1504 struct bpf_sock_addr_kern ctx = { 1505 .sk = sk, 1506 .uaddr = uaddr, 1507 .t_ctx = t_ctx, 1508 }; 1509 struct sockaddr_storage unspec; 1510 struct cgroup *cgrp; 1511 int ret; 1512 1513 /* Check socket family since not all sockets represent network 1514 * endpoint (e.g. AF_UNIX). 1515 */ 1516 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 && 1517 sk->sk_family != AF_UNIX) 1518 return 0; 1519 1520 if (!ctx.uaddr) { 1521 memset(&unspec, 0, sizeof(unspec)); 1522 ctx.uaddr = (struct sockaddr *)&unspec; 1523 ctx.uaddrlen = 0; 1524 } else { 1525 ctx.uaddrlen = *uaddrlen; 1526 } 1527 1528 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1529 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 1530 0, flags); 1531 1532 if (!ret && uaddr) 1533 *uaddrlen = ctx.uaddrlen; 1534 1535 return ret; 1536 } 1537 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); 1538 1539 /** 1540 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock 1541 * @sk: socket to get cgroup from 1542 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains 1543 * sk with connection information (IP addresses, etc.) May not contain 1544 * cgroup info if it is a req sock. 1545 * @atype: The type of program to be executed 1546 * 1547 * socket passed is expected to be of type INET or INET6. 1548 * 1549 * The program type passed in via @type must be suitable for sock_ops 1550 * filtering. No further check is performed to assert that. 1551 * 1552 * This function will return %-EPERM if any if an attached program was found 1553 * and if it returned != 1 during execution. In all other cases, 0 is returned. 1554 */ 1555 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, 1556 struct bpf_sock_ops_kern *sock_ops, 1557 enum cgroup_bpf_attach_type atype) 1558 { 1559 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1560 1561 return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run, 1562 0, NULL); 1563 } 1564 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); 1565 1566 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, 1567 short access, enum cgroup_bpf_attach_type atype) 1568 { 1569 struct cgroup *cgrp; 1570 struct bpf_cgroup_dev_ctx ctx = { 1571 .access_type = (access << 16) | dev_type, 1572 .major = major, 1573 .minor = minor, 1574 }; 1575 int ret; 1576 1577 rcu_read_lock(); 1578 cgrp = task_dfl_cgroup(current); 1579 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, 1580 NULL); 1581 rcu_read_unlock(); 1582 1583 return ret; 1584 } 1585 1586 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) 1587 { 1588 /* flags argument is not used now, 1589 * but provides an ability to extend the API. 1590 * verifier checks that its value is correct. 1591 */ 1592 enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); 1593 struct bpf_cgroup_storage *storage; 1594 struct bpf_cg_run_ctx *ctx; 1595 void *ptr; 1596 1597 /* get current cgroup storage from BPF run context */ 1598 ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1599 storage = ctx->prog_item->cgroup_storage[stype]; 1600 1601 if (stype == BPF_CGROUP_STORAGE_SHARED) 1602 ptr = &READ_ONCE(storage->buf)->data[0]; 1603 else 1604 ptr = this_cpu_ptr(storage->percpu_buf); 1605 1606 return (unsigned long)ptr; 1607 } 1608 1609 const struct bpf_func_proto bpf_get_local_storage_proto = { 1610 .func = bpf_get_local_storage, 1611 .gpl_only = false, 1612 .ret_type = RET_PTR_TO_MAP_VALUE, 1613 .arg1_type = ARG_CONST_MAP_PTR, 1614 .arg2_type = ARG_ANYTHING, 1615 }; 1616 1617 BPF_CALL_0(bpf_get_retval) 1618 { 1619 struct bpf_cg_run_ctx *ctx = 1620 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1621 1622 return ctx->retval; 1623 } 1624 1625 const struct bpf_func_proto bpf_get_retval_proto = { 1626 .func = bpf_get_retval, 1627 .gpl_only = false, 1628 .ret_type = RET_INTEGER, 1629 }; 1630 1631 BPF_CALL_1(bpf_set_retval, int, retval) 1632 { 1633 struct bpf_cg_run_ctx *ctx = 1634 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1635 1636 ctx->retval = retval; 1637 return 0; 1638 } 1639 1640 const struct bpf_func_proto bpf_set_retval_proto = { 1641 .func = bpf_set_retval, 1642 .gpl_only = false, 1643 .ret_type = RET_INTEGER, 1644 .arg1_type = ARG_ANYTHING, 1645 }; 1646 1647 static const struct bpf_func_proto * 1648 cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 1649 { 1650 const struct bpf_func_proto *func_proto; 1651 1652 func_proto = cgroup_common_func_proto(func_id, prog); 1653 if (func_proto) 1654 return func_proto; 1655 1656 func_proto = cgroup_current_func_proto(func_id, prog); 1657 if (func_proto) 1658 return func_proto; 1659 1660 switch (func_id) { 1661 case BPF_FUNC_perf_event_output: 1662 return &bpf_event_output_data_proto; 1663 default: 1664 return bpf_base_func_proto(func_id, prog); 1665 } 1666 } 1667 1668 static bool cgroup_dev_is_valid_access(int off, int size, 1669 enum bpf_access_type type, 1670 const struct bpf_prog *prog, 1671 struct bpf_insn_access_aux *info) 1672 { 1673 const int size_default = sizeof(__u32); 1674 1675 if (type == BPF_WRITE) 1676 return false; 1677 1678 if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) 1679 return false; 1680 /* The verifier guarantees that size > 0. */ 1681 if (off % size != 0) 1682 return false; 1683 1684 switch (off) { 1685 case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): 1686 bpf_ctx_record_field_size(info, size_default); 1687 if (!bpf_ctx_narrow_access_ok(off, size, size_default)) 1688 return false; 1689 break; 1690 default: 1691 if (size != size_default) 1692 return false; 1693 } 1694 1695 return true; 1696 } 1697 1698 const struct bpf_prog_ops cg_dev_prog_ops = { 1699 }; 1700 1701 const struct bpf_verifier_ops cg_dev_verifier_ops = { 1702 .get_func_proto = cgroup_dev_func_proto, 1703 .is_valid_access = cgroup_dev_is_valid_access, 1704 }; 1705 1706 /** 1707 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl 1708 * 1709 * @head: sysctl table header 1710 * @table: sysctl table 1711 * @write: sysctl is being read (= 0) or written (= 1) 1712 * @buf: pointer to buffer (in and out) 1713 * @pcount: value-result argument: value is size of buffer pointed to by @buf, 1714 * result is size of @new_buf if program set new value, initial value 1715 * otherwise 1716 * @ppos: value-result argument: value is position at which read from or write 1717 * to sysctl is happening, result is new position if program overrode it, 1718 * initial value otherwise 1719 * @atype: type of program to be executed 1720 * 1721 * Program is run when sysctl is being accessed, either read or written, and 1722 * can allow or deny such access. 1723 * 1724 * This function will return %-EPERM if an attached program is found and 1725 * returned value != 1 during execution. In all other cases 0 is returned. 1726 */ 1727 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, 1728 const struct ctl_table *table, int write, 1729 char **buf, size_t *pcount, loff_t *ppos, 1730 enum cgroup_bpf_attach_type atype) 1731 { 1732 struct bpf_sysctl_kern ctx = { 1733 .head = head, 1734 .table = table, 1735 .write = write, 1736 .ppos = ppos, 1737 .cur_val = NULL, 1738 .cur_len = PAGE_SIZE, 1739 .new_val = NULL, 1740 .new_len = 0, 1741 .new_updated = 0, 1742 }; 1743 struct cgroup *cgrp; 1744 loff_t pos = 0; 1745 int ret; 1746 1747 ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); 1748 if (!ctx.cur_val || 1749 table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) { 1750 /* Let BPF program decide how to proceed. */ 1751 ctx.cur_len = 0; 1752 } 1753 1754 if (write && *buf && *pcount) { 1755 /* BPF program should be able to override new value with a 1756 * buffer bigger than provided by user. 1757 */ 1758 ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); 1759 ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount); 1760 if (ctx.new_val) { 1761 memcpy(ctx.new_val, *buf, ctx.new_len); 1762 } else { 1763 /* Let BPF program decide how to proceed. */ 1764 ctx.new_len = 0; 1765 } 1766 } 1767 1768 rcu_read_lock(); 1769 cgrp = task_dfl_cgroup(current); 1770 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0, 1771 NULL); 1772 rcu_read_unlock(); 1773 1774 kfree(ctx.cur_val); 1775 1776 if (ret == 1 && ctx.new_updated) { 1777 kfree(*buf); 1778 *buf = ctx.new_val; 1779 *pcount = ctx.new_len; 1780 } else { 1781 kfree(ctx.new_val); 1782 } 1783 1784 return ret; 1785 } 1786 1787 #ifdef CONFIG_NET 1788 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen, 1789 struct bpf_sockopt_buf *buf) 1790 { 1791 if (unlikely(max_optlen < 0)) 1792 return -EINVAL; 1793 1794 if (unlikely(max_optlen > PAGE_SIZE)) { 1795 /* We don't expose optvals that are greater than PAGE_SIZE 1796 * to the BPF program. 1797 */ 1798 max_optlen = PAGE_SIZE; 1799 } 1800 1801 if (max_optlen <= sizeof(buf->data)) { 1802 /* When the optval fits into BPF_SOCKOPT_KERN_BUF_SIZE 1803 * bytes avoid the cost of kzalloc. 1804 */ 1805 ctx->optval = buf->data; 1806 ctx->optval_end = ctx->optval + max_optlen; 1807 return max_optlen; 1808 } 1809 1810 ctx->optval = kzalloc(max_optlen, GFP_USER); 1811 if (!ctx->optval) 1812 return -ENOMEM; 1813 1814 ctx->optval_end = ctx->optval + max_optlen; 1815 1816 return max_optlen; 1817 } 1818 1819 static void sockopt_free_buf(struct bpf_sockopt_kern *ctx, 1820 struct bpf_sockopt_buf *buf) 1821 { 1822 if (ctx->optval == buf->data) 1823 return; 1824 kfree(ctx->optval); 1825 } 1826 1827 static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx, 1828 struct bpf_sockopt_buf *buf) 1829 { 1830 return ctx->optval != buf->data; 1831 } 1832 1833 int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, 1834 int *optname, sockptr_t optval, 1835 int *optlen, char **kernel_optval) 1836 { 1837 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1838 struct bpf_sockopt_buf buf = {}; 1839 struct bpf_sockopt_kern ctx = { 1840 .sk = sk, 1841 .level = *level, 1842 .optname = *optname, 1843 }; 1844 int ret, max_optlen; 1845 1846 /* Allocate a bit more than the initial user buffer for 1847 * BPF program. The canonical use case is overriding 1848 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic). 1849 */ 1850 max_optlen = max_t(int, 16, *optlen); 1851 max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf); 1852 if (max_optlen < 0) 1853 return max_optlen; 1854 1855 ctx.optlen = *optlen; 1856 1857 if (copy_from_sockptr(ctx.optval, optval, 1858 min(*optlen, max_optlen))) { 1859 ret = -EFAULT; 1860 goto out; 1861 } 1862 1863 lock_sock(sk); 1864 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT, 1865 &ctx, bpf_prog_run, 0, NULL); 1866 release_sock(sk); 1867 1868 if (ret) 1869 goto out; 1870 1871 if (ctx.optlen == -1) { 1872 /* optlen set to -1, bypass kernel */ 1873 ret = 1; 1874 } else if (ctx.optlen > max_optlen || ctx.optlen < -1) { 1875 /* optlen is out of bounds */ 1876 if (*optlen > PAGE_SIZE && ctx.optlen >= 0) { 1877 pr_info_once("bpf setsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n", 1878 ctx.optlen, max_optlen); 1879 ret = 0; 1880 goto out; 1881 } 1882 ret = -EFAULT; 1883 } else { 1884 /* optlen within bounds, run kernel handler */ 1885 ret = 0; 1886 1887 /* export any potential modifications */ 1888 *level = ctx.level; 1889 *optname = ctx.optname; 1890 1891 /* optlen == 0 from BPF indicates that we should 1892 * use original userspace data. 1893 */ 1894 if (ctx.optlen != 0) { 1895 *optlen = ctx.optlen; 1896 /* We've used bpf_sockopt_kern->buf as an intermediary 1897 * storage, but the BPF program indicates that we need 1898 * to pass this data to the kernel setsockopt handler. 1899 * No way to export on-stack buf, have to allocate a 1900 * new buffer. 1901 */ 1902 if (!sockopt_buf_allocated(&ctx, &buf)) { 1903 void *p = kmalloc(ctx.optlen, GFP_USER); 1904 1905 if (!p) { 1906 ret = -ENOMEM; 1907 goto out; 1908 } 1909 memcpy(p, ctx.optval, ctx.optlen); 1910 *kernel_optval = p; 1911 } else { 1912 *kernel_optval = ctx.optval; 1913 } 1914 /* export and don't free sockopt buf */ 1915 return 0; 1916 } 1917 } 1918 1919 out: 1920 sockopt_free_buf(&ctx, &buf); 1921 return ret; 1922 } 1923 1924 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, 1925 int optname, sockptr_t optval, 1926 sockptr_t optlen, int max_optlen, 1927 int retval) 1928 { 1929 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1930 struct bpf_sockopt_buf buf = {}; 1931 struct bpf_sockopt_kern ctx = { 1932 .sk = sk, 1933 .level = level, 1934 .optname = optname, 1935 .current_task = current, 1936 }; 1937 int orig_optlen; 1938 int ret; 1939 1940 orig_optlen = max_optlen; 1941 ctx.optlen = max_optlen; 1942 max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf); 1943 if (max_optlen < 0) 1944 return max_optlen; 1945 1946 if (!retval) { 1947 /* If kernel getsockopt finished successfully, 1948 * copy whatever was returned to the user back 1949 * into our temporary buffer. Set optlen to the 1950 * one that kernel returned as well to let 1951 * BPF programs inspect the value. 1952 */ 1953 if (copy_from_sockptr(&ctx.optlen, optlen, 1954 sizeof(ctx.optlen))) { 1955 ret = -EFAULT; 1956 goto out; 1957 } 1958 1959 if (ctx.optlen < 0) { 1960 ret = -EFAULT; 1961 goto out; 1962 } 1963 orig_optlen = ctx.optlen; 1964 1965 if (copy_from_sockptr(ctx.optval, optval, 1966 min(ctx.optlen, max_optlen))) { 1967 ret = -EFAULT; 1968 goto out; 1969 } 1970 } 1971 1972 lock_sock(sk); 1973 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, 1974 &ctx, bpf_prog_run, retval, NULL); 1975 release_sock(sk); 1976 1977 if (ret < 0) 1978 goto out; 1979 1980 if (!sockptr_is_null(optval) && 1981 (ctx.optlen > max_optlen || ctx.optlen < 0)) { 1982 if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) { 1983 pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n", 1984 ctx.optlen, max_optlen); 1985 ret = retval; 1986 goto out; 1987 } 1988 ret = -EFAULT; 1989 goto out; 1990 } 1991 1992 if (ctx.optlen != 0) { 1993 if (!sockptr_is_null(optval) && 1994 copy_to_sockptr(optval, ctx.optval, ctx.optlen)) { 1995 ret = -EFAULT; 1996 goto out; 1997 } 1998 if (copy_to_sockptr(optlen, &ctx.optlen, sizeof(ctx.optlen))) { 1999 ret = -EFAULT; 2000 goto out; 2001 } 2002 } 2003 2004 out: 2005 sockopt_free_buf(&ctx, &buf); 2006 return ret; 2007 } 2008 2009 int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, 2010 int optname, void *optval, 2011 int *optlen, int retval) 2012 { 2013 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 2014 struct bpf_sockopt_kern ctx = { 2015 .sk = sk, 2016 .level = level, 2017 .optname = optname, 2018 .optlen = *optlen, 2019 .optval = optval, 2020 .optval_end = optval + *optlen, 2021 .current_task = current, 2022 }; 2023 int ret; 2024 2025 /* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy 2026 * user data back into BPF buffer when reval != 0. This is 2027 * done as an optimization to avoid extra copy, assuming 2028 * kernel won't populate the data in case of an error. 2029 * Here we always pass the data and memset() should 2030 * be called if that data shouldn't be "exported". 2031 */ 2032 2033 ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT, 2034 &ctx, bpf_prog_run, retval, NULL); 2035 if (ret < 0) 2036 return ret; 2037 2038 if (ctx.optlen > *optlen) 2039 return -EFAULT; 2040 2041 /* BPF programs can shrink the buffer, export the modifications. 2042 */ 2043 if (ctx.optlen != 0) 2044 *optlen = ctx.optlen; 2045 2046 return ret; 2047 } 2048 #endif 2049 2050 static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, 2051 size_t *lenp) 2052 { 2053 ssize_t tmp_ret = 0, ret; 2054 2055 if (dir->header.parent) { 2056 tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp); 2057 if (tmp_ret < 0) 2058 return tmp_ret; 2059 } 2060 2061 ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp); 2062 if (ret < 0) 2063 return ret; 2064 *bufp += ret; 2065 *lenp -= ret; 2066 ret += tmp_ret; 2067 2068 /* Avoid leading slash. */ 2069 if (!ret) 2070 return ret; 2071 2072 tmp_ret = strscpy(*bufp, "/", *lenp); 2073 if (tmp_ret < 0) 2074 return tmp_ret; 2075 *bufp += tmp_ret; 2076 *lenp -= tmp_ret; 2077 2078 return ret + tmp_ret; 2079 } 2080 2081 BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf, 2082 size_t, buf_len, u64, flags) 2083 { 2084 ssize_t tmp_ret = 0, ret; 2085 2086 if (!buf) 2087 return -EINVAL; 2088 2089 if (!(flags & BPF_F_SYSCTL_BASE_NAME)) { 2090 if (!ctx->head) 2091 return -EINVAL; 2092 tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len); 2093 if (tmp_ret < 0) 2094 return tmp_ret; 2095 } 2096 2097 ret = strscpy(buf, ctx->table->procname, buf_len); 2098 2099 return ret < 0 ? ret : tmp_ret + ret; 2100 } 2101 2102 static const struct bpf_func_proto bpf_sysctl_get_name_proto = { 2103 .func = bpf_sysctl_get_name, 2104 .gpl_only = false, 2105 .ret_type = RET_INTEGER, 2106 .arg1_type = ARG_PTR_TO_CTX, 2107 .arg2_type = ARG_PTR_TO_MEM, 2108 .arg3_type = ARG_CONST_SIZE, 2109 .arg4_type = ARG_ANYTHING, 2110 }; 2111 2112 static int copy_sysctl_value(char *dst, size_t dst_len, char *src, 2113 size_t src_len) 2114 { 2115 if (!dst) 2116 return -EINVAL; 2117 2118 if (!dst_len) 2119 return -E2BIG; 2120 2121 if (!src || !src_len) { 2122 memset(dst, 0, dst_len); 2123 return -EINVAL; 2124 } 2125 2126 memcpy(dst, src, min(dst_len, src_len)); 2127 2128 if (dst_len > src_len) { 2129 memset(dst + src_len, '\0', dst_len - src_len); 2130 return src_len; 2131 } 2132 2133 dst[dst_len - 1] = '\0'; 2134 2135 return -E2BIG; 2136 } 2137 2138 BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx, 2139 char *, buf, size_t, buf_len) 2140 { 2141 return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len); 2142 } 2143 2144 static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = { 2145 .func = bpf_sysctl_get_current_value, 2146 .gpl_only = false, 2147 .ret_type = RET_INTEGER, 2148 .arg1_type = ARG_PTR_TO_CTX, 2149 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 2150 .arg3_type = ARG_CONST_SIZE, 2151 }; 2152 2153 BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf, 2154 size_t, buf_len) 2155 { 2156 if (!ctx->write) { 2157 if (buf && buf_len) 2158 memset(buf, '\0', buf_len); 2159 return -EINVAL; 2160 } 2161 return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len); 2162 } 2163 2164 static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = { 2165 .func = bpf_sysctl_get_new_value, 2166 .gpl_only = false, 2167 .ret_type = RET_INTEGER, 2168 .arg1_type = ARG_PTR_TO_CTX, 2169 .arg2_type = ARG_PTR_TO_UNINIT_MEM, 2170 .arg3_type = ARG_CONST_SIZE, 2171 }; 2172 2173 BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx, 2174 const char *, buf, size_t, buf_len) 2175 { 2176 if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len) 2177 return -EINVAL; 2178 2179 if (buf_len > PAGE_SIZE - 1) 2180 return -E2BIG; 2181 2182 memcpy(ctx->new_val, buf, buf_len); 2183 ctx->new_len = buf_len; 2184 ctx->new_updated = 1; 2185 2186 return 0; 2187 } 2188 2189 static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { 2190 .func = bpf_sysctl_set_new_value, 2191 .gpl_only = false, 2192 .ret_type = RET_INTEGER, 2193 .arg1_type = ARG_PTR_TO_CTX, 2194 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, 2195 .arg3_type = ARG_CONST_SIZE, 2196 }; 2197 2198 static const struct bpf_func_proto * 2199 sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2200 { 2201 const struct bpf_func_proto *func_proto; 2202 2203 func_proto = cgroup_common_func_proto(func_id, prog); 2204 if (func_proto) 2205 return func_proto; 2206 2207 func_proto = cgroup_current_func_proto(func_id, prog); 2208 if (func_proto) 2209 return func_proto; 2210 2211 switch (func_id) { 2212 case BPF_FUNC_sysctl_get_name: 2213 return &bpf_sysctl_get_name_proto; 2214 case BPF_FUNC_sysctl_get_current_value: 2215 return &bpf_sysctl_get_current_value_proto; 2216 case BPF_FUNC_sysctl_get_new_value: 2217 return &bpf_sysctl_get_new_value_proto; 2218 case BPF_FUNC_sysctl_set_new_value: 2219 return &bpf_sysctl_set_new_value_proto; 2220 case BPF_FUNC_ktime_get_coarse_ns: 2221 return &bpf_ktime_get_coarse_ns_proto; 2222 case BPF_FUNC_perf_event_output: 2223 return &bpf_event_output_data_proto; 2224 default: 2225 return bpf_base_func_proto(func_id, prog); 2226 } 2227 } 2228 2229 static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, 2230 const struct bpf_prog *prog, 2231 struct bpf_insn_access_aux *info) 2232 { 2233 const int size_default = sizeof(__u32); 2234 2235 if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size) 2236 return false; 2237 2238 switch (off) { 2239 case bpf_ctx_range(struct bpf_sysctl, write): 2240 if (type != BPF_READ) 2241 return false; 2242 bpf_ctx_record_field_size(info, size_default); 2243 return bpf_ctx_narrow_access_ok(off, size, size_default); 2244 case bpf_ctx_range(struct bpf_sysctl, file_pos): 2245 if (type == BPF_READ) { 2246 bpf_ctx_record_field_size(info, size_default); 2247 return bpf_ctx_narrow_access_ok(off, size, size_default); 2248 } else { 2249 return size == size_default; 2250 } 2251 default: 2252 return false; 2253 } 2254 } 2255 2256 static u32 sysctl_convert_ctx_access(enum bpf_access_type type, 2257 const struct bpf_insn *si, 2258 struct bpf_insn *insn_buf, 2259 struct bpf_prog *prog, u32 *target_size) 2260 { 2261 struct bpf_insn *insn = insn_buf; 2262 u32 read_size; 2263 2264 switch (si->off) { 2265 case offsetof(struct bpf_sysctl, write): 2266 *insn++ = BPF_LDX_MEM( 2267 BPF_SIZE(si->code), si->dst_reg, si->src_reg, 2268 bpf_target_off(struct bpf_sysctl_kern, write, 2269 sizeof_field(struct bpf_sysctl_kern, 2270 write), 2271 target_size)); 2272 break; 2273 case offsetof(struct bpf_sysctl, file_pos): 2274 /* ppos is a pointer so it should be accessed via indirect 2275 * loads and stores. Also for stores additional temporary 2276 * register is used since neither src_reg nor dst_reg can be 2277 * overridden. 2278 */ 2279 if (type == BPF_WRITE) { 2280 int treg = BPF_REG_9; 2281 2282 if (si->src_reg == treg || si->dst_reg == treg) 2283 --treg; 2284 if (si->src_reg == treg || si->dst_reg == treg) 2285 --treg; 2286 *insn++ = BPF_STX_MEM( 2287 BPF_DW, si->dst_reg, treg, 2288 offsetof(struct bpf_sysctl_kern, tmp_reg)); 2289 *insn++ = BPF_LDX_MEM( 2290 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), 2291 treg, si->dst_reg, 2292 offsetof(struct bpf_sysctl_kern, ppos)); 2293 *insn++ = BPF_RAW_INSN( 2294 BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32), 2295 treg, si->src_reg, 2296 bpf_ctx_narrow_access_offset( 2297 0, sizeof(u32), sizeof(loff_t)), 2298 si->imm); 2299 *insn++ = BPF_LDX_MEM( 2300 BPF_DW, treg, si->dst_reg, 2301 offsetof(struct bpf_sysctl_kern, tmp_reg)); 2302 } else { 2303 *insn++ = BPF_LDX_MEM( 2304 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), 2305 si->dst_reg, si->src_reg, 2306 offsetof(struct bpf_sysctl_kern, ppos)); 2307 read_size = bpf_size_to_bytes(BPF_SIZE(si->code)); 2308 *insn++ = BPF_LDX_MEM( 2309 BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 2310 bpf_ctx_narrow_access_offset( 2311 0, read_size, sizeof(loff_t))); 2312 } 2313 *target_size = sizeof(u32); 2314 break; 2315 } 2316 2317 return insn - insn_buf; 2318 } 2319 2320 const struct bpf_verifier_ops cg_sysctl_verifier_ops = { 2321 .get_func_proto = sysctl_func_proto, 2322 .is_valid_access = sysctl_is_valid_access, 2323 .convert_ctx_access = sysctl_convert_ctx_access, 2324 }; 2325 2326 const struct bpf_prog_ops cg_sysctl_prog_ops = { 2327 }; 2328 2329 #ifdef CONFIG_NET 2330 BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx) 2331 { 2332 const struct net *net = ctx ? sock_net(ctx->sk) : &init_net; 2333 2334 return net->net_cookie; 2335 } 2336 2337 static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = { 2338 .func = bpf_get_netns_cookie_sockopt, 2339 .gpl_only = false, 2340 .ret_type = RET_INTEGER, 2341 .arg1_type = ARG_PTR_TO_CTX_OR_NULL, 2342 }; 2343 #endif 2344 2345 static const struct bpf_func_proto * 2346 cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2347 { 2348 const struct bpf_func_proto *func_proto; 2349 2350 func_proto = cgroup_common_func_proto(func_id, prog); 2351 if (func_proto) 2352 return func_proto; 2353 2354 func_proto = cgroup_current_func_proto(func_id, prog); 2355 if (func_proto) 2356 return func_proto; 2357 2358 switch (func_id) { 2359 #ifdef CONFIG_NET 2360 case BPF_FUNC_get_netns_cookie: 2361 return &bpf_get_netns_cookie_sockopt_proto; 2362 case BPF_FUNC_sk_storage_get: 2363 return &bpf_sk_storage_get_proto; 2364 case BPF_FUNC_sk_storage_delete: 2365 return &bpf_sk_storage_delete_proto; 2366 case BPF_FUNC_setsockopt: 2367 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT) 2368 return &bpf_sk_setsockopt_proto; 2369 return NULL; 2370 case BPF_FUNC_getsockopt: 2371 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT) 2372 return &bpf_sk_getsockopt_proto; 2373 return NULL; 2374 #endif 2375 #ifdef CONFIG_INET 2376 case BPF_FUNC_tcp_sock: 2377 return &bpf_tcp_sock_proto; 2378 #endif 2379 case BPF_FUNC_perf_event_output: 2380 return &bpf_event_output_data_proto; 2381 default: 2382 return bpf_base_func_proto(func_id, prog); 2383 } 2384 } 2385 2386 static bool cg_sockopt_is_valid_access(int off, int size, 2387 enum bpf_access_type type, 2388 const struct bpf_prog *prog, 2389 struct bpf_insn_access_aux *info) 2390 { 2391 const int size_default = sizeof(__u32); 2392 2393 if (off < 0 || off >= sizeof(struct bpf_sockopt)) 2394 return false; 2395 2396 if (off % size != 0) 2397 return false; 2398 2399 if (type == BPF_WRITE) { 2400 switch (off) { 2401 case offsetof(struct bpf_sockopt, retval): 2402 if (size != size_default) 2403 return false; 2404 return prog->expected_attach_type == 2405 BPF_CGROUP_GETSOCKOPT; 2406 case offsetof(struct bpf_sockopt, optname): 2407 fallthrough; 2408 case offsetof(struct bpf_sockopt, level): 2409 if (size != size_default) 2410 return false; 2411 return prog->expected_attach_type == 2412 BPF_CGROUP_SETSOCKOPT; 2413 case offsetof(struct bpf_sockopt, optlen): 2414 return size == size_default; 2415 default: 2416 return false; 2417 } 2418 } 2419 2420 switch (off) { 2421 case offsetof(struct bpf_sockopt, sk): 2422 if (size != sizeof(__u64)) 2423 return false; 2424 info->reg_type = PTR_TO_SOCKET; 2425 break; 2426 case offsetof(struct bpf_sockopt, optval): 2427 if (size != sizeof(__u64)) 2428 return false; 2429 info->reg_type = PTR_TO_PACKET; 2430 break; 2431 case offsetof(struct bpf_sockopt, optval_end): 2432 if (size != sizeof(__u64)) 2433 return false; 2434 info->reg_type = PTR_TO_PACKET_END; 2435 break; 2436 case offsetof(struct bpf_sockopt, retval): 2437 if (size != size_default) 2438 return false; 2439 return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT; 2440 default: 2441 if (size != size_default) 2442 return false; 2443 break; 2444 } 2445 return true; 2446 } 2447 2448 #define CG_SOCKOPT_READ_FIELD(F) \ 2449 BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \ 2450 si->dst_reg, si->src_reg, \ 2451 offsetof(struct bpf_sockopt_kern, F)) 2452 2453 #define CG_SOCKOPT_WRITE_FIELD(F) \ 2454 BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) | \ 2455 BPF_MEM | BPF_CLASS(si->code)), \ 2456 si->dst_reg, si->src_reg, \ 2457 offsetof(struct bpf_sockopt_kern, F), \ 2458 si->imm) 2459 2460 static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type, 2461 const struct bpf_insn *si, 2462 struct bpf_insn *insn_buf, 2463 struct bpf_prog *prog, 2464 u32 *target_size) 2465 { 2466 struct bpf_insn *insn = insn_buf; 2467 2468 switch (si->off) { 2469 case offsetof(struct bpf_sockopt, sk): 2470 *insn++ = CG_SOCKOPT_READ_FIELD(sk); 2471 break; 2472 case offsetof(struct bpf_sockopt, level): 2473 if (type == BPF_WRITE) 2474 *insn++ = CG_SOCKOPT_WRITE_FIELD(level); 2475 else 2476 *insn++ = CG_SOCKOPT_READ_FIELD(level); 2477 break; 2478 case offsetof(struct bpf_sockopt, optname): 2479 if (type == BPF_WRITE) 2480 *insn++ = CG_SOCKOPT_WRITE_FIELD(optname); 2481 else 2482 *insn++ = CG_SOCKOPT_READ_FIELD(optname); 2483 break; 2484 case offsetof(struct bpf_sockopt, optlen): 2485 if (type == BPF_WRITE) 2486 *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen); 2487 else 2488 *insn++ = CG_SOCKOPT_READ_FIELD(optlen); 2489 break; 2490 case offsetof(struct bpf_sockopt, retval): 2491 BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0); 2492 2493 if (type == BPF_WRITE) { 2494 int treg = BPF_REG_9; 2495 2496 if (si->src_reg == treg || si->dst_reg == treg) 2497 --treg; 2498 if (si->src_reg == treg || si->dst_reg == treg) 2499 --treg; 2500 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg, 2501 offsetof(struct bpf_sockopt_kern, tmp_reg)); 2502 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), 2503 treg, si->dst_reg, 2504 offsetof(struct bpf_sockopt_kern, current_task)); 2505 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), 2506 treg, treg, 2507 offsetof(struct task_struct, bpf_ctx)); 2508 *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM | 2509 BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), 2510 treg, si->src_reg, 2511 offsetof(struct bpf_cg_run_ctx, retval), 2512 si->imm); 2513 *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg, 2514 offsetof(struct bpf_sockopt_kern, tmp_reg)); 2515 } else { 2516 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), 2517 si->dst_reg, si->src_reg, 2518 offsetof(struct bpf_sockopt_kern, current_task)); 2519 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), 2520 si->dst_reg, si->dst_reg, 2521 offsetof(struct task_struct, bpf_ctx)); 2522 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), 2523 si->dst_reg, si->dst_reg, 2524 offsetof(struct bpf_cg_run_ctx, retval)); 2525 } 2526 break; 2527 case offsetof(struct bpf_sockopt, optval): 2528 *insn++ = CG_SOCKOPT_READ_FIELD(optval); 2529 break; 2530 case offsetof(struct bpf_sockopt, optval_end): 2531 *insn++ = CG_SOCKOPT_READ_FIELD(optval_end); 2532 break; 2533 } 2534 2535 return insn - insn_buf; 2536 } 2537 2538 static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf, 2539 bool direct_write, 2540 const struct bpf_prog *prog) 2541 { 2542 /* Nothing to do for sockopt argument. The data is kzalloc'ated. 2543 */ 2544 return 0; 2545 } 2546 2547 const struct bpf_verifier_ops cg_sockopt_verifier_ops = { 2548 .get_func_proto = cg_sockopt_func_proto, 2549 .is_valid_access = cg_sockopt_is_valid_access, 2550 .convert_ctx_access = cg_sockopt_convert_ctx_access, 2551 .gen_prologue = cg_sockopt_get_prologue, 2552 }; 2553 2554 const struct bpf_prog_ops cg_sockopt_prog_ops = { 2555 }; 2556 2557 /* Common helpers for cgroup hooks. */ 2558 const struct bpf_func_proto * 2559 cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2560 { 2561 switch (func_id) { 2562 case BPF_FUNC_get_local_storage: 2563 return &bpf_get_local_storage_proto; 2564 case BPF_FUNC_get_retval: 2565 switch (prog->expected_attach_type) { 2566 case BPF_CGROUP_INET_INGRESS: 2567 case BPF_CGROUP_INET_EGRESS: 2568 case BPF_CGROUP_SOCK_OPS: 2569 case BPF_CGROUP_UDP4_RECVMSG: 2570 case BPF_CGROUP_UDP6_RECVMSG: 2571 case BPF_CGROUP_UNIX_RECVMSG: 2572 case BPF_CGROUP_INET4_GETPEERNAME: 2573 case BPF_CGROUP_INET6_GETPEERNAME: 2574 case BPF_CGROUP_UNIX_GETPEERNAME: 2575 case BPF_CGROUP_INET4_GETSOCKNAME: 2576 case BPF_CGROUP_INET6_GETSOCKNAME: 2577 case BPF_CGROUP_UNIX_GETSOCKNAME: 2578 return NULL; 2579 default: 2580 return &bpf_get_retval_proto; 2581 } 2582 case BPF_FUNC_set_retval: 2583 switch (prog->expected_attach_type) { 2584 case BPF_CGROUP_INET_INGRESS: 2585 case BPF_CGROUP_INET_EGRESS: 2586 case BPF_CGROUP_SOCK_OPS: 2587 case BPF_CGROUP_UDP4_RECVMSG: 2588 case BPF_CGROUP_UDP6_RECVMSG: 2589 case BPF_CGROUP_UNIX_RECVMSG: 2590 case BPF_CGROUP_INET4_GETPEERNAME: 2591 case BPF_CGROUP_INET6_GETPEERNAME: 2592 case BPF_CGROUP_UNIX_GETPEERNAME: 2593 case BPF_CGROUP_INET4_GETSOCKNAME: 2594 case BPF_CGROUP_INET6_GETSOCKNAME: 2595 case BPF_CGROUP_UNIX_GETSOCKNAME: 2596 return NULL; 2597 default: 2598 return &bpf_set_retval_proto; 2599 } 2600 default: 2601 return NULL; 2602 } 2603 } 2604 2605 /* Common helpers for cgroup hooks with valid process context. */ 2606 const struct bpf_func_proto * 2607 cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) 2608 { 2609 switch (func_id) { 2610 case BPF_FUNC_get_current_uid_gid: 2611 return &bpf_get_current_uid_gid_proto; 2612 case BPF_FUNC_get_current_comm: 2613 return &bpf_get_current_comm_proto; 2614 #ifdef CONFIG_CGROUP_NET_CLASSID 2615 case BPF_FUNC_get_cgroup_classid: 2616 return &bpf_get_cgroup_classid_curr_proto; 2617 #endif 2618 case BPF_FUNC_current_task_under_cgroup: 2619 return &bpf_current_task_under_cgroup_proto; 2620 default: 2621 return NULL; 2622 } 2623 } 2624