1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright (c) 2017 Facebook 3 */ 4 #include <linux/bpf.h> 5 #include <linux/btf.h> 6 #include <linux/btf_ids.h> 7 #include <linux/slab.h> 8 #include <linux/init.h> 9 #include <linux/vmalloc.h> 10 #include <linux/etherdevice.h> 11 #include <linux/filter.h> 12 #include <linux/rcupdate_trace.h> 13 #include <linux/sched/signal.h> 14 #include <net/bpf_sk_storage.h> 15 #include <net/hotdata.h> 16 #include <net/sock.h> 17 #include <net/tcp.h> 18 #include <net/net_namespace.h> 19 #include <net/page_pool/helpers.h> 20 #include <linux/error-injection.h> 21 #include <linux/smp.h> 22 #include <linux/sock_diag.h> 23 #include <linux/netfilter.h> 24 #include <net/netdev_rx_queue.h> 25 #include <net/xdp.h> 26 #include <net/netfilter/nf_bpf_link.h> 27 28 #define CREATE_TRACE_POINTS 29 #include <trace/events/bpf_test_run.h> 30 31 struct bpf_test_timer { 32 u32 i; 33 u64 time_start, time_spent; 34 }; 35 36 static void bpf_test_timer_enter(struct bpf_test_timer *t) 37 __acquires(rcu) 38 { 39 rcu_read_lock_dont_migrate(); 40 t->time_start = ktime_get_ns(); 41 } 42 43 static void bpf_test_timer_leave(struct bpf_test_timer *t) 44 __releases(rcu) 45 { 46 t->time_start = 0; 47 rcu_read_unlock_migrate(); 48 } 49 50 static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations, 51 u32 repeat, int *err, u32 *duration) 52 __must_hold(rcu) 53 { 54 t->i += iterations; 55 if (t->i >= repeat) { 56 /* We're done. */ 57 t->time_spent += ktime_get_ns() - t->time_start; 58 do_div(t->time_spent, t->i); 59 *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent; 60 *err = 0; 61 goto reset; 62 } 63 64 if (signal_pending(current)) { 65 /* During iteration: we've been cancelled, abort. */ 66 *err = -EINTR; 67 goto reset; 68 } 69 70 if (need_resched()) { 71 /* During iteration: we need to reschedule between runs. */ 72 t->time_spent += ktime_get_ns() - t->time_start; 73 bpf_test_timer_leave(t); 74 cond_resched(); 75 bpf_test_timer_enter(t); 76 } 77 78 /* Do another round. */ 79 return true; 80 81 reset: 82 t->i = 0; 83 return false; 84 } 85 86 /* We put this struct at the head of each page with a context and frame 87 * initialised when the page is allocated, so we don't have to do this on each 88 * repetition of the test run. 89 */ 90 struct xdp_page_head { 91 struct xdp_buff orig_ctx; 92 struct xdp_buff ctx; 93 union { 94 /* ::data_hard_start starts here */ 95 DECLARE_FLEX_ARRAY(struct xdp_frame, frame); 96 DECLARE_FLEX_ARRAY(u8, data); 97 }; 98 }; 99 100 struct xdp_test_data { 101 struct xdp_buff *orig_ctx; 102 struct xdp_rxq_info rxq; 103 struct net_device *dev; 104 struct page_pool *pp; 105 struct xdp_frame **frames; 106 struct sk_buff **skbs; 107 struct xdp_mem_info mem; 108 u32 batch_size; 109 u32 frame_cnt; 110 }; 111 112 /* tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c:%MAX_PKT_SIZE 113 * must be updated accordingly this gets changed, otherwise BPF selftests 114 * will fail. 115 */ 116 #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head)) 117 #define TEST_XDP_MAX_BATCH 256 118 119 static void xdp_test_run_init_page(netmem_ref netmem, void *arg) 120 { 121 struct xdp_page_head *head = 122 phys_to_virt(page_to_phys(netmem_to_page(netmem))); 123 struct xdp_buff *new_ctx, *orig_ctx; 124 u32 headroom = XDP_PACKET_HEADROOM; 125 struct xdp_test_data *xdp = arg; 126 size_t frm_len, meta_len; 127 struct xdp_frame *frm; 128 void *data; 129 130 orig_ctx = xdp->orig_ctx; 131 frm_len = orig_ctx->data_end - orig_ctx->data_meta; 132 meta_len = orig_ctx->data - orig_ctx->data_meta; 133 headroom -= meta_len; 134 135 new_ctx = &head->ctx; 136 frm = head->frame; 137 data = head->data; 138 memcpy(data + headroom, orig_ctx->data_meta, frm_len); 139 140 xdp_init_buff(new_ctx, TEST_XDP_FRAME_SIZE, &xdp->rxq); 141 xdp_prepare_buff(new_ctx, data, headroom, frm_len, true); 142 new_ctx->data = new_ctx->data_meta + meta_len; 143 144 xdp_update_frame_from_buff(new_ctx, frm); 145 frm->mem_type = new_ctx->rxq->mem.type; 146 147 memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx)); 148 } 149 150 static int xdp_test_run_setup(struct xdp_test_data *xdp, struct xdp_buff *orig_ctx) 151 { 152 struct page_pool *pp; 153 int err = -ENOMEM; 154 struct page_pool_params pp_params = { 155 .order = 0, 156 .flags = 0, 157 .pool_size = xdp->batch_size, 158 .nid = NUMA_NO_NODE, 159 .init_callback = xdp_test_run_init_page, 160 .init_arg = xdp, 161 }; 162 163 xdp->frames = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL); 164 if (!xdp->frames) 165 return -ENOMEM; 166 167 xdp->skbs = kvmalloc_array(xdp->batch_size, sizeof(void *), GFP_KERNEL); 168 if (!xdp->skbs) 169 goto err_skbs; 170 171 pp = page_pool_create(&pp_params); 172 if (IS_ERR(pp)) { 173 err = PTR_ERR(pp); 174 goto err_pp; 175 } 176 177 /* will copy 'mem.id' into pp->xdp_mem_id */ 178 err = xdp_reg_mem_model(&xdp->mem, MEM_TYPE_PAGE_POOL, pp); 179 if (err) 180 goto err_mmodel; 181 182 xdp->pp = pp; 183 184 /* We create a 'fake' RXQ referencing the original dev, but with an 185 * xdp_mem_info pointing to our page_pool 186 */ 187 xdp_rxq_info_reg(&xdp->rxq, orig_ctx->rxq->dev, 0, 0); 188 xdp->rxq.mem.type = MEM_TYPE_PAGE_POOL; 189 xdp->rxq.mem.id = pp->xdp_mem_id; 190 xdp->dev = orig_ctx->rxq->dev; 191 xdp->orig_ctx = orig_ctx; 192 193 return 0; 194 195 err_mmodel: 196 page_pool_destroy(pp); 197 err_pp: 198 kvfree(xdp->skbs); 199 err_skbs: 200 kvfree(xdp->frames); 201 return err; 202 } 203 204 static void xdp_test_run_teardown(struct xdp_test_data *xdp) 205 { 206 xdp_unreg_mem_model(&xdp->mem); 207 page_pool_destroy(xdp->pp); 208 kfree(xdp->frames); 209 kfree(xdp->skbs); 210 } 211 212 static bool frame_was_changed(const struct xdp_page_head *head) 213 { 214 /* xdp_scrub_frame() zeroes the data pointer, flags is the last field, 215 * i.e. has the highest chances to be overwritten. If those two are 216 * untouched, it's most likely safe to skip the context reset. 217 */ 218 return head->frame->data != head->orig_ctx.data || 219 head->frame->flags != head->orig_ctx.flags; 220 } 221 222 static bool ctx_was_changed(struct xdp_page_head *head) 223 { 224 return head->orig_ctx.data != head->ctx.data || 225 head->orig_ctx.data_meta != head->ctx.data_meta || 226 head->orig_ctx.data_end != head->ctx.data_end; 227 } 228 229 static void reset_ctx(struct xdp_page_head *head) 230 { 231 if (likely(!frame_was_changed(head) && !ctx_was_changed(head))) 232 return; 233 234 head->ctx.data = head->orig_ctx.data; 235 head->ctx.data_meta = head->orig_ctx.data_meta; 236 head->ctx.data_end = head->orig_ctx.data_end; 237 xdp_update_frame_from_buff(&head->ctx, head->frame); 238 head->frame->mem_type = head->orig_ctx.rxq->mem.type; 239 } 240 241 static int xdp_recv_frames(struct xdp_frame **frames, int nframes, 242 struct sk_buff **skbs, 243 struct net_device *dev) 244 { 245 gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; 246 int i; 247 LIST_HEAD(list); 248 249 if (unlikely(!kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, gfp, 250 nframes, (void **)skbs))) { 251 for (i = 0; i < nframes; i++) 252 xdp_return_frame(frames[i]); 253 return -ENOMEM; 254 } 255 256 for (i = 0; i < nframes; i++) { 257 struct xdp_frame *xdpf = frames[i]; 258 struct sk_buff *skb = skbs[i]; 259 260 skb = __xdp_build_skb_from_frame(xdpf, skb, dev); 261 if (!skb) { 262 xdp_return_frame(xdpf); 263 continue; 264 } 265 266 list_add_tail(&skb->list, &list); 267 } 268 netif_receive_skb_list(&list); 269 270 return 0; 271 } 272 273 static int xdp_test_run_batch(struct xdp_test_data *xdp, struct bpf_prog *prog, 274 u32 repeat) 275 { 276 struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; 277 int err = 0, act, ret, i, nframes = 0, batch_sz; 278 struct xdp_frame **frames = xdp->frames; 279 struct bpf_redirect_info *ri; 280 struct xdp_page_head *head; 281 struct xdp_frame *frm; 282 bool redirect = false; 283 struct xdp_buff *ctx; 284 struct page *page; 285 286 batch_sz = min_t(u32, repeat, xdp->batch_size); 287 288 local_bh_disable(); 289 bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); 290 ri = bpf_net_ctx_get_ri(); 291 xdp_set_return_frame_no_direct(); 292 293 for (i = 0; i < batch_sz; i++) { 294 page = page_pool_dev_alloc_pages(xdp->pp); 295 if (!page) { 296 err = -ENOMEM; 297 goto out; 298 } 299 300 head = phys_to_virt(page_to_phys(page)); 301 reset_ctx(head); 302 ctx = &head->ctx; 303 frm = head->frame; 304 xdp->frame_cnt++; 305 306 act = bpf_prog_run_xdp(prog, ctx); 307 308 /* if program changed pkt bounds we need to update the xdp_frame */ 309 if (unlikely(ctx_was_changed(head))) { 310 ret = xdp_update_frame_from_buff(ctx, frm); 311 if (ret) { 312 xdp_return_buff(ctx); 313 continue; 314 } 315 } 316 317 switch (act) { 318 case XDP_TX: 319 /* we can't do a real XDP_TX since we're not in the 320 * driver, so turn it into a REDIRECT back to the same 321 * index 322 */ 323 ri->tgt_index = xdp->dev->ifindex; 324 ri->map_id = INT_MAX; 325 ri->map_type = BPF_MAP_TYPE_UNSPEC; 326 fallthrough; 327 case XDP_REDIRECT: 328 redirect = true; 329 ret = xdp_do_redirect_frame(xdp->dev, ctx, frm, prog); 330 if (ret) 331 xdp_return_buff(ctx); 332 break; 333 case XDP_PASS: 334 frames[nframes++] = frm; 335 break; 336 default: 337 bpf_warn_invalid_xdp_action(NULL, prog, act); 338 fallthrough; 339 case XDP_DROP: 340 xdp_return_buff(ctx); 341 break; 342 } 343 } 344 345 out: 346 if (redirect) 347 xdp_do_flush(); 348 if (nframes) { 349 ret = xdp_recv_frames(frames, nframes, xdp->skbs, xdp->dev); 350 if (ret) 351 err = ret; 352 } 353 354 xdp_clear_return_frame_no_direct(); 355 bpf_net_ctx_clear(bpf_net_ctx); 356 local_bh_enable(); 357 return err; 358 } 359 360 static int bpf_test_run_xdp_live(struct bpf_prog *prog, struct xdp_buff *ctx, 361 u32 repeat, u32 batch_size, u32 *time) 362 363 { 364 struct xdp_test_data xdp = { .batch_size = batch_size }; 365 struct bpf_test_timer t = {}; 366 int ret; 367 368 if (!repeat) 369 repeat = 1; 370 371 ret = xdp_test_run_setup(&xdp, ctx); 372 if (ret) 373 return ret; 374 375 bpf_test_timer_enter(&t); 376 do { 377 xdp.frame_cnt = 0; 378 ret = xdp_test_run_batch(&xdp, prog, repeat - t.i); 379 if (unlikely(ret < 0)) 380 break; 381 } while (bpf_test_timer_continue(&t, xdp.frame_cnt, repeat, &ret, time)); 382 bpf_test_timer_leave(&t); 383 384 xdp_test_run_teardown(&xdp); 385 return ret; 386 } 387 388 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, 389 u32 *retval, u32 *time, bool xdp) 390 { 391 struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; 392 struct bpf_prog_array_item item = {.prog = prog}; 393 struct bpf_run_ctx *old_ctx; 394 struct bpf_cg_run_ctx run_ctx; 395 struct bpf_test_timer t = {}; 396 enum bpf_cgroup_storage_type stype; 397 int ret; 398 399 for_each_cgroup_storage_type(stype) { 400 item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype); 401 if (IS_ERR(item.cgroup_storage[stype])) { 402 item.cgroup_storage[stype] = NULL; 403 for_each_cgroup_storage_type(stype) 404 bpf_cgroup_storage_free(item.cgroup_storage[stype]); 405 return -ENOMEM; 406 } 407 } 408 409 if (!repeat) 410 repeat = 1; 411 412 bpf_test_timer_enter(&t); 413 old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 414 do { 415 run_ctx.prog_item = &item; 416 local_bh_disable(); 417 bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx); 418 419 if (xdp) 420 *retval = bpf_prog_run_xdp(prog, ctx); 421 else 422 *retval = bpf_prog_run(prog, ctx); 423 424 bpf_net_ctx_clear(bpf_net_ctx); 425 local_bh_enable(); 426 } while (bpf_test_timer_continue(&t, 1, repeat, &ret, time)); 427 bpf_reset_run_ctx(old_ctx); 428 bpf_test_timer_leave(&t); 429 430 for_each_cgroup_storage_type(stype) 431 bpf_cgroup_storage_free(item.cgroup_storage[stype]); 432 433 return ret; 434 } 435 436 static int bpf_test_finish(const union bpf_attr *kattr, 437 union bpf_attr __user *uattr, const void *data, 438 struct skb_shared_info *sinfo, u32 size, u32 frag_size, 439 u32 retval, u32 duration) 440 { 441 void __user *data_out = u64_to_user_ptr(kattr->test.data_out); 442 int err = -EFAULT; 443 u32 copy_size = size; 444 445 /* Clamp copy if the user has provided a size hint, but copy the full 446 * buffer if not to retain old behaviour. 447 */ 448 if (kattr->test.data_size_out && 449 copy_size > kattr->test.data_size_out) { 450 copy_size = kattr->test.data_size_out; 451 err = -ENOSPC; 452 } 453 454 if (data_out) { 455 int len = sinfo ? copy_size - frag_size : copy_size; 456 457 if (len < 0) { 458 err = -ENOSPC; 459 goto out; 460 } 461 462 if (copy_to_user(data_out, data, len)) 463 goto out; 464 465 if (sinfo) { 466 int i, offset = len; 467 u32 data_len; 468 469 for (i = 0; i < sinfo->nr_frags; i++) { 470 skb_frag_t *frag = &sinfo->frags[i]; 471 472 if (offset >= copy_size) { 473 err = -ENOSPC; 474 break; 475 } 476 477 data_len = min_t(u32, copy_size - offset, 478 skb_frag_size(frag)); 479 480 if (copy_to_user(data_out + offset, 481 skb_frag_address(frag), 482 data_len)) 483 goto out; 484 485 offset += data_len; 486 } 487 } 488 } 489 490 if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) 491 goto out; 492 if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) 493 goto out; 494 if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration))) 495 goto out; 496 if (err != -ENOSPC) 497 err = 0; 498 out: 499 trace_bpf_test_finish(&err); 500 return err; 501 } 502 503 /* Integer types of various sizes and pointer combinations cover variety of 504 * architecture dependent calling conventions. 7+ can be supported in the 505 * future. 506 */ 507 __bpf_kfunc_start_defs(); 508 509 __bpf_kfunc int bpf_fentry_test1(int a) 510 { 511 return a + 1; 512 } 513 EXPORT_SYMBOL_GPL(bpf_fentry_test1); 514 515 noinline int bpf_fentry_test2(int a, u64 b) 516 { 517 return a + b; 518 } 519 520 noinline int bpf_fentry_test3(char a, int b, u64 c) 521 { 522 return a + b + c; 523 } 524 525 noinline int bpf_fentry_test4(void *a, char b, int c, u64 d) 526 { 527 return (long)a + b + c + d; 528 } 529 530 noinline int bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e) 531 { 532 return a + (long)b + c + d + e; 533 } 534 535 noinline int bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) 536 { 537 return a + (long)b + c + d + (long)e + f; 538 } 539 540 struct bpf_fentry_test_t { 541 struct bpf_fentry_test_t *a; 542 }; 543 544 noinline int bpf_fentry_test7(struct bpf_fentry_test_t *arg) 545 { 546 asm volatile ("" : "+r"(arg)); 547 return (long)arg; 548 } 549 550 noinline int bpf_fentry_test8(struct bpf_fentry_test_t *arg) 551 { 552 return (long)arg->a; 553 } 554 555 __bpf_kfunc u32 bpf_fentry_test9(u32 *a) 556 { 557 return *a; 558 } 559 560 noinline int bpf_fentry_test10(const void *a) 561 { 562 return (long)a; 563 } 564 565 noinline void bpf_fentry_test_sinfo(struct skb_shared_info *sinfo) 566 { 567 } 568 569 noinline void bpf_fentry_test_ppvoid(void **pp) 570 { 571 } 572 573 noinline void bpf_fentry_test_pppvoid(void ***ppp) 574 { 575 } 576 577 noinline void bpf_fentry_test_ppfile(struct file **ppf) 578 { 579 } 580 581 noinline struct file **bpf_fexit_test_ret_ppfile(void) 582 { 583 return (struct file **)NULL; 584 } 585 586 __bpf_kfunc int bpf_modify_return_test(int a, int *b) 587 { 588 *b += 1; 589 return a + *b; 590 } 591 592 __bpf_kfunc int bpf_modify_return_test2(int a, int *b, short c, int d, 593 void *e, char f, int g) 594 { 595 *b += 1; 596 return a + *b + c + d + (long)e + f + g; 597 } 598 599 __bpf_kfunc int bpf_modify_return_test_tp(int nonce) 600 { 601 trace_bpf_trigger_tp(nonce); 602 603 return nonce; 604 } 605 606 noinline int bpf_fentry_shadow_test(int a) 607 { 608 return a + 1; 609 } 610 611 struct prog_test_member1 { 612 int a; 613 }; 614 615 struct prog_test_member { 616 struct prog_test_member1 m; 617 int c; 618 }; 619 620 struct prog_test_ref_kfunc { 621 int a; 622 int b; 623 struct prog_test_member memb; 624 struct prog_test_ref_kfunc *next; 625 refcount_t cnt; 626 }; 627 628 __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) 629 { 630 refcount_dec(&p->cnt); 631 } 632 633 __bpf_kfunc void bpf_kfunc_call_test_release_dtor(void *p) 634 { 635 bpf_kfunc_call_test_release(p); 636 } 637 CFI_NOSEAL(bpf_kfunc_call_test_release_dtor); 638 639 __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) 640 { 641 } 642 643 __bpf_kfunc void bpf_kfunc_call_memb_release_dtor(void *p) 644 { 645 } 646 CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor); 647 648 __bpf_kfunc_end_defs(); 649 650 BTF_KFUNCS_START(bpf_test_modify_return_ids) 651 BTF_ID_FLAGS(func, bpf_modify_return_test) 652 BTF_ID_FLAGS(func, bpf_modify_return_test2) 653 BTF_ID_FLAGS(func, bpf_modify_return_test_tp) 654 BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE) 655 BTF_KFUNCS_END(bpf_test_modify_return_ids) 656 657 static const struct btf_kfunc_id_set bpf_test_modify_return_set = { 658 .owner = THIS_MODULE, 659 .set = &bpf_test_modify_return_ids, 660 }; 661 662 BTF_KFUNCS_START(test_sk_check_kfunc_ids) 663 BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) 664 BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) 665 BTF_KFUNCS_END(test_sk_check_kfunc_ids) 666 667 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, 668 u32 size, u32 headroom, u32 tailroom) 669 { 670 void __user *data_in = u64_to_user_ptr(kattr->test.data_in); 671 void *data; 672 673 if (user_size > PAGE_SIZE - headroom - tailroom) 674 return ERR_PTR(-EINVAL); 675 676 size = SKB_DATA_ALIGN(size); 677 data = kzalloc(size + headroom + tailroom, GFP_USER); 678 if (!data) 679 return ERR_PTR(-ENOMEM); 680 681 if (copy_from_user(data + headroom, data_in, user_size)) { 682 kfree(data); 683 return ERR_PTR(-EFAULT); 684 } 685 686 return data; 687 } 688 689 int bpf_prog_test_run_tracing(struct bpf_prog *prog, 690 const union bpf_attr *kattr, 691 union bpf_attr __user *uattr) 692 { 693 struct bpf_fentry_test_t arg = {}; 694 u16 side_effect = 0, ret = 0; 695 int b = 2, err = -EFAULT; 696 u32 retval = 0; 697 698 if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) 699 return -EINVAL; 700 701 switch (prog->expected_attach_type) { 702 case BPF_TRACE_FENTRY: 703 case BPF_TRACE_FEXIT: 704 case BPF_TRACE_FSESSION: 705 case BPF_TRACE_FENTRY_MULTI: 706 case BPF_TRACE_FEXIT_MULTI: 707 case BPF_TRACE_FSESSION_MULTI: 708 if (bpf_fentry_test1(1) != 2 || 709 bpf_fentry_test2(2, 3) != 5 || 710 bpf_fentry_test3(4, 5, 6) != 15 || 711 bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || 712 bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || 713 bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 || 714 bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 || 715 bpf_fentry_test8(&arg) != 0 || 716 bpf_fentry_test9(&retval) != 0 || 717 bpf_fentry_test10((void *)0) != 0) 718 goto out; 719 break; 720 case BPF_MODIFY_RETURN: 721 ret = bpf_modify_return_test(1, &b); 722 if (b != 2) 723 side_effect++; 724 b = 2; 725 ret += bpf_modify_return_test2(1, &b, 3, 4, (void *)5, 6, 7); 726 if (b != 2) 727 side_effect++; 728 break; 729 default: 730 goto out; 731 } 732 733 retval = ((u32)side_effect << 16) | ret; 734 if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) 735 goto out; 736 737 err = 0; 738 out: 739 trace_bpf_test_finish(&err); 740 return err; 741 } 742 743 struct bpf_raw_tp_test_run_info { 744 struct bpf_prog *prog; 745 void *ctx; 746 u32 retval; 747 }; 748 749 static void 750 __bpf_prog_test_run_raw_tp(void *data) 751 { 752 struct bpf_raw_tp_test_run_info *info = data; 753 struct srcu_ctr __percpu *scp = NULL; 754 struct bpf_trace_run_ctx run_ctx = {}; 755 struct bpf_run_ctx *old_run_ctx; 756 757 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 758 759 if (info->prog->sleepable) { 760 scp = rcu_read_lock_tasks_trace(); 761 migrate_disable(); 762 } else { 763 rcu_read_lock(); 764 } 765 766 if (unlikely(!bpf_prog_get_recursion_context(info->prog))) { 767 bpf_prog_inc_misses_counter(info->prog); 768 goto out; 769 } 770 771 info->retval = bpf_prog_run(info->prog, info->ctx); 772 773 out: 774 bpf_prog_put_recursion_context(info->prog); 775 776 if (info->prog->sleepable) { 777 migrate_enable(); 778 rcu_read_unlock_tasks_trace(scp); 779 } else { 780 rcu_read_unlock(); 781 } 782 783 bpf_reset_run_ctx(old_run_ctx); 784 } 785 786 int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, 787 const union bpf_attr *kattr, 788 union bpf_attr __user *uattr) 789 { 790 void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in); 791 __u32 ctx_size_in = kattr->test.ctx_size_in; 792 struct bpf_raw_tp_test_run_info info; 793 int cpu = kattr->test.cpu, err = 0; 794 int current_cpu; 795 796 /* doesn't support data_in/out, ctx_out, duration, or repeat */ 797 if (kattr->test.data_in || kattr->test.data_out || 798 kattr->test.ctx_out || kattr->test.duration || 799 kattr->test.repeat || kattr->test.batch_size) 800 return -EINVAL; 801 802 if (ctx_size_in < prog->aux->max_ctx_offset || 803 ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64)) 804 return -EINVAL; 805 806 if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0) 807 return -EINVAL; 808 809 /* 810 * Sleepable programs cannot run with preemption disabled or in 811 * hardirq context (smp_call_function_single), reject the flag. 812 */ 813 if (prog->sleepable && (kattr->test.flags & BPF_F_TEST_RUN_ON_CPU)) 814 return -EINVAL; 815 816 if (ctx_size_in) { 817 info.ctx = memdup_user(ctx_in, ctx_size_in); 818 if (IS_ERR(info.ctx)) 819 return PTR_ERR(info.ctx); 820 } else { 821 info.ctx = NULL; 822 } 823 824 info.retval = 0; 825 info.prog = prog; 826 827 if (prog->sleepable) { 828 __bpf_prog_test_run_raw_tp(&info); 829 } else { 830 current_cpu = get_cpu(); 831 if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 || 832 cpu == current_cpu) { 833 __bpf_prog_test_run_raw_tp(&info); 834 } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { 835 /* 836 * smp_call_function_single() also checks cpu_online() 837 * after csd_lock(). However, since cpu is from user 838 * space, let's do an extra quick check to filter out 839 * invalid value before smp_call_function_single(). 840 */ 841 err = -ENXIO; 842 } else { 843 err = smp_call_function_single(cpu, 844 __bpf_prog_test_run_raw_tp, 845 &info, 1); 846 } 847 put_cpu(); 848 } 849 850 if (!err && 851 copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32))) 852 err = -EFAULT; 853 854 kfree(info.ctx); 855 return err; 856 } 857 858 static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) 859 { 860 void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in); 861 void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); 862 u32 size = kattr->test.ctx_size_in; 863 void *data; 864 int err; 865 866 if (!data_in && !data_out) 867 return NULL; 868 869 data = kzalloc(max_size, GFP_USER); 870 if (!data) 871 return ERR_PTR(-ENOMEM); 872 873 if (data_in) { 874 err = bpf_check_uarg_tail_zero(USER_BPFPTR(data_in), max_size, size); 875 if (err) { 876 kfree(data); 877 return ERR_PTR(err); 878 } 879 880 size = min_t(u32, max_size, size); 881 if (copy_from_user(data, data_in, size)) { 882 kfree(data); 883 return ERR_PTR(-EFAULT); 884 } 885 } 886 return data; 887 } 888 889 static int bpf_ctx_finish(const union bpf_attr *kattr, 890 union bpf_attr __user *uattr, const void *data, 891 u32 size) 892 { 893 void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out); 894 int err = -EFAULT; 895 u32 copy_size = size; 896 897 if (!data || !data_out) 898 return 0; 899 900 if (copy_size > kattr->test.ctx_size_out) { 901 copy_size = kattr->test.ctx_size_out; 902 err = -ENOSPC; 903 } 904 905 if (copy_to_user(data_out, data, copy_size)) 906 goto out; 907 if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size))) 908 goto out; 909 if (err != -ENOSPC) 910 err = 0; 911 out: 912 return err; 913 } 914 915 /** 916 * range_is_zero - test whether buffer is initialized 917 * @buf: buffer to check 918 * @from: check from this position 919 * @to: check up until (excluding) this position 920 * 921 * This function returns true if the there is a non-zero byte 922 * in the buf in the range [from,to). 923 */ 924 static inline bool range_is_zero(void *buf, size_t from, size_t to) 925 { 926 return !memchr_inv((u8 *)buf + from, 0, to - from); 927 } 928 929 static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) 930 { 931 struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; 932 933 if (!__skb) 934 return 0; 935 936 /* make sure the fields we don't use are zeroed */ 937 if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark))) 938 return -EINVAL; 939 940 /* mark is allowed */ 941 942 if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark), 943 offsetof(struct __sk_buff, priority))) 944 return -EINVAL; 945 946 /* priority is allowed */ 947 /* ingress_ifindex is allowed */ 948 /* ifindex is allowed */ 949 950 if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex), 951 offsetof(struct __sk_buff, cb))) 952 return -EINVAL; 953 954 /* cb is allowed */ 955 956 if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb), 957 offsetof(struct __sk_buff, data_end))) 958 return -EINVAL; 959 960 /* data_end is allowed, but not copied to skb */ 961 962 if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end), 963 offsetof(struct __sk_buff, tstamp))) 964 return -EINVAL; 965 966 /* tstamp is allowed */ 967 /* wire_len is allowed */ 968 /* gso_segs is allowed */ 969 970 if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), 971 offsetof(struct __sk_buff, gso_size))) 972 return -EINVAL; 973 974 /* gso_size is allowed */ 975 976 if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size), 977 offsetof(struct __sk_buff, hwtstamp))) 978 return -EINVAL; 979 980 /* hwtstamp is allowed */ 981 982 if (!range_is_zero(__skb, offsetofend(struct __sk_buff, hwtstamp), 983 sizeof(struct __sk_buff))) 984 return -EINVAL; 985 986 skb->mark = __skb->mark; 987 skb->priority = __skb->priority; 988 skb->skb_iif = __skb->ingress_ifindex; 989 skb->tstamp = __skb->tstamp; 990 memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); 991 992 if (__skb->wire_len == 0) { 993 cb->pkt_len = skb->len; 994 } else { 995 if (__skb->wire_len < skb->len || 996 __skb->wire_len > GSO_LEGACY_MAX_SIZE) 997 return -EINVAL; 998 cb->pkt_len = __skb->wire_len; 999 } 1000 1001 if (__skb->gso_segs > GSO_MAX_SEGS) 1002 return -EINVAL; 1003 1004 /* Currently GSO type is zero/unset. If this gets extended with 1005 * a small list of accepted GSO types in future, the filter for 1006 * an unset GSO type in bpf_clone_redirect() can be lifted. 1007 */ 1008 skb_shinfo(skb)->gso_segs = __skb->gso_segs; 1009 skb_shinfo(skb)->gso_size = __skb->gso_size; 1010 skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp; 1011 1012 return 0; 1013 } 1014 1015 static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) 1016 { 1017 struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb; 1018 1019 if (!__skb) 1020 return; 1021 1022 __skb->mark = skb->mark; 1023 __skb->priority = skb->priority; 1024 __skb->ingress_ifindex = skb->skb_iif; 1025 __skb->ifindex = skb->dev->ifindex; 1026 __skb->tstamp = skb->tstamp; 1027 memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); 1028 __skb->wire_len = cb->pkt_len; 1029 __skb->gso_segs = skb_shinfo(skb)->gso_segs; 1030 __skb->hwtstamp = skb_shinfo(skb)->hwtstamps.hwtstamp; 1031 } 1032 1033 static struct proto bpf_dummy_proto = { 1034 .name = "bpf_dummy", 1035 .owner = THIS_MODULE, 1036 .obj_size = sizeof(struct sock), 1037 }; 1038 1039 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, 1040 union bpf_attr __user *uattr) 1041 { 1042 bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false; 1043 u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1044 struct net *net = current->nsproxy->net_ns; 1045 struct net_device *dev = net->loopback_dev; 1046 u32 headroom = NET_SKB_PAD + NET_IP_ALIGN; 1047 u32 linear_sz = kattr->test.data_size_in; 1048 u32 repeat = kattr->test.repeat; 1049 struct __sk_buff *ctx = NULL; 1050 struct sk_buff *skb = NULL; 1051 struct sock *sk = NULL; 1052 u32 retval, duration; 1053 int hh_len = ETH_HLEN; 1054 void *data = NULL; 1055 int ret; 1056 1057 if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) || 1058 kattr->test.cpu || kattr->test.batch_size) 1059 return -EINVAL; 1060 1061 if (kattr->test.data_size_in < ETH_HLEN) 1062 return -EINVAL; 1063 1064 switch (prog->type) { 1065 case BPF_PROG_TYPE_SCHED_CLS: 1066 case BPF_PROG_TYPE_SCHED_ACT: 1067 is_direct_pkt_access = true; 1068 is_l2 = true; 1069 break; 1070 case BPF_PROG_TYPE_LWT_IN: 1071 case BPF_PROG_TYPE_LWT_OUT: 1072 case BPF_PROG_TYPE_LWT_XMIT: 1073 is_lwt = true; 1074 fallthrough; 1075 case BPF_PROG_TYPE_CGROUP_SKB: 1076 is_direct_pkt_access = true; 1077 break; 1078 default: 1079 break; 1080 } 1081 1082 ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); 1083 if (IS_ERR(ctx)) 1084 return PTR_ERR(ctx); 1085 1086 if (ctx) { 1087 if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) { 1088 ret = -EINVAL; 1089 goto out; 1090 } 1091 if (ctx->data_end) { 1092 /* Non-linear LWT test_run is unsupported for now. */ 1093 if (is_lwt) { 1094 ret = -EINVAL; 1095 goto out; 1096 } 1097 linear_sz = max(ETH_HLEN, ctx->data_end); 1098 } 1099 } 1100 1101 linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom); 1102 1103 data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom); 1104 if (IS_ERR(data)) { 1105 ret = PTR_ERR(data); 1106 data = NULL; 1107 goto out; 1108 } 1109 1110 sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1); 1111 if (!sk) { 1112 ret = -ENOMEM; 1113 goto out; 1114 } 1115 sock_init_data(NULL, sk); 1116 1117 skb = slab_build_skb(data); 1118 if (!skb) { 1119 ret = -ENOMEM; 1120 goto out; 1121 } 1122 skb->sk = sk; 1123 1124 data = NULL; /* data released via kfree_skb */ 1125 1126 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 1127 __skb_put(skb, linear_sz); 1128 1129 if (unlikely(kattr->test.data_size_in > linear_sz)) { 1130 void __user *data_in = u64_to_user_ptr(kattr->test.data_in); 1131 struct skb_shared_info *sinfo = skb_shinfo(skb); 1132 u32 copied = linear_sz; 1133 1134 while (copied < kattr->test.data_size_in) { 1135 struct page *page; 1136 u32 data_len; 1137 1138 if (sinfo->nr_frags == MAX_SKB_FRAGS) { 1139 ret = -ENOMEM; 1140 goto out; 1141 } 1142 1143 page = alloc_page(GFP_KERNEL); 1144 if (!page) { 1145 ret = -ENOMEM; 1146 goto out; 1147 } 1148 1149 data_len = min_t(u32, kattr->test.data_size_in - copied, 1150 PAGE_SIZE); 1151 skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len); 1152 1153 if (copy_from_user(page_address(page), data_in + copied, 1154 data_len)) { 1155 ret = -EFAULT; 1156 goto out; 1157 } 1158 skb->data_len += data_len; 1159 skb->truesize += PAGE_SIZE; 1160 skb->len += data_len; 1161 copied += data_len; 1162 } 1163 } 1164 1165 if (ctx && ctx->ifindex > 1) { 1166 dev = dev_get_by_index(net, ctx->ifindex); 1167 if (!dev) { 1168 ret = -ENODEV; 1169 goto out; 1170 } 1171 } 1172 skb->protocol = eth_type_trans(skb, dev); 1173 skb_reset_network_header(skb); 1174 1175 switch (skb->protocol) { 1176 case htons(ETH_P_IP): 1177 if (skb_headlen(skb) < sizeof(struct iphdr)) { 1178 ret = -EINVAL; 1179 goto out; 1180 } 1181 sk->sk_family = AF_INET; 1182 sk->sk_rcv_saddr = ip_hdr(skb)->saddr; 1183 sk->sk_daddr = ip_hdr(skb)->daddr; 1184 break; 1185 #if IS_ENABLED(CONFIG_IPV6) 1186 case htons(ETH_P_IPV6): 1187 if (skb_headlen(skb) < sizeof(struct ipv6hdr)) { 1188 ret = -EINVAL; 1189 goto out; 1190 } 1191 sk->sk_family = AF_INET6; 1192 sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr; 1193 sk->sk_v6_daddr = ipv6_hdr(skb)->daddr; 1194 break; 1195 #endif 1196 default: 1197 break; 1198 } 1199 1200 if (is_l2) 1201 __skb_push(skb, hh_len); 1202 if (is_direct_pkt_access) 1203 bpf_compute_data_pointers(skb); 1204 1205 ret = convert___skb_to_skb(skb, ctx); 1206 if (ret) 1207 goto out; 1208 1209 if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { 1210 const int off = skb_network_offset(skb); 1211 int len = skb->len - off; 1212 1213 skb->csum = skb_checksum(skb, off, len, 0); 1214 skb->ip_summed = CHECKSUM_COMPLETE; 1215 } 1216 1217 if (prog->type == BPF_PROG_TYPE_LWT_XMIT) { 1218 if (!ipv6_mod_enabled()) { 1219 pr_warn_once("Please test this program with IPv6 enabled kernel\n"); 1220 ret = -EOPNOTSUPP; 1221 goto out; 1222 } 1223 #if IS_ENABLED(CONFIG_IPV6) 1224 dst_hold(&net->ipv6.ip6_null_entry->dst); 1225 skb_dst_set(skb, &net->ipv6.ip6_null_entry->dst); 1226 #endif 1227 } 1228 1229 ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false); 1230 if (ret) 1231 goto out; 1232 if (!is_l2) { 1233 if (skb_headroom(skb) < hh_len) { 1234 int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb)); 1235 1236 if (pskb_expand_head(skb, nhead, 0, GFP_USER)) { 1237 ret = -ENOMEM; 1238 goto out; 1239 } 1240 } 1241 memset(__skb_push(skb, hh_len), 0, hh_len); 1242 } 1243 1244 if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { 1245 const int off = skb_network_offset(skb); 1246 int len = skb->len - off; 1247 __wsum csum; 1248 1249 csum = skb_checksum(skb, off, len, 0); 1250 1251 if (csum_fold(skb->csum) != csum_fold(csum)) { 1252 ret = -EBADMSG; 1253 goto out; 1254 } 1255 } 1256 1257 convert_skb_to___skb(skb, ctx); 1258 1259 if (skb_is_nonlinear(skb)) 1260 /* bpf program can never convert linear skb to non-linear */ 1261 WARN_ON_ONCE(linear_sz == kattr->test.data_size_in); 1262 ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len, 1263 skb->data_len, retval, duration); 1264 if (!ret) 1265 ret = bpf_ctx_finish(kattr, uattr, ctx, 1266 sizeof(struct __sk_buff)); 1267 out: 1268 if (dev && dev != net->loopback_dev) 1269 dev_put(dev); 1270 kfree_skb(skb); 1271 kfree(data); 1272 if (sk) 1273 sk_free(sk); 1274 kfree(ctx); 1275 return ret; 1276 } 1277 1278 static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp) 1279 { 1280 unsigned int ingress_ifindex, rx_queue_index; 1281 struct netdev_rx_queue *rxqueue; 1282 struct net_device *device; 1283 1284 if (!xdp_md) 1285 return 0; 1286 1287 if (xdp_md->egress_ifindex != 0) 1288 return -EINVAL; 1289 1290 ingress_ifindex = xdp_md->ingress_ifindex; 1291 rx_queue_index = xdp_md->rx_queue_index; 1292 1293 if (!ingress_ifindex && rx_queue_index) 1294 return -EINVAL; 1295 1296 if (ingress_ifindex) { 1297 device = dev_get_by_index(current->nsproxy->net_ns, 1298 ingress_ifindex); 1299 if (!device) 1300 return -ENODEV; 1301 1302 if (rx_queue_index >= device->real_num_rx_queues) 1303 goto free_dev; 1304 1305 rxqueue = __netif_get_rx_queue(device, rx_queue_index); 1306 1307 if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq)) 1308 goto free_dev; 1309 1310 xdp->rxq = &rxqueue->xdp_rxq; 1311 /* The device is now tracked in the xdp->rxq for later 1312 * dev_put() 1313 */ 1314 } 1315 1316 xdp->data = xdp->data_meta + xdp_md->data; 1317 return 0; 1318 1319 free_dev: 1320 dev_put(device); 1321 return -EINVAL; 1322 } 1323 1324 static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md) 1325 { 1326 if (!xdp_md) 1327 return; 1328 1329 xdp_md->data = xdp->data - xdp->data_meta; 1330 xdp_md->data_end = xdp->data_end - xdp->data_meta; 1331 1332 if (xdp_md->ingress_ifindex) 1333 dev_put(xdp->rxq->dev); 1334 } 1335 1336 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, 1337 union bpf_attr __user *uattr) 1338 { 1339 bool do_live = (kattr->test.flags & BPF_F_TEST_XDP_LIVE_FRAMES); 1340 u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1341 u32 retval = 0, meta_sz = 0, duration, max_linear_sz, size; 1342 u32 linear_sz = kattr->test.data_size_in; 1343 u32 batch_size = kattr->test.batch_size; 1344 u32 headroom = XDP_PACKET_HEADROOM; 1345 u32 repeat = kattr->test.repeat; 1346 struct netdev_rx_queue *rxqueue; 1347 struct skb_shared_info *sinfo; 1348 struct xdp_buff xdp = {}; 1349 int i, ret = -EINVAL; 1350 struct xdp_md *ctx; 1351 void *data; 1352 1353 if (prog->expected_attach_type == BPF_XDP_DEVMAP || 1354 prog->expected_attach_type == BPF_XDP_CPUMAP) 1355 return -EINVAL; 1356 1357 if (kattr->test.flags & ~BPF_F_TEST_XDP_LIVE_FRAMES) 1358 return -EINVAL; 1359 1360 if (bpf_prog_is_dev_bound(prog->aux)) 1361 return -EINVAL; 1362 1363 if (do_live) { 1364 if (!batch_size) 1365 batch_size = NAPI_POLL_WEIGHT; 1366 else if (batch_size > TEST_XDP_MAX_BATCH) 1367 return -E2BIG; 1368 } else if (batch_size) { 1369 return -EINVAL; 1370 } 1371 1372 ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); 1373 if (IS_ERR(ctx)) 1374 return PTR_ERR(ctx); 1375 1376 if (ctx) { 1377 /* There can't be user provided data before the meta data */ 1378 if (ctx->data_meta || ctx->data_end > kattr->test.data_size_in || 1379 ctx->data > ctx->data_end || 1380 (do_live && (kattr->test.data_out || kattr->test.ctx_out))) 1381 goto free_ctx; 1382 1383 meta_sz = ctx->data; 1384 if (xdp_metalen_invalid(meta_sz) || meta_sz > headroom - sizeof(struct xdp_frame)) 1385 goto free_ctx; 1386 1387 /* Meta data is allocated from the headroom */ 1388 headroom -= meta_sz; 1389 linear_sz = ctx->data_end; 1390 } 1391 1392 /* The xdp_page_head structure takes up space in each page, limiting the 1393 * size of the packet data; add the extra size to headroom here to make 1394 * sure it's accounted in the length checks below, but not in the 1395 * metadata size check above. 1396 */ 1397 if (do_live) 1398 headroom += sizeof(struct xdp_page_head); 1399 1400 max_linear_sz = PAGE_SIZE - headroom - tailroom; 1401 linear_sz = min_t(u32, linear_sz, max_linear_sz); 1402 1403 /* disallow live data mode for jumbo frames */ 1404 if (do_live && kattr->test.data_size_in > linear_sz) 1405 goto free_ctx; 1406 1407 if (kattr->test.data_size_in - meta_sz < ETH_HLEN) 1408 goto free_ctx; 1409 1410 data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom); 1411 if (IS_ERR(data)) { 1412 ret = PTR_ERR(data); 1413 goto free_ctx; 1414 } 1415 1416 rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); 1417 rxqueue->xdp_rxq.frag_size = PAGE_SIZE; 1418 xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq); 1419 xdp_prepare_buff(&xdp, data, headroom, linear_sz, true); 1420 sinfo = xdp_get_shared_info_from_buff(&xdp); 1421 1422 ret = xdp_convert_md_to_buff(ctx, &xdp); 1423 if (ret) 1424 goto free_data; 1425 1426 size = linear_sz; 1427 if (unlikely(kattr->test.data_size_in > size)) { 1428 void __user *data_in = u64_to_user_ptr(kattr->test.data_in); 1429 1430 while (size < kattr->test.data_size_in) { 1431 struct page *page; 1432 skb_frag_t *frag; 1433 u32 data_len; 1434 1435 if (sinfo->nr_frags == MAX_SKB_FRAGS) { 1436 ret = -ENOMEM; 1437 goto out_put_dev; 1438 } 1439 1440 page = alloc_page(GFP_KERNEL); 1441 if (!page) { 1442 ret = -ENOMEM; 1443 goto out_put_dev; 1444 } 1445 1446 frag = &sinfo->frags[sinfo->nr_frags++]; 1447 1448 data_len = min_t(u32, kattr->test.data_size_in - size, 1449 PAGE_SIZE); 1450 skb_frag_fill_page_desc(frag, page, 0, data_len); 1451 1452 if (copy_from_user(page_address(page), data_in + size, 1453 data_len)) { 1454 ret = -EFAULT; 1455 goto out_put_dev; 1456 } 1457 sinfo->xdp_frags_size += data_len; 1458 size += data_len; 1459 } 1460 xdp_buff_set_frags_flag(&xdp); 1461 } 1462 1463 if (repeat > 1) 1464 bpf_prog_change_xdp(NULL, prog); 1465 1466 if (do_live) 1467 ret = bpf_test_run_xdp_live(prog, &xdp, repeat, batch_size, &duration); 1468 else 1469 ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); 1470 out_put_dev: 1471 /* We convert the xdp_buff back to an xdp_md before checking the return 1472 * code so the reference count of any held netdevice will be decremented 1473 * even if the test run failed. 1474 */ 1475 xdp_convert_buff_to_md(&xdp, ctx); 1476 if (ret) 1477 goto out; 1478 1479 size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; 1480 ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, sinfo->xdp_frags_size, 1481 retval, duration); 1482 if (!ret) 1483 ret = bpf_ctx_finish(kattr, uattr, ctx, 1484 sizeof(struct xdp_md)); 1485 1486 out: 1487 if (repeat > 1) 1488 bpf_prog_change_xdp(prog, NULL); 1489 free_data: 1490 for (i = 0; i < sinfo->nr_frags; i++) 1491 __free_page(skb_frag_page(&sinfo->frags[i])); 1492 kfree(data); 1493 free_ctx: 1494 kfree(ctx); 1495 return ret; 1496 } 1497 1498 static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx) 1499 { 1500 /* make sure the fields we don't use are zeroed */ 1501 if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags))) 1502 return -EINVAL; 1503 1504 /* flags is allowed */ 1505 1506 if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags), 1507 sizeof(struct bpf_flow_keys))) 1508 return -EINVAL; 1509 1510 return 0; 1511 } 1512 1513 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, 1514 const union bpf_attr *kattr, 1515 union bpf_attr __user *uattr) 1516 { 1517 struct bpf_test_timer t = {}; 1518 u32 size = kattr->test.data_size_in; 1519 struct bpf_flow_dissector ctx = {}; 1520 u32 repeat = kattr->test.repeat; 1521 struct bpf_flow_keys *user_ctx; 1522 struct bpf_flow_keys flow_keys; 1523 const struct ethhdr *eth; 1524 unsigned int flags = 0; 1525 u32 retval, duration; 1526 void *data; 1527 int ret; 1528 1529 if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) 1530 return -EINVAL; 1531 1532 if (size < ETH_HLEN) 1533 return -EINVAL; 1534 1535 data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0); 1536 if (IS_ERR(data)) 1537 return PTR_ERR(data); 1538 1539 eth = (struct ethhdr *)data; 1540 1541 if (!repeat) 1542 repeat = 1; 1543 1544 user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys)); 1545 if (IS_ERR(user_ctx)) { 1546 kfree(data); 1547 return PTR_ERR(user_ctx); 1548 } 1549 if (user_ctx) { 1550 ret = verify_user_bpf_flow_keys(user_ctx); 1551 if (ret) 1552 goto out; 1553 flags = user_ctx->flags; 1554 } 1555 1556 ctx.flow_keys = &flow_keys; 1557 ctx.data = data; 1558 ctx.data_end = (__u8 *)data + size; 1559 1560 bpf_test_timer_enter(&t); 1561 do { 1562 retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, 1563 size, flags); 1564 } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration)); 1565 bpf_test_timer_leave(&t); 1566 1567 if (ret < 0) 1568 goto out; 1569 1570 ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL, 1571 sizeof(flow_keys), 0, retval, duration); 1572 if (!ret) 1573 ret = bpf_ctx_finish(kattr, uattr, user_ctx, 1574 sizeof(struct bpf_flow_keys)); 1575 1576 out: 1577 kfree(user_ctx); 1578 kfree(data); 1579 return ret; 1580 } 1581 1582 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, 1583 union bpf_attr __user *uattr) 1584 { 1585 struct bpf_test_timer t = {}; 1586 struct bpf_prog_array *progs = NULL; 1587 struct bpf_sk_lookup_kern ctx = {}; 1588 u32 repeat = kattr->test.repeat; 1589 struct bpf_sk_lookup *user_ctx; 1590 u32 retval, duration; 1591 int ret = -EINVAL; 1592 1593 if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) 1594 return -EINVAL; 1595 1596 if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out || 1597 kattr->test.data_size_out) 1598 return -EINVAL; 1599 1600 if (!repeat) 1601 repeat = 1; 1602 1603 user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx)); 1604 if (IS_ERR(user_ctx)) 1605 return PTR_ERR(user_ctx); 1606 1607 if (!user_ctx) 1608 return -EINVAL; 1609 1610 if (user_ctx->sk) 1611 goto out; 1612 1613 if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx))) 1614 goto out; 1615 1616 if (user_ctx->local_port > U16_MAX) { 1617 ret = -ERANGE; 1618 goto out; 1619 } 1620 1621 ctx.family = (u16)user_ctx->family; 1622 ctx.protocol = (u16)user_ctx->protocol; 1623 ctx.dport = (u16)user_ctx->local_port; 1624 ctx.sport = user_ctx->remote_port; 1625 1626 switch (ctx.family) { 1627 case AF_INET: 1628 ctx.v4.daddr = (__force __be32)user_ctx->local_ip4; 1629 ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4; 1630 break; 1631 1632 #if IS_ENABLED(CONFIG_IPV6) 1633 case AF_INET6: 1634 ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6; 1635 ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6; 1636 break; 1637 #endif 1638 1639 default: 1640 ret = -EAFNOSUPPORT; 1641 goto out; 1642 } 1643 1644 progs = bpf_prog_array_alloc(1, GFP_KERNEL); 1645 if (!progs) { 1646 ret = -ENOMEM; 1647 goto out; 1648 } 1649 1650 progs->items[0].prog = prog; 1651 1652 bpf_test_timer_enter(&t); 1653 do { 1654 ctx.selected_sk = NULL; 1655 retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, bpf_prog_run); 1656 } while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration)); 1657 bpf_test_timer_leave(&t); 1658 1659 if (ret < 0) 1660 goto out; 1661 1662 user_ctx->cookie = 0; 1663 if (ctx.selected_sk) { 1664 if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) { 1665 ret = -EOPNOTSUPP; 1666 goto out; 1667 } 1668 1669 user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); 1670 } 1671 1672 ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration); 1673 if (!ret) 1674 ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); 1675 1676 out: 1677 bpf_prog_array_free(progs); 1678 kfree(user_ctx); 1679 return ret; 1680 } 1681 1682 int bpf_prog_test_run_syscall(struct bpf_prog *prog, 1683 const union bpf_attr *kattr, 1684 union bpf_attr __user *uattr) 1685 { 1686 void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in); 1687 __u32 ctx_size_in = kattr->test.ctx_size_in; 1688 void *ctx = NULL; 1689 u32 retval; 1690 int err = 0; 1691 1692 /* doesn't support data_in/out, ctx_out, duration, or repeat or flags */ 1693 if (kattr->test.data_in || kattr->test.data_out || 1694 kattr->test.ctx_out || kattr->test.duration || 1695 kattr->test.repeat || kattr->test.flags || 1696 kattr->test.batch_size) 1697 return -EINVAL; 1698 1699 if (ctx_size_in < prog->aux->max_ctx_offset || 1700 ctx_size_in > U16_MAX) 1701 return -EINVAL; 1702 1703 if (ctx_size_in) { 1704 ctx = memdup_user(ctx_in, ctx_size_in); 1705 if (IS_ERR(ctx)) 1706 return PTR_ERR(ctx); 1707 } 1708 1709 rcu_read_lock_trace(); 1710 retval = bpf_prog_run_pin_on_cpu(prog, ctx); 1711 rcu_read_unlock_trace(); 1712 1713 if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) { 1714 err = -EFAULT; 1715 goto out; 1716 } 1717 if (ctx_size_in) 1718 if (copy_to_user(ctx_in, ctx, ctx_size_in)) 1719 err = -EFAULT; 1720 out: 1721 kfree(ctx); 1722 return err; 1723 } 1724 1725 static int verify_and_copy_hook_state(struct nf_hook_state *state, 1726 const struct nf_hook_state *user, 1727 struct net_device *dev) 1728 { 1729 if (user->in || user->out) 1730 return -EINVAL; 1731 1732 if (user->net || user->sk || user->okfn) 1733 return -EINVAL; 1734 1735 switch (user->pf) { 1736 case NFPROTO_IPV4: 1737 case NFPROTO_IPV6: 1738 switch (state->hook) { 1739 case NF_INET_PRE_ROUTING: 1740 state->in = dev; 1741 break; 1742 case NF_INET_LOCAL_IN: 1743 state->in = dev; 1744 break; 1745 case NF_INET_FORWARD: 1746 state->in = dev; 1747 state->out = dev; 1748 break; 1749 case NF_INET_LOCAL_OUT: 1750 state->out = dev; 1751 break; 1752 case NF_INET_POST_ROUTING: 1753 state->out = dev; 1754 break; 1755 } 1756 1757 break; 1758 default: 1759 return -EINVAL; 1760 } 1761 1762 state->pf = user->pf; 1763 state->hook = user->hook; 1764 1765 return 0; 1766 } 1767 1768 static __be16 nfproto_eth(int nfproto) 1769 { 1770 switch (nfproto) { 1771 case NFPROTO_IPV4: 1772 return htons(ETH_P_IP); 1773 case NFPROTO_IPV6: 1774 break; 1775 } 1776 1777 return htons(ETH_P_IPV6); 1778 } 1779 1780 int bpf_prog_test_run_nf(struct bpf_prog *prog, 1781 const union bpf_attr *kattr, 1782 union bpf_attr __user *uattr) 1783 { 1784 struct net *net = current->nsproxy->net_ns; 1785 struct net_device *dev = net->loopback_dev; 1786 struct nf_hook_state *user_ctx, hook_state = { 1787 .pf = NFPROTO_IPV4, 1788 .hook = NF_INET_LOCAL_OUT, 1789 }; 1790 u32 size = kattr->test.data_size_in; 1791 u32 repeat = kattr->test.repeat; 1792 struct bpf_nf_ctx ctx = { 1793 .state = &hook_state, 1794 }; 1795 struct sk_buff *skb = NULL; 1796 u32 retval, duration; 1797 void *data; 1798 int ret; 1799 1800 if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) 1801 return -EINVAL; 1802 1803 if (size < sizeof(struct iphdr)) 1804 return -EINVAL; 1805 1806 data = bpf_test_init(kattr, kattr->test.data_size_in, size, 1807 NET_SKB_PAD + NET_IP_ALIGN, 1808 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 1809 if (IS_ERR(data)) 1810 return PTR_ERR(data); 1811 1812 if (!repeat) 1813 repeat = 1; 1814 1815 user_ctx = bpf_ctx_init(kattr, sizeof(struct nf_hook_state)); 1816 if (IS_ERR(user_ctx)) { 1817 kfree(data); 1818 return PTR_ERR(user_ctx); 1819 } 1820 1821 if (user_ctx) { 1822 ret = verify_and_copy_hook_state(&hook_state, user_ctx, dev); 1823 if (ret) 1824 goto out; 1825 } 1826 1827 skb = slab_build_skb(data); 1828 if (!skb) { 1829 ret = -ENOMEM; 1830 goto out; 1831 } 1832 1833 data = NULL; /* data released via kfree_skb */ 1834 1835 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); 1836 __skb_put(skb, size); 1837 1838 ret = -EINVAL; 1839 1840 if (hook_state.hook != NF_INET_LOCAL_OUT) { 1841 if (size < ETH_HLEN + sizeof(struct iphdr)) 1842 goto out; 1843 1844 skb->protocol = eth_type_trans(skb, dev); 1845 switch (skb->protocol) { 1846 case htons(ETH_P_IP): 1847 if (hook_state.pf == NFPROTO_IPV4) 1848 break; 1849 goto out; 1850 case htons(ETH_P_IPV6): 1851 if (size < ETH_HLEN + sizeof(struct ipv6hdr)) 1852 goto out; 1853 if (hook_state.pf == NFPROTO_IPV6) 1854 break; 1855 goto out; 1856 default: 1857 ret = -EPROTO; 1858 goto out; 1859 } 1860 1861 skb_reset_network_header(skb); 1862 } else { 1863 skb->protocol = nfproto_eth(hook_state.pf); 1864 } 1865 1866 ctx.skb = skb; 1867 1868 ret = bpf_test_run(prog, &ctx, repeat, &retval, &duration, false); 1869 if (ret) 1870 goto out; 1871 1872 ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration); 1873 1874 out: 1875 kfree(user_ctx); 1876 kfree_skb(skb); 1877 kfree(data); 1878 return ret; 1879 } 1880 1881 static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { 1882 .owner = THIS_MODULE, 1883 .set = &test_sk_check_kfunc_ids, 1884 }; 1885 1886 BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids) 1887 BTF_ID(struct, prog_test_ref_kfunc) 1888 BTF_ID(func, bpf_kfunc_call_test_release_dtor) 1889 BTF_ID(struct, prog_test_member) 1890 BTF_ID(func, bpf_kfunc_call_memb_release_dtor) 1891 1892 static int __init bpf_prog_test_run_init(void) 1893 { 1894 const struct btf_id_dtor_kfunc bpf_prog_test_dtor_kfunc[] = { 1895 { 1896 .btf_id = bpf_prog_test_dtor_kfunc_ids[0], 1897 .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[1] 1898 }, 1899 { 1900 .btf_id = bpf_prog_test_dtor_kfunc_ids[2], 1901 .kfunc_btf_id = bpf_prog_test_dtor_kfunc_ids[3], 1902 }, 1903 }; 1904 int ret; 1905 1906 ret = register_btf_fmodret_id_set(&bpf_test_modify_return_set); 1907 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); 1908 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set); 1909 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set); 1910 return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc, 1911 ARRAY_SIZE(bpf_prog_test_dtor_kfunc), 1912 THIS_MODULE); 1913 } 1914 late_initcall(bpf_prog_test_run_init); 1915