1 #include <stdbool.h> 2 #include <linux/bpf.h> 3 #include <linux/errno.h> 4 #include <linux/if_ether.h> 5 #include <linux/pkt_cls.h> 6 7 #include <bpf/bpf_endian.h> 8 #include <bpf/bpf_helpers.h> 9 #include "bpf_kfuncs.h" 10 11 #define META_SIZE 32 12 13 #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem 14 15 /* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta. 16 * 17 * The XDP program extracts a fixed-size payload following the Ethernet header 18 * and stores it as packet metadata to test the driver's metadata support. The 19 * TC program then verifies if the passed metadata is correct. 20 */ 21 22 bool test_pass; 23 24 static const __u8 smac_want[ETH_ALEN] = { 25 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF, 26 }; 27 28 static const __u8 meta_want[META_SIZE] = { 29 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 30 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 31 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 32 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 33 }; 34 35 static bool check_smac(const struct ethhdr *eth) 36 { 37 return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN); 38 } 39 40 static bool check_metadata(const char *file, int line, __u8 *meta_have) 41 { 42 if (!__builtin_memcmp(meta_have, meta_want, META_SIZE)) 43 return true; 44 45 bpf_stream_printk(BPF_STREAM_STDERR, 46 "FAIL:%s:%d: metadata mismatch\n" 47 " have:\n %pI6\n %pI6\n" 48 " want:\n %pI6\n %pI6\n", 49 file, line, 50 &meta_have[0x00], &meta_have[0x10], 51 &meta_want[0x00], &meta_want[0x10]); 52 return false; 53 } 54 55 #define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have) 56 57 static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb) 58 { 59 __u8 *data_meta = ctx_ptr(skb, data_meta); 60 __u8 *data = ctx_ptr(skb, data); 61 62 return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta); 63 } 64 65 #define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb) 66 67 SEC("tc") 68 int ing_cls(struct __sk_buff *ctx) 69 { 70 __u8 *meta_have = ctx_ptr(ctx, data_meta); 71 __u8 *data = ctx_ptr(ctx, data); 72 73 if (meta_have + META_SIZE > data) 74 goto out; 75 76 if (!check_metadata(meta_have)) 77 goto out; 78 79 test_pass = true; 80 out: 81 return TC_ACT_SHOT; 82 } 83 84 /* Read from metadata using bpf_dynptr_read helper */ 85 SEC("tc") 86 int ing_cls_dynptr_read(struct __sk_buff *ctx) 87 { 88 __u8 meta_have[META_SIZE]; 89 struct bpf_dynptr meta; 90 91 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 92 bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); 93 94 if (!check_metadata(meta_have)) 95 goto out; 96 97 test_pass = true; 98 out: 99 return TC_ACT_SHOT; 100 } 101 102 /* Write to metadata using bpf_dynptr_write helper */ 103 SEC("tc") 104 int ing_cls_dynptr_write(struct __sk_buff *ctx) 105 { 106 struct bpf_dynptr data, meta; 107 __u8 *src; 108 109 bpf_dynptr_from_skb(ctx, 0, &data); 110 src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); 111 if (!src) 112 return TC_ACT_SHOT; 113 114 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 115 bpf_dynptr_write(&meta, 0, src, META_SIZE, 0); 116 117 return TC_ACT_UNSPEC; /* pass */ 118 } 119 120 /* Read from metadata using read-only dynptr slice */ 121 SEC("tc") 122 int ing_cls_dynptr_slice(struct __sk_buff *ctx) 123 { 124 struct bpf_dynptr meta; 125 __u8 *meta_have; 126 127 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 128 meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); 129 if (!meta_have) 130 goto out; 131 132 if (!check_metadata(meta_have)) 133 goto out; 134 135 test_pass = true; 136 out: 137 return TC_ACT_SHOT; 138 } 139 140 /* Write to metadata using writeable dynptr slice */ 141 SEC("tc") 142 int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) 143 { 144 struct bpf_dynptr data, meta; 145 __u8 *src, *dst; 146 147 bpf_dynptr_from_skb(ctx, 0, &data); 148 src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); 149 if (!src) 150 return TC_ACT_SHOT; 151 152 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 153 dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); 154 if (!dst) 155 return TC_ACT_SHOT; 156 157 __builtin_memcpy(dst, src, META_SIZE); 158 159 return TC_ACT_UNSPEC; /* pass */ 160 } 161 162 /* Read skb metadata in chunks from various offsets in different ways. */ 163 SEC("tc") 164 int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) 165 { 166 const __u32 chunk_len = META_SIZE / 4; 167 __u8 meta_have[META_SIZE]; 168 struct bpf_dynptr meta; 169 __u8 *dst, *src; 170 171 dst = meta_have; 172 173 /* 1. Regular read */ 174 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 175 bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); 176 dst += chunk_len; 177 178 /* 2. Read from an offset-adjusted dynptr */ 179 bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); 180 bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); 181 dst += chunk_len; 182 183 /* 3. Read at an offset */ 184 bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0); 185 dst += chunk_len; 186 187 /* 4. Read from a slice starting at an offset */ 188 src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); 189 if (!src) 190 goto out; 191 __builtin_memcpy(dst, src, chunk_len); 192 193 if (!check_metadata(meta_have)) 194 goto out; 195 196 test_pass = true; 197 out: 198 return TC_ACT_SHOT; 199 } 200 201 /* Write skb metadata in chunks at various offsets in different ways. */ 202 SEC("tc") 203 int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) 204 { 205 const __u32 chunk_len = META_SIZE / 4; 206 __u8 payload[META_SIZE]; 207 struct bpf_dynptr meta; 208 __u8 *dst, *src; 209 210 bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload)); 211 src = payload; 212 213 /* 1. Regular write */ 214 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 215 bpf_dynptr_write(&meta, 0, src, chunk_len, 0); 216 src += chunk_len; 217 218 /* 2. Write to an offset-adjusted dynptr */ 219 bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); 220 bpf_dynptr_write(&meta, 0, src, chunk_len, 0); 221 src += chunk_len; 222 223 /* 3. Write at an offset */ 224 bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0); 225 src += chunk_len; 226 227 /* 4. Write to a slice starting at an offset */ 228 dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len); 229 if (!dst) 230 return TC_ACT_SHOT; 231 __builtin_memcpy(dst, src, chunk_len); 232 233 return TC_ACT_UNSPEC; /* pass */ 234 } 235 236 /* Pass an OOB offset to dynptr read, write, adjust, slice. */ 237 SEC("tc") 238 int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx) 239 { 240 struct bpf_dynptr meta; 241 __u8 md, *p; 242 int err; 243 244 err = bpf_dynptr_from_skb_meta(ctx, 0, &meta); 245 if (err) 246 goto fail; 247 248 /* read offset OOB */ 249 err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0); 250 if (err != -E2BIG) 251 goto fail; 252 253 /* write offset OOB */ 254 err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0); 255 if (err != -E2BIG) 256 goto fail; 257 258 /* adjust end offset OOB */ 259 err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1); 260 if (err != -ERANGE) 261 goto fail; 262 263 /* adjust start offset OOB */ 264 err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1); 265 if (err != -ERANGE) 266 goto fail; 267 268 /* slice offset OOB */ 269 p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p)); 270 if (p) 271 goto fail; 272 273 /* slice rdwr offset OOB */ 274 p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p)); 275 if (p) 276 goto fail; 277 278 return TC_ACT_UNSPEC; 279 fail: 280 return TC_ACT_SHOT; 281 } 282 283 /* Reserve and clear space for metadata but don't populate it */ 284 SEC("xdp") 285 int ing_xdp_zalloc_meta(struct xdp_md *ctx) 286 { 287 struct ethhdr *eth = ctx_ptr(ctx, data); 288 __u8 *meta; 289 int ret; 290 291 /* Drop any non-test packets */ 292 if (eth + 1 > ctx_ptr(ctx, data_end)) 293 return XDP_DROP; 294 if (!check_smac(eth)) 295 return XDP_DROP; 296 297 ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); 298 if (ret < 0) 299 return XDP_DROP; 300 301 meta = ctx_ptr(ctx, data_meta); 302 if (meta + META_SIZE > ctx_ptr(ctx, data)) 303 return XDP_DROP; 304 305 __builtin_memset(meta, 0, META_SIZE); 306 307 return XDP_PASS; 308 } 309 310 SEC("xdp") 311 int ing_xdp(struct xdp_md *ctx) 312 { 313 __u8 *data, *data_meta, *data_end, *payload; 314 struct ethhdr *eth; 315 int ret; 316 317 ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); 318 if (ret < 0) 319 return XDP_DROP; 320 321 data_meta = ctx_ptr(ctx, data_meta); 322 data_end = ctx_ptr(ctx, data_end); 323 data = ctx_ptr(ctx, data); 324 325 eth = (struct ethhdr *)data; 326 payload = data + sizeof(struct ethhdr); 327 328 if (payload + META_SIZE > data_end || 329 data_meta + META_SIZE > data) 330 return XDP_DROP; 331 332 /* The Linux networking stack may send other packets on the test 333 * interface that interfere with the test. Just drop them. 334 * The test packets can be recognized by their source MAC address. 335 */ 336 if (!check_smac(eth)) 337 return XDP_DROP; 338 339 __builtin_memcpy(data_meta, payload, META_SIZE); 340 return XDP_PASS; 341 } 342 343 /* 344 * Check that, when operating on a cloned packet, skb->data_meta..skb->data is 345 * kept intact if prog writes to packet _payload_ using packet pointers. 346 */ 347 SEC("tc") 348 int clone_data_meta_survives_data_write(struct __sk_buff *ctx) 349 { 350 __u8 *meta_have = ctx_ptr(ctx, data_meta); 351 struct ethhdr *eth = ctx_ptr(ctx, data); 352 353 if (eth + 1 > ctx_ptr(ctx, data_end)) 354 goto out; 355 /* Ignore non-test packets */ 356 if (!check_smac(eth)) 357 goto out; 358 359 if (meta_have + META_SIZE > eth) 360 goto out; 361 362 if (!check_metadata(meta_have)) 363 goto out; 364 365 /* Packet write to trigger unclone in prologue */ 366 eth->h_proto = 42; 367 368 test_pass = true; 369 out: 370 return TC_ACT_SHOT; 371 } 372 373 /* 374 * Check that, when operating on a cloned packet, skb->data_meta..skb->data is 375 * kept intact if prog writes to packet _metadata_ using packet pointers. 376 */ 377 SEC("tc") 378 int clone_data_meta_survives_meta_write(struct __sk_buff *ctx) 379 { 380 __u8 *meta_have = ctx_ptr(ctx, data_meta); 381 struct ethhdr *eth = ctx_ptr(ctx, data); 382 383 if (eth + 1 > ctx_ptr(ctx, data_end)) 384 goto out; 385 /* Ignore non-test packets */ 386 if (!check_smac(eth)) 387 goto out; 388 389 if (meta_have + META_SIZE > eth) 390 goto out; 391 392 if (!check_metadata(meta_have)) 393 goto out; 394 395 /* Metadata write to trigger unclone in prologue */ 396 *meta_have = 42; 397 398 test_pass = true; 399 out: 400 return TC_ACT_SHOT; 401 } 402 403 /* 404 * Check that, when operating on a cloned packet, metadata remains intact if 405 * prog creates a r/w slice to packet _payload_. 406 */ 407 SEC("tc") 408 int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx) 409 { 410 struct bpf_dynptr data, meta; 411 __u8 meta_have[META_SIZE]; 412 struct ethhdr *eth; 413 414 bpf_dynptr_from_skb(ctx, 0, &data); 415 eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth)); 416 if (!eth) 417 goto out; 418 /* Ignore non-test packets */ 419 if (!check_smac(eth)) 420 goto out; 421 422 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 423 bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); 424 if (!check_metadata(meta_have)) 425 goto out; 426 427 test_pass = true; 428 out: 429 return TC_ACT_SHOT; 430 } 431 432 /* 433 * Check that, when operating on a cloned packet, metadata remains intact if 434 * prog creates an r/w slice to packet _metadata_. 435 */ 436 SEC("tc") 437 int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx) 438 { 439 struct bpf_dynptr data, meta; 440 const struct ethhdr *eth; 441 __u8 *meta_have; 442 443 bpf_dynptr_from_skb(ctx, 0, &data); 444 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); 445 if (!eth) 446 goto out; 447 /* Ignore non-test packets */ 448 if (!check_smac(eth)) 449 goto out; 450 451 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 452 meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); 453 if (!meta_have) 454 goto out; 455 456 if (!check_metadata(meta_have)) 457 goto out; 458 459 test_pass = true; 460 out: 461 return TC_ACT_SHOT; 462 } 463 464 /* 465 * Check that, when operating on a cloned packet, skb_meta dynptr is read-write 466 * before prog writes to packet _payload_ using dynptr_write helper and metadata 467 * remains intact before and after the write. 468 */ 469 SEC("tc") 470 int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx) 471 { 472 struct bpf_dynptr data, meta; 473 __u8 meta_have[META_SIZE]; 474 const struct ethhdr *eth; 475 int err; 476 477 bpf_dynptr_from_skb(ctx, 0, &data); 478 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); 479 if (!eth) 480 goto out; 481 /* Ignore non-test packets */ 482 if (!check_smac(eth)) 483 goto out; 484 485 /* Expect read-write metadata before unclone */ 486 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 487 if (bpf_dynptr_is_rdonly(&meta)) 488 goto out; 489 490 err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); 491 if (err || !check_metadata(meta_have)) 492 goto out; 493 494 /* Helper write to payload will unclone the packet */ 495 bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); 496 497 err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); 498 if (err || !check_metadata(meta_have)) 499 goto out; 500 501 test_pass = true; 502 out: 503 return TC_ACT_SHOT; 504 } 505 506 /* 507 * Check that, when operating on a cloned packet, skb_meta dynptr is read-write 508 * before prog writes to packet _metadata_ using dynptr_write helper and 509 * metadata remains intact before and after the write. 510 */ 511 SEC("tc") 512 int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx) 513 { 514 struct bpf_dynptr data, meta; 515 __u8 meta_have[META_SIZE]; 516 const struct ethhdr *eth; 517 int err; 518 519 bpf_dynptr_from_skb(ctx, 0, &data); 520 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); 521 if (!eth) 522 goto out; 523 /* Ignore non-test packets */ 524 if (!check_smac(eth)) 525 goto out; 526 527 /* Expect read-write metadata before unclone */ 528 bpf_dynptr_from_skb_meta(ctx, 0, &meta); 529 if (bpf_dynptr_is_rdonly(&meta)) 530 goto out; 531 532 err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); 533 if (err || !check_metadata(meta_have)) 534 goto out; 535 536 /* Helper write to metadata will unclone the packet */ 537 bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0); 538 539 err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0); 540 if (err || !check_metadata(meta_have)) 541 goto out; 542 543 test_pass = true; 544 out: 545 return TC_ACT_SHOT; 546 } 547 548 SEC("tc") 549 int helper_skb_vlan_push_pop(struct __sk_buff *ctx) 550 { 551 int err; 552 553 /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only 554 * secondary tag push triggers an actual MAC header modification. 555 */ 556 err = bpf_skb_vlan_push(ctx, 0, 42); 557 if (err) 558 goto out; 559 err = bpf_skb_vlan_push(ctx, 0, 207); 560 if (err) 561 goto out; 562 563 if (!check_skb_metadata(ctx)) 564 goto out; 565 566 err = bpf_skb_vlan_pop(ctx); 567 if (err) 568 goto out; 569 err = bpf_skb_vlan_pop(ctx); 570 if (err) 571 goto out; 572 573 if (!check_skb_metadata(ctx)) 574 goto out; 575 576 test_pass = true; 577 out: 578 return TC_ACT_SHOT; 579 } 580 581 SEC("tc") 582 int helper_skb_adjust_room(struct __sk_buff *ctx) 583 { 584 int err; 585 586 /* Grow a 1 byte hole after the MAC header */ 587 err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0); 588 if (err) 589 goto out; 590 591 if (!check_skb_metadata(ctx)) 592 goto out; 593 594 /* Shrink a 1 byte hole after the MAC header */ 595 err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0); 596 if (err) 597 goto out; 598 599 if (!check_skb_metadata(ctx)) 600 goto out; 601 602 /* Grow a 256 byte hole to trigger head reallocation */ 603 err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0); 604 if (err) 605 goto out; 606 607 if (!check_skb_metadata(ctx)) 608 goto out; 609 610 test_pass = true; 611 out: 612 return TC_ACT_SHOT; 613 } 614 615 SEC("tc") 616 int helper_skb_change_head_tail(struct __sk_buff *ctx) 617 { 618 int err; 619 620 /* Reserve 1 extra in the front for packet data */ 621 err = bpf_skb_change_head(ctx, 1, 0); 622 if (err) 623 goto out; 624 625 if (!check_skb_metadata(ctx)) 626 goto out; 627 628 /* Reserve 256 extra bytes in the front to trigger head reallocation */ 629 err = bpf_skb_change_head(ctx, 256, 0); 630 if (err) 631 goto out; 632 633 if (!check_skb_metadata(ctx)) 634 goto out; 635 636 /* Reserve 4k extra bytes in the back to trigger head reallocation */ 637 err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0); 638 if (err) 639 goto out; 640 641 if (!check_skb_metadata(ctx)) 642 goto out; 643 644 test_pass = true; 645 out: 646 return TC_ACT_SHOT; 647 } 648 649 SEC("tc") 650 int helper_skb_change_proto(struct __sk_buff *ctx) 651 { 652 int err; 653 654 err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0); 655 if (err) 656 goto out; 657 658 if (!check_skb_metadata(ctx)) 659 goto out; 660 661 err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0); 662 if (err) 663 goto out; 664 665 if (!check_skb_metadata(ctx)) 666 goto out; 667 668 test_pass = true; 669 out: 670 return TC_ACT_SHOT; 671 } 672 673 char _license[] SEC("license") = "GPL"; 674