1 // SPDX-License-Identifier: GPL-2.0
2 #include <vmlinux.h>
3
4 #include <bpf/bpf_endian.h>
5 #include <bpf/bpf_helpers.h>
6 #include <errno.h>
7
8 #include "bpf_kfuncs.h"
9 #include "bpf_tracing_net.h"
10
11 #define META_SIZE 32
12
13 #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
14
15 /* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta.
16 *
17 * The XDP program extracts a fixed-size payload following the Ethernet header
18 * and stores it as packet metadata to test the driver's metadata support. The
19 * TC program then verifies if the passed metadata is correct.
20 */
21
22 bool test_pass;
23
24 static const __u8 smac_want[ETH_ALEN] = {
25 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF,
26 };
27
28 static const __u8 meta_want[META_SIZE] = {
29 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
30 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
31 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
32 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
33 };
34
check_smac(const struct ethhdr * eth)35 static bool check_smac(const struct ethhdr *eth)
36 {
37 return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN);
38 }
39
check_metadata(const char * file,int line,__u8 * meta_have)40 static bool check_metadata(const char *file, int line, __u8 *meta_have)
41 {
42 if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
43 return true;
44
45 bpf_stream_printk(BPF_STDERR,
46 "FAIL:%s:%d: metadata mismatch\n"
47 " have:\n %pI6\n %pI6\n"
48 " want:\n %pI6\n %pI6\n",
49 file, line,
50 &meta_have[0x00], &meta_have[0x10],
51 &meta_want[0x00], &meta_want[0x10]);
52 return false;
53 }
54
55 #define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have)
56
check_skb_metadata(const char * file,int line,struct __sk_buff * skb)57 static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb)
58 {
59 __u8 *data_meta = ctx_ptr(skb, data_meta);
60 __u8 *data = ctx_ptr(skb, data);
61
62 return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta);
63 }
64
65 #define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb)
66
67 SEC("tc")
ing_cls(struct __sk_buff * ctx)68 int ing_cls(struct __sk_buff *ctx)
69 {
70 __u8 *meta_have = ctx_ptr(ctx, data_meta);
71 __u8 *data = ctx_ptr(ctx, data);
72
73 if (meta_have + META_SIZE > data)
74 goto out;
75
76 if (!check_metadata(meta_have))
77 goto out;
78
79 test_pass = true;
80 out:
81 return TC_ACT_SHOT;
82 }
83
84 /* Read from metadata using bpf_dynptr_read helper */
85 SEC("tc")
ing_cls_dynptr_read(struct __sk_buff * ctx)86 int ing_cls_dynptr_read(struct __sk_buff *ctx)
87 {
88 __u8 meta_have[META_SIZE];
89 struct bpf_dynptr meta;
90
91 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
92 bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
93
94 if (!check_metadata(meta_have))
95 goto out;
96
97 test_pass = true;
98 out:
99 return TC_ACT_SHOT;
100 }
101
102 /* Write to metadata using bpf_dynptr_write helper */
103 SEC("tc")
ing_cls_dynptr_write(struct __sk_buff * ctx)104 int ing_cls_dynptr_write(struct __sk_buff *ctx)
105 {
106 struct bpf_dynptr data, meta;
107 __u8 *src;
108
109 bpf_dynptr_from_skb(ctx, 0, &data);
110 src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
111 if (!src)
112 return TC_ACT_SHOT;
113
114 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
115 bpf_dynptr_write(&meta, 0, src, META_SIZE, 0);
116
117 return TC_ACT_UNSPEC; /* pass */
118 }
119
120 /* Read from metadata using read-only dynptr slice */
121 SEC("tc")
ing_cls_dynptr_slice(struct __sk_buff * ctx)122 int ing_cls_dynptr_slice(struct __sk_buff *ctx)
123 {
124 struct bpf_dynptr meta;
125 __u8 *meta_have;
126
127 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
128 meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
129 if (!meta_have)
130 goto out;
131
132 if (!check_metadata(meta_have))
133 goto out;
134
135 test_pass = true;
136 out:
137 return TC_ACT_SHOT;
138 }
139
140 /* Write to metadata using writeable dynptr slice */
141 SEC("tc")
ing_cls_dynptr_slice_rdwr(struct __sk_buff * ctx)142 int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
143 {
144 struct bpf_dynptr data, meta;
145 __u8 *src, *dst;
146
147 bpf_dynptr_from_skb(ctx, 0, &data);
148 src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
149 if (!src)
150 return TC_ACT_SHOT;
151
152 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
153 dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
154 if (!dst)
155 return TC_ACT_SHOT;
156
157 __builtin_memcpy(dst, src, META_SIZE);
158
159 return TC_ACT_UNSPEC; /* pass */
160 }
161
162 /* Read skb metadata in chunks from various offsets in different ways. */
163 SEC("tc")
ing_cls_dynptr_offset_rd(struct __sk_buff * ctx)164 int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
165 {
166 const __u32 chunk_len = META_SIZE / 4;
167 __u8 meta_have[META_SIZE];
168 struct bpf_dynptr meta;
169 __u8 *dst, *src;
170
171 dst = meta_have;
172
173 /* 1. Regular read */
174 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
175 bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
176 dst += chunk_len;
177
178 /* 2. Read from an offset-adjusted dynptr */
179 bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
180 bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
181 dst += chunk_len;
182
183 /* 3. Read at an offset */
184 bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0);
185 dst += chunk_len;
186
187 /* 4. Read from a slice starting at an offset */
188 src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
189 if (!src)
190 goto out;
191 __builtin_memcpy(dst, src, chunk_len);
192
193 if (!check_metadata(meta_have))
194 goto out;
195
196 test_pass = true;
197 out:
198 return TC_ACT_SHOT;
199 }
200
201 /* Write skb metadata in chunks at various offsets in different ways. */
202 SEC("tc")
ing_cls_dynptr_offset_wr(struct __sk_buff * ctx)203 int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx)
204 {
205 const __u32 chunk_len = META_SIZE / 4;
206 __u8 payload[META_SIZE];
207 struct bpf_dynptr meta;
208 __u8 *dst, *src;
209
210 bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload));
211 src = payload;
212
213 /* 1. Regular write */
214 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
215 bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
216 src += chunk_len;
217
218 /* 2. Write to an offset-adjusted dynptr */
219 bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
220 bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
221 src += chunk_len;
222
223 /* 3. Write at an offset */
224 bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0);
225 src += chunk_len;
226
227 /* 4. Write to a slice starting at an offset */
228 dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len);
229 if (!dst)
230 return TC_ACT_SHOT;
231 __builtin_memcpy(dst, src, chunk_len);
232
233 return TC_ACT_UNSPEC; /* pass */
234 }
235
236 /* Pass an OOB offset to dynptr read, write, adjust, slice. */
237 SEC("tc")
ing_cls_dynptr_offset_oob(struct __sk_buff * ctx)238 int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
239 {
240 struct bpf_dynptr meta;
241 __u8 md, *p;
242 int err;
243
244 err = bpf_dynptr_from_skb_meta(ctx, 0, &meta);
245 if (err)
246 goto fail;
247
248 /* read offset OOB */
249 err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0);
250 if (err != -E2BIG)
251 goto fail;
252
253 /* write offset OOB */
254 err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0);
255 if (err != -E2BIG)
256 goto fail;
257
258 /* adjust end offset OOB */
259 err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1);
260 if (err != -ERANGE)
261 goto fail;
262
263 /* adjust start offset OOB */
264 err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1);
265 if (err != -ERANGE)
266 goto fail;
267
268 /* slice offset OOB */
269 p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p));
270 if (p)
271 goto fail;
272
273 /* slice rdwr offset OOB */
274 p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p));
275 if (p)
276 goto fail;
277
278 return TC_ACT_UNSPEC;
279 fail:
280 return TC_ACT_SHOT;
281 }
282
283 /* Reserve and clear space for metadata but don't populate it */
284 SEC("xdp")
ing_xdp_zalloc_meta(struct xdp_md * ctx)285 int ing_xdp_zalloc_meta(struct xdp_md *ctx)
286 {
287 struct ethhdr *eth = ctx_ptr(ctx, data);
288 __u8 *meta;
289 int ret;
290
291 /* Drop any non-test packets */
292 if (eth + 1 > ctx_ptr(ctx, data_end))
293 return XDP_DROP;
294 if (!check_smac(eth))
295 return XDP_DROP;
296
297 ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
298 if (ret < 0)
299 return XDP_DROP;
300
301 meta = ctx_ptr(ctx, data_meta);
302 if (meta + META_SIZE > ctx_ptr(ctx, data))
303 return XDP_DROP;
304
305 __builtin_memset(meta, 0, META_SIZE);
306
307 return XDP_PASS;
308 }
309
310 SEC("xdp")
ing_xdp(struct xdp_md * ctx)311 int ing_xdp(struct xdp_md *ctx)
312 {
313 __u8 *data, *data_meta, *data_end, *payload;
314 struct ethhdr *eth;
315 int ret;
316
317 ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
318 if (ret < 0)
319 return XDP_DROP;
320
321 data_meta = ctx_ptr(ctx, data_meta);
322 data_end = ctx_ptr(ctx, data_end);
323 data = ctx_ptr(ctx, data);
324
325 eth = (struct ethhdr *)data;
326 payload = data + sizeof(struct ethhdr);
327
328 if (payload + META_SIZE > data_end ||
329 data_meta + META_SIZE > data)
330 return XDP_DROP;
331
332 /* The Linux networking stack may send other packets on the test
333 * interface that interfere with the test. Just drop them.
334 * The test packets can be recognized by their source MAC address.
335 */
336 if (!check_smac(eth))
337 return XDP_DROP;
338
339 __builtin_memcpy(data_meta, payload, META_SIZE);
340 return XDP_PASS;
341 }
342
343 /*
344 * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
345 * kept intact if prog writes to packet _payload_ using packet pointers.
346 */
347 SEC("tc")
clone_data_meta_survives_data_write(struct __sk_buff * ctx)348 int clone_data_meta_survives_data_write(struct __sk_buff *ctx)
349 {
350 __u8 *meta_have = ctx_ptr(ctx, data_meta);
351 struct ethhdr *eth = ctx_ptr(ctx, data);
352
353 if (eth + 1 > ctx_ptr(ctx, data_end))
354 goto out;
355 /* Ignore non-test packets */
356 if (!check_smac(eth))
357 goto out;
358
359 if (meta_have + META_SIZE > eth)
360 goto out;
361
362 if (!check_metadata(meta_have))
363 goto out;
364
365 /* Packet write to trigger unclone in prologue */
366 eth->h_proto = 42;
367
368 test_pass = true;
369 out:
370 return TC_ACT_SHOT;
371 }
372
373 /*
374 * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
375 * kept intact if prog writes to packet _metadata_ using packet pointers.
376 */
377 SEC("tc")
clone_data_meta_survives_meta_write(struct __sk_buff * ctx)378 int clone_data_meta_survives_meta_write(struct __sk_buff *ctx)
379 {
380 __u8 *meta_have = ctx_ptr(ctx, data_meta);
381 struct ethhdr *eth = ctx_ptr(ctx, data);
382
383 if (eth + 1 > ctx_ptr(ctx, data_end))
384 goto out;
385 /* Ignore non-test packets */
386 if (!check_smac(eth))
387 goto out;
388
389 if (meta_have + META_SIZE > eth)
390 goto out;
391
392 if (!check_metadata(meta_have))
393 goto out;
394
395 /* Metadata write to trigger unclone in prologue */
396 *meta_have = 42;
397
398 test_pass = true;
399 out:
400 return TC_ACT_SHOT;
401 }
402
403 /*
404 * Check that, when operating on a cloned packet, metadata remains intact if
405 * prog creates a r/w slice to packet _payload_.
406 */
407 SEC("tc")
clone_meta_dynptr_survives_data_slice_write(struct __sk_buff * ctx)408 int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx)
409 {
410 struct bpf_dynptr data, meta;
411 __u8 meta_have[META_SIZE];
412 struct ethhdr *eth;
413
414 bpf_dynptr_from_skb(ctx, 0, &data);
415 eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
416 if (!eth)
417 goto out;
418 /* Ignore non-test packets */
419 if (!check_smac(eth))
420 goto out;
421
422 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
423 bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
424 if (!check_metadata(meta_have))
425 goto out;
426
427 test_pass = true;
428 out:
429 return TC_ACT_SHOT;
430 }
431
432 /*
433 * Check that, when operating on a cloned packet, metadata remains intact if
434 * prog creates an r/w slice to packet _metadata_.
435 */
436 SEC("tc")
clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff * ctx)437 int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx)
438 {
439 struct bpf_dynptr data, meta;
440 const struct ethhdr *eth;
441 __u8 *meta_have;
442
443 bpf_dynptr_from_skb(ctx, 0, &data);
444 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
445 if (!eth)
446 goto out;
447 /* Ignore non-test packets */
448 if (!check_smac(eth))
449 goto out;
450
451 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
452 meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
453 if (!meta_have)
454 goto out;
455
456 if (!check_metadata(meta_have))
457 goto out;
458
459 test_pass = true;
460 out:
461 return TC_ACT_SHOT;
462 }
463
464 /*
465 * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
466 * before prog writes to packet _payload_ using dynptr_write helper and metadata
467 * remains intact before and after the write.
468 */
469 SEC("tc")
clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff * ctx)470 int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
471 {
472 struct bpf_dynptr data, meta;
473 __u8 meta_have[META_SIZE];
474 const struct ethhdr *eth;
475 int err;
476
477 bpf_dynptr_from_skb(ctx, 0, &data);
478 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
479 if (!eth)
480 goto out;
481 /* Ignore non-test packets */
482 if (!check_smac(eth))
483 goto out;
484
485 /* Expect read-write metadata before unclone */
486 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
487 if (bpf_dynptr_is_rdonly(&meta))
488 goto out;
489
490 err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
491 if (err || !check_metadata(meta_have))
492 goto out;
493
494 /* Helper write to payload will unclone the packet */
495 bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
496
497 err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
498 if (err || !check_metadata(meta_have))
499 goto out;
500
501 test_pass = true;
502 out:
503 return TC_ACT_SHOT;
504 }
505
506 /*
507 * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
508 * before prog writes to packet _metadata_ using dynptr_write helper and
509 * metadata remains intact before and after the write.
510 */
511 SEC("tc")
clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff * ctx)512 int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx)
513 {
514 struct bpf_dynptr data, meta;
515 __u8 meta_have[META_SIZE];
516 const struct ethhdr *eth;
517 int err;
518
519 bpf_dynptr_from_skb(ctx, 0, &data);
520 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
521 if (!eth)
522 goto out;
523 /* Ignore non-test packets */
524 if (!check_smac(eth))
525 goto out;
526
527 /* Expect read-write metadata before unclone */
528 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
529 if (bpf_dynptr_is_rdonly(&meta))
530 goto out;
531
532 err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
533 if (err || !check_metadata(meta_have))
534 goto out;
535
536 /* Helper write to metadata will unclone the packet */
537 bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0);
538
539 err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
540 if (err || !check_metadata(meta_have))
541 goto out;
542
543 test_pass = true;
544 out:
545 return TC_ACT_SHOT;
546 }
547
548 SEC("tc")
helper_skb_vlan_push_pop(struct __sk_buff * ctx)549 int helper_skb_vlan_push_pop(struct __sk_buff *ctx)
550 {
551 int err;
552
553 /* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only
554 * secondary tag push triggers an actual MAC header modification.
555 */
556 err = bpf_skb_vlan_push(ctx, 0, 42);
557 if (err)
558 goto out;
559 err = bpf_skb_vlan_push(ctx, 0, 207);
560 if (err)
561 goto out;
562
563 if (!check_skb_metadata(ctx))
564 goto out;
565
566 err = bpf_skb_vlan_pop(ctx);
567 if (err)
568 goto out;
569 err = bpf_skb_vlan_pop(ctx);
570 if (err)
571 goto out;
572
573 if (!check_skb_metadata(ctx))
574 goto out;
575
576 test_pass = true;
577 out:
578 return TC_ACT_SHOT;
579 }
580
581 SEC("tc")
helper_skb_adjust_room(struct __sk_buff * ctx)582 int helper_skb_adjust_room(struct __sk_buff *ctx)
583 {
584 int err;
585
586 /* Grow a 1 byte hole after the MAC header */
587 err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0);
588 if (err)
589 goto out;
590
591 if (!check_skb_metadata(ctx))
592 goto out;
593
594 /* Shrink a 1 byte hole after the MAC header */
595 err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0);
596 if (err)
597 goto out;
598
599 if (!check_skb_metadata(ctx))
600 goto out;
601
602 /* Grow a 256 byte hole to trigger head reallocation */
603 err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0);
604 if (err)
605 goto out;
606
607 if (!check_skb_metadata(ctx))
608 goto out;
609
610 test_pass = true;
611 out:
612 return TC_ACT_SHOT;
613 }
614
615 SEC("tc")
helper_skb_change_head_tail(struct __sk_buff * ctx)616 int helper_skb_change_head_tail(struct __sk_buff *ctx)
617 {
618 int err;
619
620 /* Reserve 1 extra in the front for packet data */
621 err = bpf_skb_change_head(ctx, 1, 0);
622 if (err)
623 goto out;
624
625 if (!check_skb_metadata(ctx))
626 goto out;
627
628 /* Reserve 256 extra bytes in the front to trigger head reallocation */
629 err = bpf_skb_change_head(ctx, 256, 0);
630 if (err)
631 goto out;
632
633 if (!check_skb_metadata(ctx))
634 goto out;
635
636 /* Reserve 4k extra bytes in the back to trigger head reallocation */
637 err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0);
638 if (err)
639 goto out;
640
641 if (!check_skb_metadata(ctx))
642 goto out;
643
644 test_pass = true;
645 out:
646 return TC_ACT_SHOT;
647 }
648
649 SEC("tc")
helper_skb_change_proto(struct __sk_buff * ctx)650 int helper_skb_change_proto(struct __sk_buff *ctx)
651 {
652 int err;
653
654 err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0);
655 if (err)
656 goto out;
657
658 if (!check_skb_metadata(ctx))
659 goto out;
660
661 err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0);
662 if (err)
663 goto out;
664
665 if (!check_skb_metadata(ctx))
666 goto out;
667
668 test_pass = true;
669 out:
670 return TC_ACT_SHOT;
671 }
672
673 char _license[] SEC("license") = "GPL";
674