1 // SPDX-License-Identifier: GPL-2.0
2
3 /* In-place tunneling */
4
5 #include <stdbool.h>
6 #include <string.h>
7
8 #include <linux/stddef.h>
9 #include <linux/bpf.h>
10 #include <linux/if_ether.h>
11 #include <linux/in.h>
12 #include <linux/ip.h>
13 #include <linux/ipv6.h>
14 #include <linux/mpls.h>
15 #include <linux/tcp.h>
16 #include <linux/udp.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/types.h>
19
20 #include <bpf/bpf_endian.h>
21 #include <bpf/bpf_helpers.h>
22 #include "bpf_compiler.h"
23
24 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
25
26 static const int cfg_port = 8000;
27
28 static const int cfg_udp_src = 20000;
29
30 #define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
31
32 #define UDP_PORT 5555
33 #define MPLS_OVER_UDP_PORT 6635
34 #define ETH_OVER_UDP_PORT 7777
35 #define VXLAN_UDP_PORT 8472
36
37 #define EXTPROTO_VXLAN 0x1
38
39 #define VXLAN_N_VID (1u << 24)
40 #define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
41 #define VXLAN_FLAGS 0x8
42 #define VXLAN_VNI 1
43
44 #ifndef NEXTHDR_DEST
45 #define NEXTHDR_DEST 60
46 #endif
47
48 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
49 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
50 MPLS_LS_S_MASK | 0xff);
51
52 struct vxlanhdr {
53 __be32 vx_flags;
54 __be32 vx_vni;
55 } __attribute__((packed));
56
57 struct gre_hdr {
58 __be16 flags;
59 __be16 protocol;
60 } __attribute__((packed));
61
62 union l4hdr {
63 struct udphdr udp;
64 struct gre_hdr gre;
65 };
66
67 struct v4hdr {
68 struct iphdr ip;
69 union l4hdr l4hdr;
70 __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
71 } __attribute__((packed));
72
73 struct v6hdr {
74 struct ipv6hdr ip;
75 union l4hdr l4hdr;
76 __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
77 } __attribute__((packed));
78
set_ipv4_csum(struct iphdr * iph)79 static __always_inline void set_ipv4_csum(struct iphdr *iph)
80 {
81 __u16 *iph16 = (__u16 *)iph;
82 __u32 csum;
83 int i;
84
85 iph->check = 0;
86
87 __pragma_loop_unroll_full
88 for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
89 csum += *iph16++;
90
91 iph->check = ~((csum & 0xffff) + (csum >> 16));
92 }
93
__encap_ipv4(struct __sk_buff * skb,__u8 encap_proto,__u16 l2_proto,__u16 ext_proto)94 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
95 __u16 l2_proto, __u16 ext_proto)
96 {
97 __u16 udp_dst = UDP_PORT;
98 struct iphdr iph_inner;
99 struct v4hdr h_outer;
100 struct tcphdr tcph;
101 int olen, l2_len;
102 __u8 *l2_hdr = NULL;
103 int tcp_off;
104 __u64 flags;
105
106 /* Most tests encapsulate a packet into a tunnel with the same
107 * network protocol, and derive the outer header fields from
108 * the inner header.
109 *
110 * The 6in4 case tests different inner and outer protocols. As
111 * the inner is ipv6, but the outer expects an ipv4 header as
112 * input, manually build a struct iphdr based on the ipv6hdr.
113 */
114 if (encap_proto == IPPROTO_IPV6) {
115 const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
116 const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
117 struct ipv6hdr iph6_inner;
118
119 /* Read the IPv6 header */
120 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
121 sizeof(iph6_inner)) < 0)
122 return TC_ACT_OK;
123
124 /* Derive the IPv4 header fields from the IPv6 header */
125 memset(&iph_inner, 0, sizeof(iph_inner));
126 iph_inner.version = 4;
127 iph_inner.ihl = 5;
128 iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
129 bpf_ntohs(iph6_inner.payload_len));
130 iph_inner.ttl = iph6_inner.hop_limit - 1;
131 iph_inner.protocol = iph6_inner.nexthdr;
132 iph_inner.saddr = __bpf_constant_htonl(saddr);
133 iph_inner.daddr = __bpf_constant_htonl(daddr);
134
135 tcp_off = sizeof(iph6_inner);
136 } else {
137 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
138 sizeof(iph_inner)) < 0)
139 return TC_ACT_OK;
140
141 tcp_off = sizeof(iph_inner);
142 }
143
144 /* filter only packets we want */
145 if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
146 return TC_ACT_OK;
147
148 if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
149 &tcph, sizeof(tcph)) < 0)
150 return TC_ACT_OK;
151
152 if (tcph.dest != __bpf_constant_htons(cfg_port))
153 return TC_ACT_OK;
154
155 olen = sizeof(h_outer.ip);
156 l2_len = 0;
157
158 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
159
160 switch (l2_proto) {
161 case ETH_P_MPLS_UC:
162 l2_len = sizeof(mpls_label);
163 udp_dst = MPLS_OVER_UDP_PORT;
164 break;
165 case ETH_P_TEB:
166 l2_len = ETH_HLEN;
167 if (ext_proto & EXTPROTO_VXLAN) {
168 udp_dst = VXLAN_UDP_PORT;
169 l2_len += sizeof(struct vxlanhdr);
170 } else
171 udp_dst = ETH_OVER_UDP_PORT;
172 break;
173 }
174 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
175
176 switch (encap_proto) {
177 case IPPROTO_GRE:
178 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
179 olen += sizeof(h_outer.l4hdr.gre);
180 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
181 h_outer.l4hdr.gre.flags = 0;
182 break;
183 case IPPROTO_UDP:
184 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
185 olen += sizeof(h_outer.l4hdr.udp);
186 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
187 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
188 h_outer.l4hdr.udp.check = 0;
189 h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
190 sizeof(h_outer.l4hdr.udp) +
191 l2_len);
192 break;
193 case IPPROTO_IPIP:
194 case IPPROTO_IPV6:
195 break;
196 default:
197 return TC_ACT_OK;
198 }
199
200 /* add L2 encap (if specified) */
201 l2_hdr = (__u8 *)&h_outer + olen;
202 switch (l2_proto) {
203 case ETH_P_MPLS_UC:
204 *(__u32 *)l2_hdr = mpls_label;
205 break;
206 case ETH_P_TEB:
207 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
208
209 if (ext_proto & EXTPROTO_VXLAN) {
210 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
211
212 vxlan_hdr->vx_flags = VXLAN_FLAGS;
213 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
214
215 l2_hdr += sizeof(struct vxlanhdr);
216 }
217
218 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
219 return TC_ACT_SHOT;
220
221 break;
222 }
223 olen += l2_len;
224
225 /* add room between mac and network header */
226 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
227 return TC_ACT_SHOT;
228
229 /* prepare new outer network header */
230 h_outer.ip = iph_inner;
231 h_outer.ip.tot_len = bpf_htons(olen +
232 bpf_ntohs(h_outer.ip.tot_len));
233 h_outer.ip.protocol = encap_proto;
234
235 set_ipv4_csum((void *)&h_outer.ip);
236
237 /* store new outer network header */
238 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
239 BPF_F_INVALIDATE_HASH) < 0)
240 return TC_ACT_SHOT;
241
242 /* if changing outer proto type, update eth->h_proto */
243 if (encap_proto == IPPROTO_IPV6) {
244 struct ethhdr eth;
245
246 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
247 return TC_ACT_SHOT;
248 eth.h_proto = bpf_htons(ETH_P_IP);
249 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
250 return TC_ACT_SHOT;
251 }
252
253 return TC_ACT_OK;
254 }
255
encap_ipv4(struct __sk_buff * skb,__u8 encap_proto,__u16 l2_proto)256 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
257 __u16 l2_proto)
258 {
259 return __encap_ipv4(skb, encap_proto, l2_proto, 0);
260 }
261
__encap_ipv6(struct __sk_buff * skb,__u8 encap_proto,__u16 l2_proto,__u16 ext_proto)262 static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
263 __u16 l2_proto, __u16 ext_proto)
264 {
265 __u16 udp_dst = UDP_PORT;
266 struct ipv6hdr iph_inner;
267 struct v6hdr h_outer;
268 struct tcphdr tcph;
269 int olen, l2_len;
270 __u8 *l2_hdr = NULL;
271 __u16 tot_len;
272 __u64 flags;
273
274 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
275 sizeof(iph_inner)) < 0)
276 return TC_ACT_OK;
277
278 /* filter only packets we want */
279 if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
280 &tcph, sizeof(tcph)) < 0)
281 return TC_ACT_OK;
282
283 if (tcph.dest != __bpf_constant_htons(cfg_port))
284 return TC_ACT_OK;
285
286 olen = sizeof(h_outer.ip);
287 l2_len = 0;
288
289 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
290
291 switch (l2_proto) {
292 case ETH_P_MPLS_UC:
293 l2_len = sizeof(mpls_label);
294 udp_dst = MPLS_OVER_UDP_PORT;
295 break;
296 case ETH_P_TEB:
297 l2_len = ETH_HLEN;
298 if (ext_proto & EXTPROTO_VXLAN) {
299 udp_dst = VXLAN_UDP_PORT;
300 l2_len += sizeof(struct vxlanhdr);
301 } else
302 udp_dst = ETH_OVER_UDP_PORT;
303 break;
304 }
305 flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
306
307 switch (encap_proto) {
308 case IPPROTO_GRE:
309 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
310 olen += sizeof(h_outer.l4hdr.gre);
311 h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
312 h_outer.l4hdr.gre.flags = 0;
313 break;
314 case IPPROTO_UDP:
315 flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
316 olen += sizeof(h_outer.l4hdr.udp);
317 h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
318 h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
319 tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
320 sizeof(h_outer.l4hdr.udp) + l2_len;
321 h_outer.l4hdr.udp.check = 0;
322 h_outer.l4hdr.udp.len = bpf_htons(tot_len);
323 break;
324 case IPPROTO_IPV6:
325 break;
326 default:
327 return TC_ACT_OK;
328 }
329
330 /* add L2 encap (if specified) */
331 l2_hdr = (__u8 *)&h_outer + olen;
332 switch (l2_proto) {
333 case ETH_P_MPLS_UC:
334 *(__u32 *)l2_hdr = mpls_label;
335 break;
336 case ETH_P_TEB:
337 flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
338
339 if (ext_proto & EXTPROTO_VXLAN) {
340 struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
341
342 vxlan_hdr->vx_flags = VXLAN_FLAGS;
343 vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
344
345 l2_hdr += sizeof(struct vxlanhdr);
346 }
347
348 if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
349 return TC_ACT_SHOT;
350 break;
351 }
352 olen += l2_len;
353
354 /* add room between mac and network header */
355 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
356 return TC_ACT_SHOT;
357
358 /* prepare new outer network header */
359 h_outer.ip = iph_inner;
360 h_outer.ip.payload_len = bpf_htons(olen +
361 bpf_ntohs(h_outer.ip.payload_len));
362
363 h_outer.ip.nexthdr = encap_proto;
364
365 /* store new outer network header */
366 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
367 BPF_F_INVALIDATE_HASH) < 0)
368 return TC_ACT_SHOT;
369
370 return TC_ACT_OK;
371 }
372
encap_ipv6_ipip6(struct __sk_buff * skb)373 static int encap_ipv6_ipip6(struct __sk_buff *skb)
374 {
375 struct iphdr iph_inner;
376 struct v6hdr h_outer;
377 struct tcphdr tcph;
378 struct ethhdr eth;
379 __u64 flags;
380 int olen;
381
382 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
383 sizeof(iph_inner)) < 0)
384 return TC_ACT_OK;
385
386 /* filter only packets we want */
387 if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
388 &tcph, sizeof(tcph)) < 0)
389 return TC_ACT_OK;
390
391 if (tcph.dest != __bpf_constant_htons(cfg_port))
392 return TC_ACT_OK;
393
394 olen = sizeof(h_outer.ip);
395
396 flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
397
398 /* add room between mac and network header */
399 if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
400 return TC_ACT_SHOT;
401
402 /* prepare new outer network header */
403 memset(&h_outer.ip, 0, sizeof(h_outer.ip));
404 h_outer.ip.version = 6;
405 h_outer.ip.hop_limit = iph_inner.ttl;
406 h_outer.ip.saddr.s6_addr[1] = 0xfd;
407 h_outer.ip.saddr.s6_addr[15] = 1;
408 h_outer.ip.daddr.s6_addr[1] = 0xfd;
409 h_outer.ip.daddr.s6_addr[15] = 2;
410 h_outer.ip.payload_len = iph_inner.tot_len;
411 h_outer.ip.nexthdr = IPPROTO_IPIP;
412
413 /* store new outer network header */
414 if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
415 BPF_F_INVALIDATE_HASH) < 0)
416 return TC_ACT_SHOT;
417
418 /* update eth->h_proto */
419 if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth)) < 0)
420 return TC_ACT_SHOT;
421 eth.h_proto = bpf_htons(ETH_P_IPV6);
422 if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0) < 0)
423 return TC_ACT_SHOT;
424
425 return TC_ACT_OK;
426 }
427
encap_ipv6(struct __sk_buff * skb,__u8 encap_proto,__u16 l2_proto)428 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
429 __u16 l2_proto)
430 {
431 return __encap_ipv6(skb, encap_proto, l2_proto, 0);
432 }
433
434 SEC("encap_ipip_none")
__encap_ipip_none(struct __sk_buff * skb)435 int __encap_ipip_none(struct __sk_buff *skb)
436 {
437 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
438 return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
439 else
440 return TC_ACT_OK;
441 }
442
443 SEC("encap_gre_none")
__encap_gre_none(struct __sk_buff * skb)444 int __encap_gre_none(struct __sk_buff *skb)
445 {
446 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
447 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
448 else
449 return TC_ACT_OK;
450 }
451
452 SEC("encap_gre_mpls")
__encap_gre_mpls(struct __sk_buff * skb)453 int __encap_gre_mpls(struct __sk_buff *skb)
454 {
455 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
456 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
457 else
458 return TC_ACT_OK;
459 }
460
461 SEC("encap_gre_eth")
__encap_gre_eth(struct __sk_buff * skb)462 int __encap_gre_eth(struct __sk_buff *skb)
463 {
464 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
465 return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
466 else
467 return TC_ACT_OK;
468 }
469
470 SEC("encap_udp_none")
__encap_udp_none(struct __sk_buff * skb)471 int __encap_udp_none(struct __sk_buff *skb)
472 {
473 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
474 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
475 else
476 return TC_ACT_OK;
477 }
478
479 SEC("encap_udp_mpls")
__encap_udp_mpls(struct __sk_buff * skb)480 int __encap_udp_mpls(struct __sk_buff *skb)
481 {
482 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
483 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
484 else
485 return TC_ACT_OK;
486 }
487
488 SEC("encap_udp_eth")
__encap_udp_eth(struct __sk_buff * skb)489 int __encap_udp_eth(struct __sk_buff *skb)
490 {
491 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
492 return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
493 else
494 return TC_ACT_OK;
495 }
496
497 SEC("encap_vxlan_eth")
__encap_vxlan_eth(struct __sk_buff * skb)498 int __encap_vxlan_eth(struct __sk_buff *skb)
499 {
500 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
501 return __encap_ipv4(skb, IPPROTO_UDP,
502 ETH_P_TEB,
503 EXTPROTO_VXLAN);
504 else
505 return TC_ACT_OK;
506 }
507
508 SEC("encap_sit_none")
__encap_sit_none(struct __sk_buff * skb)509 int __encap_sit_none(struct __sk_buff *skb)
510 {
511 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
512 return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
513 else
514 return TC_ACT_OK;
515 }
516
517 SEC("encap_ip6tnl_none")
__encap_ip6tnl_none(struct __sk_buff * skb)518 int __encap_ip6tnl_none(struct __sk_buff *skb)
519 {
520 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
521 return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
522 else
523 return TC_ACT_OK;
524 }
525
526 SEC("encap_ipip6_none")
__encap_ipip6_none(struct __sk_buff * skb)527 int __encap_ipip6_none(struct __sk_buff *skb)
528 {
529 if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
530 return encap_ipv6_ipip6(skb);
531 else
532 return TC_ACT_OK;
533 }
534
535 SEC("encap_ip6gre_none")
__encap_ip6gre_none(struct __sk_buff * skb)536 int __encap_ip6gre_none(struct __sk_buff *skb)
537 {
538 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
539 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
540 else
541 return TC_ACT_OK;
542 }
543
544 SEC("encap_ip6gre_mpls")
__encap_ip6gre_mpls(struct __sk_buff * skb)545 int __encap_ip6gre_mpls(struct __sk_buff *skb)
546 {
547 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
548 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
549 else
550 return TC_ACT_OK;
551 }
552
553 SEC("encap_ip6gre_eth")
__encap_ip6gre_eth(struct __sk_buff * skb)554 int __encap_ip6gre_eth(struct __sk_buff *skb)
555 {
556 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
557 return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
558 else
559 return TC_ACT_OK;
560 }
561
562 SEC("encap_ip6udp_none")
__encap_ip6udp_none(struct __sk_buff * skb)563 int __encap_ip6udp_none(struct __sk_buff *skb)
564 {
565 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
566 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
567 else
568 return TC_ACT_OK;
569 }
570
571 SEC("encap_ip6udp_mpls")
__encap_ip6udp_mpls(struct __sk_buff * skb)572 int __encap_ip6udp_mpls(struct __sk_buff *skb)
573 {
574 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
575 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
576 else
577 return TC_ACT_OK;
578 }
579
580 SEC("encap_ip6udp_eth")
__encap_ip6udp_eth(struct __sk_buff * skb)581 int __encap_ip6udp_eth(struct __sk_buff *skb)
582 {
583 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
584 return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
585 else
586 return TC_ACT_OK;
587 }
588
589 SEC("encap_ip6vxlan_eth")
__encap_ip6vxlan_eth(struct __sk_buff * skb)590 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
591 {
592 if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
593 return __encap_ipv6(skb, IPPROTO_UDP,
594 ETH_P_TEB,
595 EXTPROTO_VXLAN);
596 else
597 return TC_ACT_OK;
598 }
599
decap_internal(struct __sk_buff * skb,int off,int len,char proto)600 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
601 {
602 __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
603 struct ipv6_opt_hdr ip6_opt_hdr;
604 struct gre_hdr greh;
605 struct udphdr udph;
606 int olen = len;
607
608 switch (proto) {
609 case IPPROTO_IPIP:
610 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
611 break;
612 case IPPROTO_IPV6:
613 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
614 break;
615 case NEXTHDR_DEST:
616 if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
617 sizeof(ip6_opt_hdr)) < 0)
618 return TC_ACT_OK;
619 switch (ip6_opt_hdr.nexthdr) {
620 case IPPROTO_IPIP:
621 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
622 break;
623 case IPPROTO_IPV6:
624 flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
625 break;
626 default:
627 return TC_ACT_OK;
628 }
629 break;
630 case IPPROTO_GRE:
631 olen += sizeof(struct gre_hdr);
632 if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
633 return TC_ACT_OK;
634 switch (bpf_ntohs(greh.protocol)) {
635 case ETH_P_MPLS_UC:
636 olen += sizeof(mpls_label);
637 break;
638 case ETH_P_TEB:
639 olen += ETH_HLEN;
640 break;
641 }
642 break;
643 case IPPROTO_UDP:
644 olen += sizeof(struct udphdr);
645 if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
646 return TC_ACT_OK;
647 switch (bpf_ntohs(udph.dest)) {
648 case MPLS_OVER_UDP_PORT:
649 olen += sizeof(mpls_label);
650 break;
651 case ETH_OVER_UDP_PORT:
652 olen += ETH_HLEN;
653 break;
654 case VXLAN_UDP_PORT:
655 olen += ETH_HLEN + sizeof(struct vxlanhdr);
656 break;
657 }
658 break;
659 default:
660 return TC_ACT_OK;
661 }
662
663 if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
664 return TC_ACT_SHOT;
665
666 return TC_ACT_OK;
667 }
668
decap_ipv4(struct __sk_buff * skb)669 static int decap_ipv4(struct __sk_buff *skb)
670 {
671 struct iphdr iph_outer;
672
673 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
674 sizeof(iph_outer)) < 0)
675 return TC_ACT_OK;
676
677 if (iph_outer.ihl != 5)
678 return TC_ACT_OK;
679
680 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
681 iph_outer.protocol);
682 }
683
decap_ipv6(struct __sk_buff * skb)684 static int decap_ipv6(struct __sk_buff *skb)
685 {
686 struct ipv6hdr iph_outer;
687
688 if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
689 sizeof(iph_outer)) < 0)
690 return TC_ACT_OK;
691
692 return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
693 iph_outer.nexthdr);
694 }
695
696 SEC("decap")
decap_f(struct __sk_buff * skb)697 int decap_f(struct __sk_buff *skb)
698 {
699 switch (skb->protocol) {
700 case __bpf_constant_htons(ETH_P_IP):
701 return decap_ipv4(skb);
702 case __bpf_constant_htons(ETH_P_IPV6):
703 return decap_ipv6(skb);
704 default:
705 /* does not match, ignore */
706 return TC_ACT_OK;
707 }
708 }
709
710 char __license[] SEC("license") = "GPL";
711