xref: /linux/tools/testing/selftests/bpf/progs/test_tc_tunnel.c (revision f6e0a4984c2e7244689ea87b62b433bed9d07e94)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* In-place tunneling */
4 
5 #include <stdbool.h>
6 #include <string.h>
7 
8 #include <linux/stddef.h>
9 #include <linux/bpf.h>
10 #include <linux/if_ether.h>
11 #include <linux/in.h>
12 #include <linux/ip.h>
13 #include <linux/ipv6.h>
14 #include <linux/mpls.h>
15 #include <linux/tcp.h>
16 #include <linux/udp.h>
17 #include <linux/pkt_cls.h>
18 #include <linux/types.h>
19 
20 #include <bpf/bpf_endian.h>
21 #include <bpf/bpf_helpers.h>
22 #include "bpf_compiler.h"
23 
24 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
25 
26 static const int cfg_port = 8000;
27 
28 static const int cfg_udp_src = 20000;
29 
30 #define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
31 
32 #define	UDP_PORT		5555
33 #define	MPLS_OVER_UDP_PORT	6635
34 #define	ETH_OVER_UDP_PORT	7777
35 #define	VXLAN_UDP_PORT		8472
36 
37 #define	EXTPROTO_VXLAN	0x1
38 
39 #define	VXLAN_N_VID     (1u << 24)
40 #define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
41 #define	VXLAN_FLAGS     0x8
42 #define	VXLAN_VNI       1
43 
44 #ifndef NEXTHDR_DEST
45 #define NEXTHDR_DEST	60
46 #endif
47 
48 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
49 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
50 						     MPLS_LS_S_MASK | 0xff);
51 
52 struct vxlanhdr {
53 	__be32 vx_flags;
54 	__be32 vx_vni;
55 } __attribute__((packed));
56 
57 struct gre_hdr {
58 	__be16 flags;
59 	__be16 protocol;
60 } __attribute__((packed));
61 
62 union l4hdr {
63 	struct udphdr udp;
64 	struct gre_hdr gre;
65 };
66 
67 struct v4hdr {
68 	struct iphdr ip;
69 	union l4hdr l4hdr;
70 	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
71 } __attribute__((packed));
72 
73 struct v6hdr {
74 	struct ipv6hdr ip;
75 	union l4hdr l4hdr;
76 	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
77 } __attribute__((packed));
78 
79 static __always_inline void set_ipv4_csum(struct iphdr *iph)
80 {
81 	__u16 *iph16 = (__u16 *)iph;
82 	__u32 csum;
83 	int i;
84 
85 	iph->check = 0;
86 
87 	__pragma_loop_unroll_full
88 	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
89 		csum += *iph16++;
90 
91 	iph->check = ~((csum & 0xffff) + (csum >> 16));
92 }
93 
94 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
95 					__u16 l2_proto, __u16 ext_proto)
96 {
97 	__u16 udp_dst = UDP_PORT;
98 	struct iphdr iph_inner;
99 	struct v4hdr h_outer;
100 	struct tcphdr tcph;
101 	int olen, l2_len;
102 	__u8 *l2_hdr = NULL;
103 	int tcp_off;
104 	__u64 flags;
105 
106 	/* Most tests encapsulate a packet into a tunnel with the same
107 	 * network protocol, and derive the outer header fields from
108 	 * the inner header.
109 	 *
110 	 * The 6in4 case tests different inner and outer protocols. As
111 	 * the inner is ipv6, but the outer expects an ipv4 header as
112 	 * input, manually build a struct iphdr based on the ipv6hdr.
113 	 */
114 	if (encap_proto == IPPROTO_IPV6) {
115 		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
116 		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
117 		struct ipv6hdr iph6_inner;
118 
119 		/* Read the IPv6 header */
120 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
121 				       sizeof(iph6_inner)) < 0)
122 			return TC_ACT_OK;
123 
124 		/* Derive the IPv4 header fields from the IPv6 header */
125 		memset(&iph_inner, 0, sizeof(iph_inner));
126 		iph_inner.version = 4;
127 		iph_inner.ihl = 5;
128 		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
129 				    bpf_ntohs(iph6_inner.payload_len));
130 		iph_inner.ttl = iph6_inner.hop_limit - 1;
131 		iph_inner.protocol = iph6_inner.nexthdr;
132 		iph_inner.saddr = __bpf_constant_htonl(saddr);
133 		iph_inner.daddr = __bpf_constant_htonl(daddr);
134 
135 		tcp_off = sizeof(iph6_inner);
136 	} else {
137 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
138 				       sizeof(iph_inner)) < 0)
139 			return TC_ACT_OK;
140 
141 		tcp_off = sizeof(iph_inner);
142 	}
143 
144 	/* filter only packets we want */
145 	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
146 		return TC_ACT_OK;
147 
148 	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
149 			       &tcph, sizeof(tcph)) < 0)
150 		return TC_ACT_OK;
151 
152 	if (tcph.dest != __bpf_constant_htons(cfg_port))
153 		return TC_ACT_OK;
154 
155 	olen = sizeof(h_outer.ip);
156 	l2_len = 0;
157 
158 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
159 
160 	switch (l2_proto) {
161 	case ETH_P_MPLS_UC:
162 		l2_len = sizeof(mpls_label);
163 		udp_dst = MPLS_OVER_UDP_PORT;
164 		break;
165 	case ETH_P_TEB:
166 		l2_len = ETH_HLEN;
167 		if (ext_proto & EXTPROTO_VXLAN) {
168 			udp_dst = VXLAN_UDP_PORT;
169 			l2_len += sizeof(struct vxlanhdr);
170 		} else
171 			udp_dst = ETH_OVER_UDP_PORT;
172 		break;
173 	}
174 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
175 
176 	switch (encap_proto) {
177 	case IPPROTO_GRE:
178 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
179 		olen += sizeof(h_outer.l4hdr.gre);
180 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
181 		h_outer.l4hdr.gre.flags = 0;
182 		break;
183 	case IPPROTO_UDP:
184 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
185 		olen += sizeof(h_outer.l4hdr.udp);
186 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
187 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
188 		h_outer.l4hdr.udp.check = 0;
189 		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
190 						  sizeof(h_outer.l4hdr.udp) +
191 						  l2_len);
192 		break;
193 	case IPPROTO_IPIP:
194 	case IPPROTO_IPV6:
195 		break;
196 	default:
197 		return TC_ACT_OK;
198 	}
199 
200 	/* add L2 encap (if specified) */
201 	l2_hdr = (__u8 *)&h_outer + olen;
202 	switch (l2_proto) {
203 	case ETH_P_MPLS_UC:
204 		*(__u32 *)l2_hdr = mpls_label;
205 		break;
206 	case ETH_P_TEB:
207 		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
208 
209 		if (ext_proto & EXTPROTO_VXLAN) {
210 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
211 
212 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
213 			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
214 
215 			l2_hdr += sizeof(struct vxlanhdr);
216 		}
217 
218 		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
219 			return TC_ACT_SHOT;
220 
221 		break;
222 	}
223 	olen += l2_len;
224 
225 	/* add room between mac and network header */
226 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
227 		return TC_ACT_SHOT;
228 
229 	/* prepare new outer network header */
230 	h_outer.ip = iph_inner;
231 	h_outer.ip.tot_len = bpf_htons(olen +
232 				       bpf_ntohs(h_outer.ip.tot_len));
233 	h_outer.ip.protocol = encap_proto;
234 
235 	set_ipv4_csum((void *)&h_outer.ip);
236 
237 	/* store new outer network header */
238 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
239 				BPF_F_INVALIDATE_HASH) < 0)
240 		return TC_ACT_SHOT;
241 
242 	/* if changing outer proto type, update eth->h_proto */
243 	if (encap_proto == IPPROTO_IPV6) {
244 		struct ethhdr eth;
245 
246 		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
247 			return TC_ACT_SHOT;
248 		eth.h_proto = bpf_htons(ETH_P_IP);
249 		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
250 			return TC_ACT_SHOT;
251 	}
252 
253 	return TC_ACT_OK;
254 }
255 
256 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
257 				      __u16 l2_proto)
258 {
259 	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
260 }
261 
262 static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
263 					__u16 l2_proto, __u16 ext_proto)
264 {
265 	__u16 udp_dst = UDP_PORT;
266 	struct ipv6hdr iph_inner;
267 	struct v6hdr h_outer;
268 	struct tcphdr tcph;
269 	int olen, l2_len;
270 	__u8 *l2_hdr = NULL;
271 	__u16 tot_len;
272 	__u64 flags;
273 
274 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
275 			       sizeof(iph_inner)) < 0)
276 		return TC_ACT_OK;
277 
278 	/* filter only packets we want */
279 	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
280 			       &tcph, sizeof(tcph)) < 0)
281 		return TC_ACT_OK;
282 
283 	if (tcph.dest != __bpf_constant_htons(cfg_port))
284 		return TC_ACT_OK;
285 
286 	olen = sizeof(h_outer.ip);
287 	l2_len = 0;
288 
289 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
290 
291 	switch (l2_proto) {
292 	case ETH_P_MPLS_UC:
293 		l2_len = sizeof(mpls_label);
294 		udp_dst = MPLS_OVER_UDP_PORT;
295 		break;
296 	case ETH_P_TEB:
297 		l2_len = ETH_HLEN;
298 		if (ext_proto & EXTPROTO_VXLAN) {
299 			udp_dst = VXLAN_UDP_PORT;
300 			l2_len += sizeof(struct vxlanhdr);
301 		} else
302 			udp_dst = ETH_OVER_UDP_PORT;
303 		break;
304 	}
305 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
306 
307 	switch (encap_proto) {
308 	case IPPROTO_GRE:
309 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
310 		olen += sizeof(h_outer.l4hdr.gre);
311 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
312 		h_outer.l4hdr.gre.flags = 0;
313 		break;
314 	case IPPROTO_UDP:
315 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
316 		olen += sizeof(h_outer.l4hdr.udp);
317 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
318 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
319 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
320 			  sizeof(h_outer.l4hdr.udp) + l2_len;
321 		h_outer.l4hdr.udp.check = 0;
322 		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
323 		break;
324 	case IPPROTO_IPV6:
325 		break;
326 	default:
327 		return TC_ACT_OK;
328 	}
329 
330 	/* add L2 encap (if specified) */
331 	l2_hdr = (__u8 *)&h_outer + olen;
332 	switch (l2_proto) {
333 	case ETH_P_MPLS_UC:
334 		*(__u32 *)l2_hdr = mpls_label;
335 		break;
336 	case ETH_P_TEB:
337 		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
338 
339 		if (ext_proto & EXTPROTO_VXLAN) {
340 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
341 
342 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
343 			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
344 
345 			l2_hdr += sizeof(struct vxlanhdr);
346 		}
347 
348 		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
349 			return TC_ACT_SHOT;
350 		break;
351 	}
352 	olen += l2_len;
353 
354 	/* add room between mac and network header */
355 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
356 		return TC_ACT_SHOT;
357 
358 	/* prepare new outer network header */
359 	h_outer.ip = iph_inner;
360 	h_outer.ip.payload_len = bpf_htons(olen +
361 					   bpf_ntohs(h_outer.ip.payload_len));
362 
363 	h_outer.ip.nexthdr = encap_proto;
364 
365 	/* store new outer network header */
366 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
367 				BPF_F_INVALIDATE_HASH) < 0)
368 		return TC_ACT_SHOT;
369 
370 	return TC_ACT_OK;
371 }
372 
373 static int encap_ipv6_ipip6(struct __sk_buff *skb)
374 {
375 	struct iphdr iph_inner;
376 	struct v6hdr h_outer;
377 	struct tcphdr tcph;
378 	struct ethhdr eth;
379 	__u64 flags;
380 	int olen;
381 
382 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
383 			       sizeof(iph_inner)) < 0)
384 		return TC_ACT_OK;
385 
386 	/* filter only packets we want */
387 	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
388 			       &tcph, sizeof(tcph)) < 0)
389 		return TC_ACT_OK;
390 
391 	if (tcph.dest != __bpf_constant_htons(cfg_port))
392 		return TC_ACT_OK;
393 
394 	olen = sizeof(h_outer.ip);
395 
396 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
397 
398 	/* add room between mac and network header */
399 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
400 		return TC_ACT_SHOT;
401 
402 	/* prepare new outer network header */
403 	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
404 	h_outer.ip.version = 6;
405 	h_outer.ip.hop_limit = iph_inner.ttl;
406 	h_outer.ip.saddr.s6_addr[1] = 0xfd;
407 	h_outer.ip.saddr.s6_addr[15] = 1;
408 	h_outer.ip.daddr.s6_addr[1] = 0xfd;
409 	h_outer.ip.daddr.s6_addr[15] = 2;
410 	h_outer.ip.payload_len = iph_inner.tot_len;
411 	h_outer.ip.nexthdr = IPPROTO_IPIP;
412 
413 	/* store new outer network header */
414 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
415 				BPF_F_INVALIDATE_HASH) < 0)
416 		return TC_ACT_SHOT;
417 
418 	/* update eth->h_proto */
419 	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
420 		return TC_ACT_SHOT;
421 	eth.h_proto = bpf_htons(ETH_P_IPV6);
422 	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
423 		return TC_ACT_SHOT;
424 
425 	return TC_ACT_OK;
426 }
427 
428 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
429 				      __u16 l2_proto)
430 {
431 	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
432 }
433 
434 SEC("encap_ipip_none")
435 int __encap_ipip_none(struct __sk_buff *skb)
436 {
437 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
438 		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
439 	else
440 		return TC_ACT_OK;
441 }
442 
443 SEC("encap_gre_none")
444 int __encap_gre_none(struct __sk_buff *skb)
445 {
446 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
447 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
448 	else
449 		return TC_ACT_OK;
450 }
451 
452 SEC("encap_gre_mpls")
453 int __encap_gre_mpls(struct __sk_buff *skb)
454 {
455 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
456 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
457 	else
458 		return TC_ACT_OK;
459 }
460 
461 SEC("encap_gre_eth")
462 int __encap_gre_eth(struct __sk_buff *skb)
463 {
464 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
465 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
466 	else
467 		return TC_ACT_OK;
468 }
469 
470 SEC("encap_udp_none")
471 int __encap_udp_none(struct __sk_buff *skb)
472 {
473 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
474 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
475 	else
476 		return TC_ACT_OK;
477 }
478 
479 SEC("encap_udp_mpls")
480 int __encap_udp_mpls(struct __sk_buff *skb)
481 {
482 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
483 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
484 	else
485 		return TC_ACT_OK;
486 }
487 
488 SEC("encap_udp_eth")
489 int __encap_udp_eth(struct __sk_buff *skb)
490 {
491 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
492 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
493 	else
494 		return TC_ACT_OK;
495 }
496 
497 SEC("encap_vxlan_eth")
498 int __encap_vxlan_eth(struct __sk_buff *skb)
499 {
500 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
501 		return __encap_ipv4(skb, IPPROTO_UDP,
502 				    ETH_P_TEB,
503 				    EXTPROTO_VXLAN);
504 	else
505 		return TC_ACT_OK;
506 }
507 
508 SEC("encap_sit_none")
509 int __encap_sit_none(struct __sk_buff *skb)
510 {
511 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
512 		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
513 	else
514 		return TC_ACT_OK;
515 }
516 
517 SEC("encap_ip6tnl_none")
518 int __encap_ip6tnl_none(struct __sk_buff *skb)
519 {
520 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
521 		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
522 	else
523 		return TC_ACT_OK;
524 }
525 
526 SEC("encap_ipip6_none")
527 int __encap_ipip6_none(struct __sk_buff *skb)
528 {
529 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
530 		return encap_ipv6_ipip6(skb);
531 	else
532 		return TC_ACT_OK;
533 }
534 
535 SEC("encap_ip6gre_none")
536 int __encap_ip6gre_none(struct __sk_buff *skb)
537 {
538 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
539 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
540 	else
541 		return TC_ACT_OK;
542 }
543 
544 SEC("encap_ip6gre_mpls")
545 int __encap_ip6gre_mpls(struct __sk_buff *skb)
546 {
547 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
548 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
549 	else
550 		return TC_ACT_OK;
551 }
552 
553 SEC("encap_ip6gre_eth")
554 int __encap_ip6gre_eth(struct __sk_buff *skb)
555 {
556 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
557 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
558 	else
559 		return TC_ACT_OK;
560 }
561 
562 SEC("encap_ip6udp_none")
563 int __encap_ip6udp_none(struct __sk_buff *skb)
564 {
565 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
566 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
567 	else
568 		return TC_ACT_OK;
569 }
570 
571 SEC("encap_ip6udp_mpls")
572 int __encap_ip6udp_mpls(struct __sk_buff *skb)
573 {
574 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
575 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
576 	else
577 		return TC_ACT_OK;
578 }
579 
580 SEC("encap_ip6udp_eth")
581 int __encap_ip6udp_eth(struct __sk_buff *skb)
582 {
583 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
584 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
585 	else
586 		return TC_ACT_OK;
587 }
588 
589 SEC("encap_ip6vxlan_eth")
590 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
591 {
592 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
593 		return __encap_ipv6(skb, IPPROTO_UDP,
594 				    ETH_P_TEB,
595 				    EXTPROTO_VXLAN);
596 	else
597 		return TC_ACT_OK;
598 }
599 
600 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
601 {
602 	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
603 	struct ipv6_opt_hdr ip6_opt_hdr;
604 	struct gre_hdr greh;
605 	struct udphdr udph;
606 	int olen = len;
607 
608 	switch (proto) {
609 	case IPPROTO_IPIP:
610 		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
611 		break;
612 	case IPPROTO_IPV6:
613 		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
614 		break;
615 	case NEXTHDR_DEST:
616 		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
617 				       sizeof(ip6_opt_hdr)) < 0)
618 			return TC_ACT_OK;
619 		switch (ip6_opt_hdr.nexthdr) {
620 		case IPPROTO_IPIP:
621 			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
622 			break;
623 		case IPPROTO_IPV6:
624 			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
625 			break;
626 		default:
627 			return TC_ACT_OK;
628 		}
629 		break;
630 	case IPPROTO_GRE:
631 		olen += sizeof(struct gre_hdr);
632 		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
633 			return TC_ACT_OK;
634 		switch (bpf_ntohs(greh.protocol)) {
635 		case ETH_P_MPLS_UC:
636 			olen += sizeof(mpls_label);
637 			break;
638 		case ETH_P_TEB:
639 			olen += ETH_HLEN;
640 			break;
641 		}
642 		break;
643 	case IPPROTO_UDP:
644 		olen += sizeof(struct udphdr);
645 		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
646 			return TC_ACT_OK;
647 		switch (bpf_ntohs(udph.dest)) {
648 		case MPLS_OVER_UDP_PORT:
649 			olen += sizeof(mpls_label);
650 			break;
651 		case ETH_OVER_UDP_PORT:
652 			olen += ETH_HLEN;
653 			break;
654 		case VXLAN_UDP_PORT:
655 			olen += ETH_HLEN + sizeof(struct vxlanhdr);
656 			break;
657 		}
658 		break;
659 	default:
660 		return TC_ACT_OK;
661 	}
662 
663 	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
664 		return TC_ACT_SHOT;
665 
666 	return TC_ACT_OK;
667 }
668 
669 static int decap_ipv4(struct __sk_buff *skb)
670 {
671 	struct iphdr iph_outer;
672 
673 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
674 			       sizeof(iph_outer)) < 0)
675 		return TC_ACT_OK;
676 
677 	if (iph_outer.ihl != 5)
678 		return TC_ACT_OK;
679 
680 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
681 			      iph_outer.protocol);
682 }
683 
684 static int decap_ipv6(struct __sk_buff *skb)
685 {
686 	struct ipv6hdr iph_outer;
687 
688 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
689 			       sizeof(iph_outer)) < 0)
690 		return TC_ACT_OK;
691 
692 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
693 			      iph_outer.nexthdr);
694 }
695 
696 SEC("decap")
697 int decap_f(struct __sk_buff *skb)
698 {
699 	switch (skb->protocol) {
700 	case __bpf_constant_htons(ETH_P_IP):
701 		return decap_ipv4(skb);
702 	case __bpf_constant_htons(ETH_P_IPV6):
703 		return decap_ipv6(skb);
704 	default:
705 		/* does not match, ignore */
706 		return TC_ACT_OK;
707 	}
708 }
709 
710 char __license[] SEC("license") = "GPL";
711