xref: /linux/tools/testing/selftests/bpf/progs/test_tc_tunnel.c (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* In-place tunneling */
4 
5 #include <vmlinux.h>
6 
7 #include <bpf/bpf_helpers.h>
8 #include <bpf/bpf_endian.h>
9 #include "bpf_tracing_net.h"
10 #include "bpf_compiler.h"
11 
12 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
13 
14 static const int cfg_port = 8000;
15 
16 static const int cfg_udp_src = 20000;
17 
18 #define ETH_P_MPLS_UC	0x8847
19 #define ETH_P_TEB	0x6558
20 
21 #define MPLS_LS_S_MASK	0x00000100
22 #define BPF_F_ADJ_ROOM_ENCAP_L2(len)			\
23 	(((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK)	\
24 	 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
25 
26 #define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
27 
28 #define	UDP_PORT		5555
29 #define	MPLS_OVER_UDP_PORT	6635
30 #define	ETH_OVER_UDP_PORT	7777
31 #define	VXLAN_UDP_PORT		8472
32 
33 #define	EXTPROTO_VXLAN	0x1
34 
35 #define	VXLAN_FLAGS     bpf_htonl(1<<27)
36 #define	VNI_ID		1
37 #define	VXLAN_VNI	bpf_htonl(VNI_ID << 8)
38 
39 #ifndef NEXTHDR_DEST
40 #define NEXTHDR_DEST	60
41 #endif
42 
43 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
44 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
45 						     MPLS_LS_S_MASK | 0xff);
46 struct gre_hdr {
47 	__be16 flags;
48 	__be16 protocol;
49 } __attribute__((packed));
50 
51 union l4hdr {
52 	struct udphdr udp;
53 	struct gre_hdr gre;
54 };
55 
56 struct v4hdr {
57 	struct iphdr ip;
58 	union l4hdr l4hdr;
59 	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
60 } __attribute__((packed));
61 
62 struct v6hdr {
63 	struct ipv6hdr ip;
64 	union l4hdr l4hdr;
65 	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
66 } __attribute__((packed));
67 
68 static __always_inline void set_ipv4_csum(struct iphdr *iph)
69 {
70 	__u16 *iph16 = (__u16 *)iph;
71 	__u32 csum;
72 	int i;
73 
74 	iph->check = 0;
75 
76 	__pragma_loop_unroll_full
77 	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
78 		csum += *iph16++;
79 
80 	iph->check = ~((csum & 0xffff) + (csum >> 16));
81 }
82 
83 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
84 					__u16 l2_proto, __u16 ext_proto)
85 {
86 	struct iphdr iph_inner = {0};
87 	__u16 udp_dst = UDP_PORT;
88 	struct v4hdr h_outer;
89 	struct tcphdr tcph;
90 	int olen, l2_len;
91 	__u8 *l2_hdr = NULL;
92 	int tcp_off;
93 	__u64 flags;
94 
95 	/* Most tests encapsulate a packet into a tunnel with the same
96 	 * network protocol, and derive the outer header fields from
97 	 * the inner header.
98 	 *
99 	 * The 6in4 case tests different inner and outer protocols. As
100 	 * the inner is ipv6, but the outer expects an ipv4 header as
101 	 * input, manually build a struct iphdr based on the ipv6hdr.
102 	 */
103 	if (encap_proto == IPPROTO_IPV6) {
104 		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
105 		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
106 		struct ipv6hdr iph6_inner;
107 
108 		/* Read the IPv6 header */
109 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
110 				       sizeof(iph6_inner)) < 0)
111 			return TC_ACT_OK;
112 
113 		/* Derive the IPv4 header fields from the IPv6 header */
114 		iph_inner.version = 4;
115 		iph_inner.ihl = 5;
116 		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
117 				    bpf_ntohs(iph6_inner.payload_len));
118 		iph_inner.ttl = iph6_inner.hop_limit - 1;
119 		iph_inner.protocol = iph6_inner.nexthdr;
120 		iph_inner.saddr = __bpf_constant_htonl(saddr);
121 		iph_inner.daddr = __bpf_constant_htonl(daddr);
122 
123 		tcp_off = sizeof(iph6_inner);
124 	} else {
125 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
126 				       sizeof(iph_inner)) < 0)
127 			return TC_ACT_OK;
128 
129 		tcp_off = sizeof(iph_inner);
130 	}
131 
132 	/* filter only packets we want */
133 	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
134 		return TC_ACT_OK;
135 
136 	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
137 			       &tcph, sizeof(tcph)) < 0)
138 		return TC_ACT_OK;
139 
140 	if (tcph.dest != __bpf_constant_htons(cfg_port))
141 		return TC_ACT_OK;
142 
143 	olen = sizeof(h_outer.ip);
144 	l2_len = 0;
145 
146 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
147 
148 	switch (l2_proto) {
149 	case ETH_P_MPLS_UC:
150 		l2_len = sizeof(mpls_label);
151 		udp_dst = MPLS_OVER_UDP_PORT;
152 		break;
153 	case ETH_P_TEB:
154 		l2_len = ETH_HLEN;
155 		if (ext_proto & EXTPROTO_VXLAN) {
156 			udp_dst = VXLAN_UDP_PORT;
157 			l2_len += sizeof(struct vxlanhdr);
158 		} else
159 			udp_dst = ETH_OVER_UDP_PORT;
160 		break;
161 	}
162 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
163 
164 	switch (encap_proto) {
165 	case IPPROTO_GRE:
166 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
167 		olen += sizeof(h_outer.l4hdr.gre);
168 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
169 		h_outer.l4hdr.gre.flags = 0;
170 		break;
171 	case IPPROTO_UDP:
172 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
173 		olen += sizeof(h_outer.l4hdr.udp);
174 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
175 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
176 		h_outer.l4hdr.udp.check = 0;
177 		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
178 						  sizeof(h_outer.l4hdr.udp) +
179 						  l2_len);
180 		break;
181 	case IPPROTO_IPIP:
182 	case IPPROTO_IPV6:
183 		break;
184 	default:
185 		return TC_ACT_OK;
186 	}
187 
188 	/* add L2 encap (if specified) */
189 	l2_hdr = (__u8 *)&h_outer + olen;
190 	switch (l2_proto) {
191 	case ETH_P_MPLS_UC:
192 		*(__u32 *)l2_hdr = mpls_label;
193 		break;
194 	case ETH_P_TEB:
195 		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
196 
197 		if (ext_proto & EXTPROTO_VXLAN) {
198 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
199 
200 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
201 			vxlan_hdr->vx_vni = VXLAN_VNI;
202 
203 			l2_hdr += sizeof(struct vxlanhdr);
204 		}
205 
206 		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
207 			return TC_ACT_SHOT;
208 
209 		break;
210 	}
211 	olen += l2_len;
212 
213 	/* add room between mac and network header */
214 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
215 		return TC_ACT_SHOT;
216 
217 	/* prepare new outer network header */
218 	h_outer.ip = iph_inner;
219 	h_outer.ip.tot_len = bpf_htons(olen +
220 				       bpf_ntohs(h_outer.ip.tot_len));
221 	h_outer.ip.protocol = encap_proto;
222 
223 	set_ipv4_csum((void *)&h_outer.ip);
224 
225 	/* store new outer network header */
226 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
227 				BPF_F_INVALIDATE_HASH) < 0)
228 		return TC_ACT_SHOT;
229 
230 	/* if changing outer proto type, update eth->h_proto */
231 	if (encap_proto == IPPROTO_IPV6) {
232 		struct ethhdr eth;
233 
234 		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
235 			return TC_ACT_SHOT;
236 		eth.h_proto = bpf_htons(ETH_P_IP);
237 		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
238 			return TC_ACT_SHOT;
239 	}
240 
241 	return TC_ACT_OK;
242 }
243 
244 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
245 				      __u16 l2_proto)
246 {
247 	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
248 }
249 
250 static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
251 					__u16 l2_proto, __u16 ext_proto)
252 {
253 	__u16 udp_dst = UDP_PORT;
254 	struct ipv6hdr iph_inner;
255 	struct v6hdr h_outer;
256 	struct tcphdr tcph;
257 	int olen, l2_len;
258 	__u8 *l2_hdr = NULL;
259 	__u16 tot_len;
260 	__u64 flags;
261 
262 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
263 			       sizeof(iph_inner)) < 0)
264 		return TC_ACT_OK;
265 
266 	/* filter only packets we want */
267 	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
268 			       &tcph, sizeof(tcph)) < 0)
269 		return TC_ACT_OK;
270 
271 	if (tcph.dest != __bpf_constant_htons(cfg_port))
272 		return TC_ACT_OK;
273 
274 	olen = sizeof(h_outer.ip);
275 	l2_len = 0;
276 
277 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
278 
279 	switch (l2_proto) {
280 	case ETH_P_MPLS_UC:
281 		l2_len = sizeof(mpls_label);
282 		udp_dst = MPLS_OVER_UDP_PORT;
283 		break;
284 	case ETH_P_TEB:
285 		l2_len = ETH_HLEN;
286 		if (ext_proto & EXTPROTO_VXLAN) {
287 			udp_dst = VXLAN_UDP_PORT;
288 			l2_len += sizeof(struct vxlanhdr);
289 		} else
290 			udp_dst = ETH_OVER_UDP_PORT;
291 		break;
292 	}
293 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
294 
295 	switch (encap_proto) {
296 	case IPPROTO_GRE:
297 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
298 		olen += sizeof(h_outer.l4hdr.gre);
299 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
300 		h_outer.l4hdr.gre.flags = 0;
301 		break;
302 	case IPPROTO_UDP:
303 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
304 		olen += sizeof(h_outer.l4hdr.udp);
305 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
306 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
307 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
308 			  sizeof(h_outer.l4hdr.udp) + l2_len;
309 		h_outer.l4hdr.udp.check = 0;
310 		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
311 		break;
312 	case IPPROTO_IPV6:
313 		break;
314 	default:
315 		return TC_ACT_OK;
316 	}
317 
318 	/* add L2 encap (if specified) */
319 	l2_hdr = (__u8 *)&h_outer + olen;
320 	switch (l2_proto) {
321 	case ETH_P_MPLS_UC:
322 		*(__u32 *)l2_hdr = mpls_label;
323 		break;
324 	case ETH_P_TEB:
325 		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
326 
327 		if (ext_proto & EXTPROTO_VXLAN) {
328 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
329 
330 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
331 			vxlan_hdr->vx_vni = VXLAN_VNI;
332 
333 			l2_hdr += sizeof(struct vxlanhdr);
334 		}
335 
336 		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
337 			return TC_ACT_SHOT;
338 		break;
339 	}
340 	olen += l2_len;
341 
342 	/* add room between mac and network header */
343 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
344 		return TC_ACT_SHOT;
345 
346 	/* prepare new outer network header */
347 	h_outer.ip = iph_inner;
348 	h_outer.ip.payload_len = bpf_htons(olen +
349 					   bpf_ntohs(h_outer.ip.payload_len));
350 
351 	h_outer.ip.nexthdr = encap_proto;
352 
353 	/* store new outer network header */
354 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
355 				BPF_F_INVALIDATE_HASH) < 0)
356 		return TC_ACT_SHOT;
357 
358 	return TC_ACT_OK;
359 }
360 
361 static int encap_ipv6_ipip6(struct __sk_buff *skb)
362 {
363 	struct v6hdr h_outer = {0};
364 	struct iphdr iph_inner;
365 	struct tcphdr tcph;
366 	struct ethhdr eth;
367 	__u64 flags;
368 	int olen;
369 
370 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
371 			       sizeof(iph_inner)) < 0)
372 		return TC_ACT_OK;
373 
374 	/* filter only packets we want */
375 	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
376 			       &tcph, sizeof(tcph)) < 0)
377 		return TC_ACT_OK;
378 
379 	if (tcph.dest != __bpf_constant_htons(cfg_port))
380 		return TC_ACT_OK;
381 
382 	olen = sizeof(h_outer.ip);
383 
384 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
385 
386 	/* add room between mac and network header */
387 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
388 		return TC_ACT_SHOT;
389 
390 	/* prepare new outer network header */
391 	h_outer.ip.version = 6;
392 	h_outer.ip.hop_limit = iph_inner.ttl;
393 	h_outer.ip.saddr.in6_u.u6_addr8[1] = 0xfd;
394 	h_outer.ip.saddr.in6_u.u6_addr8[15] = 1;
395 	h_outer.ip.daddr.in6_u.u6_addr8[1] = 0xfd;
396 	h_outer.ip.daddr.in6_u.u6_addr8[15] = 2;
397 	h_outer.ip.payload_len = iph_inner.tot_len;
398 	h_outer.ip.nexthdr = IPPROTO_IPIP;
399 
400 	/* store new outer network header */
401 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
402 				BPF_F_INVALIDATE_HASH) < 0)
403 		return TC_ACT_SHOT;
404 
405 	/* update eth->h_proto */
406 	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
407 		return TC_ACT_SHOT;
408 	eth.h_proto = bpf_htons(ETH_P_IPV6);
409 	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
410 		return TC_ACT_SHOT;
411 
412 	return TC_ACT_OK;
413 }
414 
415 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
416 				      __u16 l2_proto)
417 {
418 	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
419 }
420 
421 SEC("tc")
422 int __encap_ipip_none(struct __sk_buff *skb)
423 {
424 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
425 		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
426 	else
427 		return TC_ACT_OK;
428 }
429 
430 SEC("tc")
431 int __encap_gre_none(struct __sk_buff *skb)
432 {
433 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
434 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
435 	else
436 		return TC_ACT_OK;
437 }
438 
439 SEC("tc")
440 int __encap_gre_mpls(struct __sk_buff *skb)
441 {
442 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
443 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
444 	else
445 		return TC_ACT_OK;
446 }
447 
448 SEC("tc")
449 int __encap_gre_eth(struct __sk_buff *skb)
450 {
451 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
452 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
453 	else
454 		return TC_ACT_OK;
455 }
456 
457 SEC("tc")
458 int __encap_udp_none(struct __sk_buff *skb)
459 {
460 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
461 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
462 	else
463 		return TC_ACT_OK;
464 }
465 
466 SEC("tc")
467 int __encap_udp_mpls(struct __sk_buff *skb)
468 {
469 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
470 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
471 	else
472 		return TC_ACT_OK;
473 }
474 
475 SEC("tc")
476 int __encap_udp_eth(struct __sk_buff *skb)
477 {
478 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
479 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
480 	else
481 		return TC_ACT_OK;
482 }
483 
484 SEC("tc")
485 int __encap_vxlan_eth(struct __sk_buff *skb)
486 {
487 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
488 		return __encap_ipv4(skb, IPPROTO_UDP,
489 				    ETH_P_TEB,
490 				    EXTPROTO_VXLAN);
491 	else
492 		return TC_ACT_OK;
493 }
494 
495 SEC("tc")
496 int __encap_sit_none(struct __sk_buff *skb)
497 {
498 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
499 		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
500 	else
501 		return TC_ACT_OK;
502 }
503 
504 SEC("tc")
505 int __encap_ip6tnl_none(struct __sk_buff *skb)
506 {
507 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
508 		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
509 	else
510 		return TC_ACT_OK;
511 }
512 
513 SEC("tc")
514 int __encap_ipip6_none(struct __sk_buff *skb)
515 {
516 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
517 		return encap_ipv6_ipip6(skb);
518 	else
519 		return TC_ACT_OK;
520 }
521 
522 SEC("tc")
523 int __encap_ip6gre_none(struct __sk_buff *skb)
524 {
525 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
526 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
527 	else
528 		return TC_ACT_OK;
529 }
530 
531 SEC("tc")
532 int __encap_ip6gre_mpls(struct __sk_buff *skb)
533 {
534 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
535 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
536 	else
537 		return TC_ACT_OK;
538 }
539 
540 SEC("tc")
541 int __encap_ip6gre_eth(struct __sk_buff *skb)
542 {
543 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
544 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
545 	else
546 		return TC_ACT_OK;
547 }
548 
549 SEC("tc")
550 int __encap_ip6udp_none(struct __sk_buff *skb)
551 {
552 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
553 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
554 	else
555 		return TC_ACT_OK;
556 }
557 
558 SEC("tc")
559 int __encap_ip6udp_mpls(struct __sk_buff *skb)
560 {
561 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
562 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
563 	else
564 		return TC_ACT_OK;
565 }
566 
567 SEC("tc")
568 int __encap_ip6udp_eth(struct __sk_buff *skb)
569 {
570 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
571 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
572 	else
573 		return TC_ACT_OK;
574 }
575 
576 SEC("tc")
577 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
578 {
579 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
580 		return __encap_ipv6(skb, IPPROTO_UDP,
581 				    ETH_P_TEB,
582 				    EXTPROTO_VXLAN);
583 	else
584 		return TC_ACT_OK;
585 }
586 
587 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
588 {
589 	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
590 	struct ipv6_opt_hdr ip6_opt_hdr;
591 	struct gre_hdr greh;
592 	struct udphdr udph;
593 	int olen = len;
594 
595 	switch (proto) {
596 	case IPPROTO_IPIP:
597 		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
598 		break;
599 	case IPPROTO_IPV6:
600 		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
601 		break;
602 	case NEXTHDR_DEST:
603 		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
604 				       sizeof(ip6_opt_hdr)) < 0)
605 			return TC_ACT_OK;
606 		switch (ip6_opt_hdr.nexthdr) {
607 		case IPPROTO_IPIP:
608 			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
609 			break;
610 		case IPPROTO_IPV6:
611 			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
612 			break;
613 		default:
614 			return TC_ACT_OK;
615 		}
616 		break;
617 	case IPPROTO_GRE:
618 		olen += sizeof(struct gre_hdr);
619 		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
620 			return TC_ACT_OK;
621 		switch (bpf_ntohs(greh.protocol)) {
622 		case ETH_P_MPLS_UC:
623 			olen += sizeof(mpls_label);
624 			break;
625 		case ETH_P_TEB:
626 			olen += ETH_HLEN;
627 			break;
628 		}
629 		break;
630 	case IPPROTO_UDP:
631 		olen += sizeof(struct udphdr);
632 		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
633 			return TC_ACT_OK;
634 		switch (bpf_ntohs(udph.dest)) {
635 		case MPLS_OVER_UDP_PORT:
636 			olen += sizeof(mpls_label);
637 			break;
638 		case ETH_OVER_UDP_PORT:
639 			olen += ETH_HLEN;
640 			break;
641 		case VXLAN_UDP_PORT:
642 			olen += ETH_HLEN + sizeof(struct vxlanhdr);
643 			break;
644 		}
645 		break;
646 	default:
647 		return TC_ACT_OK;
648 	}
649 
650 	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
651 		return TC_ACT_SHOT;
652 
653 	return TC_ACT_OK;
654 }
655 
656 static int decap_ipv4(struct __sk_buff *skb)
657 {
658 	struct iphdr iph_outer;
659 
660 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
661 			       sizeof(iph_outer)) < 0)
662 		return TC_ACT_OK;
663 
664 	if (iph_outer.ihl != 5)
665 		return TC_ACT_OK;
666 
667 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
668 			      iph_outer.protocol);
669 }
670 
671 static int decap_ipv6(struct __sk_buff *skb)
672 {
673 	struct ipv6hdr iph_outer;
674 
675 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
676 			       sizeof(iph_outer)) < 0)
677 		return TC_ACT_OK;
678 
679 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
680 			      iph_outer.nexthdr);
681 }
682 
683 SEC("tc")
684 int decap_f(struct __sk_buff *skb)
685 {
686 	switch (skb->protocol) {
687 	case __bpf_constant_htons(ETH_P_IP):
688 		return decap_ipv4(skb);
689 	case __bpf_constant_htons(ETH_P_IPV6):
690 		return decap_ipv6(skb);
691 	default:
692 		/* does not match, ignore */
693 		return TC_ACT_OK;
694 	}
695 }
696 
697 char __license[] SEC("license") = "GPL";
698