xref: /linux/tools/testing/selftests/bpf/progs/test_tc_tunnel.c (revision 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /* In-place tunneling */
4 
5 #include <vmlinux.h>
6 
7 #include <bpf/bpf_helpers.h>
8 #include <bpf/bpf_endian.h>
9 #include "bpf_tracing_net.h"
10 #include "bpf_compiler.h"
11 
12 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
13 
14 static const int cfg_port = 8000;
15 
16 static const int cfg_udp_src = 20000;
17 
18 #define ETH_P_MPLS_UC	0x8847
19 #define ETH_P_TEB	0x6558
20 
21 #define MPLS_LS_S_MASK	0x00000100
22 #define BPF_F_ADJ_ROOM_ENCAP_L2(len)			\
23 	(((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK)	\
24 	 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
25 
26 struct vxlanhdr___local {
27 	__be32 vx_flags;
28 	__be32 vx_vni;
29 };
30 
31 #define	L2_PAD_SZ	(sizeof(struct vxlanhdr___local) + ETH_HLEN)
32 
33 #define	UDP_PORT		5555
34 #define	MPLS_OVER_UDP_PORT	6635
35 #define	ETH_OVER_UDP_PORT	7777
36 #define	VXLAN_UDP_PORT		8472
37 
38 #define	EXTPROTO_VXLAN	0x1
39 
40 #define	VXLAN_FLAGS     bpf_htonl(1<<27)
41 #define	VNI_ID		1
42 #define	VXLAN_VNI	bpf_htonl(VNI_ID << 8)
43 
44 #ifndef NEXTHDR_DEST
45 #define NEXTHDR_DEST	60
46 #endif
47 
48 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
49 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
50 						     MPLS_LS_S_MASK | 0xff);
51 struct gre_hdr {
52 	__be16 flags;
53 	__be16 protocol;
54 } __attribute__((packed));
55 
56 union l4hdr {
57 	struct udphdr udp;
58 	struct gre_hdr gre;
59 };
60 
61 struct v4hdr {
62 	struct iphdr ip;
63 	union l4hdr l4hdr;
64 	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
65 } __attribute__((packed));
66 
67 struct v6hdr {
68 	struct ipv6hdr ip;
69 	union l4hdr l4hdr;
70 	__u8 pad[L2_PAD_SZ];		/* space for L2 header / vxlan header ... */
71 } __attribute__((packed));
72 
73 static __always_inline void set_ipv4_csum(struct iphdr *iph)
74 {
75 	__u16 *iph16 = (__u16 *)iph;
76 	__u32 csum;
77 	int i;
78 
79 	iph->check = 0;
80 
81 	__pragma_loop_unroll_full
82 	for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
83 		csum += *iph16++;
84 
85 	iph->check = ~((csum & 0xffff) + (csum >> 16));
86 }
87 
88 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
89 					__u16 l2_proto, __u16 ext_proto)
90 {
91 	struct iphdr iph_inner = {0};
92 	__u16 udp_dst = UDP_PORT;
93 	struct v4hdr h_outer;
94 	struct tcphdr tcph;
95 	int olen, l2_len;
96 	__u8 *l2_hdr = NULL;
97 	int tcp_off;
98 	__u64 flags;
99 
100 	/* Most tests encapsulate a packet into a tunnel with the same
101 	 * network protocol, and derive the outer header fields from
102 	 * the inner header.
103 	 *
104 	 * The 6in4 case tests different inner and outer protocols. As
105 	 * the inner is ipv6, but the outer expects an ipv4 header as
106 	 * input, manually build a struct iphdr based on the ipv6hdr.
107 	 */
108 	if (encap_proto == IPPROTO_IPV6) {
109 		const __u32 saddr = (192 << 24) | (168 << 16) | (1 << 8) | 1;
110 		const __u32 daddr = (192 << 24) | (168 << 16) | (1 << 8) | 2;
111 		struct ipv6hdr iph6_inner;
112 
113 		/* Read the IPv6 header */
114 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph6_inner,
115 				       sizeof(iph6_inner)) < 0)
116 			return TC_ACT_OK;
117 
118 		/* Derive the IPv4 header fields from the IPv6 header */
119 		iph_inner.version = 4;
120 		iph_inner.ihl = 5;
121 		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
122 				    bpf_ntohs(iph6_inner.payload_len));
123 		iph_inner.ttl = iph6_inner.hop_limit - 1;
124 		iph_inner.protocol = iph6_inner.nexthdr;
125 		iph_inner.saddr = __bpf_constant_htonl(saddr);
126 		iph_inner.daddr = __bpf_constant_htonl(daddr);
127 
128 		tcp_off = sizeof(iph6_inner);
129 	} else {
130 		if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
131 				       sizeof(iph_inner)) < 0)
132 			return TC_ACT_OK;
133 
134 		tcp_off = sizeof(iph_inner);
135 	}
136 
137 	/* filter only packets we want */
138 	if (iph_inner.ihl != 5 || iph_inner.protocol != IPPROTO_TCP)
139 		return TC_ACT_OK;
140 
141 	if (bpf_skb_load_bytes(skb, ETH_HLEN + tcp_off,
142 			       &tcph, sizeof(tcph)) < 0)
143 		return TC_ACT_OK;
144 
145 	if (tcph.dest != __bpf_constant_htons(cfg_port))
146 		return TC_ACT_OK;
147 
148 	olen = sizeof(h_outer.ip);
149 	l2_len = 0;
150 
151 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
152 
153 	switch (l2_proto) {
154 	case ETH_P_MPLS_UC:
155 		l2_len = sizeof(mpls_label);
156 		udp_dst = MPLS_OVER_UDP_PORT;
157 		break;
158 	case ETH_P_TEB:
159 		l2_len = ETH_HLEN;
160 		if (ext_proto & EXTPROTO_VXLAN) {
161 			udp_dst = VXLAN_UDP_PORT;
162 			l2_len += sizeof(struct vxlanhdr___local);
163 		} else
164 			udp_dst = ETH_OVER_UDP_PORT;
165 		break;
166 	}
167 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
168 
169 	switch (encap_proto) {
170 	case IPPROTO_GRE:
171 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
172 		olen += sizeof(h_outer.l4hdr.gre);
173 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
174 		h_outer.l4hdr.gre.flags = 0;
175 		break;
176 	case IPPROTO_UDP:
177 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
178 		olen += sizeof(h_outer.l4hdr.udp);
179 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
180 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
181 		h_outer.l4hdr.udp.check = 0;
182 		h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
183 						  sizeof(h_outer.l4hdr.udp) +
184 						  l2_len);
185 		break;
186 	case IPPROTO_IPIP:
187 	case IPPROTO_IPV6:
188 		break;
189 	default:
190 		return TC_ACT_OK;
191 	}
192 
193 	/* add L2 encap (if specified) */
194 	l2_hdr = (__u8 *)&h_outer + olen;
195 	switch (l2_proto) {
196 	case ETH_P_MPLS_UC:
197 		*(__u32 *)l2_hdr = mpls_label;
198 		break;
199 	case ETH_P_TEB:
200 		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
201 
202 		if (ext_proto & EXTPROTO_VXLAN) {
203 			struct vxlanhdr___local *vxlan_hdr = (struct vxlanhdr___local *)l2_hdr;
204 
205 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
206 			vxlan_hdr->vx_vni = VXLAN_VNI;
207 
208 			l2_hdr += sizeof(struct vxlanhdr___local);
209 		}
210 
211 		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
212 			return TC_ACT_SHOT;
213 
214 		break;
215 	}
216 	olen += l2_len;
217 
218 	/* add room between mac and network header */
219 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
220 		return TC_ACT_SHOT;
221 
222 	/* prepare new outer network header */
223 	h_outer.ip = iph_inner;
224 	h_outer.ip.tot_len = bpf_htons(olen +
225 				       bpf_ntohs(h_outer.ip.tot_len));
226 	h_outer.ip.protocol = encap_proto;
227 
228 	set_ipv4_csum((void *)&h_outer.ip);
229 
230 	/* store new outer network header */
231 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
232 				BPF_F_INVALIDATE_HASH) < 0)
233 		return TC_ACT_SHOT;
234 
235 	/* if changing outer proto type, update eth->h_proto */
236 	if (encap_proto == IPPROTO_IPV6) {
237 		struct ethhdr eth;
238 
239 		if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
240 			return TC_ACT_SHOT;
241 		eth.h_proto = bpf_htons(ETH_P_IP);
242 		if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
243 			return TC_ACT_SHOT;
244 	}
245 
246 	return TC_ACT_OK;
247 }
248 
249 static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
250 				      __u16 l2_proto)
251 {
252 	return __encap_ipv4(skb, encap_proto, l2_proto, 0);
253 }
254 
255 static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
256 					__u16 l2_proto, __u16 ext_proto)
257 {
258 	__u16 udp_dst = UDP_PORT;
259 	struct ipv6hdr iph_inner;
260 	struct v6hdr h_outer;
261 	struct tcphdr tcph;
262 	int olen, l2_len;
263 	__u8 *l2_hdr = NULL;
264 	__u16 tot_len;
265 	__u64 flags;
266 
267 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
268 			       sizeof(iph_inner)) < 0)
269 		return TC_ACT_OK;
270 
271 	/* filter only packets we want */
272 	if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(iph_inner),
273 			       &tcph, sizeof(tcph)) < 0)
274 		return TC_ACT_OK;
275 
276 	if (tcph.dest != __bpf_constant_htons(cfg_port))
277 		return TC_ACT_OK;
278 
279 	olen = sizeof(h_outer.ip);
280 	l2_len = 0;
281 
282 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
283 
284 	switch (l2_proto) {
285 	case ETH_P_MPLS_UC:
286 		l2_len = sizeof(mpls_label);
287 		udp_dst = MPLS_OVER_UDP_PORT;
288 		break;
289 	case ETH_P_TEB:
290 		l2_len = ETH_HLEN;
291 		if (ext_proto & EXTPROTO_VXLAN) {
292 			udp_dst = VXLAN_UDP_PORT;
293 			l2_len += sizeof(struct vxlanhdr___local);
294 		} else
295 			udp_dst = ETH_OVER_UDP_PORT;
296 		break;
297 	}
298 	flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
299 
300 	switch (encap_proto) {
301 	case IPPROTO_GRE:
302 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
303 		olen += sizeof(h_outer.l4hdr.gre);
304 		h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
305 		h_outer.l4hdr.gre.flags = 0;
306 		break;
307 	case IPPROTO_UDP:
308 		flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
309 		olen += sizeof(h_outer.l4hdr.udp);
310 		h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
311 		h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
312 		tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
313 			  sizeof(h_outer.l4hdr.udp) + l2_len;
314 		h_outer.l4hdr.udp.check = 0;
315 		h_outer.l4hdr.udp.len = bpf_htons(tot_len);
316 		break;
317 	case IPPROTO_IPV6:
318 		break;
319 	default:
320 		return TC_ACT_OK;
321 	}
322 
323 	/* add L2 encap (if specified) */
324 	l2_hdr = (__u8 *)&h_outer + olen;
325 	switch (l2_proto) {
326 	case ETH_P_MPLS_UC:
327 		*(__u32 *)l2_hdr = mpls_label;
328 		break;
329 	case ETH_P_TEB:
330 		flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
331 
332 		if (ext_proto & EXTPROTO_VXLAN) {
333 			struct vxlanhdr___local *vxlan_hdr = (struct vxlanhdr___local *)l2_hdr;
334 
335 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
336 			vxlan_hdr->vx_vni = VXLAN_VNI;
337 
338 			l2_hdr += sizeof(struct vxlanhdr___local);
339 		}
340 
341 		if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
342 			return TC_ACT_SHOT;
343 		break;
344 	}
345 	olen += l2_len;
346 
347 	/* add room between mac and network header */
348 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
349 		return TC_ACT_SHOT;
350 
351 	/* prepare new outer network header */
352 	h_outer.ip = iph_inner;
353 	h_outer.ip.payload_len = bpf_htons(olen +
354 					   bpf_ntohs(h_outer.ip.payload_len));
355 
356 	h_outer.ip.nexthdr = encap_proto;
357 
358 	/* store new outer network header */
359 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
360 				BPF_F_INVALIDATE_HASH) < 0)
361 		return TC_ACT_SHOT;
362 
363 	return TC_ACT_OK;
364 }
365 
366 static int encap_ipv6_ipip6(struct __sk_buff *skb)
367 {
368 	struct v6hdr h_outer = {0};
369 	struct iphdr iph_inner;
370 	struct tcphdr tcph;
371 	struct ethhdr eth;
372 	__u64 flags;
373 	int olen;
374 
375 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
376 			       sizeof(iph_inner)) < 0)
377 		return TC_ACT_OK;
378 
379 	/* filter only packets we want */
380 	if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
381 			       &tcph, sizeof(tcph)) < 0)
382 		return TC_ACT_OK;
383 
384 	if (tcph.dest != __bpf_constant_htons(cfg_port))
385 		return TC_ACT_OK;
386 
387 	olen = sizeof(h_outer.ip);
388 
389 	flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
390 
391 	/* add room between mac and network header */
392 	if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
393 		return TC_ACT_SHOT;
394 
395 	/* prepare new outer network header */
396 	h_outer.ip.version = 6;
397 	h_outer.ip.hop_limit = iph_inner.ttl;
398 	h_outer.ip.saddr.in6_u.u6_addr8[1] = 0xfd;
399 	h_outer.ip.saddr.in6_u.u6_addr8[15] = 1;
400 	h_outer.ip.daddr.in6_u.u6_addr8[1] = 0xfd;
401 	h_outer.ip.daddr.in6_u.u6_addr8[15] = 2;
402 	h_outer.ip.payload_len = iph_inner.tot_len;
403 	h_outer.ip.nexthdr = IPPROTO_IPIP;
404 
405 	/* store new outer network header */
406 	if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
407 				BPF_F_INVALIDATE_HASH) < 0)
408 		return TC_ACT_SHOT;
409 
410 	/* update eth->h_proto */
411 	if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
412 		return TC_ACT_SHOT;
413 	eth.h_proto = bpf_htons(ETH_P_IPV6);
414 	if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
415 		return TC_ACT_SHOT;
416 
417 	return TC_ACT_OK;
418 }
419 
420 static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
421 				      __u16 l2_proto)
422 {
423 	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
424 }
425 
426 SEC("tc")
427 int __encap_ipip_none(struct __sk_buff *skb)
428 {
429 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
430 		return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
431 	else
432 		return TC_ACT_OK;
433 }
434 
435 SEC("tc")
436 int __encap_gre_none(struct __sk_buff *skb)
437 {
438 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
439 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
440 	else
441 		return TC_ACT_OK;
442 }
443 
444 SEC("tc")
445 int __encap_gre_mpls(struct __sk_buff *skb)
446 {
447 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
448 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
449 	else
450 		return TC_ACT_OK;
451 }
452 
453 SEC("tc")
454 int __encap_gre_eth(struct __sk_buff *skb)
455 {
456 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
457 		return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
458 	else
459 		return TC_ACT_OK;
460 }
461 
462 SEC("tc")
463 int __encap_udp_none(struct __sk_buff *skb)
464 {
465 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
466 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
467 	else
468 		return TC_ACT_OK;
469 }
470 
471 SEC("tc")
472 int __encap_udp_mpls(struct __sk_buff *skb)
473 {
474 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
475 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
476 	else
477 		return TC_ACT_OK;
478 }
479 
480 SEC("tc")
481 int __encap_udp_eth(struct __sk_buff *skb)
482 {
483 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
484 		return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
485 	else
486 		return TC_ACT_OK;
487 }
488 
489 SEC("tc")
490 int __encap_vxlan_eth(struct __sk_buff *skb)
491 {
492 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
493 		return __encap_ipv4(skb, IPPROTO_UDP,
494 				    ETH_P_TEB,
495 				    EXTPROTO_VXLAN);
496 	else
497 		return TC_ACT_OK;
498 }
499 
500 SEC("tc")
501 int __encap_sit_none(struct __sk_buff *skb)
502 {
503 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
504 		return encap_ipv4(skb, IPPROTO_IPV6, ETH_P_IP);
505 	else
506 		return TC_ACT_OK;
507 }
508 
509 SEC("tc")
510 int __encap_ip6tnl_none(struct __sk_buff *skb)
511 {
512 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
513 		return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
514 	else
515 		return TC_ACT_OK;
516 }
517 
518 SEC("tc")
519 int __encap_ipip6_none(struct __sk_buff *skb)
520 {
521 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
522 		return encap_ipv6_ipip6(skb);
523 	else
524 		return TC_ACT_OK;
525 }
526 
527 SEC("tc")
528 int __encap_ip6gre_none(struct __sk_buff *skb)
529 {
530 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
531 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
532 	else
533 		return TC_ACT_OK;
534 }
535 
536 SEC("tc")
537 int __encap_ip6gre_mpls(struct __sk_buff *skb)
538 {
539 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
540 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
541 	else
542 		return TC_ACT_OK;
543 }
544 
545 SEC("tc")
546 int __encap_ip6gre_eth(struct __sk_buff *skb)
547 {
548 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
549 		return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
550 	else
551 		return TC_ACT_OK;
552 }
553 
554 SEC("tc")
555 int __encap_ip6udp_none(struct __sk_buff *skb)
556 {
557 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
558 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
559 	else
560 		return TC_ACT_OK;
561 }
562 
563 SEC("tc")
564 int __encap_ip6udp_mpls(struct __sk_buff *skb)
565 {
566 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
567 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
568 	else
569 		return TC_ACT_OK;
570 }
571 
572 SEC("tc")
573 int __encap_ip6udp_eth(struct __sk_buff *skb)
574 {
575 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
576 		return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
577 	else
578 		return TC_ACT_OK;
579 }
580 
581 SEC("tc")
582 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
583 {
584 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
585 		return __encap_ipv6(skb, IPPROTO_UDP,
586 				    ETH_P_TEB,
587 				    EXTPROTO_VXLAN);
588 	else
589 		return TC_ACT_OK;
590 }
591 
592 static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
593 {
594 	__u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
595 	struct ipv6_opt_hdr ip6_opt_hdr;
596 	struct gre_hdr greh;
597 	struct udphdr udph;
598 	int olen = len;
599 
600 	switch (proto) {
601 	case IPPROTO_IPIP:
602 		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
603 		break;
604 	case IPPROTO_IPV6:
605 		flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
606 		break;
607 	case NEXTHDR_DEST:
608 		if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
609 				       sizeof(ip6_opt_hdr)) < 0)
610 			return TC_ACT_OK;
611 		switch (ip6_opt_hdr.nexthdr) {
612 		case IPPROTO_IPIP:
613 			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
614 			break;
615 		case IPPROTO_IPV6:
616 			flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
617 			break;
618 		default:
619 			return TC_ACT_OK;
620 		}
621 		break;
622 	case IPPROTO_GRE:
623 		olen += sizeof(struct gre_hdr);
624 		if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
625 			return TC_ACT_OK;
626 		switch (bpf_ntohs(greh.protocol)) {
627 		case ETH_P_MPLS_UC:
628 			olen += sizeof(mpls_label);
629 			break;
630 		case ETH_P_TEB:
631 			olen += ETH_HLEN;
632 			break;
633 		}
634 		break;
635 	case IPPROTO_UDP:
636 		olen += sizeof(struct udphdr);
637 		if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
638 			return TC_ACT_OK;
639 		switch (bpf_ntohs(udph.dest)) {
640 		case MPLS_OVER_UDP_PORT:
641 			olen += sizeof(mpls_label);
642 			break;
643 		case ETH_OVER_UDP_PORT:
644 			olen += ETH_HLEN;
645 			break;
646 		case VXLAN_UDP_PORT:
647 			olen += ETH_HLEN + sizeof(struct vxlanhdr___local);
648 			break;
649 		}
650 		break;
651 	default:
652 		return TC_ACT_OK;
653 	}
654 
655 	if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
656 		return TC_ACT_SHOT;
657 
658 	return TC_ACT_OK;
659 }
660 
661 static int decap_ipv4(struct __sk_buff *skb)
662 {
663 	struct iphdr iph_outer;
664 
665 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
666 			       sizeof(iph_outer)) < 0)
667 		return TC_ACT_OK;
668 
669 	if (iph_outer.ihl != 5)
670 		return TC_ACT_OK;
671 
672 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
673 			      iph_outer.protocol);
674 }
675 
676 static int decap_ipv6(struct __sk_buff *skb)
677 {
678 	struct ipv6hdr iph_outer;
679 
680 	if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer,
681 			       sizeof(iph_outer)) < 0)
682 		return TC_ACT_OK;
683 
684 	return decap_internal(skb, ETH_HLEN, sizeof(iph_outer),
685 			      iph_outer.nexthdr);
686 }
687 
688 SEC("tc")
689 int decap_f(struct __sk_buff *skb)
690 {
691 	switch (skb->protocol) {
692 	case __bpf_constant_htons(ETH_P_IP):
693 		return decap_ipv4(skb);
694 	case __bpf_constant_htons(ETH_P_IPV6):
695 		return decap_ipv6(skb);
696 	default:
697 		/* does not match, ignore */
698 		return TC_ACT_OK;
699 	}
700 }
701 
702 char __license[] SEC("license") = "GPL";
703