xref: /linux/tools/testing/selftests/bpf/progs/bpf_flow.c (revision 1375dc4a4579d5e767dd8c2d2abcd929ff59d0a7)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <limits.h>
3 #include <stddef.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <linux/pkt_cls.h>
7 #include <linux/bpf.h>
8 #include <linux/in.h>
9 #include <linux/if_ether.h>
10 #include <linux/icmp.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/tcp.h>
14 #include <linux/udp.h>
15 #include <linux/if_packet.h>
16 #include <sys/socket.h>
17 #include <linux/if_tunnel.h>
18 #include <linux/mpls.h>
19 #include "bpf_helpers.h"
20 #include "bpf_endian.h"
21 
22 int _version SEC("version") = 1;
23 #define PROG(F) SEC(#F) int bpf_func_##F
24 
25 /* These are the identifiers of the BPF programs that will be used in tail
26  * calls. Name is limited to 16 characters, with the terminating character and
27  * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
28  */
29 enum {
30 	IP,
31 	IPV6,
32 	IPV6OP,	/* Destination/Hop-by-Hop Options IPv6 Extension header */
33 	IPV6FR,	/* Fragmentation IPv6 Extension Header */
34 	MPLS,
35 	VLAN,
36 };
37 
38 #define IP_MF		0x2000
39 #define IP_OFFSET	0x1FFF
40 #define IP6_MF		0x0001
41 #define IP6_OFFSET	0xFFF8
42 
43 struct vlan_hdr {
44 	__be16 h_vlan_TCI;
45 	__be16 h_vlan_encapsulated_proto;
46 };
47 
48 struct gre_hdr {
49 	__be16 flags;
50 	__be16 proto;
51 };
52 
53 struct frag_hdr {
54 	__u8 nexthdr;
55 	__u8 reserved;
56 	__be16 frag_off;
57 	__be32 identification;
58 };
59 
60 struct {
61 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
62 	__uint(max_entries, 8);
63 	__uint(key_size, sizeof(__u32));
64 	__uint(value_size, sizeof(__u32));
65 } jmp_table SEC(".maps");
66 
67 struct {
68 	__uint(type, BPF_MAP_TYPE_ARRAY);
69 	__uint(max_entries, 1);
70 	__type(key, __u32);
71 	__type(value, struct bpf_flow_keys);
72 } last_dissection SEC(".maps");
73 
74 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
75 					    int ret)
76 {
77 	struct bpf_flow_keys *val;
78 	__u32 key = 0;
79 
80 	val = bpf_map_lookup_elem(&last_dissection, &key);
81 	if (val)
82 		memcpy(val, keys, sizeof(*val));
83 	return ret;
84 }
85 
86 #define IPV6_FLOWLABEL_MASK		__bpf_constant_htonl(0x000FFFFF)
87 static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
88 {
89 	return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
90 }
91 
92 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
93 							 __u16 hdr_size,
94 							 void *buffer)
95 {
96 	void *data_end = (void *)(long)skb->data_end;
97 	void *data = (void *)(long)skb->data;
98 	__u16 thoff = skb->flow_keys->thoff;
99 	__u8 *hdr;
100 
101 	/* Verifies this variable offset does not overflow */
102 	if (thoff > (USHRT_MAX - hdr_size))
103 		return NULL;
104 
105 	hdr = data + thoff;
106 	if (hdr + hdr_size <= data_end)
107 		return hdr;
108 
109 	if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
110 		return NULL;
111 
112 	return buffer;
113 }
114 
115 /* Dispatches on ETHERTYPE */
116 static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
117 {
118 	struct bpf_flow_keys *keys = skb->flow_keys;
119 
120 	switch (proto) {
121 	case bpf_htons(ETH_P_IP):
122 		bpf_tail_call(skb, &jmp_table, IP);
123 		break;
124 	case bpf_htons(ETH_P_IPV6):
125 		bpf_tail_call(skb, &jmp_table, IPV6);
126 		break;
127 	case bpf_htons(ETH_P_MPLS_MC):
128 	case bpf_htons(ETH_P_MPLS_UC):
129 		bpf_tail_call(skb, &jmp_table, MPLS);
130 		break;
131 	case bpf_htons(ETH_P_8021Q):
132 	case bpf_htons(ETH_P_8021AD):
133 		bpf_tail_call(skb, &jmp_table, VLAN);
134 		break;
135 	default:
136 		/* Protocol not supported */
137 		return export_flow_keys(keys, BPF_DROP);
138 	}
139 
140 	return export_flow_keys(keys, BPF_DROP);
141 }
142 
143 SEC("flow_dissector")
144 int _dissect(struct __sk_buff *skb)
145 {
146 	struct bpf_flow_keys *keys = skb->flow_keys;
147 
148 	return parse_eth_proto(skb, keys->n_proto);
149 }
150 
151 /* Parses on IPPROTO_* */
152 static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
153 {
154 	struct bpf_flow_keys *keys = skb->flow_keys;
155 	void *data_end = (void *)(long)skb->data_end;
156 	struct icmphdr *icmp, _icmp;
157 	struct gre_hdr *gre, _gre;
158 	struct ethhdr *eth, _eth;
159 	struct tcphdr *tcp, _tcp;
160 	struct udphdr *udp, _udp;
161 
162 	switch (proto) {
163 	case IPPROTO_ICMP:
164 		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
165 		if (!icmp)
166 			return export_flow_keys(keys, BPF_DROP);
167 		return export_flow_keys(keys, BPF_OK);
168 	case IPPROTO_IPIP:
169 		keys->is_encap = true;
170 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
171 			return export_flow_keys(keys, BPF_OK);
172 
173 		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
174 	case IPPROTO_IPV6:
175 		keys->is_encap = true;
176 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
177 			return export_flow_keys(keys, BPF_OK);
178 
179 		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
180 	case IPPROTO_GRE:
181 		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
182 		if (!gre)
183 			return export_flow_keys(keys, BPF_DROP);
184 
185 		if (bpf_htons(gre->flags & GRE_VERSION))
186 			/* Only inspect standard GRE packets with version 0 */
187 			return export_flow_keys(keys, BPF_OK);
188 
189 		keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
190 		if (GRE_IS_CSUM(gre->flags))
191 			keys->thoff += 4; /* Step over chksum and Padding */
192 		if (GRE_IS_KEY(gre->flags))
193 			keys->thoff += 4; /* Step over key */
194 		if (GRE_IS_SEQ(gre->flags))
195 			keys->thoff += 4; /* Step over sequence number */
196 
197 		keys->is_encap = true;
198 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
199 			return export_flow_keys(keys, BPF_OK);
200 
201 		if (gre->proto == bpf_htons(ETH_P_TEB)) {
202 			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
203 							  &_eth);
204 			if (!eth)
205 				return export_flow_keys(keys, BPF_DROP);
206 
207 			keys->thoff += sizeof(*eth);
208 
209 			return parse_eth_proto(skb, eth->h_proto);
210 		} else {
211 			return parse_eth_proto(skb, gre->proto);
212 		}
213 	case IPPROTO_TCP:
214 		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
215 		if (!tcp)
216 			return export_flow_keys(keys, BPF_DROP);
217 
218 		if (tcp->doff < 5)
219 			return export_flow_keys(keys, BPF_DROP);
220 
221 		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
222 			return export_flow_keys(keys, BPF_DROP);
223 
224 		keys->sport = tcp->source;
225 		keys->dport = tcp->dest;
226 		return export_flow_keys(keys, BPF_OK);
227 	case IPPROTO_UDP:
228 	case IPPROTO_UDPLITE:
229 		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
230 		if (!udp)
231 			return export_flow_keys(keys, BPF_DROP);
232 
233 		keys->sport = udp->source;
234 		keys->dport = udp->dest;
235 		return export_flow_keys(keys, BPF_OK);
236 	default:
237 		return export_flow_keys(keys, BPF_DROP);
238 	}
239 
240 	return export_flow_keys(keys, BPF_DROP);
241 }
242 
243 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
244 {
245 	struct bpf_flow_keys *keys = skb->flow_keys;
246 
247 	switch (nexthdr) {
248 	case IPPROTO_HOPOPTS:
249 	case IPPROTO_DSTOPTS:
250 		bpf_tail_call(skb, &jmp_table, IPV6OP);
251 		break;
252 	case IPPROTO_FRAGMENT:
253 		bpf_tail_call(skb, &jmp_table, IPV6FR);
254 		break;
255 	default:
256 		return parse_ip_proto(skb, nexthdr);
257 	}
258 
259 	return export_flow_keys(keys, BPF_DROP);
260 }
261 
262 PROG(IP)(struct __sk_buff *skb)
263 {
264 	void *data_end = (void *)(long)skb->data_end;
265 	struct bpf_flow_keys *keys = skb->flow_keys;
266 	void *data = (void *)(long)skb->data;
267 	struct iphdr *iph, _iph;
268 	bool done = false;
269 
270 	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
271 	if (!iph)
272 		return export_flow_keys(keys, BPF_DROP);
273 
274 	/* IP header cannot be smaller than 20 bytes */
275 	if (iph->ihl < 5)
276 		return export_flow_keys(keys, BPF_DROP);
277 
278 	keys->addr_proto = ETH_P_IP;
279 	keys->ipv4_src = iph->saddr;
280 	keys->ipv4_dst = iph->daddr;
281 	keys->ip_proto = iph->protocol;
282 
283 	keys->thoff += iph->ihl << 2;
284 	if (data + keys->thoff > data_end)
285 		return export_flow_keys(keys, BPF_DROP);
286 
287 	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
288 		keys->is_frag = true;
289 		if (iph->frag_off & bpf_htons(IP_OFFSET)) {
290 			/* From second fragment on, packets do not have headers
291 			 * we can parse.
292 			 */
293 			done = true;
294 		} else {
295 			keys->is_first_frag = true;
296 			/* No need to parse fragmented packet unless
297 			 * explicitly asked for.
298 			 */
299 			if (!(keys->flags &
300 			      BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
301 				done = true;
302 		}
303 	}
304 
305 	if (done)
306 		return export_flow_keys(keys, BPF_OK);
307 
308 	return parse_ip_proto(skb, iph->protocol);
309 }
310 
311 PROG(IPV6)(struct __sk_buff *skb)
312 {
313 	struct bpf_flow_keys *keys = skb->flow_keys;
314 	struct ipv6hdr *ip6h, _ip6h;
315 
316 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
317 	if (!ip6h)
318 		return export_flow_keys(keys, BPF_DROP);
319 
320 	keys->addr_proto = ETH_P_IPV6;
321 	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
322 
323 	keys->thoff += sizeof(struct ipv6hdr);
324 	keys->ip_proto = ip6h->nexthdr;
325 	keys->flow_label = ip6_flowlabel(ip6h);
326 
327 	if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
328 		return export_flow_keys(keys, BPF_OK);
329 
330 	return parse_ipv6_proto(skb, ip6h->nexthdr);
331 }
332 
333 PROG(IPV6OP)(struct __sk_buff *skb)
334 {
335 	struct bpf_flow_keys *keys = skb->flow_keys;
336 	struct ipv6_opt_hdr *ip6h, _ip6h;
337 
338 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
339 	if (!ip6h)
340 		return export_flow_keys(keys, BPF_DROP);
341 
342 	/* hlen is in 8-octets and does not include the first 8 bytes
343 	 * of the header
344 	 */
345 	keys->thoff += (1 + ip6h->hdrlen) << 3;
346 	keys->ip_proto = ip6h->nexthdr;
347 
348 	return parse_ipv6_proto(skb, ip6h->nexthdr);
349 }
350 
351 PROG(IPV6FR)(struct __sk_buff *skb)
352 {
353 	struct bpf_flow_keys *keys = skb->flow_keys;
354 	struct frag_hdr *fragh, _fragh;
355 
356 	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
357 	if (!fragh)
358 		return export_flow_keys(keys, BPF_DROP);
359 
360 	keys->thoff += sizeof(*fragh);
361 	keys->is_frag = true;
362 	keys->ip_proto = fragh->nexthdr;
363 
364 	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
365 		keys->is_first_frag = true;
366 
367 		/* No need to parse fragmented packet unless
368 		 * explicitly asked for.
369 		 */
370 		if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
371 			return export_flow_keys(keys, BPF_OK);
372 	}
373 
374 	return parse_ipv6_proto(skb, fragh->nexthdr);
375 }
376 
377 PROG(MPLS)(struct __sk_buff *skb)
378 {
379 	struct bpf_flow_keys *keys = skb->flow_keys;
380 	struct mpls_label *mpls, _mpls;
381 
382 	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
383 	if (!mpls)
384 		return export_flow_keys(keys, BPF_DROP);
385 
386 	return export_flow_keys(keys, BPF_OK);
387 }
388 
389 PROG(VLAN)(struct __sk_buff *skb)
390 {
391 	struct bpf_flow_keys *keys = skb->flow_keys;
392 	struct vlan_hdr *vlan, _vlan;
393 
394 	/* Account for double-tagging */
395 	if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
396 		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
397 		if (!vlan)
398 			return export_flow_keys(keys, BPF_DROP);
399 
400 		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
401 			return export_flow_keys(keys, BPF_DROP);
402 
403 		keys->nhoff += sizeof(*vlan);
404 		keys->thoff += sizeof(*vlan);
405 	}
406 
407 	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
408 	if (!vlan)
409 		return export_flow_keys(keys, BPF_DROP);
410 
411 	keys->nhoff += sizeof(*vlan);
412 	keys->thoff += sizeof(*vlan);
413 	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
414 	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
415 	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
416 		return export_flow_keys(keys, BPF_DROP);
417 
418 	keys->n_proto = vlan->h_vlan_encapsulated_proto;
419 	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
420 }
421 
422 char __license[] SEC("license") = "GPL";
423