xref: /linux/tools/testing/selftests/bpf/progs/bpf_flow.c (revision a4eb44a6435d6d8f9e642407a4a06f65eb90ca04)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <limits.h>
3 #include <stddef.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <linux/pkt_cls.h>
7 #include <linux/bpf.h>
8 #include <linux/in.h>
9 #include <linux/if_ether.h>
10 #include <linux/icmp.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/tcp.h>
14 #include <linux/udp.h>
15 #include <linux/if_packet.h>
16 #include <sys/socket.h>
17 #include <linux/if_tunnel.h>
18 #include <linux/mpls.h>
19 #include <bpf/bpf_helpers.h>
20 #include <bpf/bpf_endian.h>
21 
22 #define PROG(F) PROG_(F, _##F)
23 #define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM
24 
25 /* These are the identifiers of the BPF programs that will be used in tail
26  * calls. Name is limited to 16 characters, with the terminating character and
27  * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
28  */
29 #define IP		0
30 #define IPV6		1
31 #define IPV6OP		2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
32 #define IPV6FR		3 /* Fragmentation IPv6 Extension Header */
33 #define MPLS		4
34 #define VLAN		5
35 #define MAX_PROG	6
36 
37 #define IP_MF		0x2000
38 #define IP_OFFSET	0x1FFF
39 #define IP6_MF		0x0001
40 #define IP6_OFFSET	0xFFF8
41 
42 struct vlan_hdr {
43 	__be16 h_vlan_TCI;
44 	__be16 h_vlan_encapsulated_proto;
45 };
46 
47 struct gre_hdr {
48 	__be16 flags;
49 	__be16 proto;
50 };
51 
52 struct frag_hdr {
53 	__u8 nexthdr;
54 	__u8 reserved;
55 	__be16 frag_off;
56 	__be32 identification;
57 };
58 
59 struct {
60 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
61 	__uint(max_entries, MAX_PROG);
62 	__uint(key_size, sizeof(__u32));
63 	__uint(value_size, sizeof(__u32));
64 } jmp_table SEC(".maps");
65 
66 struct {
67 	__uint(type, BPF_MAP_TYPE_HASH);
68 	__uint(max_entries, 1024);
69 	__type(key, __u32);
70 	__type(value, struct bpf_flow_keys);
71 } last_dissection SEC(".maps");
72 
73 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
74 					    int ret)
75 {
76 	__u32 key = (__u32)(keys->sport) << 16 | keys->dport;
77 	struct bpf_flow_keys val;
78 
79 	memcpy(&val, keys, sizeof(val));
80 	bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
81 	return ret;
82 }
83 
84 #define IPV6_FLOWLABEL_MASK		__bpf_constant_htonl(0x000FFFFF)
85 static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
86 {
87 	return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
88 }
89 
90 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
91 							 __u16 hdr_size,
92 							 void *buffer)
93 {
94 	void *data_end = (void *)(long)skb->data_end;
95 	void *data = (void *)(long)skb->data;
96 	__u16 thoff = skb->flow_keys->thoff;
97 	__u8 *hdr;
98 
99 	/* Verifies this variable offset does not overflow */
100 	if (thoff > (USHRT_MAX - hdr_size))
101 		return NULL;
102 
103 	hdr = data + thoff;
104 	if (hdr + hdr_size <= data_end)
105 		return hdr;
106 
107 	if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
108 		return NULL;
109 
110 	return buffer;
111 }
112 
113 /* Dispatches on ETHERTYPE */
114 static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
115 {
116 	struct bpf_flow_keys *keys = skb->flow_keys;
117 
118 	switch (proto) {
119 	case bpf_htons(ETH_P_IP):
120 		bpf_tail_call_static(skb, &jmp_table, IP);
121 		break;
122 	case bpf_htons(ETH_P_IPV6):
123 		bpf_tail_call_static(skb, &jmp_table, IPV6);
124 		break;
125 	case bpf_htons(ETH_P_MPLS_MC):
126 	case bpf_htons(ETH_P_MPLS_UC):
127 		bpf_tail_call_static(skb, &jmp_table, MPLS);
128 		break;
129 	case bpf_htons(ETH_P_8021Q):
130 	case bpf_htons(ETH_P_8021AD):
131 		bpf_tail_call_static(skb, &jmp_table, VLAN);
132 		break;
133 	default:
134 		/* Protocol not supported */
135 		return export_flow_keys(keys, BPF_DROP);
136 	}
137 
138 	return export_flow_keys(keys, BPF_DROP);
139 }
140 
141 SEC("flow_dissector")
142 int _dissect(struct __sk_buff *skb)
143 {
144 	struct bpf_flow_keys *keys = skb->flow_keys;
145 
146 	return parse_eth_proto(skb, keys->n_proto);
147 }
148 
149 /* Parses on IPPROTO_* */
150 static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
151 {
152 	struct bpf_flow_keys *keys = skb->flow_keys;
153 	void *data_end = (void *)(long)skb->data_end;
154 	struct icmphdr *icmp, _icmp;
155 	struct gre_hdr *gre, _gre;
156 	struct ethhdr *eth, _eth;
157 	struct tcphdr *tcp, _tcp;
158 	struct udphdr *udp, _udp;
159 
160 	switch (proto) {
161 	case IPPROTO_ICMP:
162 		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
163 		if (!icmp)
164 			return export_flow_keys(keys, BPF_DROP);
165 		return export_flow_keys(keys, BPF_OK);
166 	case IPPROTO_IPIP:
167 		keys->is_encap = true;
168 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
169 			return export_flow_keys(keys, BPF_OK);
170 
171 		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
172 	case IPPROTO_IPV6:
173 		keys->is_encap = true;
174 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
175 			return export_flow_keys(keys, BPF_OK);
176 
177 		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
178 	case IPPROTO_GRE:
179 		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
180 		if (!gre)
181 			return export_flow_keys(keys, BPF_DROP);
182 
183 		if (bpf_htons(gre->flags & GRE_VERSION))
184 			/* Only inspect standard GRE packets with version 0 */
185 			return export_flow_keys(keys, BPF_OK);
186 
187 		keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
188 		if (GRE_IS_CSUM(gre->flags))
189 			keys->thoff += 4; /* Step over chksum and Padding */
190 		if (GRE_IS_KEY(gre->flags))
191 			keys->thoff += 4; /* Step over key */
192 		if (GRE_IS_SEQ(gre->flags))
193 			keys->thoff += 4; /* Step over sequence number */
194 
195 		keys->is_encap = true;
196 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
197 			return export_flow_keys(keys, BPF_OK);
198 
199 		if (gre->proto == bpf_htons(ETH_P_TEB)) {
200 			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
201 							  &_eth);
202 			if (!eth)
203 				return export_flow_keys(keys, BPF_DROP);
204 
205 			keys->thoff += sizeof(*eth);
206 
207 			return parse_eth_proto(skb, eth->h_proto);
208 		} else {
209 			return parse_eth_proto(skb, gre->proto);
210 		}
211 	case IPPROTO_TCP:
212 		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
213 		if (!tcp)
214 			return export_flow_keys(keys, BPF_DROP);
215 
216 		if (tcp->doff < 5)
217 			return export_flow_keys(keys, BPF_DROP);
218 
219 		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
220 			return export_flow_keys(keys, BPF_DROP);
221 
222 		keys->sport = tcp->source;
223 		keys->dport = tcp->dest;
224 		return export_flow_keys(keys, BPF_OK);
225 	case IPPROTO_UDP:
226 	case IPPROTO_UDPLITE:
227 		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
228 		if (!udp)
229 			return export_flow_keys(keys, BPF_DROP);
230 
231 		keys->sport = udp->source;
232 		keys->dport = udp->dest;
233 		return export_flow_keys(keys, BPF_OK);
234 	default:
235 		return export_flow_keys(keys, BPF_DROP);
236 	}
237 
238 	return export_flow_keys(keys, BPF_DROP);
239 }
240 
241 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
242 {
243 	struct bpf_flow_keys *keys = skb->flow_keys;
244 
245 	switch (nexthdr) {
246 	case IPPROTO_HOPOPTS:
247 	case IPPROTO_DSTOPTS:
248 		bpf_tail_call_static(skb, &jmp_table, IPV6OP);
249 		break;
250 	case IPPROTO_FRAGMENT:
251 		bpf_tail_call_static(skb, &jmp_table, IPV6FR);
252 		break;
253 	default:
254 		return parse_ip_proto(skb, nexthdr);
255 	}
256 
257 	return export_flow_keys(keys, BPF_DROP);
258 }
259 
260 PROG(IP)(struct __sk_buff *skb)
261 {
262 	void *data_end = (void *)(long)skb->data_end;
263 	struct bpf_flow_keys *keys = skb->flow_keys;
264 	void *data = (void *)(long)skb->data;
265 	struct iphdr *iph, _iph;
266 	bool done = false;
267 
268 	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
269 	if (!iph)
270 		return export_flow_keys(keys, BPF_DROP);
271 
272 	/* IP header cannot be smaller than 20 bytes */
273 	if (iph->ihl < 5)
274 		return export_flow_keys(keys, BPF_DROP);
275 
276 	keys->addr_proto = ETH_P_IP;
277 	keys->ipv4_src = iph->saddr;
278 	keys->ipv4_dst = iph->daddr;
279 	keys->ip_proto = iph->protocol;
280 
281 	keys->thoff += iph->ihl << 2;
282 	if (data + keys->thoff > data_end)
283 		return export_flow_keys(keys, BPF_DROP);
284 
285 	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
286 		keys->is_frag = true;
287 		if (iph->frag_off & bpf_htons(IP_OFFSET)) {
288 			/* From second fragment on, packets do not have headers
289 			 * we can parse.
290 			 */
291 			done = true;
292 		} else {
293 			keys->is_first_frag = true;
294 			/* No need to parse fragmented packet unless
295 			 * explicitly asked for.
296 			 */
297 			if (!(keys->flags &
298 			      BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
299 				done = true;
300 		}
301 	}
302 
303 	if (done)
304 		return export_flow_keys(keys, BPF_OK);
305 
306 	return parse_ip_proto(skb, iph->protocol);
307 }
308 
309 PROG(IPV6)(struct __sk_buff *skb)
310 {
311 	struct bpf_flow_keys *keys = skb->flow_keys;
312 	struct ipv6hdr *ip6h, _ip6h;
313 
314 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
315 	if (!ip6h)
316 		return export_flow_keys(keys, BPF_DROP);
317 
318 	keys->addr_proto = ETH_P_IPV6;
319 	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
320 
321 	keys->thoff += sizeof(struct ipv6hdr);
322 	keys->ip_proto = ip6h->nexthdr;
323 	keys->flow_label = ip6_flowlabel(ip6h);
324 
325 	if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
326 		return export_flow_keys(keys, BPF_OK);
327 
328 	return parse_ipv6_proto(skb, ip6h->nexthdr);
329 }
330 
331 PROG(IPV6OP)(struct __sk_buff *skb)
332 {
333 	struct bpf_flow_keys *keys = skb->flow_keys;
334 	struct ipv6_opt_hdr *ip6h, _ip6h;
335 
336 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
337 	if (!ip6h)
338 		return export_flow_keys(keys, BPF_DROP);
339 
340 	/* hlen is in 8-octets and does not include the first 8 bytes
341 	 * of the header
342 	 */
343 	keys->thoff += (1 + ip6h->hdrlen) << 3;
344 	keys->ip_proto = ip6h->nexthdr;
345 
346 	return parse_ipv6_proto(skb, ip6h->nexthdr);
347 }
348 
349 PROG(IPV6FR)(struct __sk_buff *skb)
350 {
351 	struct bpf_flow_keys *keys = skb->flow_keys;
352 	struct frag_hdr *fragh, _fragh;
353 
354 	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
355 	if (!fragh)
356 		return export_flow_keys(keys, BPF_DROP);
357 
358 	keys->thoff += sizeof(*fragh);
359 	keys->is_frag = true;
360 	keys->ip_proto = fragh->nexthdr;
361 
362 	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
363 		keys->is_first_frag = true;
364 
365 		/* No need to parse fragmented packet unless
366 		 * explicitly asked for.
367 		 */
368 		if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
369 			return export_flow_keys(keys, BPF_OK);
370 	} else {
371 		return export_flow_keys(keys, BPF_OK);
372 	}
373 
374 	return parse_ipv6_proto(skb, fragh->nexthdr);
375 }
376 
377 PROG(MPLS)(struct __sk_buff *skb)
378 {
379 	struct bpf_flow_keys *keys = skb->flow_keys;
380 	struct mpls_label *mpls, _mpls;
381 
382 	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
383 	if (!mpls)
384 		return export_flow_keys(keys, BPF_DROP);
385 
386 	return export_flow_keys(keys, BPF_OK);
387 }
388 
389 PROG(VLAN)(struct __sk_buff *skb)
390 {
391 	struct bpf_flow_keys *keys = skb->flow_keys;
392 	struct vlan_hdr *vlan, _vlan;
393 
394 	/* Account for double-tagging */
395 	if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
396 		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
397 		if (!vlan)
398 			return export_flow_keys(keys, BPF_DROP);
399 
400 		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
401 			return export_flow_keys(keys, BPF_DROP);
402 
403 		keys->nhoff += sizeof(*vlan);
404 		keys->thoff += sizeof(*vlan);
405 	}
406 
407 	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
408 	if (!vlan)
409 		return export_flow_keys(keys, BPF_DROP);
410 
411 	keys->nhoff += sizeof(*vlan);
412 	keys->thoff += sizeof(*vlan);
413 	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
414 	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
415 	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
416 		return export_flow_keys(keys, BPF_DROP);
417 
418 	keys->n_proto = vlan->h_vlan_encapsulated_proto;
419 	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
420 }
421 
422 char __license[] SEC("license") = "GPL";
423