xref: /linux/tools/testing/selftests/bpf/progs/bpf_flow.c (revision 460ea8980511c01c1551012b9a6ec6a06d02da59)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <limits.h>
3 #include <stddef.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include <linux/pkt_cls.h>
7 #include <linux/bpf.h>
8 #include <linux/in.h>
9 #include <linux/if_ether.h>
10 #include <linux/icmp.h>
11 #include <linux/ip.h>
12 #include <linux/ipv6.h>
13 #include <linux/tcp.h>
14 #include <linux/udp.h>
15 #include <linux/if_packet.h>
16 #include <sys/socket.h>
17 #include <linux/if_tunnel.h>
18 #include <linux/mpls.h>
19 #include <bpf/bpf_helpers.h>
20 #include <bpf/bpf_endian.h>
21 
22 #define PROG(F) PROG_(F, _##F)
23 #define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM
24 
25 #define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
26 
27 /* These are the identifiers of the BPF programs that will be used in tail
28  * calls. Name is limited to 16 characters, with the terminating character and
29  * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
30  */
31 #define IP		0
32 #define IPV6		1
33 #define IPV6OP		2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
34 #define IPV6FR		3 /* Fragmentation IPv6 Extension Header */
35 #define MPLS		4
36 #define VLAN		5
37 #define MAX_PROG	6
38 
39 #define IP_MF		0x2000
40 #define IP_OFFSET	0x1FFF
41 #define IP6_MF		0x0001
42 #define IP6_OFFSET	0xFFF8
43 
44 struct vlan_hdr {
45 	__be16 h_vlan_TCI;
46 	__be16 h_vlan_encapsulated_proto;
47 };
48 
49 struct gre_hdr {
50 	__be16 flags;
51 	__be16 proto;
52 };
53 
54 struct frag_hdr {
55 	__u8 nexthdr;
56 	__u8 reserved;
57 	__be16 frag_off;
58 	__be32 identification;
59 };
60 
61 struct {
62 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
63 	__uint(max_entries, MAX_PROG);
64 	__uint(key_size, sizeof(__u32));
65 	__uint(value_size, sizeof(__u32));
66 } jmp_table SEC(".maps");
67 
68 struct {
69 	__uint(type, BPF_MAP_TYPE_HASH);
70 	__uint(max_entries, 1024);
71 	__type(key, __u32);
72 	__type(value, struct bpf_flow_keys);
73 } last_dissection SEC(".maps");
74 
75 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys,
76 					    int ret)
77 {
78 	__u32 key = (__u32)(keys->sport) << 16 | keys->dport;
79 	struct bpf_flow_keys val;
80 
81 	memcpy(&val, keys, sizeof(val));
82 	bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY);
83 	return ret;
84 }
85 
86 #define IPV6_FLOWLABEL_MASK		__bpf_constant_htonl(0x000FFFFF)
87 static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr)
88 {
89 	return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK;
90 }
91 
92 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
93 							 __u16 hdr_size,
94 							 void *buffer)
95 {
96 	void *data_end = (void *)(long)skb->data_end;
97 	void *data = (void *)(long)skb->data;
98 	__u16 thoff = skb->flow_keys->thoff;
99 	__u8 *hdr;
100 
101 	/* Verifies this variable offset does not overflow */
102 	if (thoff > (USHRT_MAX - hdr_size))
103 		return NULL;
104 
105 	hdr = data + thoff;
106 	if (hdr + hdr_size <= data_end)
107 		return hdr;
108 
109 	if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size))
110 		return NULL;
111 
112 	return buffer;
113 }
114 
115 /* Dispatches on ETHERTYPE */
116 static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
117 {
118 	struct bpf_flow_keys *keys = skb->flow_keys;
119 
120 	switch (proto) {
121 	case bpf_htons(ETH_P_IP):
122 		bpf_tail_call_static(skb, &jmp_table, IP);
123 		break;
124 	case bpf_htons(ETH_P_IPV6):
125 		bpf_tail_call_static(skb, &jmp_table, IPV6);
126 		break;
127 	case bpf_htons(ETH_P_MPLS_MC):
128 	case bpf_htons(ETH_P_MPLS_UC):
129 		bpf_tail_call_static(skb, &jmp_table, MPLS);
130 		break;
131 	case bpf_htons(ETH_P_8021Q):
132 	case bpf_htons(ETH_P_8021AD):
133 		bpf_tail_call_static(skb, &jmp_table, VLAN);
134 		break;
135 	default:
136 		/* Protocol not supported */
137 		return export_flow_keys(keys, BPF_DROP);
138 	}
139 
140 	return export_flow_keys(keys, BPF_DROP);
141 }
142 
143 SEC("flow_dissector")
144 int _dissect(struct __sk_buff *skb)
145 {
146 	struct bpf_flow_keys *keys = skb->flow_keys;
147 
148 	if (keys->n_proto == bpf_htons(ETH_P_IP)) {
149 		/* IP traffic from FLOW_CONTINUE_SADDR falls-back to
150 		 * standard dissector
151 		 */
152 		struct iphdr *iph, _iph;
153 
154 		iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
155 		if (iph && iph->ihl == 5 &&
156 		    iph->saddr == bpf_htonl(FLOW_CONTINUE_SADDR)) {
157 			return BPF_FLOW_DISSECTOR_CONTINUE;
158 		}
159 	}
160 
161 	return parse_eth_proto(skb, keys->n_proto);
162 }
163 
164 /* Parses on IPPROTO_* */
165 static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
166 {
167 	struct bpf_flow_keys *keys = skb->flow_keys;
168 	void *data_end = (void *)(long)skb->data_end;
169 	struct icmphdr *icmp, _icmp;
170 	struct gre_hdr *gre, _gre;
171 	struct ethhdr *eth, _eth;
172 	struct tcphdr *tcp, _tcp;
173 	struct udphdr *udp, _udp;
174 
175 	switch (proto) {
176 	case IPPROTO_ICMP:
177 		icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
178 		if (!icmp)
179 			return export_flow_keys(keys, BPF_DROP);
180 		return export_flow_keys(keys, BPF_OK);
181 	case IPPROTO_IPIP:
182 		keys->is_encap = true;
183 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
184 			return export_flow_keys(keys, BPF_OK);
185 
186 		return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
187 	case IPPROTO_IPV6:
188 		keys->is_encap = true;
189 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
190 			return export_flow_keys(keys, BPF_OK);
191 
192 		return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
193 	case IPPROTO_GRE:
194 		gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
195 		if (!gre)
196 			return export_flow_keys(keys, BPF_DROP);
197 
198 		if (bpf_htons(gre->flags & GRE_VERSION))
199 			/* Only inspect standard GRE packets with version 0 */
200 			return export_flow_keys(keys, BPF_OK);
201 
202 		keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */
203 		if (GRE_IS_CSUM(gre->flags))
204 			keys->thoff += 4; /* Step over chksum and Padding */
205 		if (GRE_IS_KEY(gre->flags))
206 			keys->thoff += 4; /* Step over key */
207 		if (GRE_IS_SEQ(gre->flags))
208 			keys->thoff += 4; /* Step over sequence number */
209 
210 		keys->is_encap = true;
211 		if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP)
212 			return export_flow_keys(keys, BPF_OK);
213 
214 		if (gre->proto == bpf_htons(ETH_P_TEB)) {
215 			eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
216 							  &_eth);
217 			if (!eth)
218 				return export_flow_keys(keys, BPF_DROP);
219 
220 			keys->thoff += sizeof(*eth);
221 
222 			return parse_eth_proto(skb, eth->h_proto);
223 		} else {
224 			return parse_eth_proto(skb, gre->proto);
225 		}
226 	case IPPROTO_TCP:
227 		tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
228 		if (!tcp)
229 			return export_flow_keys(keys, BPF_DROP);
230 
231 		if (tcp->doff < 5)
232 			return export_flow_keys(keys, BPF_DROP);
233 
234 		if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
235 			return export_flow_keys(keys, BPF_DROP);
236 
237 		keys->sport = tcp->source;
238 		keys->dport = tcp->dest;
239 		return export_flow_keys(keys, BPF_OK);
240 	case IPPROTO_UDP:
241 	case IPPROTO_UDPLITE:
242 		udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
243 		if (!udp)
244 			return export_flow_keys(keys, BPF_DROP);
245 
246 		keys->sport = udp->source;
247 		keys->dport = udp->dest;
248 		return export_flow_keys(keys, BPF_OK);
249 	default:
250 		return export_flow_keys(keys, BPF_DROP);
251 	}
252 
253 	return export_flow_keys(keys, BPF_DROP);
254 }
255 
256 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
257 {
258 	struct bpf_flow_keys *keys = skb->flow_keys;
259 
260 	switch (nexthdr) {
261 	case IPPROTO_HOPOPTS:
262 	case IPPROTO_DSTOPTS:
263 		bpf_tail_call_static(skb, &jmp_table, IPV6OP);
264 		break;
265 	case IPPROTO_FRAGMENT:
266 		bpf_tail_call_static(skb, &jmp_table, IPV6FR);
267 		break;
268 	default:
269 		return parse_ip_proto(skb, nexthdr);
270 	}
271 
272 	return export_flow_keys(keys, BPF_DROP);
273 }
274 
275 PROG(IP)(struct __sk_buff *skb)
276 {
277 	void *data_end = (void *)(long)skb->data_end;
278 	struct bpf_flow_keys *keys = skb->flow_keys;
279 	void *data = (void *)(long)skb->data;
280 	struct iphdr *iph, _iph;
281 	bool done = false;
282 
283 	iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
284 	if (!iph)
285 		return export_flow_keys(keys, BPF_DROP);
286 
287 	/* IP header cannot be smaller than 20 bytes */
288 	if (iph->ihl < 5)
289 		return export_flow_keys(keys, BPF_DROP);
290 
291 	keys->addr_proto = ETH_P_IP;
292 	keys->ipv4_src = iph->saddr;
293 	keys->ipv4_dst = iph->daddr;
294 	keys->ip_proto = iph->protocol;
295 
296 	keys->thoff += iph->ihl << 2;
297 	if (data + keys->thoff > data_end)
298 		return export_flow_keys(keys, BPF_DROP);
299 
300 	if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
301 		keys->is_frag = true;
302 		if (iph->frag_off & bpf_htons(IP_OFFSET)) {
303 			/* From second fragment on, packets do not have headers
304 			 * we can parse.
305 			 */
306 			done = true;
307 		} else {
308 			keys->is_first_frag = true;
309 			/* No need to parse fragmented packet unless
310 			 * explicitly asked for.
311 			 */
312 			if (!(keys->flags &
313 			      BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
314 				done = true;
315 		}
316 	}
317 
318 	if (done)
319 		return export_flow_keys(keys, BPF_OK);
320 
321 	return parse_ip_proto(skb, iph->protocol);
322 }
323 
324 PROG(IPV6)(struct __sk_buff *skb)
325 {
326 	struct bpf_flow_keys *keys = skb->flow_keys;
327 	struct ipv6hdr *ip6h, _ip6h;
328 
329 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
330 	if (!ip6h)
331 		return export_flow_keys(keys, BPF_DROP);
332 
333 	keys->addr_proto = ETH_P_IPV6;
334 	memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
335 
336 	keys->thoff += sizeof(struct ipv6hdr);
337 	keys->ip_proto = ip6h->nexthdr;
338 	keys->flow_label = ip6_flowlabel(ip6h);
339 
340 	if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
341 		return export_flow_keys(keys, BPF_OK);
342 
343 	return parse_ipv6_proto(skb, ip6h->nexthdr);
344 }
345 
346 PROG(IPV6OP)(struct __sk_buff *skb)
347 {
348 	struct bpf_flow_keys *keys = skb->flow_keys;
349 	struct ipv6_opt_hdr *ip6h, _ip6h;
350 
351 	ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
352 	if (!ip6h)
353 		return export_flow_keys(keys, BPF_DROP);
354 
355 	/* hlen is in 8-octets and does not include the first 8 bytes
356 	 * of the header
357 	 */
358 	keys->thoff += (1 + ip6h->hdrlen) << 3;
359 	keys->ip_proto = ip6h->nexthdr;
360 
361 	return parse_ipv6_proto(skb, ip6h->nexthdr);
362 }
363 
364 PROG(IPV6FR)(struct __sk_buff *skb)
365 {
366 	struct bpf_flow_keys *keys = skb->flow_keys;
367 	struct frag_hdr *fragh, _fragh;
368 
369 	fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
370 	if (!fragh)
371 		return export_flow_keys(keys, BPF_DROP);
372 
373 	keys->thoff += sizeof(*fragh);
374 	keys->is_frag = true;
375 	keys->ip_proto = fragh->nexthdr;
376 
377 	if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) {
378 		keys->is_first_frag = true;
379 
380 		/* No need to parse fragmented packet unless
381 		 * explicitly asked for.
382 		 */
383 		if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
384 			return export_flow_keys(keys, BPF_OK);
385 	} else {
386 		return export_flow_keys(keys, BPF_OK);
387 	}
388 
389 	return parse_ipv6_proto(skb, fragh->nexthdr);
390 }
391 
392 PROG(MPLS)(struct __sk_buff *skb)
393 {
394 	struct bpf_flow_keys *keys = skb->flow_keys;
395 	struct mpls_label *mpls, _mpls;
396 
397 	mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
398 	if (!mpls)
399 		return export_flow_keys(keys, BPF_DROP);
400 
401 	return export_flow_keys(keys, BPF_OK);
402 }
403 
404 PROG(VLAN)(struct __sk_buff *skb)
405 {
406 	struct bpf_flow_keys *keys = skb->flow_keys;
407 	struct vlan_hdr *vlan, _vlan;
408 
409 	/* Account for double-tagging */
410 	if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
411 		vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
412 		if (!vlan)
413 			return export_flow_keys(keys, BPF_DROP);
414 
415 		if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
416 			return export_flow_keys(keys, BPF_DROP);
417 
418 		keys->nhoff += sizeof(*vlan);
419 		keys->thoff += sizeof(*vlan);
420 	}
421 
422 	vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
423 	if (!vlan)
424 		return export_flow_keys(keys, BPF_DROP);
425 
426 	keys->nhoff += sizeof(*vlan);
427 	keys->thoff += sizeof(*vlan);
428 	/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
429 	if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
430 	    vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
431 		return export_flow_keys(keys, BPF_DROP);
432 
433 	keys->n_proto = vlan->h_vlan_encapsulated_proto;
434 	return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
435 }
436 
437 char __license[] SEC("license") = "GPL";
438