xref: /linux/samples/bpf/sockex3_kern.c (revision b4db9f840283caca0d904436f187ef56a9126eaa)
1 /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include <uapi/linux/bpf.h>
8 #include <uapi/linux/in.h>
9 #include <uapi/linux/if.h>
10 #include <uapi/linux/if_ether.h>
11 #include <uapi/linux/ip.h>
12 #include <uapi/linux/ipv6.h>
13 #include <uapi/linux/if_tunnel.h>
14 #include <uapi/linux/mpls.h>
15 #include <bpf/bpf_helpers.h>
16 #include "bpf_legacy.h"
17 #define IP_MF		0x2000
18 #define IP_OFFSET	0x1FFF
19 
20 #define PARSE_VLAN 1
21 #define PARSE_MPLS 2
22 #define PARSE_IP 3
23 #define PARSE_IPV6 4
24 
25 struct vlan_hdr {
26 	__be16 h_vlan_TCI;
27 	__be16 h_vlan_encapsulated_proto;
28 };
29 
30 struct flow_key_record {
31 	__be32 src;
32 	__be32 dst;
33 	union {
34 		__be32 ports;
35 		__be16 port16[2];
36 	};
37 	__u32 ip_proto;
38 };
39 
40 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto);
41 
42 static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
43 {
44 	return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
45 		& (IP_MF | IP_OFFSET);
46 }
47 
48 static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
49 {
50 	__u64 w0 = load_word(ctx, off);
51 	__u64 w1 = load_word(ctx, off + 4);
52 	__u64 w2 = load_word(ctx, off + 8);
53 	__u64 w3 = load_word(ctx, off + 12);
54 
55 	return (__u32)(w0 ^ w1 ^ w2 ^ w3);
56 }
57 
58 struct globals {
59 	struct flow_key_record flow;
60 };
61 
62 struct {
63 	__uint(type, BPF_MAP_TYPE_ARRAY);
64 	__type(key, __u32);
65 	__type(value, struct globals);
66 	__uint(max_entries, 32);
67 } percpu_map SEC(".maps");
68 
69 /* user poor man's per_cpu until native support is ready */
70 static struct globals *this_cpu_globals(void)
71 {
72 	u32 key = bpf_get_smp_processor_id();
73 
74 	return bpf_map_lookup_elem(&percpu_map, &key);
75 }
76 
77 /* some simple stats for user space consumption */
78 struct pair {
79 	__u64 packets;
80 	__u64 bytes;
81 };
82 
83 struct {
84 	__uint(type, BPF_MAP_TYPE_HASH);
85 	__type(key, struct flow_key_record);
86 	__type(value, struct pair);
87 	__uint(max_entries, 1024);
88 } hash_map SEC(".maps");
89 
90 static void update_stats(struct __sk_buff *skb, struct globals *g)
91 {
92 	struct flow_key_record key = g->flow;
93 	struct pair *value;
94 
95 	value = bpf_map_lookup_elem(&hash_map, &key);
96 	if (value) {
97 		__sync_fetch_and_add(&value->packets, 1);
98 		__sync_fetch_and_add(&value->bytes, skb->len);
99 	} else {
100 		struct pair val = {1, skb->len};
101 
102 		bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
103 	}
104 }
105 
106 static __always_inline void parse_ip_proto(struct __sk_buff *skb,
107 					   struct globals *g, __u32 ip_proto)
108 {
109 	__u32 nhoff = skb->cb[0];
110 	int poff;
111 
112 	switch (ip_proto) {
113 	case IPPROTO_GRE: {
114 		struct gre_hdr {
115 			__be16 flags;
116 			__be16 proto;
117 		};
118 
119 		__u32 gre_flags = load_half(skb,
120 					    nhoff + offsetof(struct gre_hdr, flags));
121 		__u32 gre_proto = load_half(skb,
122 					    nhoff + offsetof(struct gre_hdr, proto));
123 
124 		if (gre_flags & (GRE_VERSION|GRE_ROUTING))
125 			break;
126 
127 		nhoff += 4;
128 		if (gre_flags & GRE_CSUM)
129 			nhoff += 4;
130 		if (gre_flags & GRE_KEY)
131 			nhoff += 4;
132 		if (gre_flags & GRE_SEQ)
133 			nhoff += 4;
134 
135 		skb->cb[0] = nhoff;
136 		parse_eth_proto(skb, gre_proto);
137 		break;
138 	}
139 	case IPPROTO_IPIP:
140 		parse_eth_proto(skb, ETH_P_IP);
141 		break;
142 	case IPPROTO_IPV6:
143 		parse_eth_proto(skb, ETH_P_IPV6);
144 		break;
145 	case IPPROTO_TCP:
146 	case IPPROTO_UDP:
147 		g->flow.ports = load_word(skb, nhoff);
148 	case IPPROTO_ICMP:
149 		g->flow.ip_proto = ip_proto;
150 		update_stats(skb, g);
151 		break;
152 	default:
153 		break;
154 	}
155 }
156 
157 SEC("socket")
158 int bpf_func_ip(struct __sk_buff *skb)
159 {
160 	struct globals *g = this_cpu_globals();
161 	__u32 nhoff, verlen, ip_proto;
162 
163 	if (!g)
164 		return 0;
165 
166 	nhoff = skb->cb[0];
167 
168 	if (unlikely(ip_is_fragment(skb, nhoff)))
169 		return 0;
170 
171 	ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
172 
173 	if (ip_proto != IPPROTO_GRE) {
174 		g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
175 		g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
176 	}
177 
178 	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
179 	nhoff += (verlen & 0xF) << 2;
180 
181 	skb->cb[0] = nhoff;
182 	parse_ip_proto(skb, g, ip_proto);
183 	return 0;
184 }
185 
186 SEC("socket")
187 int bpf_func_ipv6(struct __sk_buff *skb)
188 {
189 	struct globals *g = this_cpu_globals();
190 	__u32 nhoff, ip_proto;
191 
192 	if (!g)
193 		return 0;
194 
195 	nhoff = skb->cb[0];
196 
197 	ip_proto = load_byte(skb,
198 			     nhoff + offsetof(struct ipv6hdr, nexthdr));
199 	g->flow.src = ipv6_addr_hash(skb,
200 				     nhoff + offsetof(struct ipv6hdr, saddr));
201 	g->flow.dst = ipv6_addr_hash(skb,
202 				     nhoff + offsetof(struct ipv6hdr, daddr));
203 	nhoff += sizeof(struct ipv6hdr);
204 
205 	skb->cb[0] = nhoff;
206 	parse_ip_proto(skb, g, ip_proto);
207 	return 0;
208 }
209 
210 SEC("socket")
211 int bpf_func_vlan(struct __sk_buff *skb)
212 {
213 	__u32 nhoff, proto;
214 
215 	nhoff = skb->cb[0];
216 
217 	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
218 						h_vlan_encapsulated_proto));
219 	nhoff += sizeof(struct vlan_hdr);
220 	skb->cb[0] = nhoff;
221 
222 	parse_eth_proto(skb, proto);
223 
224 	return 0;
225 }
226 
227 SEC("socket")
228 int bpf_func_mpls(struct __sk_buff *skb)
229 {
230 	__u32 nhoff, label;
231 
232 	nhoff = skb->cb[0];
233 
234 	label = load_word(skb, nhoff);
235 	nhoff += sizeof(struct mpls_label);
236 	skb->cb[0] = nhoff;
237 
238 	if (label & MPLS_LS_S_MASK) {
239 		__u8 verlen = load_byte(skb, nhoff);
240 		if ((verlen & 0xF0) == 4)
241 			parse_eth_proto(skb, ETH_P_IP);
242 		else
243 			parse_eth_proto(skb, ETH_P_IPV6);
244 	} else {
245 		parse_eth_proto(skb, ETH_P_MPLS_UC);
246 	}
247 
248 	return 0;
249 }
250 
251 struct {
252 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
253 	__uint(key_size, sizeof(u32));
254 	__uint(max_entries, 8);
255 	__array(values, u32 (void *));
256 } prog_array_init SEC(".maps") = {
257 	.values = {
258 		[PARSE_VLAN] = (void *)&bpf_func_vlan,
259 		[PARSE_IP]   = (void *)&bpf_func_ip,
260 		[PARSE_IPV6] = (void *)&bpf_func_ipv6,
261 		[PARSE_MPLS] = (void *)&bpf_func_mpls,
262 	},
263 };
264 
265 /* Protocol dispatch routine. It tail-calls next BPF program depending
266  * on eth proto. Note, we could have used ...
267  *
268  *   bpf_tail_call(skb, &prog_array_init, proto);
269  *
270  * ... but it would need large prog_array and cannot be optimised given
271  * the map key is not static.
272  */
273 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
274 {
275 	switch (proto) {
276 	case ETH_P_8021Q:
277 	case ETH_P_8021AD:
278 		bpf_tail_call(skb, &prog_array_init, PARSE_VLAN);
279 		break;
280 	case ETH_P_MPLS_UC:
281 	case ETH_P_MPLS_MC:
282 		bpf_tail_call(skb, &prog_array_init, PARSE_MPLS);
283 		break;
284 	case ETH_P_IP:
285 		bpf_tail_call(skb, &prog_array_init, PARSE_IP);
286 		break;
287 	case ETH_P_IPV6:
288 		bpf_tail_call(skb, &prog_array_init, PARSE_IPV6);
289 		break;
290 	}
291 }
292 
293 SEC("socket")
294 int main_prog(struct __sk_buff *skb)
295 {
296 	__u32 nhoff = ETH_HLEN;
297 	__u32 proto = load_half(skb, 12);
298 
299 	skb->cb[0] = nhoff;
300 	parse_eth_proto(skb, proto);
301 	return 0;
302 }
303 
304 char _license[] SEC("license") = "GPL";
305