xref: /linux/samples/bpf/sockex3_kern.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include <uapi/linux/bpf.h>
8 #include "bpf_helpers.h"
9 #include <uapi/linux/in.h>
10 #include <uapi/linux/if.h>
11 #include <uapi/linux/if_ether.h>
12 #include <uapi/linux/ip.h>
13 #include <uapi/linux/ipv6.h>
14 #include <uapi/linux/if_tunnel.h>
15 #include <uapi/linux/mpls.h>
16 #define IP_MF		0x2000
17 #define IP_OFFSET	0x1FFF
18 
19 #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
20 
21 struct bpf_map_def SEC("maps") jmp_table = {
22 	.type = BPF_MAP_TYPE_PROG_ARRAY,
23 	.key_size = sizeof(u32),
24 	.value_size = sizeof(u32),
25 	.max_entries = 8,
26 };
27 
28 #define PARSE_VLAN 1
29 #define PARSE_MPLS 2
30 #define PARSE_IP 3
31 #define PARSE_IPV6 4
32 
33 /* protocol dispatch routine.
34  * It tail-calls next BPF program depending on eth proto
35  * Note, we could have used:
36  * bpf_tail_call(skb, &jmp_table, proto);
37  * but it would need large prog_array
38  */
39 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
40 {
41 	switch (proto) {
42 	case ETH_P_8021Q:
43 	case ETH_P_8021AD:
44 		bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
45 		break;
46 	case ETH_P_MPLS_UC:
47 	case ETH_P_MPLS_MC:
48 		bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
49 		break;
50 	case ETH_P_IP:
51 		bpf_tail_call(skb, &jmp_table, PARSE_IP);
52 		break;
53 	case ETH_P_IPV6:
54 		bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
55 		break;
56 	}
57 }
58 
59 struct vlan_hdr {
60 	__be16 h_vlan_TCI;
61 	__be16 h_vlan_encapsulated_proto;
62 };
63 
64 struct flow_keys {
65 	__be32 src;
66 	__be32 dst;
67 	union {
68 		__be32 ports;
69 		__be16 port16[2];
70 	};
71 	__u32 ip_proto;
72 };
73 
74 static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
75 {
76 	return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
77 		& (IP_MF | IP_OFFSET);
78 }
79 
80 static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
81 {
82 	__u64 w0 = load_word(ctx, off);
83 	__u64 w1 = load_word(ctx, off + 4);
84 	__u64 w2 = load_word(ctx, off + 8);
85 	__u64 w3 = load_word(ctx, off + 12);
86 
87 	return (__u32)(w0 ^ w1 ^ w2 ^ w3);
88 }
89 
90 struct globals {
91 	struct flow_keys flow;
92 };
93 
94 struct bpf_map_def SEC("maps") percpu_map = {
95 	.type = BPF_MAP_TYPE_ARRAY,
96 	.key_size = sizeof(__u32),
97 	.value_size = sizeof(struct globals),
98 	.max_entries = 32,
99 };
100 
101 /* user poor man's per_cpu until native support is ready */
102 static struct globals *this_cpu_globals(void)
103 {
104 	u32 key = bpf_get_smp_processor_id();
105 
106 	return bpf_map_lookup_elem(&percpu_map, &key);
107 }
108 
109 /* some simple stats for user space consumption */
110 struct pair {
111 	__u64 packets;
112 	__u64 bytes;
113 };
114 
115 struct bpf_map_def SEC("maps") hash_map = {
116 	.type = BPF_MAP_TYPE_HASH,
117 	.key_size = sizeof(struct flow_keys),
118 	.value_size = sizeof(struct pair),
119 	.max_entries = 1024,
120 };
121 
122 static void update_stats(struct __sk_buff *skb, struct globals *g)
123 {
124 	struct flow_keys key = g->flow;
125 	struct pair *value;
126 
127 	value = bpf_map_lookup_elem(&hash_map, &key);
128 	if (value) {
129 		__sync_fetch_and_add(&value->packets, 1);
130 		__sync_fetch_and_add(&value->bytes, skb->len);
131 	} else {
132 		struct pair val = {1, skb->len};
133 
134 		bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
135 	}
136 }
137 
138 static __always_inline void parse_ip_proto(struct __sk_buff *skb,
139 					   struct globals *g, __u32 ip_proto)
140 {
141 	__u32 nhoff = skb->cb[0];
142 	int poff;
143 
144 	switch (ip_proto) {
145 	case IPPROTO_GRE: {
146 		struct gre_hdr {
147 			__be16 flags;
148 			__be16 proto;
149 		};
150 
151 		__u32 gre_flags = load_half(skb,
152 					    nhoff + offsetof(struct gre_hdr, flags));
153 		__u32 gre_proto = load_half(skb,
154 					    nhoff + offsetof(struct gre_hdr, proto));
155 
156 		if (gre_flags & (GRE_VERSION|GRE_ROUTING))
157 			break;
158 
159 		nhoff += 4;
160 		if (gre_flags & GRE_CSUM)
161 			nhoff += 4;
162 		if (gre_flags & GRE_KEY)
163 			nhoff += 4;
164 		if (gre_flags & GRE_SEQ)
165 			nhoff += 4;
166 
167 		skb->cb[0] = nhoff;
168 		parse_eth_proto(skb, gre_proto);
169 		break;
170 	}
171 	case IPPROTO_IPIP:
172 		parse_eth_proto(skb, ETH_P_IP);
173 		break;
174 	case IPPROTO_IPV6:
175 		parse_eth_proto(skb, ETH_P_IPV6);
176 		break;
177 	case IPPROTO_TCP:
178 	case IPPROTO_UDP:
179 		g->flow.ports = load_word(skb, nhoff);
180 	case IPPROTO_ICMP:
181 		g->flow.ip_proto = ip_proto;
182 		update_stats(skb, g);
183 		break;
184 	default:
185 		break;
186 	}
187 }
188 
189 PROG(PARSE_IP)(struct __sk_buff *skb)
190 {
191 	struct globals *g = this_cpu_globals();
192 	__u32 nhoff, verlen, ip_proto;
193 
194 	if (!g)
195 		return 0;
196 
197 	nhoff = skb->cb[0];
198 
199 	if (unlikely(ip_is_fragment(skb, nhoff)))
200 		return 0;
201 
202 	ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
203 
204 	if (ip_proto != IPPROTO_GRE) {
205 		g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
206 		g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
207 	}
208 
209 	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
210 	nhoff += (verlen & 0xF) << 2;
211 
212 	skb->cb[0] = nhoff;
213 	parse_ip_proto(skb, g, ip_proto);
214 	return 0;
215 }
216 
217 PROG(PARSE_IPV6)(struct __sk_buff *skb)
218 {
219 	struct globals *g = this_cpu_globals();
220 	__u32 nhoff, ip_proto;
221 
222 	if (!g)
223 		return 0;
224 
225 	nhoff = skb->cb[0];
226 
227 	ip_proto = load_byte(skb,
228 			     nhoff + offsetof(struct ipv6hdr, nexthdr));
229 	g->flow.src = ipv6_addr_hash(skb,
230 				     nhoff + offsetof(struct ipv6hdr, saddr));
231 	g->flow.dst = ipv6_addr_hash(skb,
232 				     nhoff + offsetof(struct ipv6hdr, daddr));
233 	nhoff += sizeof(struct ipv6hdr);
234 
235 	skb->cb[0] = nhoff;
236 	parse_ip_proto(skb, g, ip_proto);
237 	return 0;
238 }
239 
240 PROG(PARSE_VLAN)(struct __sk_buff *skb)
241 {
242 	__u32 nhoff, proto;
243 
244 	nhoff = skb->cb[0];
245 
246 	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
247 						h_vlan_encapsulated_proto));
248 	nhoff += sizeof(struct vlan_hdr);
249 	skb->cb[0] = nhoff;
250 
251 	parse_eth_proto(skb, proto);
252 
253 	return 0;
254 }
255 
256 PROG(PARSE_MPLS)(struct __sk_buff *skb)
257 {
258 	__u32 nhoff, label;
259 
260 	nhoff = skb->cb[0];
261 
262 	label = load_word(skb, nhoff);
263 	nhoff += sizeof(struct mpls_label);
264 	skb->cb[0] = nhoff;
265 
266 	if (label & MPLS_LS_S_MASK) {
267 		__u8 verlen = load_byte(skb, nhoff);
268 		if ((verlen & 0xF0) == 4)
269 			parse_eth_proto(skb, ETH_P_IP);
270 		else
271 			parse_eth_proto(skb, ETH_P_IPV6);
272 	} else {
273 		parse_eth_proto(skb, ETH_P_MPLS_UC);
274 	}
275 
276 	return 0;
277 }
278 
279 SEC("socket/0")
280 int main_prog(struct __sk_buff *skb)
281 {
282 	__u32 nhoff = ETH_HLEN;
283 	__u32 proto = load_half(skb, 12);
284 
285 	skb->cb[0] = nhoff;
286 	parse_eth_proto(skb, proto);
287 	return 0;
288 }
289 
290 char _license[] SEC("license") = "GPL";
291