xref: /linux/samples/bpf/sockex3_kern.c (revision 662fa3d6099374c4615bf64d06895e3573b935b2)
1 /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
2  *
3  * This program is free software; you can redistribute it and/or
4  * modify it under the terms of version 2 of the GNU General Public
5  * License as published by the Free Software Foundation.
6  */
7 #include <uapi/linux/bpf.h>
8 #include <uapi/linux/in.h>
9 #include <uapi/linux/if.h>
10 #include <uapi/linux/if_ether.h>
11 #include <uapi/linux/ip.h>
12 #include <uapi/linux/ipv6.h>
13 #include <uapi/linux/if_tunnel.h>
14 #include <uapi/linux/mpls.h>
15 #include <bpf/bpf_helpers.h>
16 #include "bpf_legacy.h"
17 #define IP_MF		0x2000
18 #define IP_OFFSET	0x1FFF
19 
20 #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
21 
22 struct {
23 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
24 	__uint(key_size, sizeof(u32));
25 	__uint(value_size, sizeof(u32));
26 	__uint(max_entries, 8);
27 } jmp_table SEC(".maps");
28 
29 #define PARSE_VLAN 1
30 #define PARSE_MPLS 2
31 #define PARSE_IP 3
32 #define PARSE_IPV6 4
33 
34 /* Protocol dispatch routine. It tail-calls next BPF program depending
35  * on eth proto. Note, we could have used ...
36  *
37  *   bpf_tail_call(skb, &jmp_table, proto);
38  *
39  * ... but it would need large prog_array and cannot be optimised given
40  * the map key is not static.
41  */
42 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
43 {
44 	switch (proto) {
45 	case ETH_P_8021Q:
46 	case ETH_P_8021AD:
47 		bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
48 		break;
49 	case ETH_P_MPLS_UC:
50 	case ETH_P_MPLS_MC:
51 		bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
52 		break;
53 	case ETH_P_IP:
54 		bpf_tail_call(skb, &jmp_table, PARSE_IP);
55 		break;
56 	case ETH_P_IPV6:
57 		bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
58 		break;
59 	}
60 }
61 
62 struct vlan_hdr {
63 	__be16 h_vlan_TCI;
64 	__be16 h_vlan_encapsulated_proto;
65 };
66 
67 struct flow_key_record {
68 	__be32 src;
69 	__be32 dst;
70 	union {
71 		__be32 ports;
72 		__be16 port16[2];
73 	};
74 	__u32 ip_proto;
75 };
76 
77 static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
78 {
79 	return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
80 		& (IP_MF | IP_OFFSET);
81 }
82 
83 static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
84 {
85 	__u64 w0 = load_word(ctx, off);
86 	__u64 w1 = load_word(ctx, off + 4);
87 	__u64 w2 = load_word(ctx, off + 8);
88 	__u64 w3 = load_word(ctx, off + 12);
89 
90 	return (__u32)(w0 ^ w1 ^ w2 ^ w3);
91 }
92 
93 struct globals {
94 	struct flow_key_record flow;
95 };
96 
97 struct {
98 	__uint(type, BPF_MAP_TYPE_ARRAY);
99 	__type(key, __u32);
100 	__type(value, struct globals);
101 	__uint(max_entries, 32);
102 } percpu_map SEC(".maps");
103 
104 /* user poor man's per_cpu until native support is ready */
105 static struct globals *this_cpu_globals(void)
106 {
107 	u32 key = bpf_get_smp_processor_id();
108 
109 	return bpf_map_lookup_elem(&percpu_map, &key);
110 }
111 
112 /* some simple stats for user space consumption */
113 struct pair {
114 	__u64 packets;
115 	__u64 bytes;
116 };
117 
118 struct {
119 	__uint(type, BPF_MAP_TYPE_HASH);
120 	__type(key, struct flow_key_record);
121 	__type(value, struct pair);
122 	__uint(max_entries, 1024);
123 } hash_map SEC(".maps");
124 
125 static void update_stats(struct __sk_buff *skb, struct globals *g)
126 {
127 	struct flow_key_record key = g->flow;
128 	struct pair *value;
129 
130 	value = bpf_map_lookup_elem(&hash_map, &key);
131 	if (value) {
132 		__sync_fetch_and_add(&value->packets, 1);
133 		__sync_fetch_and_add(&value->bytes, skb->len);
134 	} else {
135 		struct pair val = {1, skb->len};
136 
137 		bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
138 	}
139 }
140 
141 static __always_inline void parse_ip_proto(struct __sk_buff *skb,
142 					   struct globals *g, __u32 ip_proto)
143 {
144 	__u32 nhoff = skb->cb[0];
145 	int poff;
146 
147 	switch (ip_proto) {
148 	case IPPROTO_GRE: {
149 		struct gre_hdr {
150 			__be16 flags;
151 			__be16 proto;
152 		};
153 
154 		__u32 gre_flags = load_half(skb,
155 					    nhoff + offsetof(struct gre_hdr, flags));
156 		__u32 gre_proto = load_half(skb,
157 					    nhoff + offsetof(struct gre_hdr, proto));
158 
159 		if (gre_flags & (GRE_VERSION|GRE_ROUTING))
160 			break;
161 
162 		nhoff += 4;
163 		if (gre_flags & GRE_CSUM)
164 			nhoff += 4;
165 		if (gre_flags & GRE_KEY)
166 			nhoff += 4;
167 		if (gre_flags & GRE_SEQ)
168 			nhoff += 4;
169 
170 		skb->cb[0] = nhoff;
171 		parse_eth_proto(skb, gre_proto);
172 		break;
173 	}
174 	case IPPROTO_IPIP:
175 		parse_eth_proto(skb, ETH_P_IP);
176 		break;
177 	case IPPROTO_IPV6:
178 		parse_eth_proto(skb, ETH_P_IPV6);
179 		break;
180 	case IPPROTO_TCP:
181 	case IPPROTO_UDP:
182 		g->flow.ports = load_word(skb, nhoff);
183 	case IPPROTO_ICMP:
184 		g->flow.ip_proto = ip_proto;
185 		update_stats(skb, g);
186 		break;
187 	default:
188 		break;
189 	}
190 }
191 
192 PROG(PARSE_IP)(struct __sk_buff *skb)
193 {
194 	struct globals *g = this_cpu_globals();
195 	__u32 nhoff, verlen, ip_proto;
196 
197 	if (!g)
198 		return 0;
199 
200 	nhoff = skb->cb[0];
201 
202 	if (unlikely(ip_is_fragment(skb, nhoff)))
203 		return 0;
204 
205 	ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
206 
207 	if (ip_proto != IPPROTO_GRE) {
208 		g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
209 		g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
210 	}
211 
212 	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
213 	nhoff += (verlen & 0xF) << 2;
214 
215 	skb->cb[0] = nhoff;
216 	parse_ip_proto(skb, g, ip_proto);
217 	return 0;
218 }
219 
220 PROG(PARSE_IPV6)(struct __sk_buff *skb)
221 {
222 	struct globals *g = this_cpu_globals();
223 	__u32 nhoff, ip_proto;
224 
225 	if (!g)
226 		return 0;
227 
228 	nhoff = skb->cb[0];
229 
230 	ip_proto = load_byte(skb,
231 			     nhoff + offsetof(struct ipv6hdr, nexthdr));
232 	g->flow.src = ipv6_addr_hash(skb,
233 				     nhoff + offsetof(struct ipv6hdr, saddr));
234 	g->flow.dst = ipv6_addr_hash(skb,
235 				     nhoff + offsetof(struct ipv6hdr, daddr));
236 	nhoff += sizeof(struct ipv6hdr);
237 
238 	skb->cb[0] = nhoff;
239 	parse_ip_proto(skb, g, ip_proto);
240 	return 0;
241 }
242 
243 PROG(PARSE_VLAN)(struct __sk_buff *skb)
244 {
245 	__u32 nhoff, proto;
246 
247 	nhoff = skb->cb[0];
248 
249 	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
250 						h_vlan_encapsulated_proto));
251 	nhoff += sizeof(struct vlan_hdr);
252 	skb->cb[0] = nhoff;
253 
254 	parse_eth_proto(skb, proto);
255 
256 	return 0;
257 }
258 
259 PROG(PARSE_MPLS)(struct __sk_buff *skb)
260 {
261 	__u32 nhoff, label;
262 
263 	nhoff = skb->cb[0];
264 
265 	label = load_word(skb, nhoff);
266 	nhoff += sizeof(struct mpls_label);
267 	skb->cb[0] = nhoff;
268 
269 	if (label & MPLS_LS_S_MASK) {
270 		__u8 verlen = load_byte(skb, nhoff);
271 		if ((verlen & 0xF0) == 4)
272 			parse_eth_proto(skb, ETH_P_IP);
273 		else
274 			parse_eth_proto(skb, ETH_P_IPV6);
275 	} else {
276 		parse_eth_proto(skb, ETH_P_MPLS_UC);
277 	}
278 
279 	return 0;
280 }
281 
282 SEC("socket/0")
283 int main_prog(struct __sk_buff *skb)
284 {
285 	__u32 nhoff = ETH_HLEN;
286 	__u32 proto = load_half(skb, 12);
287 
288 	skb->cb[0] = nhoff;
289 	parse_eth_proto(skb, proto);
290 	return 0;
291 }
292 
293 char _license[] SEC("license") = "GPL";
294