1 /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 */ 7 #include <uapi/linux/bpf.h> 8 #include "bpf_helpers.h" 9 #include <uapi/linux/in.h> 10 #include <uapi/linux/if.h> 11 #include <uapi/linux/if_ether.h> 12 #include <uapi/linux/ip.h> 13 #include <uapi/linux/ipv6.h> 14 #include <uapi/linux/if_tunnel.h> 15 #include <uapi/linux/mpls.h> 16 #define IP_MF 0x2000 17 #define IP_OFFSET 0x1FFF 18 19 #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F 20 21 struct bpf_map_def SEC("maps") jmp_table = { 22 .type = BPF_MAP_TYPE_PROG_ARRAY, 23 .key_size = sizeof(u32), 24 .value_size = sizeof(u32), 25 .max_entries = 8, 26 }; 27 28 #define PARSE_VLAN 1 29 #define PARSE_MPLS 2 30 #define PARSE_IP 3 31 #define PARSE_IPV6 4 32 33 /* protocol dispatch routine. 34 * It tail-calls next BPF program depending on eth proto 35 * Note, we could have used: 36 * bpf_tail_call(skb, &jmp_table, proto); 37 * but it would need large prog_array 38 */ 39 static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) 40 { 41 switch (proto) { 42 case ETH_P_8021Q: 43 case ETH_P_8021AD: 44 bpf_tail_call(skb, &jmp_table, PARSE_VLAN); 45 break; 46 case ETH_P_MPLS_UC: 47 case ETH_P_MPLS_MC: 48 bpf_tail_call(skb, &jmp_table, PARSE_MPLS); 49 break; 50 case ETH_P_IP: 51 bpf_tail_call(skb, &jmp_table, PARSE_IP); 52 break; 53 case ETH_P_IPV6: 54 bpf_tail_call(skb, &jmp_table, PARSE_IPV6); 55 break; 56 } 57 } 58 59 struct vlan_hdr { 60 __be16 h_vlan_TCI; 61 __be16 h_vlan_encapsulated_proto; 62 }; 63 64 struct flow_key_record { 65 __be32 src; 66 __be32 dst; 67 union { 68 __be32 ports; 69 __be16 port16[2]; 70 }; 71 __u32 ip_proto; 72 }; 73 74 static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff) 75 { 76 return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) 77 & (IP_MF | IP_OFFSET); 78 } 79 80 static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) 81 { 82 __u64 w0 = load_word(ctx, off); 83 __u64 w1 = load_word(ctx, off + 4); 84 __u64 w2 = load_word(ctx, off + 8); 85 __u64 w3 = load_word(ctx, off + 12); 86 87 return (__u32)(w0 ^ w1 ^ w2 ^ w3); 88 } 89 90 struct globals { 91 struct flow_key_record flow; 92 }; 93 94 struct bpf_map_def SEC("maps") percpu_map = { 95 .type = BPF_MAP_TYPE_ARRAY, 96 .key_size = sizeof(__u32), 97 .value_size = sizeof(struct globals), 98 .max_entries = 32, 99 }; 100 101 /* user poor man's per_cpu until native support is ready */ 102 static struct globals *this_cpu_globals(void) 103 { 104 u32 key = bpf_get_smp_processor_id(); 105 106 return bpf_map_lookup_elem(&percpu_map, &key); 107 } 108 109 /* some simple stats for user space consumption */ 110 struct pair { 111 __u64 packets; 112 __u64 bytes; 113 }; 114 115 struct bpf_map_def SEC("maps") hash_map = { 116 .type = BPF_MAP_TYPE_HASH, 117 .key_size = sizeof(struct flow_key_record), 118 .value_size = sizeof(struct pair), 119 .max_entries = 1024, 120 }; 121 122 static void update_stats(struct __sk_buff *skb, struct globals *g) 123 { 124 struct flow_key_record key = g->flow; 125 struct pair *value; 126 127 value = bpf_map_lookup_elem(&hash_map, &key); 128 if (value) { 129 __sync_fetch_and_add(&value->packets, 1); 130 __sync_fetch_and_add(&value->bytes, skb->len); 131 } else { 132 struct pair val = {1, skb->len}; 133 134 bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); 135 } 136 } 137 138 static __always_inline void parse_ip_proto(struct __sk_buff *skb, 139 struct globals *g, __u32 ip_proto) 140 { 141 __u32 nhoff = skb->cb[0]; 142 int poff; 143 144 switch (ip_proto) { 145 case IPPROTO_GRE: { 146 struct gre_hdr { 147 __be16 flags; 148 __be16 proto; 149 }; 150 151 __u32 gre_flags = load_half(skb, 152 nhoff + offsetof(struct gre_hdr, flags)); 153 __u32 gre_proto = load_half(skb, 154 nhoff + offsetof(struct gre_hdr, proto)); 155 156 if (gre_flags & (GRE_VERSION|GRE_ROUTING)) 157 break; 158 159 nhoff += 4; 160 if (gre_flags & GRE_CSUM) 161 nhoff += 4; 162 if (gre_flags & GRE_KEY) 163 nhoff += 4; 164 if (gre_flags & GRE_SEQ) 165 nhoff += 4; 166 167 skb->cb[0] = nhoff; 168 parse_eth_proto(skb, gre_proto); 169 break; 170 } 171 case IPPROTO_IPIP: 172 parse_eth_proto(skb, ETH_P_IP); 173 break; 174 case IPPROTO_IPV6: 175 parse_eth_proto(skb, ETH_P_IPV6); 176 break; 177 case IPPROTO_TCP: 178 case IPPROTO_UDP: 179 g->flow.ports = load_word(skb, nhoff); 180 case IPPROTO_ICMP: 181 g->flow.ip_proto = ip_proto; 182 update_stats(skb, g); 183 break; 184 default: 185 break; 186 } 187 } 188 189 PROG(PARSE_IP)(struct __sk_buff *skb) 190 { 191 struct globals *g = this_cpu_globals(); 192 __u32 nhoff, verlen, ip_proto; 193 194 if (!g) 195 return 0; 196 197 nhoff = skb->cb[0]; 198 199 if (unlikely(ip_is_fragment(skb, nhoff))) 200 return 0; 201 202 ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); 203 204 if (ip_proto != IPPROTO_GRE) { 205 g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); 206 g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); 207 } 208 209 verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); 210 nhoff += (verlen & 0xF) << 2; 211 212 skb->cb[0] = nhoff; 213 parse_ip_proto(skb, g, ip_proto); 214 return 0; 215 } 216 217 PROG(PARSE_IPV6)(struct __sk_buff *skb) 218 { 219 struct globals *g = this_cpu_globals(); 220 __u32 nhoff, ip_proto; 221 222 if (!g) 223 return 0; 224 225 nhoff = skb->cb[0]; 226 227 ip_proto = load_byte(skb, 228 nhoff + offsetof(struct ipv6hdr, nexthdr)); 229 g->flow.src = ipv6_addr_hash(skb, 230 nhoff + offsetof(struct ipv6hdr, saddr)); 231 g->flow.dst = ipv6_addr_hash(skb, 232 nhoff + offsetof(struct ipv6hdr, daddr)); 233 nhoff += sizeof(struct ipv6hdr); 234 235 skb->cb[0] = nhoff; 236 parse_ip_proto(skb, g, ip_proto); 237 return 0; 238 } 239 240 PROG(PARSE_VLAN)(struct __sk_buff *skb) 241 { 242 __u32 nhoff, proto; 243 244 nhoff = skb->cb[0]; 245 246 proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, 247 h_vlan_encapsulated_proto)); 248 nhoff += sizeof(struct vlan_hdr); 249 skb->cb[0] = nhoff; 250 251 parse_eth_proto(skb, proto); 252 253 return 0; 254 } 255 256 PROG(PARSE_MPLS)(struct __sk_buff *skb) 257 { 258 __u32 nhoff, label; 259 260 nhoff = skb->cb[0]; 261 262 label = load_word(skb, nhoff); 263 nhoff += sizeof(struct mpls_label); 264 skb->cb[0] = nhoff; 265 266 if (label & MPLS_LS_S_MASK) { 267 __u8 verlen = load_byte(skb, nhoff); 268 if ((verlen & 0xF0) == 4) 269 parse_eth_proto(skb, ETH_P_IP); 270 else 271 parse_eth_proto(skb, ETH_P_IPV6); 272 } else { 273 parse_eth_proto(skb, ETH_P_MPLS_UC); 274 } 275 276 return 0; 277 } 278 279 SEC("socket/0") 280 int main_prog(struct __sk_buff *skb) 281 { 282 __u32 nhoff = ETH_HLEN; 283 __u32 proto = load_half(skb, 12); 284 285 skb->cb[0] = nhoff; 286 parse_eth_proto(skb, proto); 287 return 0; 288 } 289 290 char _license[] SEC("license") = "GPL"; 291