1 // SPDX-License-Identifier: GPL-2.0 2 #include <limits.h> 3 #include <stddef.h> 4 #include <stdbool.h> 5 #include <string.h> 6 #include <linux/pkt_cls.h> 7 #include <linux/bpf.h> 8 #include <linux/in.h> 9 #include <linux/if_ether.h> 10 #include <linux/icmp.h> 11 #include <linux/ip.h> 12 #include <linux/ipv6.h> 13 #include <linux/tcp.h> 14 #include <linux/udp.h> 15 #include <linux/if_packet.h> 16 #include <sys/socket.h> 17 #include <linux/if_tunnel.h> 18 #include <linux/mpls.h> 19 #include "bpf_helpers.h" 20 #include "bpf_endian.h" 21 22 int _version SEC("version") = 1; 23 #define PROG(F) SEC(#F) int bpf_func_##F 24 25 /* These are the identifiers of the BPF programs that will be used in tail 26 * calls. Name is limited to 16 characters, with the terminating character and 27 * bpf_func_ above, we have only 6 to work with, anything after will be cropped. 28 */ 29 enum { 30 IP, 31 IPV6, 32 IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ 33 IPV6FR, /* Fragmentation IPv6 Extension Header */ 34 MPLS, 35 VLAN, 36 }; 37 38 #define IP_MF 0x2000 39 #define IP_OFFSET 0x1FFF 40 #define IP6_MF 0x0001 41 #define IP6_OFFSET 0xFFF8 42 43 struct vlan_hdr { 44 __be16 h_vlan_TCI; 45 __be16 h_vlan_encapsulated_proto; 46 }; 47 48 struct gre_hdr { 49 __be16 flags; 50 __be16 proto; 51 }; 52 53 struct frag_hdr { 54 __u8 nexthdr; 55 __u8 reserved; 56 __be16 frag_off; 57 __be32 identification; 58 }; 59 60 struct { 61 __uint(type, BPF_MAP_TYPE_PROG_ARRAY); 62 __uint(max_entries, 8); 63 __uint(key_size, sizeof(__u32)); 64 __uint(value_size, sizeof(__u32)); 65 } jmp_table SEC(".maps"); 66 67 struct { 68 __uint(type, BPF_MAP_TYPE_ARRAY); 69 __uint(max_entries, 1); 70 __type(key, __u32); 71 __type(value, struct bpf_flow_keys); 72 } last_dissection SEC(".maps"); 73 74 static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, 75 int ret) 76 { 77 struct bpf_flow_keys *val; 78 __u32 key = 0; 79 80 val = bpf_map_lookup_elem(&last_dissection, &key); 81 if (val) 82 memcpy(val, keys, sizeof(*val)); 83 return ret; 84 } 85 86 #define IPV6_FLOWLABEL_MASK __bpf_constant_htonl(0x000FFFFF) 87 static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) 88 { 89 return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; 90 } 91 92 static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, 93 __u16 hdr_size, 94 void *buffer) 95 { 96 void *data_end = (void *)(long)skb->data_end; 97 void *data = (void *)(long)skb->data; 98 __u16 thoff = skb->flow_keys->thoff; 99 __u8 *hdr; 100 101 /* Verifies this variable offset does not overflow */ 102 if (thoff > (USHRT_MAX - hdr_size)) 103 return NULL; 104 105 hdr = data + thoff; 106 if (hdr + hdr_size <= data_end) 107 return hdr; 108 109 if (bpf_skb_load_bytes(skb, thoff, buffer, hdr_size)) 110 return NULL; 111 112 return buffer; 113 } 114 115 /* Dispatches on ETHERTYPE */ 116 static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) 117 { 118 struct bpf_flow_keys *keys = skb->flow_keys; 119 120 switch (proto) { 121 case bpf_htons(ETH_P_IP): 122 bpf_tail_call(skb, &jmp_table, IP); 123 break; 124 case bpf_htons(ETH_P_IPV6): 125 bpf_tail_call(skb, &jmp_table, IPV6); 126 break; 127 case bpf_htons(ETH_P_MPLS_MC): 128 case bpf_htons(ETH_P_MPLS_UC): 129 bpf_tail_call(skb, &jmp_table, MPLS); 130 break; 131 case bpf_htons(ETH_P_8021Q): 132 case bpf_htons(ETH_P_8021AD): 133 bpf_tail_call(skb, &jmp_table, VLAN); 134 break; 135 default: 136 /* Protocol not supported */ 137 return export_flow_keys(keys, BPF_DROP); 138 } 139 140 return export_flow_keys(keys, BPF_DROP); 141 } 142 143 SEC("flow_dissector") 144 int _dissect(struct __sk_buff *skb) 145 { 146 struct bpf_flow_keys *keys = skb->flow_keys; 147 148 return parse_eth_proto(skb, keys->n_proto); 149 } 150 151 /* Parses on IPPROTO_* */ 152 static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) 153 { 154 struct bpf_flow_keys *keys = skb->flow_keys; 155 void *data_end = (void *)(long)skb->data_end; 156 struct icmphdr *icmp, _icmp; 157 struct gre_hdr *gre, _gre; 158 struct ethhdr *eth, _eth; 159 struct tcphdr *tcp, _tcp; 160 struct udphdr *udp, _udp; 161 162 switch (proto) { 163 case IPPROTO_ICMP: 164 icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); 165 if (!icmp) 166 return export_flow_keys(keys, BPF_DROP); 167 return export_flow_keys(keys, BPF_OK); 168 case IPPROTO_IPIP: 169 keys->is_encap = true; 170 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 171 return export_flow_keys(keys, BPF_OK); 172 173 return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); 174 case IPPROTO_IPV6: 175 keys->is_encap = true; 176 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 177 return export_flow_keys(keys, BPF_OK); 178 179 return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); 180 case IPPROTO_GRE: 181 gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); 182 if (!gre) 183 return export_flow_keys(keys, BPF_DROP); 184 185 if (bpf_htons(gre->flags & GRE_VERSION)) 186 /* Only inspect standard GRE packets with version 0 */ 187 return export_flow_keys(keys, BPF_OK); 188 189 keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ 190 if (GRE_IS_CSUM(gre->flags)) 191 keys->thoff += 4; /* Step over chksum and Padding */ 192 if (GRE_IS_KEY(gre->flags)) 193 keys->thoff += 4; /* Step over key */ 194 if (GRE_IS_SEQ(gre->flags)) 195 keys->thoff += 4; /* Step over sequence number */ 196 197 keys->is_encap = true; 198 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) 199 return export_flow_keys(keys, BPF_OK); 200 201 if (gre->proto == bpf_htons(ETH_P_TEB)) { 202 eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), 203 &_eth); 204 if (!eth) 205 return export_flow_keys(keys, BPF_DROP); 206 207 keys->thoff += sizeof(*eth); 208 209 return parse_eth_proto(skb, eth->h_proto); 210 } else { 211 return parse_eth_proto(skb, gre->proto); 212 } 213 case IPPROTO_TCP: 214 tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); 215 if (!tcp) 216 return export_flow_keys(keys, BPF_DROP); 217 218 if (tcp->doff < 5) 219 return export_flow_keys(keys, BPF_DROP); 220 221 if ((__u8 *)tcp + (tcp->doff << 2) > data_end) 222 return export_flow_keys(keys, BPF_DROP); 223 224 keys->sport = tcp->source; 225 keys->dport = tcp->dest; 226 return export_flow_keys(keys, BPF_OK); 227 case IPPROTO_UDP: 228 case IPPROTO_UDPLITE: 229 udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); 230 if (!udp) 231 return export_flow_keys(keys, BPF_DROP); 232 233 keys->sport = udp->source; 234 keys->dport = udp->dest; 235 return export_flow_keys(keys, BPF_OK); 236 default: 237 return export_flow_keys(keys, BPF_DROP); 238 } 239 240 return export_flow_keys(keys, BPF_DROP); 241 } 242 243 static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) 244 { 245 struct bpf_flow_keys *keys = skb->flow_keys; 246 247 switch (nexthdr) { 248 case IPPROTO_HOPOPTS: 249 case IPPROTO_DSTOPTS: 250 bpf_tail_call(skb, &jmp_table, IPV6OP); 251 break; 252 case IPPROTO_FRAGMENT: 253 bpf_tail_call(skb, &jmp_table, IPV6FR); 254 break; 255 default: 256 return parse_ip_proto(skb, nexthdr); 257 } 258 259 return export_flow_keys(keys, BPF_DROP); 260 } 261 262 PROG(IP)(struct __sk_buff *skb) 263 { 264 void *data_end = (void *)(long)skb->data_end; 265 struct bpf_flow_keys *keys = skb->flow_keys; 266 void *data = (void *)(long)skb->data; 267 struct iphdr *iph, _iph; 268 bool done = false; 269 270 iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); 271 if (!iph) 272 return export_flow_keys(keys, BPF_DROP); 273 274 /* IP header cannot be smaller than 20 bytes */ 275 if (iph->ihl < 5) 276 return export_flow_keys(keys, BPF_DROP); 277 278 keys->addr_proto = ETH_P_IP; 279 keys->ipv4_src = iph->saddr; 280 keys->ipv4_dst = iph->daddr; 281 keys->ip_proto = iph->protocol; 282 283 keys->thoff += iph->ihl << 2; 284 if (data + keys->thoff > data_end) 285 return export_flow_keys(keys, BPF_DROP); 286 287 if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { 288 keys->is_frag = true; 289 if (iph->frag_off & bpf_htons(IP_OFFSET)) { 290 /* From second fragment on, packets do not have headers 291 * we can parse. 292 */ 293 done = true; 294 } else { 295 keys->is_first_frag = true; 296 /* No need to parse fragmented packet unless 297 * explicitly asked for. 298 */ 299 if (!(keys->flags & 300 BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 301 done = true; 302 } 303 } 304 305 if (done) 306 return export_flow_keys(keys, BPF_OK); 307 308 return parse_ip_proto(skb, iph->protocol); 309 } 310 311 PROG(IPV6)(struct __sk_buff *skb) 312 { 313 struct bpf_flow_keys *keys = skb->flow_keys; 314 struct ipv6hdr *ip6h, _ip6h; 315 316 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 317 if (!ip6h) 318 return export_flow_keys(keys, BPF_DROP); 319 320 keys->addr_proto = ETH_P_IPV6; 321 memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); 322 323 keys->thoff += sizeof(struct ipv6hdr); 324 keys->ip_proto = ip6h->nexthdr; 325 keys->flow_label = ip6_flowlabel(ip6h); 326 327 if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) 328 return export_flow_keys(keys, BPF_OK); 329 330 return parse_ipv6_proto(skb, ip6h->nexthdr); 331 } 332 333 PROG(IPV6OP)(struct __sk_buff *skb) 334 { 335 struct bpf_flow_keys *keys = skb->flow_keys; 336 struct ipv6_opt_hdr *ip6h, _ip6h; 337 338 ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 339 if (!ip6h) 340 return export_flow_keys(keys, BPF_DROP); 341 342 /* hlen is in 8-octets and does not include the first 8 bytes 343 * of the header 344 */ 345 keys->thoff += (1 + ip6h->hdrlen) << 3; 346 keys->ip_proto = ip6h->nexthdr; 347 348 return parse_ipv6_proto(skb, ip6h->nexthdr); 349 } 350 351 PROG(IPV6FR)(struct __sk_buff *skb) 352 { 353 struct bpf_flow_keys *keys = skb->flow_keys; 354 struct frag_hdr *fragh, _fragh; 355 356 fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); 357 if (!fragh) 358 return export_flow_keys(keys, BPF_DROP); 359 360 keys->thoff += sizeof(*fragh); 361 keys->is_frag = true; 362 keys->ip_proto = fragh->nexthdr; 363 364 if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) { 365 keys->is_first_frag = true; 366 367 /* No need to parse fragmented packet unless 368 * explicitly asked for. 369 */ 370 if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) 371 return export_flow_keys(keys, BPF_OK); 372 } 373 374 return parse_ipv6_proto(skb, fragh->nexthdr); 375 } 376 377 PROG(MPLS)(struct __sk_buff *skb) 378 { 379 struct bpf_flow_keys *keys = skb->flow_keys; 380 struct mpls_label *mpls, _mpls; 381 382 mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); 383 if (!mpls) 384 return export_flow_keys(keys, BPF_DROP); 385 386 return export_flow_keys(keys, BPF_OK); 387 } 388 389 PROG(VLAN)(struct __sk_buff *skb) 390 { 391 struct bpf_flow_keys *keys = skb->flow_keys; 392 struct vlan_hdr *vlan, _vlan; 393 394 /* Account for double-tagging */ 395 if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { 396 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 397 if (!vlan) 398 return export_flow_keys(keys, BPF_DROP); 399 400 if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) 401 return export_flow_keys(keys, BPF_DROP); 402 403 keys->nhoff += sizeof(*vlan); 404 keys->thoff += sizeof(*vlan); 405 } 406 407 vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 408 if (!vlan) 409 return export_flow_keys(keys, BPF_DROP); 410 411 keys->nhoff += sizeof(*vlan); 412 keys->thoff += sizeof(*vlan); 413 /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ 414 if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || 415 vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) 416 return export_flow_keys(keys, BPF_DROP); 417 418 keys->n_proto = vlan->h_vlan_encapsulated_proto; 419 return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); 420 } 421 422 char __license[] SEC("license") = "GPL"; 423