1 // SPDX-License-Identifier: GPL-2.0 2 // Copyright (c) 2022 Meta 3 4 #include <stddef.h> 5 #include <stdint.h> 6 #include <stdbool.h> 7 #include <linux/bpf.h> 8 #include <linux/stddef.h> 9 #include <linux/pkt_cls.h> 10 #include <linux/if_ether.h> 11 #include <linux/in.h> 12 #include <linux/ip.h> 13 #include <linux/ipv6.h> 14 #include <linux/tcp.h> 15 #include <linux/udp.h> 16 #include <bpf/bpf_helpers.h> 17 #include <bpf/bpf_endian.h> 18 19 /* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst 20 * | | 21 * ns_src | ns_fwd | ns_dst 22 * 23 * ns_src and ns_dst: ENDHOST namespace 24 * ns_fwd: Fowarding namespace 25 */ 26 27 #define ctx_ptr(field) (void *)(long)(field) 28 29 #define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */ 30 #define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */ 31 32 #define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 33 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 34 #define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ 35 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } 36 37 #define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ 38 a.s6_addr32[1] == b.s6_addr32[1] && \ 39 a.s6_addr32[2] == b.s6_addr32[2] && \ 40 a.s6_addr32[3] == b.s6_addr32[3]) 41 42 volatile const __u32 IFINDEX_SRC; 43 volatile const __u32 IFINDEX_DST; 44 45 #define EGRESS_ENDHOST_MAGIC 0x0b9fbeef 46 #define INGRESS_FWDNS_MAGIC 0x1b9fbeef 47 #define EGRESS_FWDNS_MAGIC 0x2b9fbeef 48 49 enum { 50 INGRESS_FWDNS_P100, 51 INGRESS_FWDNS_P101, 52 EGRESS_FWDNS_P100, 53 EGRESS_FWDNS_P101, 54 INGRESS_ENDHOST, 55 EGRESS_ENDHOST, 56 SET_DTIME, 57 __MAX_CNT, 58 }; 59 60 enum { 61 TCP_IP6_CLEAR_DTIME, 62 TCP_IP4, 63 TCP_IP6, 64 UDP_IP4, 65 UDP_IP6, 66 TCP_IP4_RT_FWD, 67 TCP_IP6_RT_FWD, 68 UDP_IP4_RT_FWD, 69 UDP_IP6_RT_FWD, 70 UKN_TEST, 71 __NR_TESTS, 72 }; 73 74 enum { 75 SRC_NS = 1, 76 DST_NS, 77 }; 78 79 __u32 dtimes[__NR_TESTS][__MAX_CNT] = {}; 80 __u32 errs[__NR_TESTS][__MAX_CNT] = {}; 81 __u32 test = 0; 82 83 static void inc_dtimes(__u32 idx) 84 { 85 if (test < __NR_TESTS) 86 dtimes[test][idx]++; 87 else 88 dtimes[UKN_TEST][idx]++; 89 } 90 91 static void inc_errs(__u32 idx) 92 { 93 if (test < __NR_TESTS) 94 errs[test][idx]++; 95 else 96 errs[UKN_TEST][idx]++; 97 } 98 99 static int skb_proto(int type) 100 { 101 return type & 0xff; 102 } 103 104 static int skb_ns(int type) 105 { 106 return (type >> 8) & 0xff; 107 } 108 109 static bool fwdns_clear_dtime(void) 110 { 111 return test == TCP_IP6_CLEAR_DTIME; 112 } 113 114 static bool bpf_fwd(void) 115 { 116 return test < TCP_IP4_RT_FWD; 117 } 118 119 static __u8 get_proto(void) 120 { 121 switch (test) { 122 case UDP_IP4: 123 case UDP_IP6: 124 case UDP_IP4_RT_FWD: 125 case UDP_IP6_RT_FWD: 126 return IPPROTO_UDP; 127 default: 128 return IPPROTO_TCP; 129 } 130 } 131 132 /* -1: parse error: TC_ACT_SHOT 133 * 0: not testing traffic: TC_ACT_OK 134 * >0: first byte is the inet_proto, second byte has the netns 135 * of the sender 136 */ 137 static int skb_get_type(struct __sk_buff *skb) 138 { 139 __u16 dst_ns_port = __bpf_htons(50000 + test); 140 void *data_end = ctx_ptr(skb->data_end); 141 void *data = ctx_ptr(skb->data); 142 __u8 inet_proto = 0, ns = 0; 143 struct ipv6hdr *ip6h; 144 __u16 sport, dport; 145 struct iphdr *iph; 146 struct tcphdr *th; 147 struct udphdr *uh; 148 void *trans; 149 150 switch (skb->protocol) { 151 case __bpf_htons(ETH_P_IP): 152 iph = data + sizeof(struct ethhdr); 153 if (iph + 1 > data_end) 154 return -1; 155 if (iph->saddr == ip4_src) 156 ns = SRC_NS; 157 else if (iph->saddr == ip4_dst) 158 ns = DST_NS; 159 inet_proto = iph->protocol; 160 trans = iph + 1; 161 break; 162 case __bpf_htons(ETH_P_IPV6): 163 ip6h = data + sizeof(struct ethhdr); 164 if (ip6h + 1 > data_end) 165 return -1; 166 if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}})) 167 ns = SRC_NS; 168 else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}})) 169 ns = DST_NS; 170 inet_proto = ip6h->nexthdr; 171 trans = ip6h + 1; 172 break; 173 default: 174 return 0; 175 } 176 177 /* skb is not from src_ns or dst_ns. 178 * skb is not the testing IPPROTO. 179 */ 180 if (!ns || inet_proto != get_proto()) 181 return 0; 182 183 switch (inet_proto) { 184 case IPPROTO_TCP: 185 th = trans; 186 if (th + 1 > data_end) 187 return -1; 188 sport = th->source; 189 dport = th->dest; 190 break; 191 case IPPROTO_UDP: 192 uh = trans; 193 if (uh + 1 > data_end) 194 return -1; 195 sport = uh->source; 196 dport = uh->dest; 197 break; 198 default: 199 return 0; 200 } 201 202 /* The skb is the testing traffic */ 203 if ((ns == SRC_NS && dport == dst_ns_port) || 204 (ns == DST_NS && sport == dst_ns_port)) 205 return (ns << 8 | inet_proto); 206 207 return 0; 208 } 209 210 /* format: direction@iface@netns 211 * egress@veth_(src|dst)@ns_(src|dst) 212 */ 213 SEC("tc") 214 int egress_host(struct __sk_buff *skb) 215 { 216 int skb_type; 217 218 skb_type = skb_get_type(skb); 219 if (skb_type == -1) 220 return TC_ACT_SHOT; 221 if (!skb_type) 222 return TC_ACT_OK; 223 224 if (skb_proto(skb_type) == IPPROTO_TCP) { 225 if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC && 226 skb->tstamp) 227 inc_dtimes(EGRESS_ENDHOST); 228 else 229 inc_errs(EGRESS_ENDHOST); 230 } else if (skb_proto(skb_type) == IPPROTO_UDP) { 231 if (skb->tstamp_type == BPF_SKB_CLOCK_TAI && 232 skb->tstamp) 233 inc_dtimes(EGRESS_ENDHOST); 234 else 235 inc_errs(EGRESS_ENDHOST); 236 } else { 237 if (skb->tstamp_type == BPF_SKB_CLOCK_REALTIME && 238 skb->tstamp) 239 inc_errs(EGRESS_ENDHOST); 240 } 241 242 skb->tstamp = EGRESS_ENDHOST_MAGIC; 243 244 return TC_ACT_OK; 245 } 246 247 /* ingress@veth_(src|dst)@ns_(src|dst) */ 248 SEC("tc") 249 int ingress_host(struct __sk_buff *skb) 250 { 251 int skb_type; 252 253 skb_type = skb_get_type(skb); 254 if (skb_type == -1) 255 return TC_ACT_SHOT; 256 if (!skb_type) 257 return TC_ACT_OK; 258 259 if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC && 260 skb->tstamp == EGRESS_FWDNS_MAGIC) 261 inc_dtimes(INGRESS_ENDHOST); 262 else 263 inc_errs(INGRESS_ENDHOST); 264 265 return TC_ACT_OK; 266 } 267 268 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 269 SEC("tc") 270 int ingress_fwdns_prio100(struct __sk_buff *skb) 271 { 272 int skb_type; 273 274 skb_type = skb_get_type(skb); 275 if (skb_type == -1) 276 return TC_ACT_SHOT; 277 if (!skb_type) 278 return TC_ACT_OK; 279 280 /* delivery_time is only available to the ingress 281 * if the tc-bpf checks the skb->tstamp_type. 282 */ 283 if (skb->tstamp == EGRESS_ENDHOST_MAGIC) 284 inc_errs(INGRESS_FWDNS_P100); 285 286 if (fwdns_clear_dtime()) 287 skb->tstamp = 0; 288 289 return TC_ACT_UNSPEC; 290 } 291 292 /* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */ 293 SEC("tc") 294 int egress_fwdns_prio100(struct __sk_buff *skb) 295 { 296 int skb_type; 297 298 skb_type = skb_get_type(skb); 299 if (skb_type == -1) 300 return TC_ACT_SHOT; 301 if (!skb_type) 302 return TC_ACT_OK; 303 304 /* delivery_time is always available to egress even 305 * the tc-bpf did not use the tstamp_type. 306 */ 307 if (skb->tstamp == INGRESS_FWDNS_MAGIC) 308 inc_dtimes(EGRESS_FWDNS_P100); 309 else 310 inc_errs(EGRESS_FWDNS_P100); 311 312 if (fwdns_clear_dtime()) 313 skb->tstamp = 0; 314 315 return TC_ACT_UNSPEC; 316 } 317 318 /* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 319 SEC("tc") 320 int ingress_fwdns_prio101(struct __sk_buff *skb) 321 { 322 int skb_type; 323 324 skb_type = skb_get_type(skb); 325 if (skb_type == -1 || !skb_type) 326 /* Should have handled in prio100 */ 327 return TC_ACT_SHOT; 328 329 if (skb->tstamp_type) { 330 if (fwdns_clear_dtime() || 331 (skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC && 332 skb->tstamp_type != BPF_SKB_CLOCK_TAI) || 333 skb->tstamp != EGRESS_ENDHOST_MAGIC) 334 inc_errs(INGRESS_FWDNS_P101); 335 else 336 inc_dtimes(INGRESS_FWDNS_P101); 337 } else { 338 if (!fwdns_clear_dtime()) 339 inc_errs(INGRESS_FWDNS_P101); 340 } 341 342 if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) { 343 skb->tstamp = INGRESS_FWDNS_MAGIC; 344 } else { 345 if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC, 346 BPF_SKB_CLOCK_MONOTONIC)) 347 inc_errs(SET_DTIME); 348 } 349 350 if (skb_ns(skb_type) == SRC_NS) 351 return bpf_fwd() ? 352 bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK; 353 else 354 return bpf_fwd() ? 355 bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK; 356 } 357 358 /* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */ 359 SEC("tc") 360 int egress_fwdns_prio101(struct __sk_buff *skb) 361 { 362 int skb_type; 363 364 skb_type = skb_get_type(skb); 365 if (skb_type == -1 || !skb_type) 366 /* Should have handled in prio100 */ 367 return TC_ACT_SHOT; 368 369 if (skb->tstamp_type) { 370 if (fwdns_clear_dtime() || 371 skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC || 372 skb->tstamp != INGRESS_FWDNS_MAGIC) 373 inc_errs(EGRESS_FWDNS_P101); 374 else 375 inc_dtimes(EGRESS_FWDNS_P101); 376 } else { 377 if (!fwdns_clear_dtime()) 378 inc_errs(EGRESS_FWDNS_P101); 379 } 380 381 if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) { 382 skb->tstamp = EGRESS_FWDNS_MAGIC; 383 } else { 384 if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC, 385 BPF_SKB_CLOCK_MONOTONIC)) 386 inc_errs(SET_DTIME); 387 } 388 389 return TC_ACT_OK; 390 } 391 392 char __license[] SEC("license") = "GPL"; 393