1 // SPDX-License-Identifier: GPL-2.0 2 #include <test_progs.h> 3 #include <network_helpers.h> 4 #include "xdp_metadata.skel.h" 5 #include "xdp_metadata2.skel.h" 6 #include "xdp_metadata.h" 7 #include "xsk.h" 8 9 #include <bpf/btf.h> 10 #include <linux/errqueue.h> 11 #include <linux/if_link.h> 12 #include <linux/net_tstamp.h> 13 #include <linux/udp.h> 14 #include <sys/mman.h> 15 #include <net/if.h> 16 #include <poll.h> 17 18 #define TX_NAME "veTX" 19 #define RX_NAME "veRX" 20 21 #define UDP_PAYLOAD_BYTES 4 22 23 #define AF_XDP_SOURCE_PORT 1234 24 #define AF_XDP_CONSUMER_PORT 8080 25 26 #define UMEM_NUM 16 27 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE 28 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) 29 #define XDP_FLAGS XDP_FLAGS_DRV_MODE 30 #define QUEUE_ID 0 31 32 #define TX_ADDR "10.0.0.1" 33 #define RX_ADDR "10.0.0.2" 34 #define PREFIX_LEN "8" 35 #define FAMILY AF_INET 36 37 struct xsk { 38 void *umem_area; 39 struct xsk_umem *umem; 40 struct xsk_ring_prod fill; 41 struct xsk_ring_cons comp; 42 struct xsk_ring_prod tx; 43 struct xsk_ring_cons rx; 44 struct xsk_socket *socket; 45 }; 46 47 static int open_xsk(int ifindex, struct xsk *xsk) 48 { 49 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; 50 const struct xsk_socket_config socket_config = { 51 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 52 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 53 .bind_flags = XDP_COPY, 54 }; 55 const struct xsk_umem_config umem_config = { 56 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 57 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 58 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 59 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM, 60 .tx_metadata_len = sizeof(struct xsk_tx_metadata), 61 }; 62 __u32 idx; 63 u64 addr; 64 int ret; 65 int i; 66 67 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); 68 if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap")) 69 return -1; 70 71 ret = xsk_umem__create(&xsk->umem, 72 xsk->umem_area, UMEM_SIZE, 73 &xsk->fill, 74 &xsk->comp, 75 &umem_config); 76 if (!ASSERT_OK(ret, "xsk_umem__create")) 77 return ret; 78 79 ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID, 80 xsk->umem, 81 &xsk->rx, 82 &xsk->tx, 83 &socket_config); 84 if (!ASSERT_OK(ret, "xsk_socket__create")) 85 return ret; 86 87 /* First half of umem is for TX. This way address matches 1-to-1 88 * to the completion queue index. 89 */ 90 91 for (i = 0; i < UMEM_NUM / 2; i++) { 92 addr = i * UMEM_FRAME_SIZE; 93 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); 94 } 95 96 /* Second half of umem is for RX. */ 97 98 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); 99 if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve")) 100 return ret; 101 if (!ASSERT_EQ(idx, 0, "fill idx != 0")) 102 return -1; 103 104 for (i = 0; i < UMEM_NUM / 2; i++) { 105 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; 106 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); 107 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; 108 } 109 xsk_ring_prod__submit(&xsk->fill, ret); 110 111 return 0; 112 } 113 114 static void close_xsk(struct xsk *xsk) 115 { 116 if (xsk->umem) 117 xsk_umem__delete(xsk->umem); 118 if (xsk->socket) 119 xsk_socket__delete(xsk->socket); 120 munmap(xsk->umem_area, UMEM_SIZE); 121 } 122 123 static void ip_csum(struct iphdr *iph) 124 { 125 __u32 sum = 0; 126 __u16 *p; 127 int i; 128 129 iph->check = 0; 130 p = (void *)iph; 131 for (i = 0; i < sizeof(*iph) / sizeof(*p); i++) 132 sum += p[i]; 133 134 while (sum >> 16) 135 sum = (sum & 0xffff) + (sum >> 16); 136 137 iph->check = ~sum; 138 } 139 140 static int generate_packet(struct xsk *xsk, __u16 dst_port) 141 { 142 struct xsk_tx_metadata *meta; 143 struct xdp_desc *tx_desc; 144 struct udphdr *udph; 145 struct ethhdr *eth; 146 struct iphdr *iph; 147 void *data; 148 __u32 idx; 149 int ret; 150 151 ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx); 152 if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve")) 153 return -1; 154 155 tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); 156 tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata); 157 printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr); 158 data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); 159 160 meta = data - sizeof(struct xsk_tx_metadata); 161 memset(meta, 0, sizeof(*meta)); 162 meta->flags = XDP_TXMD_FLAGS_TIMESTAMP; 163 164 eth = data; 165 iph = (void *)(eth + 1); 166 udph = (void *)(iph + 1); 167 168 memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN); 169 memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN); 170 eth->h_proto = htons(ETH_P_IP); 171 172 iph->version = 0x4; 173 iph->ihl = 0x5; 174 iph->tos = 0x9; 175 iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES); 176 iph->id = 0; 177 iph->frag_off = 0; 178 iph->ttl = 0; 179 iph->protocol = IPPROTO_UDP; 180 ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)"); 181 ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); 182 ip_csum(iph); 183 184 udph->source = htons(AF_XDP_SOURCE_PORT); 185 udph->dest = htons(dst_port); 186 udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); 187 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 188 ntohs(udph->len), IPPROTO_UDP, 0); 189 190 memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES); 191 192 meta->flags |= XDP_TXMD_FLAGS_CHECKSUM; 193 meta->request.csum_start = sizeof(*eth) + sizeof(*iph); 194 meta->request.csum_offset = offsetof(struct udphdr, check); 195 196 tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES; 197 tx_desc->options |= XDP_TX_METADATA; 198 xsk_ring_prod__submit(&xsk->tx, 1); 199 200 ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); 201 if (!ASSERT_GE(ret, 0, "sendto")) 202 return ret; 203 204 return 0; 205 } 206 207 static void complete_tx(struct xsk *xsk) 208 { 209 struct xsk_tx_metadata *meta; 210 __u64 addr; 211 void *data; 212 __u32 idx; 213 214 if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) { 215 addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); 216 217 printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); 218 219 data = xsk_umem__get_data(xsk->umem_area, addr); 220 meta = data - sizeof(struct xsk_tx_metadata); 221 222 ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp"); 223 224 xsk_ring_cons__release(&xsk->comp, 1); 225 } 226 } 227 228 static void refill_rx(struct xsk *xsk, __u64 addr) 229 { 230 __u32 idx; 231 232 if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) { 233 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); 234 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; 235 xsk_ring_prod__submit(&xsk->fill, 1); 236 } 237 } 238 239 static int verify_xsk_metadata(struct xsk *xsk) 240 { 241 const struct xdp_desc *rx_desc; 242 struct pollfd fds = {}; 243 struct xdp_meta *meta; 244 struct udphdr *udph; 245 struct ethhdr *eth; 246 struct iphdr *iph; 247 __u64 comp_addr; 248 void *data; 249 __u64 addr; 250 __u32 idx = 0; 251 int ret; 252 253 ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); 254 if (!ASSERT_EQ(ret, 0, "recvfrom")) 255 return -1; 256 257 fds.fd = xsk_socket__fd(xsk->socket); 258 fds.events = POLLIN; 259 260 ret = poll(&fds, 1, 1000); 261 if (!ASSERT_GT(ret, 0, "poll")) 262 return -1; 263 264 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); 265 if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek")) 266 return -2; 267 268 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); 269 comp_addr = xsk_umem__extract_addr(rx_desc->addr); 270 addr = xsk_umem__add_offset_to_addr(rx_desc->addr); 271 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", 272 xsk, idx, rx_desc->addr, addr, comp_addr); 273 data = xsk_umem__get_data(xsk->umem_area, addr); 274 275 /* Make sure we got the packet offset correctly. */ 276 277 eth = data; 278 ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto"); 279 iph = (void *)(eth + 1); 280 ASSERT_EQ((int)iph->version, 4, "iph->version"); 281 udph = (void *)(iph + 1); 282 283 /* custom metadata */ 284 285 meta = data - sizeof(struct xdp_meta); 286 287 if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp")) 288 return -1; 289 290 if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) 291 return -1; 292 293 ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); 294 295 /* checksum offload */ 296 ASSERT_EQ(udph->check, htons(0x721c), "csum"); 297 298 xsk_ring_cons__release(&xsk->rx, 1); 299 refill_rx(xsk, comp_addr); 300 301 return 0; 302 } 303 304 void test_xdp_metadata(void) 305 { 306 struct xdp_metadata2 *bpf_obj2 = NULL; 307 struct xdp_metadata *bpf_obj = NULL; 308 struct bpf_program *new_prog, *prog; 309 struct nstoken *tok = NULL; 310 __u32 queue_id = QUEUE_ID; 311 struct bpf_map *prog_arr; 312 struct xsk tx_xsk = {}; 313 struct xsk rx_xsk = {}; 314 __u32 val, key = 0; 315 int retries = 10; 316 int rx_ifindex; 317 int tx_ifindex; 318 int sock_fd; 319 int ret; 320 321 /* Setup new networking namespace, with a veth pair. */ 322 323 SYS(out, "ip netns add xdp_metadata"); 324 tok = open_netns("xdp_metadata"); 325 SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME 326 " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); 327 SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); 328 SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); 329 SYS(out, "ip link set dev " TX_NAME " up"); 330 SYS(out, "ip link set dev " RX_NAME " up"); 331 SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); 332 SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); 333 334 rx_ifindex = if_nametoindex(RX_NAME); 335 tx_ifindex = if_nametoindex(TX_NAME); 336 337 /* Setup separate AF_XDP for TX and RX interfaces. */ 338 339 ret = open_xsk(tx_ifindex, &tx_xsk); 340 if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) 341 goto out; 342 343 ret = open_xsk(rx_ifindex, &rx_xsk); 344 if (!ASSERT_OK(ret, "open_xsk(RX_NAME)")) 345 goto out; 346 347 bpf_obj = xdp_metadata__open(); 348 if (!ASSERT_OK_PTR(bpf_obj, "open skeleton")) 349 goto out; 350 351 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); 352 bpf_program__set_ifindex(prog, rx_ifindex); 353 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); 354 355 if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton")) 356 goto out; 357 358 /* Make sure we can't add dev-bound programs to prog maps. */ 359 prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr"); 360 if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map")) 361 goto out; 362 363 val = bpf_program__fd(prog); 364 if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key), 365 &val, sizeof(val), BPF_ANY), 366 "update prog_arr")) 367 goto out; 368 369 /* Attach BPF program to RX interface. */ 370 371 ret = bpf_xdp_attach(rx_ifindex, 372 bpf_program__fd(bpf_obj->progs.rx), 373 XDP_FLAGS, NULL); 374 if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) 375 goto out; 376 377 sock_fd = xsk_socket__fd(rx_xsk.socket); 378 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); 379 if (!ASSERT_GE(ret, 0, "bpf_map_update_elem")) 380 goto out; 381 382 /* Send packet destined to RX AF_XDP socket. */ 383 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, 384 "generate AF_XDP_CONSUMER_PORT")) 385 goto out; 386 387 /* Verify AF_XDP RX packet has proper metadata. */ 388 if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0, 389 "verify_xsk_metadata")) 390 goto out; 391 392 complete_tx(&tx_xsk); 393 394 /* Make sure freplace correctly picks up original bound device 395 * and doesn't crash. 396 */ 397 398 bpf_obj2 = xdp_metadata2__open(); 399 if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton")) 400 goto out; 401 402 new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx"); 403 bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx"); 404 405 if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton")) 406 goto out; 407 408 if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace")) 409 goto out; 410 411 /* Send packet to trigger . */ 412 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, 413 "generate freplace packet")) 414 goto out; 415 416 while (!retries--) { 417 if (bpf_obj2->bss->called) 418 break; 419 usleep(10); 420 } 421 ASSERT_GT(bpf_obj2->bss->called, 0, "not called"); 422 423 out: 424 close_xsk(&rx_xsk); 425 close_xsk(&tx_xsk); 426 xdp_metadata2__destroy(bpf_obj2); 427 xdp_metadata__destroy(bpf_obj); 428 if (tok) 429 close_netns(tok); 430 SYS_NOFAIL("ip netns del xdp_metadata"); 431 } 432