1 // SPDX-License-Identifier: GPL-2.0 2 #include <test_progs.h> 3 #include <network_helpers.h> 4 #include "xdp_metadata.skel.h" 5 #include "xdp_metadata2.skel.h" 6 #include "xdp_metadata.h" 7 #include "xsk.h" 8 9 #include <bpf/btf.h> 10 #include <linux/errqueue.h> 11 #include <linux/if_link.h> 12 #include <linux/net_tstamp.h> 13 #include <netinet/udp.h> 14 #include <sys/mman.h> 15 #include <net/if.h> 16 #include <poll.h> 17 18 #define TX_NAME "veTX" 19 #define RX_NAME "veRX" 20 21 #define UDP_PAYLOAD_BYTES 4 22 23 #define UDP_SOURCE_PORT 1234 24 #define AF_XDP_CONSUMER_PORT 8080 25 26 #define UMEM_NUM 16 27 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE 28 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) 29 #define XDP_FLAGS XDP_FLAGS_DRV_MODE 30 #define QUEUE_ID 0 31 32 #define TX_ADDR "10.0.0.1" 33 #define RX_ADDR "10.0.0.2" 34 #define PREFIX_LEN "8" 35 #define FAMILY AF_INET 36 #define TX_NETNS_NAME "xdp_metadata_tx" 37 #define RX_NETNS_NAME "xdp_metadata_rx" 38 #define TX_MAC "00:00:00:00:00:01" 39 #define RX_MAC "00:00:00:00:00:02" 40 41 #define VLAN_ID 59 42 #define VLAN_PROTO "802.1Q" 43 #define VLAN_PID htons(ETH_P_8021Q) 44 #define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID) 45 46 #define XDP_RSS_TYPE_L4 BIT(3) 47 #define VLAN_VID_MASK 0xfff 48 49 struct xsk { 50 void *umem_area; 51 struct xsk_umem *umem; 52 struct xsk_ring_prod fill; 53 struct xsk_ring_cons comp; 54 struct xsk_ring_prod tx; 55 struct xsk_ring_cons rx; 56 struct xsk_socket *socket; 57 }; 58 59 static int open_xsk(int ifindex, struct xsk *xsk) 60 { 61 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; 62 const struct xsk_socket_config socket_config = { 63 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 64 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 65 .bind_flags = XDP_COPY, 66 }; 67 const struct xsk_umem_config umem_config = { 68 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 69 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 70 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 71 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM | 72 XDP_UMEM_TX_METADATA_LEN, 73 .tx_metadata_len = sizeof(struct xsk_tx_metadata), 74 }; 75 __u32 idx; 76 u64 addr; 77 int ret; 78 int i; 79 80 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); 81 if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap")) 82 return -1; 83 84 ret = xsk_umem__create(&xsk->umem, 85 xsk->umem_area, UMEM_SIZE, 86 &xsk->fill, 87 &xsk->comp, 88 &umem_config); 89 if (!ASSERT_OK(ret, "xsk_umem__create")) 90 return ret; 91 92 ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID, 93 xsk->umem, 94 &xsk->rx, 95 &xsk->tx, 96 &socket_config); 97 if (!ASSERT_OK(ret, "xsk_socket__create")) 98 return ret; 99 100 /* First half of umem is for TX. This way address matches 1-to-1 101 * to the completion queue index. 102 */ 103 104 for (i = 0; i < UMEM_NUM / 2; i++) { 105 addr = i * UMEM_FRAME_SIZE; 106 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); 107 } 108 109 /* Second half of umem is for RX. */ 110 111 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); 112 if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve")) 113 return ret; 114 if (!ASSERT_EQ(idx, 0, "fill idx != 0")) 115 return -1; 116 117 for (i = 0; i < UMEM_NUM / 2; i++) { 118 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; 119 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); 120 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; 121 } 122 xsk_ring_prod__submit(&xsk->fill, ret); 123 124 return 0; 125 } 126 127 static void close_xsk(struct xsk *xsk) 128 { 129 if (xsk->umem) 130 xsk_umem__delete(xsk->umem); 131 if (xsk->socket) 132 xsk_socket__delete(xsk->socket); 133 munmap(xsk->umem_area, UMEM_SIZE); 134 } 135 136 static int generate_packet(struct xsk *xsk, __u16 dst_port) 137 { 138 struct xsk_tx_metadata *meta; 139 struct xdp_desc *tx_desc; 140 struct udphdr *udph; 141 struct ethhdr *eth; 142 struct iphdr *iph; 143 void *data; 144 __u32 idx; 145 int ret; 146 147 ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx); 148 if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve")) 149 return -1; 150 151 tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); 152 tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata); 153 printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr); 154 data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); 155 156 meta = data - sizeof(struct xsk_tx_metadata); 157 memset(meta, 0, sizeof(*meta)); 158 meta->flags = XDP_TXMD_FLAGS_TIMESTAMP; 159 160 eth = data; 161 iph = (void *)(eth + 1); 162 udph = (void *)(iph + 1); 163 164 memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN); 165 memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN); 166 eth->h_proto = htons(ETH_P_IP); 167 168 iph->version = 0x4; 169 iph->ihl = 0x5; 170 iph->tos = 0x9; 171 iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES); 172 iph->id = 0; 173 iph->frag_off = 0; 174 iph->ttl = 0; 175 iph->protocol = IPPROTO_UDP; 176 ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)"); 177 ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); 178 iph->check = build_ip_csum(iph); 179 180 udph->source = htons(UDP_SOURCE_PORT); 181 udph->dest = htons(dst_port); 182 udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); 183 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 184 ntohs(udph->len), IPPROTO_UDP, 0); 185 186 memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES); 187 188 meta->flags |= XDP_TXMD_FLAGS_CHECKSUM; 189 meta->request.csum_start = sizeof(*eth) + sizeof(*iph); 190 meta->request.csum_offset = offsetof(struct udphdr, check); 191 192 tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES; 193 tx_desc->options |= XDP_TX_METADATA; 194 xsk_ring_prod__submit(&xsk->tx, 1); 195 196 ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); 197 if (!ASSERT_GE(ret, 0, "sendto")) 198 return ret; 199 200 return 0; 201 } 202 203 static int generate_packet_inet(void) 204 { 205 char udp_payload[UDP_PAYLOAD_BYTES]; 206 struct sockaddr_in rx_addr; 207 int sock_fd, err = 0; 208 209 /* Build a packet */ 210 memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES); 211 rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR); 212 rx_addr.sin_family = AF_INET; 213 rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT); 214 215 sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); 216 if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)")) 217 return sock_fd; 218 219 err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT, 220 (void *)&rx_addr, sizeof(rx_addr)); 221 ASSERT_GE(err, 0, "sendto"); 222 223 close(sock_fd); 224 return err; 225 } 226 227 static void complete_tx(struct xsk *xsk) 228 { 229 struct xsk_tx_metadata *meta; 230 __u64 addr; 231 void *data; 232 __u32 idx; 233 234 if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) { 235 addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); 236 237 printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); 238 239 data = xsk_umem__get_data(xsk->umem_area, addr); 240 meta = data - sizeof(struct xsk_tx_metadata); 241 242 ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp"); 243 244 xsk_ring_cons__release(&xsk->comp, 1); 245 } 246 } 247 248 static void refill_rx(struct xsk *xsk, __u64 addr) 249 { 250 __u32 idx; 251 252 if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) { 253 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); 254 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; 255 xsk_ring_prod__submit(&xsk->fill, 1); 256 } 257 } 258 259 static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp) 260 { 261 const struct xdp_desc *rx_desc; 262 struct pollfd fds = {}; 263 struct xdp_meta *meta; 264 struct udphdr *udph; 265 struct ethhdr *eth; 266 struct iphdr *iph; 267 __u64 comp_addr; 268 void *data; 269 __u64 addr; 270 __u32 idx = 0; 271 int ret; 272 273 ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); 274 if (!ASSERT_EQ(ret, 0, "recvfrom")) 275 return -1; 276 277 fds.fd = xsk_socket__fd(xsk->socket); 278 fds.events = POLLIN; 279 280 ret = poll(&fds, 1, 1000); 281 if (!ASSERT_GT(ret, 0, "poll")) 282 return -1; 283 284 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); 285 if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek")) 286 return -2; 287 288 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); 289 comp_addr = xsk_umem__extract_addr(rx_desc->addr); 290 addr = xsk_umem__add_offset_to_addr(rx_desc->addr); 291 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", 292 xsk, idx, rx_desc->addr, addr, comp_addr); 293 data = xsk_umem__get_data(xsk->umem_area, addr); 294 295 /* Make sure we got the packet offset correctly. */ 296 297 eth = data; 298 ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto"); 299 iph = (void *)(eth + 1); 300 ASSERT_EQ((int)iph->version, 4, "iph->version"); 301 udph = (void *)(iph + 1); 302 303 /* custom metadata */ 304 305 meta = data - sizeof(struct xdp_meta); 306 307 if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp")) 308 return -1; 309 310 if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) 311 return -1; 312 313 if (!sent_from_af_xdp) { 314 if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type")) 315 return -1; 316 317 if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci")) 318 return -1; 319 320 if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto")) 321 return -1; 322 goto done; 323 } 324 325 ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); 326 327 /* checksum offload */ 328 ASSERT_EQ(udph->check, htons(0x721c), "csum"); 329 330 done: 331 xsk_ring_cons__release(&xsk->rx, 1); 332 refill_rx(xsk, comp_addr); 333 334 return 0; 335 } 336 337 static void switch_ns_to_rx(struct nstoken **tok) 338 { 339 close_netns(*tok); 340 *tok = open_netns(RX_NETNS_NAME); 341 } 342 343 static void switch_ns_to_tx(struct nstoken **tok) 344 { 345 close_netns(*tok); 346 *tok = open_netns(TX_NETNS_NAME); 347 } 348 349 void test_xdp_metadata(void) 350 { 351 struct xdp_metadata2 *bpf_obj2 = NULL; 352 struct xdp_metadata *bpf_obj = NULL; 353 struct bpf_program *new_prog, *prog; 354 struct bpf_devmap_val devmap_e = {}; 355 struct bpf_map *prog_arr, *devmap; 356 struct nstoken *tok = NULL; 357 __u32 queue_id = QUEUE_ID; 358 struct xsk tx_xsk = {}; 359 struct xsk rx_xsk = {}; 360 __u32 val, key = 0; 361 int retries = 10; 362 int rx_ifindex; 363 int tx_ifindex; 364 int sock_fd; 365 int ret; 366 367 /* Setup new networking namespaces, with a veth pair. */ 368 SYS(out, "ip netns add " TX_NETNS_NAME); 369 SYS(out, "ip netns add " RX_NETNS_NAME); 370 371 tok = open_netns(TX_NETNS_NAME); 372 if (!ASSERT_OK_PTR(tok, "setns")) 373 goto out; 374 SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME 375 " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); 376 SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME); 377 378 SYS(out, "ip link set dev " TX_NAME " address " TX_MAC); 379 SYS(out, "ip link set dev " TX_NAME " up"); 380 381 SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN 382 " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID)); 383 SYS(out, "ip link set dev " TX_NAME_VLAN " up"); 384 SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN); 385 386 /* Avoid ARP calls */ 387 SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN); 388 389 switch_ns_to_rx(&tok); 390 if (!ASSERT_OK_PTR(tok, "setns rx")) 391 goto out; 392 393 SYS(out, "ip link set dev " RX_NAME " address " RX_MAC); 394 SYS(out, "ip link set dev " RX_NAME " up"); 395 SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); 396 397 rx_ifindex = if_nametoindex(RX_NAME); 398 399 /* Setup separate AF_XDP for RX interface. */ 400 401 ret = open_xsk(rx_ifindex, &rx_xsk); 402 if (!ASSERT_OK(ret, "open_xsk(RX_NAME)")) 403 goto out; 404 405 bpf_obj = xdp_metadata__open(); 406 if (!ASSERT_OK_PTR(bpf_obj, "open skeleton")) 407 goto out; 408 409 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); 410 bpf_program__set_ifindex(prog, rx_ifindex); 411 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); 412 413 /* Make sure we can load a dev-bound program that performs 414 * XDP_REDIRECT into a devmap. 415 */ 416 new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect"); 417 bpf_program__set_ifindex(new_prog, rx_ifindex); 418 bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY); 419 420 if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton")) 421 goto out; 422 423 /* Make sure we can't add dev-bound programs to prog maps. */ 424 prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr"); 425 if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map")) 426 goto out; 427 428 val = bpf_program__fd(prog); 429 if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key), 430 &val, sizeof(val), BPF_ANY), 431 "update prog_arr")) 432 goto out; 433 434 /* Make sure we can't add dev-bound programs to devmaps. */ 435 devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map"); 436 if (!ASSERT_OK_PTR(devmap, "no dev_map found")) 437 goto out; 438 439 devmap_e.bpf_prog.fd = val; 440 if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key), 441 &devmap_e, sizeof(devmap_e), 442 BPF_ANY), 443 "update dev_map")) 444 goto out; 445 446 /* Attach BPF program to RX interface. */ 447 448 ret = bpf_xdp_attach(rx_ifindex, 449 bpf_program__fd(bpf_obj->progs.rx), 450 XDP_FLAGS, NULL); 451 if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) 452 goto out; 453 454 sock_fd = xsk_socket__fd(rx_xsk.socket); 455 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); 456 if (!ASSERT_GE(ret, 0, "bpf_map_update_elem")) 457 goto out; 458 459 switch_ns_to_tx(&tok); 460 if (!ASSERT_OK_PTR(tok, "setns tx")) 461 goto out; 462 463 /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */ 464 tx_ifindex = if_nametoindex(TX_NAME); 465 ret = open_xsk(tx_ifindex, &tx_xsk); 466 if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) 467 goto out; 468 469 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, 470 "generate AF_XDP_CONSUMER_PORT")) 471 goto out; 472 473 switch_ns_to_rx(&tok); 474 if (!ASSERT_OK_PTR(tok, "setns rx")) 475 goto out; 476 477 /* Verify packet sent from AF_XDP has proper metadata. */ 478 if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0, 479 "verify_xsk_metadata")) 480 goto out; 481 482 switch_ns_to_tx(&tok); 483 if (!ASSERT_OK_PTR(tok, "setns tx")) 484 goto out; 485 complete_tx(&tx_xsk); 486 487 /* Now check metadata of packet, generated with network stack */ 488 if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet")) 489 goto out; 490 491 switch_ns_to_rx(&tok); 492 if (!ASSERT_OK_PTR(tok, "setns rx")) 493 goto out; 494 495 if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0, 496 "verify_xsk_metadata")) 497 goto out; 498 499 /* Make sure freplace correctly picks up original bound device 500 * and doesn't crash. 501 */ 502 503 bpf_obj2 = xdp_metadata2__open(); 504 if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton")) 505 goto out; 506 507 new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx"); 508 bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx"); 509 510 if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton")) 511 goto out; 512 513 if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace")) 514 goto out; 515 516 switch_ns_to_tx(&tok); 517 if (!ASSERT_OK_PTR(tok, "setns tx")) 518 goto out; 519 520 /* Send packet to trigger . */ 521 if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, 522 "generate freplace packet")) 523 goto out; 524 525 switch_ns_to_rx(&tok); 526 if (!ASSERT_OK_PTR(tok, "setns rx")) 527 goto out; 528 529 while (!retries--) { 530 if (bpf_obj2->bss->called) 531 break; 532 usleep(10); 533 } 534 ASSERT_GT(bpf_obj2->bss->called, 0, "not called"); 535 536 out: 537 close_xsk(&rx_xsk); 538 close_xsk(&tx_xsk); 539 xdp_metadata2__destroy(bpf_obj2); 540 xdp_metadata__destroy(bpf_obj); 541 if (tok) 542 close_netns(tok); 543 SYS_NOFAIL("ip netns del " RX_NETNS_NAME); 544 SYS_NOFAIL("ip netns del " TX_NETNS_NAME); 545 } 546