1 // SPDX-License-Identifier: GPL-2.0 2 3 /* Reference program for verifying XDP metadata on real HW. Functional test 4 * only, doesn't test the performance. 5 * 6 * RX: 7 * - UDP 9091 packets are diverted into AF_XDP 8 * - Metadata verified: 9 * - rx_timestamp 10 * - rx_hash 11 * 12 * TX: 13 * - UDP 9091 packets trigger TX reply 14 * - TX HW timestamp is requested and reported back upon completion 15 * - TX checksum is requested 16 * - TX launch time HW offload is requested for transmission 17 */ 18 19 #include <test_progs.h> 20 #include <network_helpers.h> 21 #include "xdp_hw_metadata.skel.h" 22 #include "xsk.h" 23 24 #include <error.h> 25 #include <linux/kernel.h> 26 #include <linux/bits.h> 27 #include <linux/bitfield.h> 28 #include <linux/errqueue.h> 29 #include <linux/if_link.h> 30 #include <linux/net_tstamp.h> 31 #include <netinet/udp.h> 32 #include <linux/sockios.h> 33 #include <linux/if_xdp.h> 34 #include <sys/mman.h> 35 #include <net/if.h> 36 #include <ctype.h> 37 #include <poll.h> 38 #include <time.h> 39 #include <unistd.h> 40 #include <libgen.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <sys/ioctl.h> 45 #include <linux/pkt_sched.h> 46 #include <linux/pkt_cls.h> 47 #include <linux/ethtool.h> 48 #include <sys/socket.h> 49 #include <arpa/inet.h> 50 51 #include "xdp_metadata.h" 52 53 #define UMEM_NUM 256 54 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE 55 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) 56 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE) 57 58 struct xsk { 59 void *umem_area; 60 struct xsk_umem *umem; 61 struct xsk_ring_prod fill; 62 struct xsk_ring_cons comp; 63 struct xsk_ring_prod tx; 64 struct xsk_ring_cons rx; 65 struct xsk_socket *socket; 66 }; 67 68 struct xdp_hw_metadata *bpf_obj; 69 __u16 bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY; 70 struct xsk *rx_xsk; 71 const char *ifname; 72 int ifindex; 73 int rxq; 74 bool skip_tx; 75 __u64 last_hw_rx_timestamp; 76 __u64 last_xdp_rx_timestamp; 77 __u64 last_launch_time; 78 __u64 launch_time_delta_to_hw_rx_timestamp; 79 int launch_time_queue; 80 81 #define run_command(cmd, ...) \ 82 ({ \ 83 char command[1024]; \ 84 memset(command, 0, sizeof(command)); \ 85 snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ 86 fprintf(stderr, "Running: %s\n", command); \ 87 system(command); \ 88 }) 89 90 void test__fail(void) { /* for network_helpers.c */ } 91 92 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) 93 { 94 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; 95 const struct xsk_socket_config socket_config = { 96 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 97 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 98 .bind_flags = bind_flags, 99 }; 100 const struct xsk_umem_config umem_config = { 101 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 102 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 103 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 104 .flags = XDP_UMEM_TX_METADATA_LEN, 105 .tx_metadata_len = sizeof(struct xsk_tx_metadata), 106 }; 107 __u32 idx = 0; 108 u64 addr; 109 int ret; 110 int i; 111 112 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); 113 if (xsk->umem_area == MAP_FAILED) 114 return -ENOMEM; 115 116 ret = xsk_umem__create(&xsk->umem, 117 xsk->umem_area, UMEM_SIZE, 118 &xsk->fill, 119 &xsk->comp, 120 &umem_config); 121 if (ret) 122 return ret; 123 124 ret = xsk_socket__create(&xsk->socket, ifindex, queue_id, 125 xsk->umem, 126 &xsk->rx, 127 &xsk->tx, 128 &socket_config); 129 if (ret) 130 return ret; 131 132 /* First half of umem is for TX. This way address matches 1-to-1 133 * to the completion queue index. 134 */ 135 136 for (i = 0; i < UMEM_NUM / 2; i++) { 137 addr = i * UMEM_FRAME_SIZE; 138 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); 139 } 140 141 /* Second half of umem is for RX. */ 142 143 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); 144 for (i = 0; i < UMEM_NUM / 2; i++) { 145 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; 146 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); 147 *xsk_ring_prod__fill_addr(&xsk->fill, idx + i) = addr; 148 } 149 xsk_ring_prod__submit(&xsk->fill, ret); 150 151 return 0; 152 } 153 154 static void close_xsk(struct xsk *xsk) 155 { 156 if (xsk->umem) 157 xsk_umem__delete(xsk->umem); 158 if (xsk->socket) 159 xsk_socket__delete(xsk->socket); 160 munmap(xsk->umem_area, UMEM_SIZE); 161 } 162 163 static void refill_rx(struct xsk *xsk, __u64 addr) 164 { 165 __u32 idx; 166 167 if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) { 168 printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr); 169 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; 170 xsk_ring_prod__submit(&xsk->fill, 1); 171 } 172 } 173 174 static int kick_tx(struct xsk *xsk) 175 { 176 return sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); 177 } 178 179 static int kick_rx(struct xsk *xsk) 180 { 181 return recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); 182 } 183 184 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ 185 static __u64 gettime(clockid_t clock_id) 186 { 187 struct timespec t; 188 int res; 189 190 /* See man clock_gettime(2) for type of clock_id's */ 191 res = clock_gettime(clock_id, &t); 192 193 if (res < 0) 194 error(res, errno, "Error with clock_gettime()"); 195 196 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; 197 } 198 199 static void print_tstamp_delta(const char *name, const char *refname, 200 __u64 tstamp, __u64 reference) 201 { 202 __s64 delta = (__s64)reference - (__s64)tstamp; 203 204 printf("%s: %llu (sec:%0.4f) delta to %s sec:%0.4f (%0.3f usec)\n", 205 name, tstamp, (double)tstamp / NANOSEC_PER_SEC, refname, 206 (double)delta / NANOSEC_PER_SEC, 207 (double)delta / 1000); 208 } 209 210 #define VLAN_PRIO_MASK GENMASK(15, 13) /* Priority Code Point */ 211 #define VLAN_DEI_MASK GENMASK(12, 12) /* Drop Eligible Indicator */ 212 #define VLAN_VID_MASK GENMASK(11, 0) /* VLAN Identifier */ 213 static void print_vlan_tci(__u16 tag) 214 { 215 __u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag); 216 __u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag); 217 bool dei = FIELD_GET(VLAN_DEI_MASK, tag); 218 219 printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id); 220 } 221 222 static void verify_xdp_metadata(void *data, clockid_t clock_id) 223 { 224 struct xdp_meta *meta; 225 226 meta = data - sizeof(*meta); 227 228 if (meta->hint_valid & XDP_META_FIELD_RSS) 229 printf("rx_hash: 0x%X with RSS type:0x%X\n", 230 meta->rx_hash, meta->rx_hash_type); 231 else 232 printf("No rx_hash, err=%d\n", meta->rx_hash_err); 233 234 if (meta->hint_valid & XDP_META_FIELD_TS) { 235 __u64 ref_tstamp = gettime(clock_id); 236 237 /* store received timestamps to calculate a delta at tx */ 238 last_hw_rx_timestamp = meta->rx_timestamp; 239 last_xdp_rx_timestamp = meta->xdp_timestamp; 240 241 print_tstamp_delta("HW RX-time", "User RX-time", 242 meta->rx_timestamp, ref_tstamp); 243 print_tstamp_delta("XDP RX-time", "User RX-time", 244 meta->xdp_timestamp, ref_tstamp); 245 } else { 246 printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err); 247 } 248 249 if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) { 250 printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto)); 251 printf("rx_vlan_tci: "); 252 print_vlan_tci(meta->rx_vlan_tci); 253 } else { 254 printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n", 255 meta->rx_vlan_tag_err); 256 } 257 } 258 259 static void verify_skb_metadata(int fd) 260 { 261 char cmsg_buf[1024]; 262 char packet_buf[128]; 263 264 struct scm_timestamping *ts; 265 struct iovec packet_iov; 266 struct cmsghdr *cmsg; 267 struct msghdr hdr; 268 269 memset(&hdr, 0, sizeof(hdr)); 270 hdr.msg_iov = &packet_iov; 271 hdr.msg_iovlen = 1; 272 packet_iov.iov_base = packet_buf; 273 packet_iov.iov_len = sizeof(packet_buf); 274 275 hdr.msg_control = cmsg_buf; 276 hdr.msg_controllen = sizeof(cmsg_buf); 277 278 if (recvmsg(fd, &hdr, 0) < 0) 279 error(1, errno, "recvmsg"); 280 281 for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL; 282 cmsg = CMSG_NXTHDR(&hdr, cmsg)) { 283 284 if (cmsg->cmsg_level != SOL_SOCKET) 285 continue; 286 287 switch (cmsg->cmsg_type) { 288 case SCM_TIMESTAMPING: 289 ts = (struct scm_timestamping *)CMSG_DATA(cmsg); 290 if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) { 291 printf("found skb hwtstamp = %lu.%lu\n", 292 ts->ts[2].tv_sec, ts->ts[2].tv_nsec); 293 return; 294 } 295 break; 296 default: 297 break; 298 } 299 } 300 301 printf("skb hwtstamp is not found!\n"); 302 } 303 304 static bool complete_tx(struct xsk *xsk, clockid_t clock_id) 305 { 306 struct xsk_tx_metadata *meta; 307 __u64 addr; 308 void *data; 309 __u32 idx; 310 311 if (!xsk_ring_cons__peek(&xsk->comp, 1, &idx)) 312 return false; 313 314 addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); 315 data = xsk_umem__get_data(xsk->umem_area, addr); 316 meta = data - sizeof(struct xsk_tx_metadata); 317 318 printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); 319 320 if (meta->completion.tx_timestamp) { 321 __u64 ref_tstamp = gettime(clock_id); 322 323 if (launch_time_delta_to_hw_rx_timestamp) { 324 print_tstamp_delta("HW Launch-time", 325 "HW TX-complete-time", 326 last_launch_time, 327 meta->completion.tx_timestamp); 328 } 329 print_tstamp_delta("HW TX-complete-time", "User TX-complete-time", 330 meta->completion.tx_timestamp, ref_tstamp); 331 print_tstamp_delta("XDP RX-time", "User TX-complete-time", 332 last_xdp_rx_timestamp, ref_tstamp); 333 print_tstamp_delta("HW RX-time", "HW TX-complete-time", 334 last_hw_rx_timestamp, meta->completion.tx_timestamp); 335 } else { 336 printf("No tx_timestamp\n"); 337 } 338 339 xsk_ring_cons__release(&xsk->comp, 1); 340 341 return true; 342 } 343 344 #define swap(a, b, len) do { \ 345 for (int i = 0; i < len; i++) { \ 346 __u8 tmp = ((__u8 *)a)[i]; \ 347 ((__u8 *)a)[i] = ((__u8 *)b)[i]; \ 348 ((__u8 *)b)[i] = tmp; \ 349 } \ 350 } while (0) 351 352 static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) 353 { 354 struct xsk_tx_metadata *meta; 355 struct ipv6hdr *ip6h = NULL; 356 struct iphdr *iph = NULL; 357 struct xdp_desc *tx_desc; 358 struct udphdr *udph; 359 struct ethhdr *eth; 360 __sum16 want_csum; 361 void *data; 362 __u32 idx; 363 int ret; 364 int len; 365 366 ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx); 367 if (ret != 1) { 368 printf("%p: failed to reserve tx slot\n", xsk); 369 return; 370 } 371 372 tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); 373 tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata); 374 data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); 375 376 meta = data - sizeof(struct xsk_tx_metadata); 377 memset(meta, 0, sizeof(*meta)); 378 meta->flags = XDP_TXMD_FLAGS_TIMESTAMP; 379 380 eth = rx_packet; 381 382 if (eth->h_proto == htons(ETH_P_IP)) { 383 iph = (void *)(eth + 1); 384 udph = (void *)(iph + 1); 385 } else if (eth->h_proto == htons(ETH_P_IPV6)) { 386 ip6h = (void *)(eth + 1); 387 udph = (void *)(ip6h + 1); 388 } else { 389 printf("%p: failed to detect IP version for ping pong %04x\n", xsk, eth->h_proto); 390 xsk_ring_prod__cancel(&xsk->tx, 1); 391 return; 392 } 393 394 len = ETH_HLEN; 395 if (ip6h) 396 len += sizeof(*ip6h) + ntohs(ip6h->payload_len); 397 if (iph) 398 len += ntohs(iph->tot_len); 399 400 swap(eth->h_dest, eth->h_source, ETH_ALEN); 401 if (iph) 402 swap(&iph->saddr, &iph->daddr, 4); 403 else 404 swap(&ip6h->saddr, &ip6h->daddr, 16); 405 swap(&udph->source, &udph->dest, 2); 406 407 want_csum = udph->check; 408 if (ip6h) 409 udph->check = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 410 ntohs(udph->len), IPPROTO_UDP, 0); 411 else 412 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 413 ntohs(udph->len), IPPROTO_UDP, 0); 414 415 meta->flags |= XDP_TXMD_FLAGS_CHECKSUM; 416 if (iph) 417 meta->request.csum_start = sizeof(*eth) + sizeof(*iph); 418 else 419 meta->request.csum_start = sizeof(*eth) + sizeof(*ip6h); 420 meta->request.csum_offset = offsetof(struct udphdr, check); 421 422 printf("%p: ping-pong with csum=%04x (want %04x) csum_start=%d csum_offset=%d\n", 423 xsk, ntohs(udph->check), ntohs(want_csum), 424 meta->request.csum_start, meta->request.csum_offset); 425 426 /* Set the value of launch time */ 427 if (launch_time_delta_to_hw_rx_timestamp) { 428 meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME; 429 meta->request.launch_time = last_hw_rx_timestamp + 430 launch_time_delta_to_hw_rx_timestamp; 431 last_launch_time = meta->request.launch_time; 432 print_tstamp_delta("HW RX-time", "HW Launch-time", 433 last_hw_rx_timestamp, 434 meta->request.launch_time); 435 } 436 437 memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */ 438 tx_desc->options |= XDP_TX_METADATA; 439 tx_desc->len = len; 440 441 xsk_ring_prod__submit(&xsk->tx, 1); 442 } 443 444 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) 445 { 446 const struct xdp_desc *rx_desc; 447 struct pollfd fds[rxq + 1]; 448 __u64 comp_addr; 449 __u64 deadline; 450 __u64 addr; 451 __u32 idx = 0; 452 int ret; 453 int i; 454 455 for (i = 0; i < rxq; i++) { 456 fds[i].fd = xsk_socket__fd(rx_xsk[i].socket); 457 fds[i].events = POLLIN; 458 fds[i].revents = 0; 459 } 460 461 fds[rxq].fd = server_fd; 462 fds[rxq].events = POLLIN; 463 fds[rxq].revents = 0; 464 465 while (true) { 466 errno = 0; 467 468 for (i = 0; i < rxq; i++) { 469 ret = kick_rx(&rx_xsk[i]); 470 if (ret) 471 printf("kick_rx ret=%d\n", ret); 472 } 473 474 ret = poll(fds, rxq + 1, 1000); 475 printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n", 476 ret, errno, bpf_obj->bss->pkts_skip, 477 bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir); 478 if (ret < 0) 479 break; 480 if (ret == 0) 481 continue; 482 483 if (fds[rxq].revents) 484 verify_skb_metadata(server_fd); 485 486 for (i = 0; i < rxq; i++) { 487 bool first_seg = true; 488 bool is_eop = true; 489 490 if (fds[i].revents == 0) 491 continue; 492 493 struct xsk *xsk = &rx_xsk[i]; 494 peek: 495 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); 496 printf("xsk_ring_cons__peek: %d\n", ret); 497 if (ret != 1) 498 continue; 499 500 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); 501 comp_addr = xsk_umem__extract_addr(rx_desc->addr); 502 addr = xsk_umem__add_offset_to_addr(rx_desc->addr); 503 is_eop = !(rx_desc->options & XDP_PKT_CONTD); 504 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n", 505 xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : ""); 506 if (first_seg) { 507 verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), 508 clock_id); 509 first_seg = false; 510 511 if (!skip_tx) { 512 /* mirror first chunk back */ 513 ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr), 514 clock_id); 515 516 ret = kick_tx(xsk); 517 if (ret) 518 printf("kick_tx ret=%d\n", ret); 519 520 /* wait 1 second + cover launch time */ 521 deadline = gettime(clock_id) + 522 NANOSEC_PER_SEC + 523 launch_time_delta_to_hw_rx_timestamp; 524 while (true) { 525 if (complete_tx(xsk, clock_id)) 526 break; 527 if (gettime(clock_id) >= deadline) 528 break; 529 usleep(10); 530 } 531 } 532 } 533 534 xsk_ring_cons__release(&xsk->rx, 1); 535 refill_rx(xsk, comp_addr); 536 if (!is_eop) 537 goto peek; 538 } 539 } 540 541 return 0; 542 } 543 544 static int rxq_num(const char *ifname) 545 { 546 struct ethtool_channels ch = { 547 .cmd = ETHTOOL_GCHANNELS, 548 }; 549 550 struct ifreq ifr = { 551 .ifr_data = (void *)&ch, 552 }; 553 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); 554 int fd, ret; 555 556 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 557 if (fd < 0) 558 error(1, errno, "socket"); 559 560 ret = ioctl(fd, SIOCETHTOOL, &ifr); 561 if (ret < 0) 562 error(1, errno, "ioctl(SIOCETHTOOL)"); 563 564 close(fd); 565 566 return ch.rx_count + ch.combined_count; 567 } 568 569 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg) 570 { 571 struct ifreq ifr = { 572 .ifr_data = (void *)cfg, 573 }; 574 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); 575 int fd, ret; 576 577 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 578 if (fd < 0) 579 error(1, errno, "socket"); 580 581 ret = ioctl(fd, op, &ifr); 582 if (ret < 0) 583 error(1, errno, "ioctl(%d)", op); 584 585 close(fd); 586 } 587 588 static struct hwtstamp_config saved_hwtstamp_cfg; 589 static const char *saved_hwtstamp_ifname; 590 591 static void hwtstamp_restore(void) 592 { 593 hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg); 594 } 595 596 static void hwtstamp_enable(const char *ifname) 597 { 598 struct hwtstamp_config cfg = { 599 .rx_filter = HWTSTAMP_FILTER_ALL, 600 .tx_type = HWTSTAMP_TX_ON, 601 }; 602 603 hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); 604 saved_hwtstamp_ifname = strdup(ifname); 605 atexit(hwtstamp_restore); 606 607 hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg); 608 } 609 610 static void cleanup(void) 611 { 612 LIBBPF_OPTS(bpf_xdp_attach_opts, opts); 613 int ret; 614 int i; 615 616 if (bpf_obj) { 617 opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx); 618 if (opts.old_prog_fd >= 0) { 619 printf("detaching bpf program....\n"); 620 ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts); 621 if (ret) 622 printf("failed to detach XDP program: %d\n", ret); 623 } 624 } 625 626 for (i = 0; i < rxq; i++) 627 close_xsk(&rx_xsk[i]); 628 629 if (bpf_obj) 630 xdp_hw_metadata__destroy(bpf_obj); 631 632 free((void *)saved_hwtstamp_ifname); 633 } 634 635 static void handle_signal(int sig) 636 { 637 /* interrupting poll() is all we need */ 638 } 639 640 static void timestamping_enable(int fd, int val) 641 { 642 int ret; 643 644 ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val)); 645 if (ret < 0) 646 error(1, errno, "setsockopt(SO_TIMESTAMPING)"); 647 } 648 649 static void print_usage(void) 650 { 651 const char *usage = 652 "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n" 653 " -c Run in copy mode (zerocopy is default)\n" 654 " -h Display this help and exit\n\n" 655 " -m Enable multi-buffer XDP for larger MTU\n" 656 " -r Don't generate AF_XDP reply (rx metadata only)\n" 657 " -l Delta of launch time relative to HW RX-time in ns\n" 658 " default: 0 ns (launch time request is disabled)\n" 659 " -L Tx Queue to be enabled with launch time offload\n" 660 " default: 0 (Tx Queue 0)\n" 661 "Generate test packets on the other machine with:\n" 662 " echo -n xdp | nc -u -q1 <dst_ip> 9091\n"; 663 664 printf("%s", usage); 665 } 666 667 static void read_args(int argc, char *argv[]) 668 { 669 int opt; 670 671 while ((opt = getopt(argc, argv, "chmrl:L:")) != -1) { 672 switch (opt) { 673 case 'c': 674 bind_flags &= ~XDP_USE_NEED_WAKEUP; 675 bind_flags &= ~XDP_ZEROCOPY; 676 bind_flags |= XDP_COPY; 677 break; 678 case 'h': 679 print_usage(); 680 exit(0); 681 case 'm': 682 bind_flags |= XDP_USE_SG; 683 break; 684 case 'r': 685 skip_tx = true; 686 break; 687 case 'l': 688 launch_time_delta_to_hw_rx_timestamp = atoll(optarg); 689 break; 690 case 'L': 691 launch_time_queue = atoll(optarg); 692 break; 693 case '?': 694 if (isprint(optopt)) 695 fprintf(stderr, "Unknown option: -%c\n", optopt); 696 fallthrough; 697 default: 698 print_usage(); 699 error(-1, opterr, "Command line options error"); 700 } 701 } 702 703 if (optind >= argc) { 704 fprintf(stderr, "No device name provided\n"); 705 print_usage(); 706 exit(-1); 707 } 708 709 ifname = argv[optind]; 710 ifindex = if_nametoindex(ifname); 711 712 if (!ifname) 713 error(-1, errno, "Invalid interface name"); 714 } 715 716 void clean_existing_configurations(void) 717 { 718 /* Check and delete root qdisc if exists */ 719 if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc mqprio 8001:'", ifname) == 0) 720 run_command("sudo tc qdisc del dev %s root", ifname); 721 722 /* Check and delete ingress qdisc if exists */ 723 if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc ingress ffff:'", ifname) == 0) 724 run_command("sudo tc qdisc del dev %s ingress", ifname); 725 726 /* Check and delete ethtool filters if any exist */ 727 if (run_command("sudo ethtool -n %s | grep -q 'Filter:'", ifname) == 0) { 728 run_command("sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 sudo ethtool -N %s delete >&2", 729 ifname, ifname); 730 } 731 } 732 733 #define MAX_TC 16 734 735 int main(int argc, char *argv[]) 736 { 737 clockid_t clock_id = CLOCK_TAI; 738 struct bpf_program *prog; 739 int server_fd = -1; 740 size_t map_len = 0; 741 size_t que_len = 0; 742 char *buf = NULL; 743 char *map = NULL; 744 char *que = NULL; 745 char *tmp = NULL; 746 int tc = 0; 747 int ret; 748 int i; 749 750 read_args(argc, argv); 751 752 rxq = rxq_num(ifname); 753 printf("rxq: %d\n", rxq); 754 755 if (launch_time_queue >= rxq || launch_time_queue < 0) 756 error(1, 0, "Invalid launch_time_queue."); 757 758 clean_existing_configurations(); 759 sleep(1); 760 761 /* Enable tx and rx hardware timestamping */ 762 hwtstamp_enable(ifname); 763 764 /* Prepare priority to traffic class map for tc-mqprio */ 765 for (i = 0; i < MAX_TC; i++) { 766 if (i < rxq) 767 tc = i; 768 769 if (asprintf(&buf, "%d ", tc) == -1) { 770 printf("Failed to malloc buf for tc map.\n"); 771 goto free_mem; 772 } 773 774 map_len += strlen(buf); 775 tmp = realloc(map, map_len + 1); 776 if (!tmp) { 777 printf("Failed to realloc tc map.\n"); 778 goto free_mem; 779 } 780 map = tmp; 781 strcat(map, buf); 782 free(buf); 783 buf = NULL; 784 } 785 786 /* Prepare traffic class to hardware queue map for tc-mqprio */ 787 for (i = 0; i <= tc; i++) { 788 if (asprintf(&buf, "1@%d ", i) == -1) { 789 printf("Failed to malloc buf for tc queues.\n"); 790 goto free_mem; 791 } 792 793 que_len += strlen(buf); 794 tmp = realloc(que, que_len + 1); 795 if (!tmp) { 796 printf("Failed to realloc tc queues.\n"); 797 goto free_mem; 798 } 799 que = tmp; 800 strcat(que, buf); 801 free(buf); 802 buf = NULL; 803 } 804 805 /* Add mqprio qdisc */ 806 run_command("sudo tc qdisc add dev %s handle 8001: parent root mqprio num_tc %d map %squeues %shw 0", 807 ifname, tc + 1, map, que); 808 809 /* To test launch time, send UDP packet with VLAN priority 1 to port 9091 */ 810 if (launch_time_delta_to_hw_rx_timestamp) { 811 /* Enable launch time hardware offload on launch_time_queue */ 812 run_command("sudo tc qdisc replace dev %s parent 8001:%d etf offload clockid CLOCK_TAI delta 500000", 813 ifname, launch_time_queue + 1); 814 sleep(1); 815 816 /* Route incoming packet with VLAN priority 1 into launch_time_queue */ 817 if (run_command("sudo ethtool -N %s flow-type ether vlan 0x2000 vlan-mask 0x1FFF action %d", 818 ifname, launch_time_queue)) { 819 run_command("sudo tc qdisc add dev %s ingress", ifname); 820 run_command("sudo tc filter add dev %s parent ffff: protocol 802.1Q flower vlan_prio 1 hw_tc %d", 821 ifname, launch_time_queue); 822 } 823 824 /* Enable VLAN tag stripping offload */ 825 run_command("sudo ethtool -K %s rxvlan on", ifname); 826 } 827 828 rx_xsk = malloc(sizeof(struct xsk) * rxq); 829 if (!rx_xsk) 830 error(1, ENOMEM, "malloc"); 831 832 for (i = 0; i < rxq; i++) { 833 printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i); 834 ret = open_xsk(ifindex, &rx_xsk[i], i); 835 if (ret) 836 error(1, -ret, "open_xsk"); 837 838 printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket)); 839 } 840 841 printf("open bpf program...\n"); 842 bpf_obj = xdp_hw_metadata__open(); 843 if (libbpf_get_error(bpf_obj)) 844 error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open"); 845 846 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); 847 bpf_program__set_ifindex(prog, ifindex); 848 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); 849 850 printf("load bpf program...\n"); 851 ret = xdp_hw_metadata__load(bpf_obj); 852 if (ret) 853 error(1, -ret, "xdp_hw_metadata__load"); 854 855 printf("prepare skb endpoint...\n"); 856 server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000); 857 if (server_fd < 0) 858 error(1, errno, "start_server"); 859 timestamping_enable(server_fd, 860 SOF_TIMESTAMPING_SOFTWARE | 861 SOF_TIMESTAMPING_RAW_HARDWARE); 862 863 printf("prepare xsk map...\n"); 864 for (i = 0; i < rxq; i++) { 865 int sock_fd = xsk_socket__fd(rx_xsk[i].socket); 866 __u32 queue_id = i; 867 868 printf("map[%d] = %d\n", queue_id, sock_fd); 869 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); 870 if (ret) 871 error(1, -ret, "bpf_map_update_elem"); 872 } 873 874 printf("attach bpf program...\n"); 875 ret = bpf_xdp_attach(ifindex, 876 bpf_program__fd(bpf_obj->progs.rx), 877 XDP_FLAGS, NULL); 878 if (ret) 879 error(1, -ret, "bpf_xdp_attach"); 880 881 signal(SIGINT, handle_signal); 882 ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id); 883 close(server_fd); 884 cleanup(); 885 if (ret) 886 error(1, -ret, "verify_metadata"); 887 888 clean_existing_configurations(); 889 890 free_mem: 891 free(buf); 892 free(map); 893 free(que); 894 } 895