1 // SPDX-License-Identifier: GPL-2.0 2 3 /* Reference program for verifying XDP metadata on real HW. Functional test 4 * only, doesn't test the performance. 5 * 6 * RX: 7 * - UDP 9091 packets are diverted into AF_XDP 8 * - Metadata verified: 9 * - rx_timestamp 10 * - rx_hash 11 * 12 * TX: 13 * - TBD 14 */ 15 16 #include <test_progs.h> 17 #include <network_helpers.h> 18 #include "xdp_hw_metadata.skel.h" 19 #include "xsk.h" 20 21 #include <error.h> 22 #include <linux/errqueue.h> 23 #include <linux/if_link.h> 24 #include <linux/net_tstamp.h> 25 #include <linux/udp.h> 26 #include <linux/sockios.h> 27 #include <sys/mman.h> 28 #include <net/if.h> 29 #include <ctype.h> 30 #include <poll.h> 31 #include <time.h> 32 33 #include "xdp_metadata.h" 34 35 #define UMEM_NUM 16 36 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE 37 #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) 38 #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE) 39 40 struct xsk { 41 void *umem_area; 42 struct xsk_umem *umem; 43 struct xsk_ring_prod fill; 44 struct xsk_ring_cons comp; 45 struct xsk_ring_prod tx; 46 struct xsk_ring_cons rx; 47 struct xsk_socket *socket; 48 }; 49 50 struct xdp_hw_metadata *bpf_obj; 51 __u16 bind_flags = XDP_COPY; 52 struct xsk *rx_xsk; 53 const char *ifname; 54 int ifindex; 55 int rxq; 56 57 void test__fail(void) { /* for network_helpers.c */ } 58 59 static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) 60 { 61 int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; 62 const struct xsk_socket_config socket_config = { 63 .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 64 .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 65 .bind_flags = bind_flags, 66 }; 67 const struct xsk_umem_config umem_config = { 68 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 69 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 70 .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, 71 .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, 72 }; 73 __u32 idx; 74 u64 addr; 75 int ret; 76 int i; 77 78 xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); 79 if (xsk->umem_area == MAP_FAILED) 80 return -ENOMEM; 81 82 ret = xsk_umem__create(&xsk->umem, 83 xsk->umem_area, UMEM_SIZE, 84 &xsk->fill, 85 &xsk->comp, 86 &umem_config); 87 if (ret) 88 return ret; 89 90 ret = xsk_socket__create(&xsk->socket, ifindex, queue_id, 91 xsk->umem, 92 &xsk->rx, 93 &xsk->tx, 94 &socket_config); 95 if (ret) 96 return ret; 97 98 /* First half of umem is for TX. This way address matches 1-to-1 99 * to the completion queue index. 100 */ 101 102 for (i = 0; i < UMEM_NUM / 2; i++) { 103 addr = i * UMEM_FRAME_SIZE; 104 printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr); 105 } 106 107 /* Second half of umem is for RX. */ 108 109 ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx); 110 for (i = 0; i < UMEM_NUM / 2; i++) { 111 addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; 112 printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); 113 *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; 114 } 115 xsk_ring_prod__submit(&xsk->fill, ret); 116 117 return 0; 118 } 119 120 static void close_xsk(struct xsk *xsk) 121 { 122 if (xsk->umem) 123 xsk_umem__delete(xsk->umem); 124 if (xsk->socket) 125 xsk_socket__delete(xsk->socket); 126 munmap(xsk->umem_area, UMEM_SIZE); 127 } 128 129 static void refill_rx(struct xsk *xsk, __u64 addr) 130 { 131 __u32 idx; 132 133 if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) { 134 printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); 135 *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; 136 xsk_ring_prod__submit(&xsk->fill, 1); 137 } 138 } 139 140 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ 141 static __u64 gettime(clockid_t clock_id) 142 { 143 struct timespec t; 144 int res; 145 146 /* See man clock_gettime(2) for type of clock_id's */ 147 res = clock_gettime(clock_id, &t); 148 149 if (res < 0) 150 error(res, errno, "Error with clock_gettime()"); 151 152 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; 153 } 154 155 static void verify_xdp_metadata(void *data, clockid_t clock_id) 156 { 157 struct xdp_meta *meta; 158 159 meta = data - sizeof(*meta); 160 161 if (meta->rx_hash_err < 0) 162 printf("No rx_hash err=%d\n", meta->rx_hash_err); 163 else 164 printf("rx_hash: 0x%X with RSS type:0x%X\n", 165 meta->rx_hash, meta->rx_hash_type); 166 167 printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, 168 (double)meta->rx_timestamp / NANOSEC_PER_SEC); 169 if (meta->rx_timestamp) { 170 __u64 usr_clock = gettime(clock_id); 171 __u64 xdp_clock = meta->xdp_timestamp; 172 __s64 delta_X = xdp_clock - meta->rx_timestamp; 173 __s64 delta_X2U = usr_clock - xdp_clock; 174 175 printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", 176 xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC, 177 (double)delta_X / NANOSEC_PER_SEC, 178 (double)delta_X / 1000); 179 180 printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", 181 usr_clock, (double)usr_clock / NANOSEC_PER_SEC, 182 (double)delta_X2U / NANOSEC_PER_SEC, 183 (double)delta_X2U / 1000); 184 } 185 186 } 187 188 static void verify_skb_metadata(int fd) 189 { 190 char cmsg_buf[1024]; 191 char packet_buf[128]; 192 193 struct scm_timestamping *ts; 194 struct iovec packet_iov; 195 struct cmsghdr *cmsg; 196 struct msghdr hdr; 197 198 memset(&hdr, 0, sizeof(hdr)); 199 hdr.msg_iov = &packet_iov; 200 hdr.msg_iovlen = 1; 201 packet_iov.iov_base = packet_buf; 202 packet_iov.iov_len = sizeof(packet_buf); 203 204 hdr.msg_control = cmsg_buf; 205 hdr.msg_controllen = sizeof(cmsg_buf); 206 207 if (recvmsg(fd, &hdr, 0) < 0) 208 error(1, errno, "recvmsg"); 209 210 for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL; 211 cmsg = CMSG_NXTHDR(&hdr, cmsg)) { 212 213 if (cmsg->cmsg_level != SOL_SOCKET) 214 continue; 215 216 switch (cmsg->cmsg_type) { 217 case SCM_TIMESTAMPING: 218 ts = (struct scm_timestamping *)CMSG_DATA(cmsg); 219 if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) { 220 printf("found skb hwtstamp = %lu.%lu\n", 221 ts->ts[2].tv_sec, ts->ts[2].tv_nsec); 222 return; 223 } 224 break; 225 default: 226 break; 227 } 228 } 229 230 printf("skb hwtstamp is not found!\n"); 231 } 232 233 static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) 234 { 235 const struct xdp_desc *rx_desc; 236 struct pollfd fds[rxq + 1]; 237 __u64 comp_addr; 238 __u64 addr; 239 __u32 idx = 0; 240 int ret; 241 int i; 242 243 for (i = 0; i < rxq; i++) { 244 fds[i].fd = xsk_socket__fd(rx_xsk[i].socket); 245 fds[i].events = POLLIN; 246 fds[i].revents = 0; 247 } 248 249 fds[rxq].fd = server_fd; 250 fds[rxq].events = POLLIN; 251 fds[rxq].revents = 0; 252 253 while (true) { 254 errno = 0; 255 ret = poll(fds, rxq + 1, 1000); 256 printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n", 257 ret, errno, bpf_obj->bss->pkts_skip, 258 bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir); 259 if (ret < 0) 260 break; 261 if (ret == 0) 262 continue; 263 264 if (fds[rxq].revents) 265 verify_skb_metadata(server_fd); 266 267 for (i = 0; i < rxq; i++) { 268 bool first_seg = true; 269 bool is_eop = true; 270 271 if (fds[i].revents == 0) 272 continue; 273 274 struct xsk *xsk = &rx_xsk[i]; 275 peek: 276 ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx); 277 printf("xsk_ring_cons__peek: %d\n", ret); 278 if (ret != 1) 279 continue; 280 281 rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx); 282 comp_addr = xsk_umem__extract_addr(rx_desc->addr); 283 addr = xsk_umem__add_offset_to_addr(rx_desc->addr); 284 is_eop = !(rx_desc->options & XDP_PKT_CONTD); 285 printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n", 286 xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : ""); 287 if (first_seg) { 288 verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), 289 clock_id); 290 first_seg = false; 291 } 292 293 xsk_ring_cons__release(&xsk->rx, 1); 294 refill_rx(xsk, comp_addr); 295 if (!is_eop) 296 goto peek; 297 } 298 } 299 300 return 0; 301 } 302 303 struct ethtool_channels { 304 __u32 cmd; 305 __u32 max_rx; 306 __u32 max_tx; 307 __u32 max_other; 308 __u32 max_combined; 309 __u32 rx_count; 310 __u32 tx_count; 311 __u32 other_count; 312 __u32 combined_count; 313 }; 314 315 #define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ 316 317 static int rxq_num(const char *ifname) 318 { 319 struct ethtool_channels ch = { 320 .cmd = ETHTOOL_GCHANNELS, 321 }; 322 323 struct ifreq ifr = { 324 .ifr_data = (void *)&ch, 325 }; 326 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); 327 int fd, ret; 328 329 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 330 if (fd < 0) 331 error(1, errno, "socket"); 332 333 ret = ioctl(fd, SIOCETHTOOL, &ifr); 334 if (ret < 0) 335 error(1, errno, "ioctl(SIOCETHTOOL)"); 336 337 close(fd); 338 339 return ch.rx_count + ch.combined_count; 340 } 341 342 static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg) 343 { 344 struct ifreq ifr = { 345 .ifr_data = (void *)cfg, 346 }; 347 strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1); 348 int fd, ret; 349 350 fd = socket(AF_UNIX, SOCK_DGRAM, 0); 351 if (fd < 0) 352 error(1, errno, "socket"); 353 354 ret = ioctl(fd, op, &ifr); 355 if (ret < 0) 356 error(1, errno, "ioctl(%d)", op); 357 358 close(fd); 359 } 360 361 static struct hwtstamp_config saved_hwtstamp_cfg; 362 static const char *saved_hwtstamp_ifname; 363 364 static void hwtstamp_restore(void) 365 { 366 hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg); 367 } 368 369 static void hwtstamp_enable(const char *ifname) 370 { 371 struct hwtstamp_config cfg = { 372 .rx_filter = HWTSTAMP_FILTER_ALL, 373 }; 374 375 hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); 376 saved_hwtstamp_ifname = strdup(ifname); 377 atexit(hwtstamp_restore); 378 379 hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg); 380 } 381 382 static void cleanup(void) 383 { 384 LIBBPF_OPTS(bpf_xdp_attach_opts, opts); 385 int ret; 386 int i; 387 388 if (bpf_obj) { 389 opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx); 390 if (opts.old_prog_fd >= 0) { 391 printf("detaching bpf program....\n"); 392 ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts); 393 if (ret) 394 printf("failed to detach XDP program: %d\n", ret); 395 } 396 } 397 398 for (i = 0; i < rxq; i++) 399 close_xsk(&rx_xsk[i]); 400 401 if (bpf_obj) 402 xdp_hw_metadata__destroy(bpf_obj); 403 } 404 405 static void handle_signal(int sig) 406 { 407 /* interrupting poll() is all we need */ 408 } 409 410 static void timestamping_enable(int fd, int val) 411 { 412 int ret; 413 414 ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val)); 415 if (ret < 0) 416 error(1, errno, "setsockopt(SO_TIMESTAMPING)"); 417 } 418 419 static void print_usage(void) 420 { 421 const char *usage = 422 "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n" 423 " -m Enable multi-buffer XDP for larger MTU\n" 424 " -h Display this help and exit\n\n" 425 "Generate test packets on the other machine with:\n" 426 " echo -n xdp | nc -u -q1 <dst_ip> 9091\n"; 427 428 printf("%s", usage); 429 } 430 431 static void read_args(int argc, char *argv[]) 432 { 433 int opt; 434 435 while ((opt = getopt(argc, argv, "mh")) != -1) { 436 switch (opt) { 437 case 'm': 438 bind_flags |= XDP_USE_SG; 439 break; 440 case 'h': 441 print_usage(); 442 exit(0); 443 case '?': 444 if (isprint(optopt)) 445 fprintf(stderr, "Unknown option: -%c\n", optopt); 446 fallthrough; 447 default: 448 print_usage(); 449 error(-1, opterr, "Command line options error"); 450 } 451 } 452 453 if (optind >= argc) { 454 fprintf(stderr, "No device name provided\n"); 455 print_usage(); 456 exit(-1); 457 } 458 459 ifname = argv[optind]; 460 ifindex = if_nametoindex(ifname); 461 462 if (!ifname) 463 error(-1, errno, "Invalid interface name"); 464 } 465 466 int main(int argc, char *argv[]) 467 { 468 clockid_t clock_id = CLOCK_TAI; 469 int server_fd = -1; 470 int ret; 471 int i; 472 473 struct bpf_program *prog; 474 475 read_args(argc, argv); 476 477 rxq = rxq_num(ifname); 478 479 printf("rxq: %d\n", rxq); 480 481 hwtstamp_enable(ifname); 482 483 rx_xsk = malloc(sizeof(struct xsk) * rxq); 484 if (!rx_xsk) 485 error(1, ENOMEM, "malloc"); 486 487 for (i = 0; i < rxq; i++) { 488 printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i); 489 ret = open_xsk(ifindex, &rx_xsk[i], i); 490 if (ret) 491 error(1, -ret, "open_xsk"); 492 493 printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket)); 494 } 495 496 printf("open bpf program...\n"); 497 bpf_obj = xdp_hw_metadata__open(); 498 if (libbpf_get_error(bpf_obj)) 499 error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open"); 500 501 prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx"); 502 bpf_program__set_ifindex(prog, ifindex); 503 bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY); 504 505 printf("load bpf program...\n"); 506 ret = xdp_hw_metadata__load(bpf_obj); 507 if (ret) 508 error(1, -ret, "xdp_hw_metadata__load"); 509 510 printf("prepare skb endpoint...\n"); 511 server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000); 512 if (server_fd < 0) 513 error(1, errno, "start_server"); 514 timestamping_enable(server_fd, 515 SOF_TIMESTAMPING_SOFTWARE | 516 SOF_TIMESTAMPING_RAW_HARDWARE); 517 518 printf("prepare xsk map...\n"); 519 for (i = 0; i < rxq; i++) { 520 int sock_fd = xsk_socket__fd(rx_xsk[i].socket); 521 __u32 queue_id = i; 522 523 printf("map[%d] = %d\n", queue_id, sock_fd); 524 ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0); 525 if (ret) 526 error(1, -ret, "bpf_map_update_elem"); 527 } 528 529 printf("attach bpf program...\n"); 530 ret = bpf_xdp_attach(ifindex, 531 bpf_program__fd(bpf_obj->progs.rx), 532 XDP_FLAGS, NULL); 533 if (ret) 534 error(1, -ret, "bpf_xdp_attach"); 535 536 signal(SIGINT, handle_signal); 537 ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id); 538 close(server_fd); 539 cleanup(); 540 if (ret) 541 error(1, -ret, "verify_metadata"); 542 } 543