1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * tcpdevmem netcat. Works similarly to netcat but does device memory TCP 4 * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. 5 * 6 * Usage: 7 * 8 * On server: 9 * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 10 * 11 * On client: 12 * echo -n "hello\nworld" | \ 13 * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 14 * 15 * Note this is compatible with regular netcat. i.e. the sender or receiver can 16 * be replaced with regular netcat to test the RX or TX path in isolation. 17 * 18 * Test data validation (devmem TCP on RX only): 19 * 20 * On server: 21 * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7 22 * 23 * On client: 24 * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ 25 * head -c 1G | \ 26 * nc <server IP> 5201 -p 5201 27 * 28 * Test data validation (devmem TCP on RX and TX, validation happens on RX): 29 * 30 * On server: 31 * ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1 32 * 33 * On client: 34 * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \ 35 * head -c 1M | \ 36 * ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1 37 */ 38 #define _GNU_SOURCE 39 #define __EXPORTED_HEADERS__ 40 41 #include <linux/uio.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <unistd.h> 45 #include <stdbool.h> 46 #include <string.h> 47 #include <errno.h> 48 #define __iovec_defined 49 #include <fcntl.h> 50 #include <malloc.h> 51 #include <error.h> 52 #include <poll.h> 53 54 #include <arpa/inet.h> 55 #include <sys/socket.h> 56 #include <sys/mman.h> 57 #include <sys/ioctl.h> 58 #include <sys/syscall.h> 59 #include <sys/time.h> 60 61 #include <linux/memfd.h> 62 #include <linux/dma-buf.h> 63 #include <linux/errqueue.h> 64 #include <linux/udmabuf.h> 65 #include <linux/types.h> 66 #include <linux/netlink.h> 67 #include <linux/genetlink.h> 68 #include <linux/netdev.h> 69 #include <linux/ethtool_netlink.h> 70 #include <time.h> 71 #include <net/if.h> 72 73 #include "netdev-user.h" 74 #include "ethtool-user.h" 75 #include <ynl.h> 76 77 #define PAGE_SHIFT 12 78 #define TEST_PREFIX "ncdevmem" 79 #define NUM_PAGES 16000 80 81 #ifndef MSG_SOCK_DEVMEM 82 #define MSG_SOCK_DEVMEM 0x2000000 83 #endif 84 85 #define MAX_IOV 1024 86 87 static size_t max_chunk; 88 static char *server_ip; 89 static char *client_ip; 90 static char *port; 91 static size_t do_validation; 92 static int start_queue = -1; 93 static int num_queues = -1; 94 static char *ifname; 95 static unsigned int ifindex; 96 static unsigned int dmabuf_id; 97 static uint32_t tx_dmabuf_id; 98 static int waittime_ms = 500; 99 100 struct memory_buffer { 101 int fd; 102 size_t size; 103 104 int devfd; 105 int memfd; 106 char *buf_mem; 107 }; 108 109 struct memory_provider { 110 struct memory_buffer *(*alloc)(size_t size); 111 void (*free)(struct memory_buffer *ctx); 112 void (*memcpy_to_device)(struct memory_buffer *dst, size_t off, 113 void *src, int n); 114 void (*memcpy_from_device)(void *dst, struct memory_buffer *src, 115 size_t off, int n); 116 }; 117 118 static struct memory_buffer *udmabuf_alloc(size_t size) 119 { 120 struct udmabuf_create create; 121 struct memory_buffer *ctx; 122 int ret; 123 124 ctx = malloc(sizeof(*ctx)); 125 if (!ctx) 126 error(1, ENOMEM, "malloc failed"); 127 128 ctx->size = size; 129 130 ctx->devfd = open("/dev/udmabuf", O_RDWR); 131 if (ctx->devfd < 0) 132 error(1, errno, 133 "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", 134 TEST_PREFIX); 135 136 ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); 137 if (ctx->memfd < 0) 138 error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); 139 140 ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); 141 if (ret < 0) 142 error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); 143 144 ret = ftruncate(ctx->memfd, size); 145 if (ret == -1) 146 error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); 147 148 memset(&create, 0, sizeof(create)); 149 150 create.memfd = ctx->memfd; 151 create.offset = 0; 152 create.size = size; 153 ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); 154 if (ctx->fd < 0) 155 error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); 156 157 ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, 158 ctx->fd, 0); 159 if (ctx->buf_mem == MAP_FAILED) 160 error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); 161 162 return ctx; 163 } 164 165 static void udmabuf_free(struct memory_buffer *ctx) 166 { 167 munmap(ctx->buf_mem, ctx->size); 168 close(ctx->fd); 169 close(ctx->memfd); 170 close(ctx->devfd); 171 free(ctx); 172 } 173 174 static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off, 175 void *src, int n) 176 { 177 struct dma_buf_sync sync = {}; 178 179 sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE; 180 ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); 181 182 memcpy(dst->buf_mem + off, src, n); 183 184 sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE; 185 ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync); 186 } 187 188 static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, 189 size_t off, int n) 190 { 191 struct dma_buf_sync sync = {}; 192 193 sync.flags = DMA_BUF_SYNC_START; 194 ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); 195 196 memcpy(dst, src->buf_mem + off, n); 197 198 sync.flags = DMA_BUF_SYNC_END; 199 ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); 200 } 201 202 static struct memory_provider udmabuf_memory_provider = { 203 .alloc = udmabuf_alloc, 204 .free = udmabuf_free, 205 .memcpy_to_device = udmabuf_memcpy_to_device, 206 .memcpy_from_device = udmabuf_memcpy_from_device, 207 }; 208 209 static struct memory_provider *provider = &udmabuf_memory_provider; 210 211 static void print_nonzero_bytes(void *ptr, size_t size) 212 { 213 unsigned char *p = ptr; 214 unsigned int i; 215 216 for (i = 0; i < size; i++) 217 putchar(p[i]); 218 } 219 220 void validate_buffer(void *line, size_t size) 221 { 222 static unsigned char seed = 1; 223 unsigned char *ptr = line; 224 unsigned char expected; 225 static int errors; 226 size_t i; 227 228 for (i = 0; i < size; i++) { 229 expected = seed ? seed : '\n'; 230 if (ptr[i] != expected) { 231 fprintf(stderr, 232 "Failed validation: expected=%u, actual=%u, index=%lu\n", 233 expected, ptr[i], i); 234 errors++; 235 if (errors > 20) 236 error(1, 0, "validation failed."); 237 } 238 seed++; 239 if (seed == do_validation) 240 seed = 0; 241 } 242 243 fprintf(stdout, "Validated buffer\n"); 244 } 245 246 static int rxq_num(int ifindex) 247 { 248 struct ethtool_channels_get_req *req; 249 struct ethtool_channels_get_rsp *rsp; 250 struct ynl_error yerr; 251 struct ynl_sock *ys; 252 int num = -1; 253 254 ys = ynl_sock_create(&ynl_ethtool_family, &yerr); 255 if (!ys) { 256 fprintf(stderr, "YNL: %s\n", yerr.msg); 257 return -1; 258 } 259 260 req = ethtool_channels_get_req_alloc(); 261 ethtool_channels_get_req_set_header_dev_index(req, ifindex); 262 rsp = ethtool_channels_get(ys, req); 263 if (rsp) 264 num = rsp->rx_count + rsp->combined_count; 265 ethtool_channels_get_req_free(req); 266 ethtool_channels_get_rsp_free(rsp); 267 268 ynl_sock_destroy(ys); 269 270 return num; 271 } 272 273 #define run_command(cmd, ...) \ 274 ({ \ 275 char command[256]; \ 276 memset(command, 0, sizeof(command)); \ 277 snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ 278 fprintf(stderr, "Running: %s\n", command); \ 279 system(command); \ 280 }) 281 282 static int reset_flow_steering(void) 283 { 284 /* Depending on the NIC, toggling ntuple off and on might not 285 * be allowed. Additionally, attempting to delete existing filters 286 * will fail if no filters are present. Therefore, do not enforce 287 * the exit status. 288 */ 289 290 run_command("ethtool -K %s ntuple off >&2", ifname); 291 run_command("ethtool -K %s ntuple on >&2", ifname); 292 run_command( 293 "ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", 294 ifname, ifname); 295 return 0; 296 } 297 298 static const char *tcp_data_split_str(int val) 299 { 300 switch (val) { 301 case 0: 302 return "off"; 303 case 1: 304 return "auto"; 305 case 2: 306 return "on"; 307 default: 308 return "?"; 309 } 310 } 311 312 static int configure_headersplit(bool on) 313 { 314 struct ethtool_rings_get_req *get_req; 315 struct ethtool_rings_get_rsp *get_rsp; 316 struct ethtool_rings_set_req *req; 317 struct ynl_error yerr; 318 struct ynl_sock *ys; 319 int ret; 320 321 ys = ynl_sock_create(&ynl_ethtool_family, &yerr); 322 if (!ys) { 323 fprintf(stderr, "YNL: %s\n", yerr.msg); 324 return -1; 325 } 326 327 req = ethtool_rings_set_req_alloc(); 328 ethtool_rings_set_req_set_header_dev_index(req, ifindex); 329 /* 0 - off, 1 - auto, 2 - on */ 330 ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); 331 ret = ethtool_rings_set(ys, req); 332 if (ret < 0) 333 fprintf(stderr, "YNL failed: %s\n", ys->err.msg); 334 ethtool_rings_set_req_free(req); 335 336 if (ret == 0) { 337 get_req = ethtool_rings_get_req_alloc(); 338 ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); 339 get_rsp = ethtool_rings_get(ys, get_req); 340 ethtool_rings_get_req_free(get_req); 341 if (get_rsp) 342 fprintf(stderr, "TCP header split: %s\n", 343 tcp_data_split_str(get_rsp->tcp_data_split)); 344 ethtool_rings_get_rsp_free(get_rsp); 345 } 346 347 ynl_sock_destroy(ys); 348 349 return ret; 350 } 351 352 static int configure_rss(void) 353 { 354 return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); 355 } 356 357 static int configure_channels(unsigned int rx, unsigned int tx) 358 { 359 struct ethtool_channels_get_req *gchan; 360 struct ethtool_channels_set_req *schan; 361 struct ethtool_channels_get_rsp *chan; 362 struct ynl_error yerr; 363 struct ynl_sock *ys; 364 int ret; 365 366 fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); 367 368 ys = ynl_sock_create(&ynl_ethtool_family, &yerr); 369 if (!ys) { 370 fprintf(stderr, "YNL: %s\n", yerr.msg); 371 return -1; 372 } 373 374 gchan = ethtool_channels_get_req_alloc(); 375 if (!gchan) { 376 ret = -1; 377 goto exit_close_sock; 378 } 379 380 ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); 381 chan = ethtool_channels_get(ys, gchan); 382 ethtool_channels_get_req_free(gchan); 383 if (!chan) { 384 fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); 385 ret = -1; 386 goto exit_close_sock; 387 } 388 389 schan = ethtool_channels_set_req_alloc(); 390 if (!schan) { 391 ret = -1; 392 goto exit_free_chan; 393 } 394 395 ethtool_channels_set_req_set_header_dev_index(schan, ifindex); 396 397 if (chan->_present.combined_count) { 398 if (chan->_present.rx_count || chan->_present.tx_count) { 399 ethtool_channels_set_req_set_rx_count(schan, 0); 400 ethtool_channels_set_req_set_tx_count(schan, 0); 401 } 402 403 if (rx == tx) { 404 ethtool_channels_set_req_set_combined_count(schan, rx); 405 } else if (rx > tx) { 406 ethtool_channels_set_req_set_combined_count(schan, tx); 407 ethtool_channels_set_req_set_rx_count(schan, rx - tx); 408 } else { 409 ethtool_channels_set_req_set_combined_count(schan, rx); 410 ethtool_channels_set_req_set_tx_count(schan, tx - rx); 411 } 412 413 ret = ethtool_channels_set(ys, schan); 414 if (ret) 415 fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); 416 } else if (chan->_present.rx_count) { 417 ethtool_channels_set_req_set_rx_count(schan, rx); 418 ethtool_channels_set_req_set_tx_count(schan, tx); 419 420 ret = ethtool_channels_set(ys, schan); 421 if (ret) 422 fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); 423 } else { 424 fprintf(stderr, "Error: device has neither combined nor rx channels\n"); 425 ret = -1; 426 } 427 ethtool_channels_set_req_free(schan); 428 exit_free_chan: 429 ethtool_channels_get_rsp_free(chan); 430 exit_close_sock: 431 ynl_sock_destroy(ys); 432 433 return ret; 434 } 435 436 static int configure_flow_steering(struct sockaddr_in6 *server_sin) 437 { 438 const char *type = "tcp6"; 439 const char *server_addr; 440 char buf[40]; 441 442 inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); 443 server_addr = buf; 444 445 if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) { 446 type = "tcp4"; 447 server_addr = strrchr(server_addr, ':') + 1; 448 } 449 450 /* Try configure 5-tuple */ 451 if (run_command("ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", 452 ifname, 453 type, 454 client_ip ? "src-ip" : "", 455 client_ip ?: "", 456 server_addr, 457 client_ip ? "src-port" : "", 458 client_ip ? port : "", 459 port, start_queue)) 460 /* If that fails, try configure 3-tuple */ 461 if (run_command("ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", 462 ifname, 463 type, 464 server_addr, 465 port, start_queue)) 466 /* If that fails, return error */ 467 return -1; 468 469 return 0; 470 } 471 472 static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, 473 struct netdev_queue_id *queues, 474 unsigned int n_queue_index, struct ynl_sock **ys) 475 { 476 struct netdev_bind_rx_req *req = NULL; 477 struct netdev_bind_rx_rsp *rsp = NULL; 478 struct ynl_error yerr; 479 480 *ys = ynl_sock_create(&ynl_netdev_family, &yerr); 481 if (!*ys) { 482 fprintf(stderr, "YNL: %s\n", yerr.msg); 483 return -1; 484 } 485 486 req = netdev_bind_rx_req_alloc(); 487 netdev_bind_rx_req_set_ifindex(req, ifindex); 488 netdev_bind_rx_req_set_fd(req, dmabuf_fd); 489 __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); 490 491 rsp = netdev_bind_rx(*ys, req); 492 if (!rsp) { 493 perror("netdev_bind_rx"); 494 goto err_close; 495 } 496 497 if (!rsp->_present.id) { 498 perror("id not present"); 499 goto err_close; 500 } 501 502 fprintf(stderr, "got dmabuf id=%d\n", rsp->id); 503 dmabuf_id = rsp->id; 504 505 netdev_bind_rx_req_free(req); 506 netdev_bind_rx_rsp_free(rsp); 507 508 return 0; 509 510 err_close: 511 fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); 512 netdev_bind_rx_req_free(req); 513 ynl_sock_destroy(*ys); 514 return -1; 515 } 516 517 static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd, 518 struct ynl_sock **ys) 519 { 520 struct netdev_bind_tx_req *req = NULL; 521 struct netdev_bind_tx_rsp *rsp = NULL; 522 struct ynl_error yerr; 523 524 *ys = ynl_sock_create(&ynl_netdev_family, &yerr); 525 if (!*ys) { 526 fprintf(stderr, "YNL: %s\n", yerr.msg); 527 return -1; 528 } 529 530 req = netdev_bind_tx_req_alloc(); 531 netdev_bind_tx_req_set_ifindex(req, ifindex); 532 netdev_bind_tx_req_set_fd(req, dmabuf_fd); 533 534 rsp = netdev_bind_tx(*ys, req); 535 if (!rsp) { 536 perror("netdev_bind_tx"); 537 goto err_close; 538 } 539 540 if (!rsp->_present.id) { 541 perror("id not present"); 542 goto err_close; 543 } 544 545 fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id); 546 tx_dmabuf_id = rsp->id; 547 548 netdev_bind_tx_req_free(req); 549 netdev_bind_tx_rsp_free(rsp); 550 551 return 0; 552 553 err_close: 554 fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); 555 netdev_bind_tx_req_free(req); 556 ynl_sock_destroy(*ys); 557 return -1; 558 } 559 560 static void enable_reuseaddr(int fd) 561 { 562 int opt = 1; 563 int ret; 564 565 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); 566 if (ret) 567 error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); 568 569 ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); 570 if (ret) 571 error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); 572 } 573 574 static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) 575 { 576 int ret; 577 578 sin6->sin6_family = AF_INET6; 579 sin6->sin6_port = htons(port); 580 581 ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); 582 if (ret != 1) { 583 /* fallback to plain IPv4 */ 584 ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); 585 if (ret != 1) 586 return -1; 587 588 /* add ::ffff prefix */ 589 sin6->sin6_addr.s6_addr32[0] = 0; 590 sin6->sin6_addr.s6_addr32[1] = 0; 591 sin6->sin6_addr.s6_addr16[4] = 0; 592 sin6->sin6_addr.s6_addr16[5] = 0xffff; 593 } 594 595 return 0; 596 } 597 598 static struct netdev_queue_id *create_queues(void) 599 { 600 struct netdev_queue_id *queues; 601 size_t i = 0; 602 603 queues = netdev_queue_id_alloc(num_queues); 604 for (i = 0; i < num_queues; i++) { 605 netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX); 606 netdev_queue_id_set_id(&queues[i], start_queue + i); 607 } 608 609 return queues; 610 } 611 612 static int do_server(struct memory_buffer *mem) 613 { 614 char ctrl_data[sizeof(int) * 20000]; 615 size_t non_page_aligned_frags = 0; 616 struct sockaddr_in6 client_addr; 617 struct sockaddr_in6 server_sin; 618 size_t page_aligned_frags = 0; 619 size_t total_received = 0; 620 socklen_t client_addr_len; 621 bool is_devmem = false; 622 char *tmp_mem = NULL; 623 struct ynl_sock *ys; 624 char iobuf[819200]; 625 char buffer[256]; 626 int socket_fd; 627 int client_fd; 628 int ret; 629 630 ret = parse_address(server_ip, atoi(port), &server_sin); 631 if (ret < 0) 632 error(1, 0, "parse server address"); 633 634 if (reset_flow_steering()) 635 error(1, 0, "Failed to reset flow steering\n"); 636 637 if (configure_headersplit(1)) 638 error(1, 0, "Failed to enable TCP header split\n"); 639 640 /* Configure RSS to divert all traffic from our devmem queues */ 641 if (configure_rss()) 642 error(1, 0, "Failed to configure rss\n"); 643 644 /* Flow steer our devmem flows to start_queue */ 645 if (configure_flow_steering(&server_sin)) 646 error(1, 0, "Failed to configure flow steering\n"); 647 648 sleep(1); 649 650 if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) 651 error(1, 0, "Failed to bind\n"); 652 653 tmp_mem = malloc(mem->size); 654 if (!tmp_mem) 655 error(1, ENOMEM, "malloc failed"); 656 657 socket_fd = socket(AF_INET6, SOCK_STREAM, 0); 658 if (socket_fd < 0) 659 error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); 660 661 enable_reuseaddr(socket_fd); 662 663 fprintf(stderr, "binding to address %s:%d\n", server_ip, 664 ntohs(server_sin.sin6_port)); 665 666 ret = bind(socket_fd, &server_sin, sizeof(server_sin)); 667 if (ret) 668 error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); 669 670 ret = listen(socket_fd, 1); 671 if (ret) 672 error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); 673 674 client_addr_len = sizeof(client_addr); 675 676 inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer, 677 sizeof(buffer)); 678 fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, 679 ntohs(server_sin.sin6_port)); 680 client_fd = accept(socket_fd, &client_addr, &client_addr_len); 681 682 inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, 683 sizeof(buffer)); 684 fprintf(stderr, "Got connection from %s:%d\n", buffer, 685 ntohs(client_addr.sin6_port)); 686 687 while (1) { 688 struct iovec iov = { .iov_base = iobuf, 689 .iov_len = sizeof(iobuf) }; 690 struct dmabuf_cmsg *dmabuf_cmsg = NULL; 691 struct cmsghdr *cm = NULL; 692 struct msghdr msg = { 0 }; 693 struct dmabuf_token token; 694 ssize_t ret; 695 696 is_devmem = false; 697 698 msg.msg_iov = &iov; 699 msg.msg_iovlen = 1; 700 msg.msg_control = ctrl_data; 701 msg.msg_controllen = sizeof(ctrl_data); 702 ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); 703 fprintf(stderr, "recvmsg ret=%ld\n", ret); 704 if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) 705 continue; 706 if (ret < 0) { 707 perror("recvmsg"); 708 continue; 709 } 710 if (ret == 0) { 711 fprintf(stderr, "client exited\n"); 712 goto cleanup; 713 } 714 715 for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { 716 if (cm->cmsg_level != SOL_SOCKET || 717 (cm->cmsg_type != SCM_DEVMEM_DMABUF && 718 cm->cmsg_type != SCM_DEVMEM_LINEAR)) { 719 fprintf(stderr, "skipping non-devmem cmsg\n"); 720 continue; 721 } 722 723 dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); 724 is_devmem = true; 725 726 if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { 727 /* TODO: process data copied from skb's linear 728 * buffer. 729 */ 730 fprintf(stderr, 731 "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", 732 dmabuf_cmsg->frag_size); 733 734 continue; 735 } 736 737 token.token_start = dmabuf_cmsg->frag_token; 738 token.token_count = 1; 739 740 total_received += dmabuf_cmsg->frag_size; 741 fprintf(stderr, 742 "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", 743 dmabuf_cmsg->frag_offset >> PAGE_SHIFT, 744 dmabuf_cmsg->frag_offset % getpagesize(), 745 dmabuf_cmsg->frag_offset, 746 dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, 747 total_received, dmabuf_cmsg->dmabuf_id); 748 749 if (dmabuf_cmsg->dmabuf_id != dmabuf_id) 750 error(1, 0, 751 "received on wrong dmabuf_id: flow steering error\n"); 752 753 if (dmabuf_cmsg->frag_size % getpagesize()) 754 non_page_aligned_frags++; 755 else 756 page_aligned_frags++; 757 758 provider->memcpy_from_device(tmp_mem, mem, 759 dmabuf_cmsg->frag_offset, 760 dmabuf_cmsg->frag_size); 761 762 if (do_validation) 763 validate_buffer(tmp_mem, 764 dmabuf_cmsg->frag_size); 765 else 766 print_nonzero_bytes(tmp_mem, 767 dmabuf_cmsg->frag_size); 768 769 ret = setsockopt(client_fd, SOL_SOCKET, 770 SO_DEVMEM_DONTNEED, &token, 771 sizeof(token)); 772 if (ret != 1) 773 error(1, 0, 774 "SO_DEVMEM_DONTNEED not enough tokens"); 775 } 776 if (!is_devmem) 777 error(1, 0, "flow steering error\n"); 778 779 fprintf(stderr, "total_received=%lu\n", total_received); 780 } 781 782 fprintf(stderr, "%s: ok\n", TEST_PREFIX); 783 784 fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", 785 page_aligned_frags, non_page_aligned_frags); 786 787 cleanup: 788 789 free(tmp_mem); 790 close(client_fd); 791 close(socket_fd); 792 ynl_sock_destroy(ys); 793 794 return 0; 795 } 796 797 void run_devmem_tests(void) 798 { 799 struct memory_buffer *mem; 800 struct ynl_sock *ys; 801 802 mem = provider->alloc(getpagesize() * NUM_PAGES); 803 804 /* Configure RSS to divert all traffic from our devmem queues */ 805 if (configure_rss()) 806 error(1, 0, "rss error\n"); 807 808 if (configure_headersplit(1)) 809 error(1, 0, "Failed to configure header split\n"); 810 811 if (!bind_rx_queue(ifindex, mem->fd, 812 calloc(num_queues, sizeof(struct netdev_queue_id)), 813 num_queues, &ys)) 814 error(1, 0, "Binding empty queues array should have failed\n"); 815 816 if (configure_headersplit(0)) 817 error(1, 0, "Failed to configure header split\n"); 818 819 if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) 820 error(1, 0, "Configure dmabuf with header split off should have failed\n"); 821 822 if (configure_headersplit(1)) 823 error(1, 0, "Failed to configure header split\n"); 824 825 if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) 826 error(1, 0, "Failed to bind\n"); 827 828 /* Deactivating a bound queue should not be legal */ 829 if (!configure_channels(num_queues, num_queues)) 830 error(1, 0, "Deactivating a bound queue should be illegal.\n"); 831 832 /* Closing the netlink socket does an implicit unbind */ 833 ynl_sock_destroy(ys); 834 835 provider->free(mem); 836 } 837 838 static uint64_t gettimeofday_ms(void) 839 { 840 struct timeval tv; 841 842 gettimeofday(&tv, NULL); 843 return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL); 844 } 845 846 static int do_poll(int fd) 847 { 848 struct pollfd pfd; 849 int ret; 850 851 pfd.revents = 0; 852 pfd.fd = fd; 853 854 ret = poll(&pfd, 1, waittime_ms); 855 if (ret == -1) 856 error(1, errno, "poll"); 857 858 return ret && (pfd.revents & POLLERR); 859 } 860 861 static void wait_compl(int fd) 862 { 863 int64_t tstop = gettimeofday_ms() + waittime_ms; 864 char control[CMSG_SPACE(100)] = {}; 865 struct sock_extended_err *serr; 866 struct msghdr msg = {}; 867 struct cmsghdr *cm; 868 __u32 hi, lo; 869 int ret; 870 871 msg.msg_control = control; 872 msg.msg_controllen = sizeof(control); 873 874 while (gettimeofday_ms() < tstop) { 875 if (!do_poll(fd)) 876 continue; 877 878 ret = recvmsg(fd, &msg, MSG_ERRQUEUE); 879 if (ret < 0) { 880 if (errno == EAGAIN) 881 continue; 882 error(1, errno, "recvmsg(MSG_ERRQUEUE)"); 883 return; 884 } 885 if (msg.msg_flags & MSG_CTRUNC) 886 error(1, 0, "MSG_CTRUNC\n"); 887 888 for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { 889 if (cm->cmsg_level != SOL_IP && 890 cm->cmsg_level != SOL_IPV6) 891 continue; 892 if (cm->cmsg_level == SOL_IP && 893 cm->cmsg_type != IP_RECVERR) 894 continue; 895 if (cm->cmsg_level == SOL_IPV6 && 896 cm->cmsg_type != IPV6_RECVERR) 897 continue; 898 899 serr = (void *)CMSG_DATA(cm); 900 if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) 901 error(1, 0, "wrong origin %u", serr->ee_origin); 902 if (serr->ee_errno != 0) 903 error(1, 0, "wrong errno %d", serr->ee_errno); 904 905 hi = serr->ee_data; 906 lo = serr->ee_info; 907 908 fprintf(stderr, "tx complete [%d,%d]\n", lo, hi); 909 return; 910 } 911 } 912 913 error(1, 0, "did not receive tx completion"); 914 } 915 916 static int do_client(struct memory_buffer *mem) 917 { 918 char ctrl_data[CMSG_SPACE(sizeof(__u32))]; 919 struct sockaddr_in6 server_sin; 920 struct sockaddr_in6 client_sin; 921 struct ynl_sock *ys = NULL; 922 struct iovec iov[MAX_IOV]; 923 struct msghdr msg = {}; 924 ssize_t line_size = 0; 925 struct cmsghdr *cmsg; 926 char *line = NULL; 927 size_t len = 0; 928 int socket_fd; 929 __u32 ddmabuf; 930 int opt = 1; 931 int ret; 932 933 ret = parse_address(server_ip, atoi(port), &server_sin); 934 if (ret < 0) 935 error(1, 0, "parse server address"); 936 937 socket_fd = socket(AF_INET6, SOCK_STREAM, 0); 938 if (socket_fd < 0) 939 error(1, socket_fd, "create socket"); 940 941 enable_reuseaddr(socket_fd); 942 943 ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname, 944 strlen(ifname) + 1); 945 if (ret) 946 error(1, errno, "bindtodevice"); 947 948 if (bind_tx_queue(ifindex, mem->fd, &ys)) 949 error(1, 0, "Failed to bind\n"); 950 951 if (client_ip) { 952 ret = parse_address(client_ip, atoi(port), &client_sin); 953 if (ret < 0) 954 error(1, 0, "parse client address"); 955 956 ret = bind(socket_fd, &client_sin, sizeof(client_sin)); 957 if (ret) 958 error(1, errno, "bind"); 959 } 960 961 ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt)); 962 if (ret) 963 error(1, errno, "set sock opt"); 964 965 fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip, 966 ntohs(server_sin.sin6_port), ifname); 967 968 ret = connect(socket_fd, &server_sin, sizeof(server_sin)); 969 if (ret) 970 error(1, errno, "connect"); 971 972 while (1) { 973 free(line); 974 line = NULL; 975 line_size = getline(&line, &len, stdin); 976 977 if (line_size < 0) 978 break; 979 980 if (max_chunk) { 981 msg.msg_iovlen = 982 (line_size + max_chunk - 1) / max_chunk; 983 if (msg.msg_iovlen > MAX_IOV) 984 error(1, 0, 985 "can't partition %zd bytes into maximum of %d chunks", 986 line_size, MAX_IOV); 987 988 for (int i = 0; i < msg.msg_iovlen; i++) { 989 iov[i].iov_base = (void *)(i * max_chunk); 990 iov[i].iov_len = max_chunk; 991 } 992 993 iov[msg.msg_iovlen - 1].iov_len = 994 line_size - (msg.msg_iovlen - 1) * max_chunk; 995 } else { 996 iov[0].iov_base = 0; 997 iov[0].iov_len = line_size; 998 msg.msg_iovlen = 1; 999 } 1000 1001 msg.msg_iov = iov; 1002 provider->memcpy_to_device(mem, 0, line, line_size); 1003 1004 msg.msg_control = ctrl_data; 1005 msg.msg_controllen = sizeof(ctrl_data); 1006 1007 cmsg = CMSG_FIRSTHDR(&msg); 1008 cmsg->cmsg_level = SOL_SOCKET; 1009 cmsg->cmsg_type = SCM_DEVMEM_DMABUF; 1010 cmsg->cmsg_len = CMSG_LEN(sizeof(__u32)); 1011 1012 ddmabuf = tx_dmabuf_id; 1013 1014 *((__u32 *)CMSG_DATA(cmsg)) = ddmabuf; 1015 1016 ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY); 1017 if (ret < 0) 1018 error(1, errno, "Failed sendmsg"); 1019 1020 fprintf(stderr, "sendmsg_ret=%d\n", ret); 1021 1022 if (ret != line_size) 1023 error(1, errno, "Did not send all bytes %d vs %zd", ret, 1024 line_size); 1025 1026 wait_compl(socket_fd); 1027 } 1028 1029 fprintf(stderr, "%s: tx ok\n", TEST_PREFIX); 1030 1031 free(line); 1032 close(socket_fd); 1033 1034 if (ys) 1035 ynl_sock_destroy(ys); 1036 1037 return 0; 1038 } 1039 1040 int main(int argc, char *argv[]) 1041 { 1042 struct memory_buffer *mem; 1043 int is_server = 0, opt; 1044 int ret; 1045 1046 while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) { 1047 switch (opt) { 1048 case 'l': 1049 is_server = 1; 1050 break; 1051 case 's': 1052 server_ip = optarg; 1053 break; 1054 case 'c': 1055 client_ip = optarg; 1056 break; 1057 case 'p': 1058 port = optarg; 1059 break; 1060 case 'v': 1061 do_validation = atoll(optarg); 1062 break; 1063 case 'q': 1064 num_queues = atoi(optarg); 1065 break; 1066 case 't': 1067 start_queue = atoi(optarg); 1068 break; 1069 case 'f': 1070 ifname = optarg; 1071 break; 1072 case 'z': 1073 max_chunk = atoi(optarg); 1074 break; 1075 case '?': 1076 fprintf(stderr, "unknown option: %c\n", optopt); 1077 break; 1078 } 1079 } 1080 1081 if (!ifname) 1082 error(1, 0, "Missing -f argument\n"); 1083 1084 ifindex = if_nametoindex(ifname); 1085 1086 fprintf(stderr, "using ifindex=%u\n", ifindex); 1087 1088 if (!server_ip && !client_ip) { 1089 if (start_queue < 0 && num_queues < 0) { 1090 num_queues = rxq_num(ifindex); 1091 if (num_queues < 0) 1092 error(1, 0, "couldn't detect number of queues\n"); 1093 if (num_queues < 2) 1094 error(1, 0, 1095 "number of device queues is too low\n"); 1096 /* make sure can bind to multiple queues */ 1097 start_queue = num_queues / 2; 1098 num_queues /= 2; 1099 } 1100 1101 if (start_queue < 0 || num_queues < 0) 1102 error(1, 0, "Both -t and -q are required\n"); 1103 1104 run_devmem_tests(); 1105 return 0; 1106 } 1107 1108 if (start_queue < 0 && num_queues < 0) { 1109 num_queues = rxq_num(ifindex); 1110 if (num_queues < 2) 1111 error(1, 0, "number of device queues is too low\n"); 1112 1113 num_queues = 1; 1114 start_queue = rxq_num(ifindex) - num_queues; 1115 1116 if (start_queue < 0) 1117 error(1, 0, "couldn't detect number of queues\n"); 1118 1119 fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); 1120 } 1121 1122 for (; optind < argc; optind++) 1123 fprintf(stderr, "extra arguments: %s\n", argv[optind]); 1124 1125 if (start_queue < 0) 1126 error(1, 0, "Missing -t argument\n"); 1127 1128 if (num_queues < 0) 1129 error(1, 0, "Missing -q argument\n"); 1130 1131 if (!server_ip) 1132 error(1, 0, "Missing -s argument\n"); 1133 1134 if (!port) 1135 error(1, 0, "Missing -p argument\n"); 1136 1137 mem = provider->alloc(getpagesize() * NUM_PAGES); 1138 ret = is_server ? do_server(mem) : do_client(mem); 1139 provider->free(mem); 1140 1141 return ret; 1142 } 1143