xref: /linux/tools/testing/selftests/drivers/net/hw/ncdevmem.c (revision 73d952840d9f84d0ba94d21a35b3e8149f5a28ed)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * tcpdevmem netcat. Works similarly to netcat but does device memory TCP
4  * instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
5  *
6  * Usage:
7  *
8  *     On server:
9  *     ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
10  *
11  *     On client:
12  *     echo -n "hello\nworld" | \
13  *		ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
14  *
15  * Note this is compatible with regular netcat. i.e. the sender or receiver can
16  * be replaced with regular netcat to test the RX or TX path in isolation.
17  *
18  * Test data validation (devmem TCP on RX only):
19  *
20  *     On server:
21  *     ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
22  *
23  *     On client:
24  *     yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
25  *             head -c 1G | \
26  *             nc <server IP> 5201 -p 5201
27  *
28  * Test data validation (devmem TCP on RX and TX, validation happens on RX):
29  *
30  *	On server:
31  *	ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
32  *
33  *	On client:
34  *	yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
35  *		head -c 1M | \
36  *		ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
37  */
38 #define _GNU_SOURCE
39 #define __EXPORTED_HEADERS__
40 
41 #include <linux/uio.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <unistd.h>
45 #include <stdbool.h>
46 #include <string.h>
47 #include <errno.h>
48 #define __iovec_defined
49 #include <fcntl.h>
50 #include <malloc.h>
51 #include <error.h>
52 #include <poll.h>
53 
54 #include <arpa/inet.h>
55 #include <sys/socket.h>
56 #include <sys/mman.h>
57 #include <sys/ioctl.h>
58 #include <sys/syscall.h>
59 #include <sys/time.h>
60 
61 #include <linux/memfd.h>
62 #include <linux/dma-buf.h>
63 #include <linux/errqueue.h>
64 #include <linux/udmabuf.h>
65 #include <linux/types.h>
66 #include <linux/netlink.h>
67 #include <linux/genetlink.h>
68 #include <linux/netdev.h>
69 #include <linux/ethtool_netlink.h>
70 #include <time.h>
71 #include <net/if.h>
72 
73 #include "netdev-user.h"
74 #include "ethtool-user.h"
75 #include <ynl.h>
76 
77 #define PAGE_SHIFT 12
78 #define TEST_PREFIX "ncdevmem"
79 #define NUM_PAGES 16000
80 
81 #ifndef MSG_SOCK_DEVMEM
82 #define MSG_SOCK_DEVMEM 0x2000000
83 #endif
84 
85 static char *server_ip;
86 static char *client_ip;
87 static char *port;
88 static size_t do_validation;
89 static int start_queue = -1;
90 static int num_queues = -1;
91 static char *ifname;
92 static unsigned int ifindex;
93 static unsigned int dmabuf_id;
94 static uint32_t tx_dmabuf_id;
95 static int waittime_ms = 500;
96 
97 struct memory_buffer {
98 	int fd;
99 	size_t size;
100 
101 	int devfd;
102 	int memfd;
103 	char *buf_mem;
104 };
105 
106 struct memory_provider {
107 	struct memory_buffer *(*alloc)(size_t size);
108 	void (*free)(struct memory_buffer *ctx);
109 	void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
110 				 void *src, int n);
111 	void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
112 				   size_t off, int n);
113 };
114 
115 static struct memory_buffer *udmabuf_alloc(size_t size)
116 {
117 	struct udmabuf_create create;
118 	struct memory_buffer *ctx;
119 	int ret;
120 
121 	ctx = malloc(sizeof(*ctx));
122 	if (!ctx)
123 		error(1, ENOMEM, "malloc failed");
124 
125 	ctx->size = size;
126 
127 	ctx->devfd = open("/dev/udmabuf", O_RDWR);
128 	if (ctx->devfd < 0)
129 		error(1, errno,
130 		      "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
131 		      TEST_PREFIX);
132 
133 	ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
134 	if (ctx->memfd < 0)
135 		error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX);
136 
137 	ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
138 	if (ret < 0)
139 		error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
140 
141 	ret = ftruncate(ctx->memfd, size);
142 	if (ret == -1)
143 		error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
144 
145 	memset(&create, 0, sizeof(create));
146 
147 	create.memfd = ctx->memfd;
148 	create.offset = 0;
149 	create.size = size;
150 	ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create);
151 	if (ctx->fd < 0)
152 		error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX);
153 
154 	ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
155 			    ctx->fd, 0);
156 	if (ctx->buf_mem == MAP_FAILED)
157 		error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX);
158 
159 	return ctx;
160 }
161 
162 static void udmabuf_free(struct memory_buffer *ctx)
163 {
164 	munmap(ctx->buf_mem, ctx->size);
165 	close(ctx->fd);
166 	close(ctx->memfd);
167 	close(ctx->devfd);
168 	free(ctx);
169 }
170 
171 static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
172 				     void *src, int n)
173 {
174 	struct dma_buf_sync sync = {};
175 
176 	sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
177 	ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
178 
179 	memcpy(dst->buf_mem + off, src, n);
180 
181 	sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
182 	ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
183 }
184 
185 static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
186 				       size_t off, int n)
187 {
188 	struct dma_buf_sync sync = {};
189 
190 	sync.flags = DMA_BUF_SYNC_START;
191 	ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
192 
193 	memcpy(dst, src->buf_mem + off, n);
194 
195 	sync.flags = DMA_BUF_SYNC_END;
196 	ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
197 }
198 
199 static struct memory_provider udmabuf_memory_provider = {
200 	.alloc = udmabuf_alloc,
201 	.free = udmabuf_free,
202 	.memcpy_to_device = udmabuf_memcpy_to_device,
203 	.memcpy_from_device = udmabuf_memcpy_from_device,
204 };
205 
206 static struct memory_provider *provider = &udmabuf_memory_provider;
207 
208 static void print_nonzero_bytes(void *ptr, size_t size)
209 {
210 	unsigned char *p = ptr;
211 	unsigned int i;
212 
213 	for (i = 0; i < size; i++)
214 		putchar(p[i]);
215 }
216 
217 void validate_buffer(void *line, size_t size)
218 {
219 	static unsigned char seed = 1;
220 	unsigned char *ptr = line;
221 	unsigned char expected;
222 	static int errors;
223 	size_t i;
224 
225 	for (i = 0; i < size; i++) {
226 		expected = seed ? seed : '\n';
227 		if (ptr[i] != expected) {
228 			fprintf(stderr,
229 				"Failed validation: expected=%u, actual=%u, index=%lu\n",
230 				expected, ptr[i], i);
231 			errors++;
232 			if (errors > 20)
233 				error(1, 0, "validation failed.");
234 		}
235 		seed++;
236 		if (seed == do_validation)
237 			seed = 0;
238 	}
239 
240 	fprintf(stdout, "Validated buffer\n");
241 }
242 
243 static int rxq_num(int ifindex)
244 {
245 	struct ethtool_channels_get_req *req;
246 	struct ethtool_channels_get_rsp *rsp;
247 	struct ynl_error yerr;
248 	struct ynl_sock *ys;
249 	int num = -1;
250 
251 	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
252 	if (!ys) {
253 		fprintf(stderr, "YNL: %s\n", yerr.msg);
254 		return -1;
255 	}
256 
257 	req = ethtool_channels_get_req_alloc();
258 	ethtool_channels_get_req_set_header_dev_index(req, ifindex);
259 	rsp = ethtool_channels_get(ys, req);
260 	if (rsp)
261 		num = rsp->rx_count + rsp->combined_count;
262 	ethtool_channels_get_req_free(req);
263 	ethtool_channels_get_rsp_free(rsp);
264 
265 	ynl_sock_destroy(ys);
266 
267 	return num;
268 }
269 
270 #define run_command(cmd, ...)                                           \
271 	({                                                              \
272 		char command[256];                                      \
273 		memset(command, 0, sizeof(command));                    \
274 		snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
275 		fprintf(stderr, "Running: %s\n", command);                       \
276 		system(command);                                        \
277 	})
278 
279 static int reset_flow_steering(void)
280 {
281 	/* Depending on the NIC, toggling ntuple off and on might not
282 	 * be allowed. Additionally, attempting to delete existing filters
283 	 * will fail if no filters are present. Therefore, do not enforce
284 	 * the exit status.
285 	 */
286 
287 	run_command("sudo ethtool -K %s ntuple off >&2", ifname);
288 	run_command("sudo ethtool -K %s ntuple on >&2", ifname);
289 	run_command(
290 		"sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2",
291 		ifname, ifname);
292 	return 0;
293 }
294 
295 static const char *tcp_data_split_str(int val)
296 {
297 	switch (val) {
298 	case 0:
299 		return "off";
300 	case 1:
301 		return "auto";
302 	case 2:
303 		return "on";
304 	default:
305 		return "?";
306 	}
307 }
308 
309 static int configure_headersplit(bool on)
310 {
311 	struct ethtool_rings_get_req *get_req;
312 	struct ethtool_rings_get_rsp *get_rsp;
313 	struct ethtool_rings_set_req *req;
314 	struct ynl_error yerr;
315 	struct ynl_sock *ys;
316 	int ret;
317 
318 	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
319 	if (!ys) {
320 		fprintf(stderr, "YNL: %s\n", yerr.msg);
321 		return -1;
322 	}
323 
324 	req = ethtool_rings_set_req_alloc();
325 	ethtool_rings_set_req_set_header_dev_index(req, ifindex);
326 	/* 0 - off, 1 - auto, 2 - on */
327 	ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0);
328 	ret = ethtool_rings_set(ys, req);
329 	if (ret < 0)
330 		fprintf(stderr, "YNL failed: %s\n", ys->err.msg);
331 	ethtool_rings_set_req_free(req);
332 
333 	if (ret == 0) {
334 		get_req = ethtool_rings_get_req_alloc();
335 		ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
336 		get_rsp = ethtool_rings_get(ys, get_req);
337 		ethtool_rings_get_req_free(get_req);
338 		if (get_rsp)
339 			fprintf(stderr, "TCP header split: %s\n",
340 				tcp_data_split_str(get_rsp->tcp_data_split));
341 		ethtool_rings_get_rsp_free(get_rsp);
342 	}
343 
344 	ynl_sock_destroy(ys);
345 
346 	return ret;
347 }
348 
349 static int configure_rss(void)
350 {
351 	return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue);
352 }
353 
354 static int configure_channels(unsigned int rx, unsigned int tx)
355 {
356 	return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx);
357 }
358 
359 static int configure_flow_steering(struct sockaddr_in6 *server_sin)
360 {
361 	const char *type = "tcp6";
362 	const char *server_addr;
363 	char buf[40];
364 
365 	inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf));
366 	server_addr = buf;
367 
368 	if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) {
369 		type = "tcp4";
370 		server_addr = strrchr(server_addr, ':') + 1;
371 	}
372 
373 	return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
374 			   ifname,
375 			   type,
376 			   client_ip ? "src-ip" : "",
377 			   client_ip ?: "",
378 			   server_addr,
379 			   client_ip ? "src-port" : "",
380 			   client_ip ? port : "",
381 			   port, start_queue);
382 }
383 
384 static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
385 			 struct netdev_queue_id *queues,
386 			 unsigned int n_queue_index, struct ynl_sock **ys)
387 {
388 	struct netdev_bind_rx_req *req = NULL;
389 	struct netdev_bind_rx_rsp *rsp = NULL;
390 	struct ynl_error yerr;
391 
392 	*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
393 	if (!*ys) {
394 		fprintf(stderr, "YNL: %s\n", yerr.msg);
395 		return -1;
396 	}
397 
398 	req = netdev_bind_rx_req_alloc();
399 	netdev_bind_rx_req_set_ifindex(req, ifindex);
400 	netdev_bind_rx_req_set_fd(req, dmabuf_fd);
401 	__netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
402 
403 	rsp = netdev_bind_rx(*ys, req);
404 	if (!rsp) {
405 		perror("netdev_bind_rx");
406 		goto err_close;
407 	}
408 
409 	if (!rsp->_present.id) {
410 		perror("id not present");
411 		goto err_close;
412 	}
413 
414 	fprintf(stderr, "got dmabuf id=%d\n", rsp->id);
415 	dmabuf_id = rsp->id;
416 
417 	netdev_bind_rx_req_free(req);
418 	netdev_bind_rx_rsp_free(rsp);
419 
420 	return 0;
421 
422 err_close:
423 	fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
424 	netdev_bind_rx_req_free(req);
425 	ynl_sock_destroy(*ys);
426 	return -1;
427 }
428 
429 static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
430 			 struct ynl_sock **ys)
431 {
432 	struct netdev_bind_tx_req *req = NULL;
433 	struct netdev_bind_tx_rsp *rsp = NULL;
434 	struct ynl_error yerr;
435 
436 	*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
437 	if (!*ys) {
438 		fprintf(stderr, "YNL: %s\n", yerr.msg);
439 		return -1;
440 	}
441 
442 	req = netdev_bind_tx_req_alloc();
443 	netdev_bind_tx_req_set_ifindex(req, ifindex);
444 	netdev_bind_tx_req_set_fd(req, dmabuf_fd);
445 
446 	rsp = netdev_bind_tx(*ys, req);
447 	if (!rsp) {
448 		perror("netdev_bind_tx");
449 		goto err_close;
450 	}
451 
452 	if (!rsp->_present.id) {
453 		perror("id not present");
454 		goto err_close;
455 	}
456 
457 	fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
458 	tx_dmabuf_id = rsp->id;
459 
460 	netdev_bind_tx_req_free(req);
461 	netdev_bind_tx_rsp_free(rsp);
462 
463 	return 0;
464 
465 err_close:
466 	fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
467 	netdev_bind_tx_req_free(req);
468 	ynl_sock_destroy(*ys);
469 	return -1;
470 }
471 
472 static void enable_reuseaddr(int fd)
473 {
474 	int opt = 1;
475 	int ret;
476 
477 	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
478 	if (ret)
479 		error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX);
480 
481 	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
482 	if (ret)
483 		error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX);
484 }
485 
486 static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
487 {
488 	int ret;
489 
490 	sin6->sin6_family = AF_INET6;
491 	sin6->sin6_port = htons(port);
492 
493 	ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
494 	if (ret != 1) {
495 		/* fallback to plain IPv4 */
496 		ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
497 		if (ret != 1)
498 			return -1;
499 
500 		/* add ::ffff prefix */
501 		sin6->sin6_addr.s6_addr32[0] = 0;
502 		sin6->sin6_addr.s6_addr32[1] = 0;
503 		sin6->sin6_addr.s6_addr16[4] = 0;
504 		sin6->sin6_addr.s6_addr16[5] = 0xffff;
505 	}
506 
507 	return 0;
508 }
509 
510 static int do_server(struct memory_buffer *mem)
511 {
512 	char ctrl_data[sizeof(int) * 20000];
513 	struct netdev_queue_id *queues;
514 	size_t non_page_aligned_frags = 0;
515 	struct sockaddr_in6 client_addr;
516 	struct sockaddr_in6 server_sin;
517 	size_t page_aligned_frags = 0;
518 	size_t total_received = 0;
519 	socklen_t client_addr_len;
520 	bool is_devmem = false;
521 	char *tmp_mem = NULL;
522 	struct ynl_sock *ys;
523 	char iobuf[819200];
524 	char buffer[256];
525 	int socket_fd;
526 	int client_fd;
527 	size_t i = 0;
528 	int ret;
529 
530 	ret = parse_address(server_ip, atoi(port), &server_sin);
531 	if (ret < 0)
532 		error(1, 0, "parse server address");
533 
534 	if (reset_flow_steering())
535 		error(1, 0, "Failed to reset flow steering\n");
536 
537 	if (configure_headersplit(1))
538 		error(1, 0, "Failed to enable TCP header split\n");
539 
540 	/* Configure RSS to divert all traffic from our devmem queues */
541 	if (configure_rss())
542 		error(1, 0, "Failed to configure rss\n");
543 
544 	/* Flow steer our devmem flows to start_queue */
545 	if (configure_flow_steering(&server_sin))
546 		error(1, 0, "Failed to configure flow steering\n");
547 
548 	sleep(1);
549 
550 	queues = malloc(sizeof(*queues) * num_queues);
551 
552 	for (i = 0; i < num_queues; i++) {
553 		queues[i]._present.type = 1;
554 		queues[i]._present.id = 1;
555 		queues[i].type = NETDEV_QUEUE_TYPE_RX;
556 		queues[i].id = start_queue + i;
557 	}
558 
559 	if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
560 		error(1, 0, "Failed to bind\n");
561 
562 	tmp_mem = malloc(mem->size);
563 	if (!tmp_mem)
564 		error(1, ENOMEM, "malloc failed");
565 
566 	socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
567 	if (socket_fd < 0)
568 		error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX);
569 
570 	enable_reuseaddr(socket_fd);
571 
572 	fprintf(stderr, "binding to address %s:%d\n", server_ip,
573 		ntohs(server_sin.sin6_port));
574 
575 	ret = bind(socket_fd, &server_sin, sizeof(server_sin));
576 	if (ret)
577 		error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX);
578 
579 	ret = listen(socket_fd, 1);
580 	if (ret)
581 		error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX);
582 
583 	client_addr_len = sizeof(client_addr);
584 
585 	inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer,
586 		  sizeof(buffer));
587 	fprintf(stderr, "Waiting or connection on %s:%d\n", buffer,
588 		ntohs(server_sin.sin6_port));
589 	client_fd = accept(socket_fd, &client_addr, &client_addr_len);
590 
591 	inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer,
592 		  sizeof(buffer));
593 	fprintf(stderr, "Got connection from %s:%d\n", buffer,
594 		ntohs(client_addr.sin6_port));
595 
596 	while (1) {
597 		struct iovec iov = { .iov_base = iobuf,
598 				     .iov_len = sizeof(iobuf) };
599 		struct dmabuf_cmsg *dmabuf_cmsg = NULL;
600 		struct cmsghdr *cm = NULL;
601 		struct msghdr msg = { 0 };
602 		struct dmabuf_token token;
603 		ssize_t ret;
604 
605 		is_devmem = false;
606 
607 		msg.msg_iov = &iov;
608 		msg.msg_iovlen = 1;
609 		msg.msg_control = ctrl_data;
610 		msg.msg_controllen = sizeof(ctrl_data);
611 		ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
612 		fprintf(stderr, "recvmsg ret=%ld\n", ret);
613 		if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
614 			continue;
615 		if (ret < 0) {
616 			perror("recvmsg");
617 			continue;
618 		}
619 		if (ret == 0) {
620 			fprintf(stderr, "client exited\n");
621 			goto cleanup;
622 		}
623 
624 		i++;
625 		for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
626 			if (cm->cmsg_level != SOL_SOCKET ||
627 			    (cm->cmsg_type != SCM_DEVMEM_DMABUF &&
628 			     cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
629 				fprintf(stderr, "skipping non-devmem cmsg\n");
630 				continue;
631 			}
632 
633 			dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
634 			is_devmem = true;
635 
636 			if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
637 				/* TODO: process data copied from skb's linear
638 				 * buffer.
639 				 */
640 				fprintf(stderr,
641 					"SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
642 					dmabuf_cmsg->frag_size);
643 
644 				continue;
645 			}
646 
647 			token.token_start = dmabuf_cmsg->frag_token;
648 			token.token_count = 1;
649 
650 			total_received += dmabuf_cmsg->frag_size;
651 			fprintf(stderr,
652 				"received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
653 				dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
654 				dmabuf_cmsg->frag_offset % getpagesize(),
655 				dmabuf_cmsg->frag_offset,
656 				dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token,
657 				total_received, dmabuf_cmsg->dmabuf_id);
658 
659 			if (dmabuf_cmsg->dmabuf_id != dmabuf_id)
660 				error(1, 0,
661 				      "received on wrong dmabuf_id: flow steering error\n");
662 
663 			if (dmabuf_cmsg->frag_size % getpagesize())
664 				non_page_aligned_frags++;
665 			else
666 				page_aligned_frags++;
667 
668 			provider->memcpy_from_device(tmp_mem, mem,
669 						     dmabuf_cmsg->frag_offset,
670 						     dmabuf_cmsg->frag_size);
671 
672 			if (do_validation)
673 				validate_buffer(tmp_mem,
674 						dmabuf_cmsg->frag_size);
675 			else
676 				print_nonzero_bytes(tmp_mem,
677 						    dmabuf_cmsg->frag_size);
678 
679 			ret = setsockopt(client_fd, SOL_SOCKET,
680 					 SO_DEVMEM_DONTNEED, &token,
681 					 sizeof(token));
682 			if (ret != 1)
683 				error(1, 0,
684 				      "SO_DEVMEM_DONTNEED not enough tokens");
685 		}
686 		if (!is_devmem)
687 			error(1, 0, "flow steering error\n");
688 
689 		fprintf(stderr, "total_received=%lu\n", total_received);
690 	}
691 
692 	fprintf(stderr, "%s: ok\n", TEST_PREFIX);
693 
694 	fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
695 		page_aligned_frags, non_page_aligned_frags);
696 
697 cleanup:
698 
699 	free(tmp_mem);
700 	close(client_fd);
701 	close(socket_fd);
702 	ynl_sock_destroy(ys);
703 
704 	return 0;
705 }
706 
707 void run_devmem_tests(void)
708 {
709 	struct netdev_queue_id *queues;
710 	struct memory_buffer *mem;
711 	struct ynl_sock *ys;
712 	size_t i = 0;
713 
714 	mem = provider->alloc(getpagesize() * NUM_PAGES);
715 
716 	/* Configure RSS to divert all traffic from our devmem queues */
717 	if (configure_rss())
718 		error(1, 0, "rss error\n");
719 
720 	queues = calloc(num_queues, sizeof(*queues));
721 
722 	if (configure_headersplit(1))
723 		error(1, 0, "Failed to configure header split\n");
724 
725 	if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
726 		error(1, 0, "Binding empty queues array should have failed\n");
727 
728 	for (i = 0; i < num_queues; i++) {
729 		queues[i]._present.type = 1;
730 		queues[i]._present.id = 1;
731 		queues[i].type = NETDEV_QUEUE_TYPE_RX;
732 		queues[i].id = start_queue + i;
733 	}
734 
735 	if (configure_headersplit(0))
736 		error(1, 0, "Failed to configure header split\n");
737 
738 	if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
739 		error(1, 0, "Configure dmabuf with header split off should have failed\n");
740 
741 	if (configure_headersplit(1))
742 		error(1, 0, "Failed to configure header split\n");
743 
744 	for (i = 0; i < num_queues; i++) {
745 		queues[i]._present.type = 1;
746 		queues[i]._present.id = 1;
747 		queues[i].type = NETDEV_QUEUE_TYPE_RX;
748 		queues[i].id = start_queue + i;
749 	}
750 
751 	if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
752 		error(1, 0, "Failed to bind\n");
753 
754 	/* Deactivating a bound queue should not be legal */
755 	if (!configure_channels(num_queues, num_queues - 1))
756 		error(1, 0, "Deactivating a bound queue should be illegal.\n");
757 
758 	/* Closing the netlink socket does an implicit unbind */
759 	ynl_sock_destroy(ys);
760 
761 	provider->free(mem);
762 }
763 
764 static uint64_t gettimeofday_ms(void)
765 {
766 	struct timeval tv;
767 
768 	gettimeofday(&tv, NULL);
769 	return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
770 }
771 
772 static int do_poll(int fd)
773 {
774 	struct pollfd pfd;
775 	int ret;
776 
777 	pfd.revents = 0;
778 	pfd.fd = fd;
779 
780 	ret = poll(&pfd, 1, waittime_ms);
781 	if (ret == -1)
782 		error(1, errno, "poll");
783 
784 	return ret && (pfd.revents & POLLERR);
785 }
786 
787 static void wait_compl(int fd)
788 {
789 	int64_t tstop = gettimeofday_ms() + waittime_ms;
790 	char control[CMSG_SPACE(100)] = {};
791 	struct sock_extended_err *serr;
792 	struct msghdr msg = {};
793 	struct cmsghdr *cm;
794 	__u32 hi, lo;
795 	int ret;
796 
797 	msg.msg_control = control;
798 	msg.msg_controllen = sizeof(control);
799 
800 	while (gettimeofday_ms() < tstop) {
801 		if (!do_poll(fd))
802 			continue;
803 
804 		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
805 		if (ret < 0) {
806 			if (errno == EAGAIN)
807 				continue;
808 			error(1, errno, "recvmsg(MSG_ERRQUEUE)");
809 			return;
810 		}
811 		if (msg.msg_flags & MSG_CTRUNC)
812 			error(1, 0, "MSG_CTRUNC\n");
813 
814 		for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
815 			if (cm->cmsg_level != SOL_IP &&
816 			    cm->cmsg_level != SOL_IPV6)
817 				continue;
818 			if (cm->cmsg_level == SOL_IP &&
819 			    cm->cmsg_type != IP_RECVERR)
820 				continue;
821 			if (cm->cmsg_level == SOL_IPV6 &&
822 			    cm->cmsg_type != IPV6_RECVERR)
823 				continue;
824 
825 			serr = (void *)CMSG_DATA(cm);
826 			if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
827 				error(1, 0, "wrong origin %u", serr->ee_origin);
828 			if (serr->ee_errno != 0)
829 				error(1, 0, "wrong errno %d", serr->ee_errno);
830 
831 			hi = serr->ee_data;
832 			lo = serr->ee_info;
833 
834 			fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
835 			return;
836 		}
837 	}
838 
839 	error(1, 0, "did not receive tx completion");
840 }
841 
842 static int do_client(struct memory_buffer *mem)
843 {
844 	char ctrl_data[CMSG_SPACE(sizeof(__u32))];
845 	struct sockaddr_in6 server_sin;
846 	struct sockaddr_in6 client_sin;
847 	struct ynl_sock *ys = NULL;
848 	struct msghdr msg = {};
849 	ssize_t line_size = 0;
850 	struct cmsghdr *cmsg;
851 	struct iovec iov[2];
852 	char *line = NULL;
853 	unsigned long mid;
854 	size_t len = 0;
855 	int socket_fd;
856 	__u32 ddmabuf;
857 	int opt = 1;
858 	int ret;
859 
860 	ret = parse_address(server_ip, atoi(port), &server_sin);
861 	if (ret < 0)
862 		error(1, 0, "parse server address");
863 
864 	socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
865 	if (socket_fd < 0)
866 		error(1, socket_fd, "create socket");
867 
868 	enable_reuseaddr(socket_fd);
869 
870 	ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
871 			 strlen(ifname) + 1);
872 	if (ret)
873 		error(1, errno, "bindtodevice");
874 
875 	if (bind_tx_queue(ifindex, mem->fd, &ys))
876 		error(1, 0, "Failed to bind\n");
877 
878 	if (client_ip) {
879 		ret = parse_address(client_ip, atoi(port), &client_sin);
880 		if (ret < 0)
881 			error(1, 0, "parse client address");
882 
883 		ret = bind(socket_fd, &client_sin, sizeof(client_sin));
884 		if (ret)
885 			error(1, errno, "bind");
886 	}
887 
888 	ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
889 	if (ret)
890 		error(1, errno, "set sock opt");
891 
892 	fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
893 		ntohs(server_sin.sin6_port), ifname);
894 
895 	ret = connect(socket_fd, &server_sin, sizeof(server_sin));
896 	if (ret)
897 		error(1, errno, "connect");
898 
899 	while (1) {
900 		free(line);
901 		line = NULL;
902 		line_size = getline(&line, &len, stdin);
903 
904 		if (line_size < 0)
905 			break;
906 
907 		mid = (line_size / 2) + 1;
908 
909 		iov[0].iov_base = (void *)1;
910 		iov[0].iov_len = mid;
911 		iov[1].iov_base = (void *)(mid + 2);
912 		iov[1].iov_len = line_size - mid;
913 
914 		provider->memcpy_to_device(mem, (size_t)iov[0].iov_base, line,
915 					   iov[0].iov_len);
916 		provider->memcpy_to_device(mem, (size_t)iov[1].iov_base,
917 					   line + iov[0].iov_len,
918 					   iov[1].iov_len);
919 
920 		fprintf(stderr,
921 			"read line_size=%ld iov[0].iov_base=%lu, iov[0].iov_len=%lu, iov[1].iov_base=%lu, iov[1].iov_len=%lu\n",
922 			line_size, (unsigned long)iov[0].iov_base,
923 			iov[0].iov_len, (unsigned long)iov[1].iov_base,
924 			iov[1].iov_len);
925 
926 		msg.msg_iov = iov;
927 		msg.msg_iovlen = 2;
928 
929 		msg.msg_control = ctrl_data;
930 		msg.msg_controllen = sizeof(ctrl_data);
931 
932 		cmsg = CMSG_FIRSTHDR(&msg);
933 		cmsg->cmsg_level = SOL_SOCKET;
934 		cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
935 		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
936 
937 		ddmabuf = tx_dmabuf_id;
938 
939 		*((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
940 
941 		ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
942 		if (ret < 0)
943 			error(1, errno, "Failed sendmsg");
944 
945 		fprintf(stderr, "sendmsg_ret=%d\n", ret);
946 
947 		if (ret != line_size)
948 			error(1, errno, "Did not send all bytes");
949 
950 		wait_compl(socket_fd);
951 	}
952 
953 	fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
954 
955 	free(line);
956 	close(socket_fd);
957 
958 	if (ys)
959 		ynl_sock_destroy(ys);
960 
961 	return 0;
962 }
963 
964 int main(int argc, char *argv[])
965 {
966 	struct memory_buffer *mem;
967 	int is_server = 0, opt;
968 	int ret;
969 
970 	while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
971 		switch (opt) {
972 		case 'l':
973 			is_server = 1;
974 			break;
975 		case 's':
976 			server_ip = optarg;
977 			break;
978 		case 'c':
979 			client_ip = optarg;
980 			break;
981 		case 'p':
982 			port = optarg;
983 			break;
984 		case 'v':
985 			do_validation = atoll(optarg);
986 			break;
987 		case 'q':
988 			num_queues = atoi(optarg);
989 			break;
990 		case 't':
991 			start_queue = atoi(optarg);
992 			break;
993 		case 'f':
994 			ifname = optarg;
995 			break;
996 		case '?':
997 			fprintf(stderr, "unknown option: %c\n", optopt);
998 			break;
999 		}
1000 	}
1001 
1002 	if (!ifname)
1003 		error(1, 0, "Missing -f argument\n");
1004 
1005 	ifindex = if_nametoindex(ifname);
1006 
1007 	fprintf(stderr, "using ifindex=%u\n", ifindex);
1008 
1009 	if (!server_ip && !client_ip) {
1010 		if (start_queue < 0 && num_queues < 0) {
1011 			num_queues = rxq_num(ifindex);
1012 			if (num_queues < 0)
1013 				error(1, 0, "couldn't detect number of queues\n");
1014 			if (num_queues < 2)
1015 				error(1, 0,
1016 				      "number of device queues is too low\n");
1017 			/* make sure can bind to multiple queues */
1018 			start_queue = num_queues / 2;
1019 			num_queues /= 2;
1020 		}
1021 
1022 		if (start_queue < 0 || num_queues < 0)
1023 			error(1, 0, "Both -t and -q are required\n");
1024 
1025 		run_devmem_tests();
1026 		return 0;
1027 	}
1028 
1029 	if (start_queue < 0 && num_queues < 0) {
1030 		num_queues = rxq_num(ifindex);
1031 		if (num_queues < 2)
1032 			error(1, 0, "number of device queues is too low\n");
1033 
1034 		num_queues = 1;
1035 		start_queue = rxq_num(ifindex) - num_queues;
1036 
1037 		if (start_queue < 0)
1038 			error(1, 0, "couldn't detect number of queues\n");
1039 
1040 		fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues);
1041 	}
1042 
1043 	for (; optind < argc; optind++)
1044 		fprintf(stderr, "extra arguments: %s\n", argv[optind]);
1045 
1046 	if (start_queue < 0)
1047 		error(1, 0, "Missing -t argument\n");
1048 
1049 	if (num_queues < 0)
1050 		error(1, 0, "Missing -q argument\n");
1051 
1052 	if (!server_ip)
1053 		error(1, 0, "Missing -s argument\n");
1054 
1055 	if (!port)
1056 		error(1, 0, "Missing -p argument\n");
1057 
1058 	mem = provider->alloc(getpagesize() * NUM_PAGES);
1059 	ret = is_server ? do_server(mem) : do_client(mem);
1060 	provider->free(mem);
1061 
1062 	return ret;
1063 }
1064