xref: /linux/tools/testing/selftests/drivers/net/hw/ncdevmem.c (revision 85502b2214d50ba0ddf2a5fb454e4d28a160d175)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * tcpdevmem netcat. Works similarly to netcat but does device memory TCP
4  * instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
5  *
6  * Usage:
7  *
8  *     On server:
9  *     ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201
10  *
11  *     On client:
12  *     echo -n "hello\nworld" | \
13  *		ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
14  *
15  * Note this is compatible with regular netcat. i.e. the sender or receiver can
16  * be replaced with regular netcat to test the RX or TX path in isolation.
17  *
18  * Test data validation (devmem TCP on RX only):
19  *
20  *     On server:
21  *     ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7
22  *
23  *     On client:
24  *     yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
25  *             head -c 1G | \
26  *             nc <server IP> 5201 -p 5201
27  *
28  * Test data validation (devmem TCP on RX and TX, validation happens on RX):
29  *
30  *	On server:
31  *	ncdevmem -s <server IP> [-c <client IP>] -l -p 5201 -v 8 -f eth1
32  *
33  *	On client:
34  *	yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06\\x07) | \
35  *		head -c 1M | \
36  *		ncdevmem -s <server IP> [-c <client IP>] -p 5201 -f eth1
37  */
38 #define _GNU_SOURCE
39 #define __EXPORTED_HEADERS__
40 
41 #include <linux/uio.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <unistd.h>
45 #include <stdbool.h>
46 #include <string.h>
47 #include <errno.h>
48 #define __iovec_defined
49 #include <fcntl.h>
50 #include <malloc.h>
51 #include <error.h>
52 #include <poll.h>
53 
54 #include <arpa/inet.h>
55 #include <sys/socket.h>
56 #include <sys/mman.h>
57 #include <sys/ioctl.h>
58 #include <sys/syscall.h>
59 #include <sys/time.h>
60 
61 #include <linux/memfd.h>
62 #include <linux/dma-buf.h>
63 #include <linux/errqueue.h>
64 #include <linux/udmabuf.h>
65 #include <linux/types.h>
66 #include <linux/netlink.h>
67 #include <linux/genetlink.h>
68 #include <linux/netdev.h>
69 #include <linux/ethtool_netlink.h>
70 #include <time.h>
71 #include <net/if.h>
72 
73 #include "netdev-user.h"
74 #include "ethtool-user.h"
75 #include <ynl.h>
76 
77 #define PAGE_SHIFT 12
78 #define TEST_PREFIX "ncdevmem"
79 #define NUM_PAGES 16000
80 
81 #ifndef MSG_SOCK_DEVMEM
82 #define MSG_SOCK_DEVMEM 0x2000000
83 #endif
84 
85 #define MAX_IOV 1024
86 
87 static size_t max_chunk;
88 static char *server_ip;
89 static char *client_ip;
90 static char *port;
91 static size_t do_validation;
92 static int start_queue = -1;
93 static int num_queues = -1;
94 static char *ifname;
95 static unsigned int ifindex;
96 static unsigned int dmabuf_id;
97 static uint32_t tx_dmabuf_id;
98 static int waittime_ms = 500;
99 
100 struct memory_buffer {
101 	int fd;
102 	size_t size;
103 
104 	int devfd;
105 	int memfd;
106 	char *buf_mem;
107 };
108 
109 struct memory_provider {
110 	struct memory_buffer *(*alloc)(size_t size);
111 	void (*free)(struct memory_buffer *ctx);
112 	void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
113 				 void *src, int n);
114 	void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
115 				   size_t off, int n);
116 };
117 
udmabuf_alloc(size_t size)118 static struct memory_buffer *udmabuf_alloc(size_t size)
119 {
120 	struct udmabuf_create create;
121 	struct memory_buffer *ctx;
122 	int ret;
123 
124 	ctx = malloc(sizeof(*ctx));
125 	if (!ctx)
126 		error(1, ENOMEM, "malloc failed");
127 
128 	ctx->size = size;
129 
130 	ctx->devfd = open("/dev/udmabuf", O_RDWR);
131 	if (ctx->devfd < 0)
132 		error(1, errno,
133 		      "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
134 		      TEST_PREFIX);
135 
136 	ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
137 	if (ctx->memfd < 0)
138 		error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX);
139 
140 	ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
141 	if (ret < 0)
142 		error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
143 
144 	ret = ftruncate(ctx->memfd, size);
145 	if (ret == -1)
146 		error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
147 
148 	memset(&create, 0, sizeof(create));
149 
150 	create.memfd = ctx->memfd;
151 	create.offset = 0;
152 	create.size = size;
153 	ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create);
154 	if (ctx->fd < 0)
155 		error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX);
156 
157 	ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED,
158 			    ctx->fd, 0);
159 	if (ctx->buf_mem == MAP_FAILED)
160 		error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX);
161 
162 	return ctx;
163 }
164 
udmabuf_free(struct memory_buffer * ctx)165 static void udmabuf_free(struct memory_buffer *ctx)
166 {
167 	munmap(ctx->buf_mem, ctx->size);
168 	close(ctx->fd);
169 	close(ctx->memfd);
170 	close(ctx->devfd);
171 	free(ctx);
172 }
173 
udmabuf_memcpy_to_device(struct memory_buffer * dst,size_t off,void * src,int n)174 static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
175 				     void *src, int n)
176 {
177 	struct dma_buf_sync sync = {};
178 
179 	sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
180 	ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
181 
182 	memcpy(dst->buf_mem + off, src, n);
183 
184 	sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
185 	ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
186 }
187 
udmabuf_memcpy_from_device(void * dst,struct memory_buffer * src,size_t off,int n)188 static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
189 				       size_t off, int n)
190 {
191 	struct dma_buf_sync sync = {};
192 
193 	sync.flags = DMA_BUF_SYNC_START;
194 	ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
195 
196 	memcpy(dst, src->buf_mem + off, n);
197 
198 	sync.flags = DMA_BUF_SYNC_END;
199 	ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync);
200 }
201 
202 static struct memory_provider udmabuf_memory_provider = {
203 	.alloc = udmabuf_alloc,
204 	.free = udmabuf_free,
205 	.memcpy_to_device = udmabuf_memcpy_to_device,
206 	.memcpy_from_device = udmabuf_memcpy_from_device,
207 };
208 
209 static struct memory_provider *provider = &udmabuf_memory_provider;
210 
print_nonzero_bytes(void * ptr,size_t size)211 static void print_nonzero_bytes(void *ptr, size_t size)
212 {
213 	unsigned char *p = ptr;
214 	unsigned int i;
215 
216 	for (i = 0; i < size; i++)
217 		putchar(p[i]);
218 }
219 
validate_buffer(void * line,size_t size)220 void validate_buffer(void *line, size_t size)
221 {
222 	static unsigned char seed = 1;
223 	unsigned char *ptr = line;
224 	unsigned char expected;
225 	static int errors;
226 	size_t i;
227 
228 	for (i = 0; i < size; i++) {
229 		expected = seed ? seed : '\n';
230 		if (ptr[i] != expected) {
231 			fprintf(stderr,
232 				"Failed validation: expected=%u, actual=%u, index=%lu\n",
233 				expected, ptr[i], i);
234 			errors++;
235 			if (errors > 20)
236 				error(1, 0, "validation failed.");
237 		}
238 		seed++;
239 		if (seed == do_validation)
240 			seed = 0;
241 	}
242 
243 	fprintf(stdout, "Validated buffer\n");
244 }
245 
rxq_num(int ifindex)246 static int rxq_num(int ifindex)
247 {
248 	struct ethtool_channels_get_req *req;
249 	struct ethtool_channels_get_rsp *rsp;
250 	struct ynl_error yerr;
251 	struct ynl_sock *ys;
252 	int num = -1;
253 
254 	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
255 	if (!ys) {
256 		fprintf(stderr, "YNL: %s\n", yerr.msg);
257 		return -1;
258 	}
259 
260 	req = ethtool_channels_get_req_alloc();
261 	ethtool_channels_get_req_set_header_dev_index(req, ifindex);
262 	rsp = ethtool_channels_get(ys, req);
263 	if (rsp)
264 		num = rsp->rx_count + rsp->combined_count;
265 	ethtool_channels_get_req_free(req);
266 	ethtool_channels_get_rsp_free(rsp);
267 
268 	ynl_sock_destroy(ys);
269 
270 	return num;
271 }
272 
273 #define run_command(cmd, ...)                                           \
274 	({                                                              \
275 		char command[256];                                      \
276 		memset(command, 0, sizeof(command));                    \
277 		snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
278 		fprintf(stderr, "Running: %s\n", command);                       \
279 		system(command);                                        \
280 	})
281 
reset_flow_steering(void)282 static int reset_flow_steering(void)
283 {
284 	/* Depending on the NIC, toggling ntuple off and on might not
285 	 * be allowed. Additionally, attempting to delete existing filters
286 	 * will fail if no filters are present. Therefore, do not enforce
287 	 * the exit status.
288 	 */
289 
290 	run_command("sudo ethtool -K %s ntuple off >&2", ifname);
291 	run_command("sudo ethtool -K %s ntuple on >&2", ifname);
292 	run_command(
293 		"sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2",
294 		ifname, ifname);
295 	return 0;
296 }
297 
tcp_data_split_str(int val)298 static const char *tcp_data_split_str(int val)
299 {
300 	switch (val) {
301 	case 0:
302 		return "off";
303 	case 1:
304 		return "auto";
305 	case 2:
306 		return "on";
307 	default:
308 		return "?";
309 	}
310 }
311 
configure_headersplit(bool on)312 static int configure_headersplit(bool on)
313 {
314 	struct ethtool_rings_get_req *get_req;
315 	struct ethtool_rings_get_rsp *get_rsp;
316 	struct ethtool_rings_set_req *req;
317 	struct ynl_error yerr;
318 	struct ynl_sock *ys;
319 	int ret;
320 
321 	ys = ynl_sock_create(&ynl_ethtool_family, &yerr);
322 	if (!ys) {
323 		fprintf(stderr, "YNL: %s\n", yerr.msg);
324 		return -1;
325 	}
326 
327 	req = ethtool_rings_set_req_alloc();
328 	ethtool_rings_set_req_set_header_dev_index(req, ifindex);
329 	/* 0 - off, 1 - auto, 2 - on */
330 	ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0);
331 	ret = ethtool_rings_set(ys, req);
332 	if (ret < 0)
333 		fprintf(stderr, "YNL failed: %s\n", ys->err.msg);
334 	ethtool_rings_set_req_free(req);
335 
336 	if (ret == 0) {
337 		get_req = ethtool_rings_get_req_alloc();
338 		ethtool_rings_get_req_set_header_dev_index(get_req, ifindex);
339 		get_rsp = ethtool_rings_get(ys, get_req);
340 		ethtool_rings_get_req_free(get_req);
341 		if (get_rsp)
342 			fprintf(stderr, "TCP header split: %s\n",
343 				tcp_data_split_str(get_rsp->tcp_data_split));
344 		ethtool_rings_get_rsp_free(get_rsp);
345 	}
346 
347 	ynl_sock_destroy(ys);
348 
349 	return ret;
350 }
351 
configure_rss(void)352 static int configure_rss(void)
353 {
354 	return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue);
355 }
356 
configure_channels(unsigned int rx,unsigned int tx)357 static int configure_channels(unsigned int rx, unsigned int tx)
358 {
359 	return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx);
360 }
361 
configure_flow_steering(struct sockaddr_in6 * server_sin)362 static int configure_flow_steering(struct sockaddr_in6 *server_sin)
363 {
364 	const char *type = "tcp6";
365 	const char *server_addr;
366 	char buf[40];
367 
368 	inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf));
369 	server_addr = buf;
370 
371 	if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) {
372 		type = "tcp4";
373 		server_addr = strrchr(server_addr, ':') + 1;
374 	}
375 
376 	/* Try configure 5-tuple */
377 	if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2",
378 			   ifname,
379 			   type,
380 			   client_ip ? "src-ip" : "",
381 			   client_ip ?: "",
382 			   server_addr,
383 			   client_ip ? "src-port" : "",
384 			   client_ip ? port : "",
385 			   port, start_queue))
386 		/* If that fails, try configure 3-tuple */
387 		if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2",
388 				ifname,
389 				type,
390 				server_addr,
391 				port, start_queue))
392 			/* If that fails, return error */
393 			return -1;
394 
395 	return 0;
396 }
397 
bind_rx_queue(unsigned int ifindex,unsigned int dmabuf_fd,struct netdev_queue_id * queues,unsigned int n_queue_index,struct ynl_sock ** ys)398 static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
399 			 struct netdev_queue_id *queues,
400 			 unsigned int n_queue_index, struct ynl_sock **ys)
401 {
402 	struct netdev_bind_rx_req *req = NULL;
403 	struct netdev_bind_rx_rsp *rsp = NULL;
404 	struct ynl_error yerr;
405 
406 	*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
407 	if (!*ys) {
408 		fprintf(stderr, "YNL: %s\n", yerr.msg);
409 		return -1;
410 	}
411 
412 	req = netdev_bind_rx_req_alloc();
413 	netdev_bind_rx_req_set_ifindex(req, ifindex);
414 	netdev_bind_rx_req_set_fd(req, dmabuf_fd);
415 	__netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
416 
417 	rsp = netdev_bind_rx(*ys, req);
418 	if (!rsp) {
419 		perror("netdev_bind_rx");
420 		goto err_close;
421 	}
422 
423 	if (!rsp->_present.id) {
424 		perror("id not present");
425 		goto err_close;
426 	}
427 
428 	fprintf(stderr, "got dmabuf id=%d\n", rsp->id);
429 	dmabuf_id = rsp->id;
430 
431 	netdev_bind_rx_req_free(req);
432 	netdev_bind_rx_rsp_free(rsp);
433 
434 	return 0;
435 
436 err_close:
437 	fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
438 	netdev_bind_rx_req_free(req);
439 	ynl_sock_destroy(*ys);
440 	return -1;
441 }
442 
bind_tx_queue(unsigned int ifindex,unsigned int dmabuf_fd,struct ynl_sock ** ys)443 static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
444 			 struct ynl_sock **ys)
445 {
446 	struct netdev_bind_tx_req *req = NULL;
447 	struct netdev_bind_tx_rsp *rsp = NULL;
448 	struct ynl_error yerr;
449 
450 	*ys = ynl_sock_create(&ynl_netdev_family, &yerr);
451 	if (!*ys) {
452 		fprintf(stderr, "YNL: %s\n", yerr.msg);
453 		return -1;
454 	}
455 
456 	req = netdev_bind_tx_req_alloc();
457 	netdev_bind_tx_req_set_ifindex(req, ifindex);
458 	netdev_bind_tx_req_set_fd(req, dmabuf_fd);
459 
460 	rsp = netdev_bind_tx(*ys, req);
461 	if (!rsp) {
462 		perror("netdev_bind_tx");
463 		goto err_close;
464 	}
465 
466 	if (!rsp->_present.id) {
467 		perror("id not present");
468 		goto err_close;
469 	}
470 
471 	fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
472 	tx_dmabuf_id = rsp->id;
473 
474 	netdev_bind_tx_req_free(req);
475 	netdev_bind_tx_rsp_free(rsp);
476 
477 	return 0;
478 
479 err_close:
480 	fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
481 	netdev_bind_tx_req_free(req);
482 	ynl_sock_destroy(*ys);
483 	return -1;
484 }
485 
enable_reuseaddr(int fd)486 static void enable_reuseaddr(int fd)
487 {
488 	int opt = 1;
489 	int ret;
490 
491 	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
492 	if (ret)
493 		error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX);
494 
495 	ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
496 	if (ret)
497 		error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX);
498 }
499 
parse_address(const char * str,int port,struct sockaddr_in6 * sin6)500 static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
501 {
502 	int ret;
503 
504 	sin6->sin6_family = AF_INET6;
505 	sin6->sin6_port = htons(port);
506 
507 	ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
508 	if (ret != 1) {
509 		/* fallback to plain IPv4 */
510 		ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
511 		if (ret != 1)
512 			return -1;
513 
514 		/* add ::ffff prefix */
515 		sin6->sin6_addr.s6_addr32[0] = 0;
516 		sin6->sin6_addr.s6_addr32[1] = 0;
517 		sin6->sin6_addr.s6_addr16[4] = 0;
518 		sin6->sin6_addr.s6_addr16[5] = 0xffff;
519 	}
520 
521 	return 0;
522 }
523 
create_queues(void)524 static struct netdev_queue_id *create_queues(void)
525 {
526 	struct netdev_queue_id *queues;
527 	size_t i = 0;
528 
529 	queues = calloc(num_queues, sizeof(*queues));
530 	for (i = 0; i < num_queues; i++) {
531 		queues[i]._present.type = 1;
532 		queues[i]._present.id = 1;
533 		queues[i].type = NETDEV_QUEUE_TYPE_RX;
534 		queues[i].id = start_queue + i;
535 	}
536 
537 	return queues;
538 }
539 
do_server(struct memory_buffer * mem)540 static int do_server(struct memory_buffer *mem)
541 {
542 	char ctrl_data[sizeof(int) * 20000];
543 	size_t non_page_aligned_frags = 0;
544 	struct sockaddr_in6 client_addr;
545 	struct sockaddr_in6 server_sin;
546 	size_t page_aligned_frags = 0;
547 	size_t total_received = 0;
548 	socklen_t client_addr_len;
549 	bool is_devmem = false;
550 	char *tmp_mem = NULL;
551 	struct ynl_sock *ys;
552 	char iobuf[819200];
553 	char buffer[256];
554 	int socket_fd;
555 	int client_fd;
556 	int ret;
557 
558 	ret = parse_address(server_ip, atoi(port), &server_sin);
559 	if (ret < 0)
560 		error(1, 0, "parse server address");
561 
562 	if (reset_flow_steering())
563 		error(1, 0, "Failed to reset flow steering\n");
564 
565 	if (configure_headersplit(1))
566 		error(1, 0, "Failed to enable TCP header split\n");
567 
568 	/* Configure RSS to divert all traffic from our devmem queues */
569 	if (configure_rss())
570 		error(1, 0, "Failed to configure rss\n");
571 
572 	/* Flow steer our devmem flows to start_queue */
573 	if (configure_flow_steering(&server_sin))
574 		error(1, 0, "Failed to configure flow steering\n");
575 
576 	sleep(1);
577 
578 	if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
579 		error(1, 0, "Failed to bind\n");
580 
581 	tmp_mem = malloc(mem->size);
582 	if (!tmp_mem)
583 		error(1, ENOMEM, "malloc failed");
584 
585 	socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
586 	if (socket_fd < 0)
587 		error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX);
588 
589 	enable_reuseaddr(socket_fd);
590 
591 	fprintf(stderr, "binding to address %s:%d\n", server_ip,
592 		ntohs(server_sin.sin6_port));
593 
594 	ret = bind(socket_fd, &server_sin, sizeof(server_sin));
595 	if (ret)
596 		error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX);
597 
598 	ret = listen(socket_fd, 1);
599 	if (ret)
600 		error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX);
601 
602 	client_addr_len = sizeof(client_addr);
603 
604 	inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer,
605 		  sizeof(buffer));
606 	fprintf(stderr, "Waiting or connection on %s:%d\n", buffer,
607 		ntohs(server_sin.sin6_port));
608 	client_fd = accept(socket_fd, &client_addr, &client_addr_len);
609 
610 	inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer,
611 		  sizeof(buffer));
612 	fprintf(stderr, "Got connection from %s:%d\n", buffer,
613 		ntohs(client_addr.sin6_port));
614 
615 	while (1) {
616 		struct iovec iov = { .iov_base = iobuf,
617 				     .iov_len = sizeof(iobuf) };
618 		struct dmabuf_cmsg *dmabuf_cmsg = NULL;
619 		struct cmsghdr *cm = NULL;
620 		struct msghdr msg = { 0 };
621 		struct dmabuf_token token;
622 		ssize_t ret;
623 
624 		is_devmem = false;
625 
626 		msg.msg_iov = &iov;
627 		msg.msg_iovlen = 1;
628 		msg.msg_control = ctrl_data;
629 		msg.msg_controllen = sizeof(ctrl_data);
630 		ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
631 		fprintf(stderr, "recvmsg ret=%ld\n", ret);
632 		if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
633 			continue;
634 		if (ret < 0) {
635 			perror("recvmsg");
636 			continue;
637 		}
638 		if (ret == 0) {
639 			fprintf(stderr, "client exited\n");
640 			goto cleanup;
641 		}
642 
643 		for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
644 			if (cm->cmsg_level != SOL_SOCKET ||
645 			    (cm->cmsg_type != SCM_DEVMEM_DMABUF &&
646 			     cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
647 				fprintf(stderr, "skipping non-devmem cmsg\n");
648 				continue;
649 			}
650 
651 			dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
652 			is_devmem = true;
653 
654 			if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
655 				/* TODO: process data copied from skb's linear
656 				 * buffer.
657 				 */
658 				fprintf(stderr,
659 					"SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
660 					dmabuf_cmsg->frag_size);
661 
662 				continue;
663 			}
664 
665 			token.token_start = dmabuf_cmsg->frag_token;
666 			token.token_count = 1;
667 
668 			total_received += dmabuf_cmsg->frag_size;
669 			fprintf(stderr,
670 				"received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
671 				dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
672 				dmabuf_cmsg->frag_offset % getpagesize(),
673 				dmabuf_cmsg->frag_offset,
674 				dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token,
675 				total_received, dmabuf_cmsg->dmabuf_id);
676 
677 			if (dmabuf_cmsg->dmabuf_id != dmabuf_id)
678 				error(1, 0,
679 				      "received on wrong dmabuf_id: flow steering error\n");
680 
681 			if (dmabuf_cmsg->frag_size % getpagesize())
682 				non_page_aligned_frags++;
683 			else
684 				page_aligned_frags++;
685 
686 			provider->memcpy_from_device(tmp_mem, mem,
687 						     dmabuf_cmsg->frag_offset,
688 						     dmabuf_cmsg->frag_size);
689 
690 			if (do_validation)
691 				validate_buffer(tmp_mem,
692 						dmabuf_cmsg->frag_size);
693 			else
694 				print_nonzero_bytes(tmp_mem,
695 						    dmabuf_cmsg->frag_size);
696 
697 			ret = setsockopt(client_fd, SOL_SOCKET,
698 					 SO_DEVMEM_DONTNEED, &token,
699 					 sizeof(token));
700 			if (ret != 1)
701 				error(1, 0,
702 				      "SO_DEVMEM_DONTNEED not enough tokens");
703 		}
704 		if (!is_devmem)
705 			error(1, 0, "flow steering error\n");
706 
707 		fprintf(stderr, "total_received=%lu\n", total_received);
708 	}
709 
710 	fprintf(stderr, "%s: ok\n", TEST_PREFIX);
711 
712 	fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
713 		page_aligned_frags, non_page_aligned_frags);
714 
715 cleanup:
716 
717 	free(tmp_mem);
718 	close(client_fd);
719 	close(socket_fd);
720 	ynl_sock_destroy(ys);
721 
722 	return 0;
723 }
724 
run_devmem_tests(void)725 void run_devmem_tests(void)
726 {
727 	struct memory_buffer *mem;
728 	struct ynl_sock *ys;
729 
730 	mem = provider->alloc(getpagesize() * NUM_PAGES);
731 
732 	/* Configure RSS to divert all traffic from our devmem queues */
733 	if (configure_rss())
734 		error(1, 0, "rss error\n");
735 
736 	if (configure_headersplit(1))
737 		error(1, 0, "Failed to configure header split\n");
738 
739 	if (!bind_rx_queue(ifindex, mem->fd,
740 			   calloc(num_queues, sizeof(struct netdev_queue_id)),
741 			   num_queues, &ys))
742 		error(1, 0, "Binding empty queues array should have failed\n");
743 
744 	if (configure_headersplit(0))
745 		error(1, 0, "Failed to configure header split\n");
746 
747 	if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
748 		error(1, 0, "Configure dmabuf with header split off should have failed\n");
749 
750 	if (configure_headersplit(1))
751 		error(1, 0, "Failed to configure header split\n");
752 
753 	if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
754 		error(1, 0, "Failed to bind\n");
755 
756 	/* Deactivating a bound queue should not be legal */
757 	if (!configure_channels(num_queues, num_queues - 1))
758 		error(1, 0, "Deactivating a bound queue should be illegal.\n");
759 
760 	/* Closing the netlink socket does an implicit unbind */
761 	ynl_sock_destroy(ys);
762 
763 	provider->free(mem);
764 }
765 
gettimeofday_ms(void)766 static uint64_t gettimeofday_ms(void)
767 {
768 	struct timeval tv;
769 
770 	gettimeofday(&tv, NULL);
771 	return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
772 }
773 
do_poll(int fd)774 static int do_poll(int fd)
775 {
776 	struct pollfd pfd;
777 	int ret;
778 
779 	pfd.revents = 0;
780 	pfd.fd = fd;
781 
782 	ret = poll(&pfd, 1, waittime_ms);
783 	if (ret == -1)
784 		error(1, errno, "poll");
785 
786 	return ret && (pfd.revents & POLLERR);
787 }
788 
wait_compl(int fd)789 static void wait_compl(int fd)
790 {
791 	int64_t tstop = gettimeofday_ms() + waittime_ms;
792 	char control[CMSG_SPACE(100)] = {};
793 	struct sock_extended_err *serr;
794 	struct msghdr msg = {};
795 	struct cmsghdr *cm;
796 	__u32 hi, lo;
797 	int ret;
798 
799 	msg.msg_control = control;
800 	msg.msg_controllen = sizeof(control);
801 
802 	while (gettimeofday_ms() < tstop) {
803 		if (!do_poll(fd))
804 			continue;
805 
806 		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
807 		if (ret < 0) {
808 			if (errno == EAGAIN)
809 				continue;
810 			error(1, errno, "recvmsg(MSG_ERRQUEUE)");
811 			return;
812 		}
813 		if (msg.msg_flags & MSG_CTRUNC)
814 			error(1, 0, "MSG_CTRUNC\n");
815 
816 		for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
817 			if (cm->cmsg_level != SOL_IP &&
818 			    cm->cmsg_level != SOL_IPV6)
819 				continue;
820 			if (cm->cmsg_level == SOL_IP &&
821 			    cm->cmsg_type != IP_RECVERR)
822 				continue;
823 			if (cm->cmsg_level == SOL_IPV6 &&
824 			    cm->cmsg_type != IPV6_RECVERR)
825 				continue;
826 
827 			serr = (void *)CMSG_DATA(cm);
828 			if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
829 				error(1, 0, "wrong origin %u", serr->ee_origin);
830 			if (serr->ee_errno != 0)
831 				error(1, 0, "wrong errno %d", serr->ee_errno);
832 
833 			hi = serr->ee_data;
834 			lo = serr->ee_info;
835 
836 			fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
837 			return;
838 		}
839 	}
840 
841 	error(1, 0, "did not receive tx completion");
842 }
843 
do_client(struct memory_buffer * mem)844 static int do_client(struct memory_buffer *mem)
845 {
846 	char ctrl_data[CMSG_SPACE(sizeof(__u32))];
847 	struct sockaddr_in6 server_sin;
848 	struct sockaddr_in6 client_sin;
849 	struct ynl_sock *ys = NULL;
850 	struct iovec iov[MAX_IOV];
851 	struct msghdr msg = {};
852 	ssize_t line_size = 0;
853 	struct cmsghdr *cmsg;
854 	char *line = NULL;
855 	unsigned long mid;
856 	size_t len = 0;
857 	int socket_fd;
858 	__u32 ddmabuf;
859 	int opt = 1;
860 	int ret;
861 
862 	ret = parse_address(server_ip, atoi(port), &server_sin);
863 	if (ret < 0)
864 		error(1, 0, "parse server address");
865 
866 	socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
867 	if (socket_fd < 0)
868 		error(1, socket_fd, "create socket");
869 
870 	enable_reuseaddr(socket_fd);
871 
872 	ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
873 			 strlen(ifname) + 1);
874 	if (ret)
875 		error(1, errno, "bindtodevice");
876 
877 	if (bind_tx_queue(ifindex, mem->fd, &ys))
878 		error(1, 0, "Failed to bind\n");
879 
880 	if (client_ip) {
881 		ret = parse_address(client_ip, atoi(port), &client_sin);
882 		if (ret < 0)
883 			error(1, 0, "parse client address");
884 
885 		ret = bind(socket_fd, &client_sin, sizeof(client_sin));
886 		if (ret)
887 			error(1, errno, "bind");
888 	}
889 
890 	ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
891 	if (ret)
892 		error(1, errno, "set sock opt");
893 
894 	fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
895 		ntohs(server_sin.sin6_port), ifname);
896 
897 	ret = connect(socket_fd, &server_sin, sizeof(server_sin));
898 	if (ret)
899 		error(1, errno, "connect");
900 
901 	while (1) {
902 		free(line);
903 		line = NULL;
904 		line_size = getline(&line, &len, stdin);
905 
906 		if (line_size < 0)
907 			break;
908 
909 		if (max_chunk) {
910 			msg.msg_iovlen =
911 				(line_size + max_chunk - 1) / max_chunk;
912 			if (msg.msg_iovlen > MAX_IOV)
913 				error(1, 0,
914 				      "can't partition %zd bytes into maximum of %d chunks",
915 				      line_size, MAX_IOV);
916 
917 			for (int i = 0; i < msg.msg_iovlen; i++) {
918 				iov[i].iov_base = (void *)(i * max_chunk);
919 				iov[i].iov_len = max_chunk;
920 			}
921 
922 			iov[msg.msg_iovlen - 1].iov_len =
923 				line_size - (msg.msg_iovlen - 1) * max_chunk;
924 		} else {
925 			iov[0].iov_base = 0;
926 			iov[0].iov_len = line_size;
927 			msg.msg_iovlen = 1;
928 		}
929 
930 		msg.msg_iov = iov;
931 		provider->memcpy_to_device(mem, 0, line, line_size);
932 
933 		msg.msg_control = ctrl_data;
934 		msg.msg_controllen = sizeof(ctrl_data);
935 
936 		cmsg = CMSG_FIRSTHDR(&msg);
937 		cmsg->cmsg_level = SOL_SOCKET;
938 		cmsg->cmsg_type = SCM_DEVMEM_DMABUF;
939 		cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
940 
941 		ddmabuf = tx_dmabuf_id;
942 
943 		*((__u32 *)CMSG_DATA(cmsg)) = ddmabuf;
944 
945 		ret = sendmsg(socket_fd, &msg, MSG_ZEROCOPY);
946 		if (ret < 0)
947 			error(1, errno, "Failed sendmsg");
948 
949 		fprintf(stderr, "sendmsg_ret=%d\n", ret);
950 
951 		if (ret != line_size)
952 			error(1, errno, "Did not send all bytes %d vs %zd", ret,
953 			      line_size);
954 
955 		wait_compl(socket_fd);
956 	}
957 
958 	fprintf(stderr, "%s: tx ok\n", TEST_PREFIX);
959 
960 	free(line);
961 	close(socket_fd);
962 
963 	if (ys)
964 		ynl_sock_destroy(ys);
965 
966 	return 0;
967 }
968 
main(int argc,char * argv[])969 int main(int argc, char *argv[])
970 {
971 	struct memory_buffer *mem;
972 	int is_server = 0, opt;
973 	int ret;
974 
975 	while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
976 		switch (opt) {
977 		case 'l':
978 			is_server = 1;
979 			break;
980 		case 's':
981 			server_ip = optarg;
982 			break;
983 		case 'c':
984 			client_ip = optarg;
985 			break;
986 		case 'p':
987 			port = optarg;
988 			break;
989 		case 'v':
990 			do_validation = atoll(optarg);
991 			break;
992 		case 'q':
993 			num_queues = atoi(optarg);
994 			break;
995 		case 't':
996 			start_queue = atoi(optarg);
997 			break;
998 		case 'f':
999 			ifname = optarg;
1000 			break;
1001 		case 'z':
1002 			max_chunk = atoi(optarg);
1003 			break;
1004 		case '?':
1005 			fprintf(stderr, "unknown option: %c\n", optopt);
1006 			break;
1007 		}
1008 	}
1009 
1010 	if (!ifname)
1011 		error(1, 0, "Missing -f argument\n");
1012 
1013 	ifindex = if_nametoindex(ifname);
1014 
1015 	fprintf(stderr, "using ifindex=%u\n", ifindex);
1016 
1017 	if (!server_ip && !client_ip) {
1018 		if (start_queue < 0 && num_queues < 0) {
1019 			num_queues = rxq_num(ifindex);
1020 			if (num_queues < 0)
1021 				error(1, 0, "couldn't detect number of queues\n");
1022 			if (num_queues < 2)
1023 				error(1, 0,
1024 				      "number of device queues is too low\n");
1025 			/* make sure can bind to multiple queues */
1026 			start_queue = num_queues / 2;
1027 			num_queues /= 2;
1028 		}
1029 
1030 		if (start_queue < 0 || num_queues < 0)
1031 			error(1, 0, "Both -t and -q are required\n");
1032 
1033 		run_devmem_tests();
1034 		return 0;
1035 	}
1036 
1037 	if (start_queue < 0 && num_queues < 0) {
1038 		num_queues = rxq_num(ifindex);
1039 		if (num_queues < 2)
1040 			error(1, 0, "number of device queues is too low\n");
1041 
1042 		num_queues = 1;
1043 		start_queue = rxq_num(ifindex) - num_queues;
1044 
1045 		if (start_queue < 0)
1046 			error(1, 0, "couldn't detect number of queues\n");
1047 
1048 		fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues);
1049 	}
1050 
1051 	for (; optind < argc; optind++)
1052 		fprintf(stderr, "extra arguments: %s\n", argv[optind]);
1053 
1054 	if (start_queue < 0)
1055 		error(1, 0, "Missing -t argument\n");
1056 
1057 	if (num_queues < 0)
1058 		error(1, 0, "Missing -q argument\n");
1059 
1060 	if (!server_ip)
1061 		error(1, 0, "Missing -s argument\n");
1062 
1063 	if (!port)
1064 		error(1, 0, "Missing -p argument\n");
1065 
1066 	mem = provider->alloc(getpagesize() * NUM_PAGES);
1067 	ret = is_server ? do_server(mem) : do_client(mem);
1068 	provider->free(mem);
1069 
1070 	return ret;
1071 }
1072