xref: /linux/tools/testing/selftests/net/msg_zerocopy.c (revision f7308991bfeea3f6a4c6281c64fc1ba9dc6e56b3)
1 /* Evaluate MSG_ZEROCOPY
2  *
3  * Send traffic between two processes over one of the supported
4  * protocols and modes:
5  *
6  * PF_INET/PF_INET6
7  * - SOCK_STREAM
8  * - SOCK_DGRAM
9  * - SOCK_DGRAM with UDP_CORK
10  * - SOCK_RAW
11  * - SOCK_RAW with IP_HDRINCL
12  *
13  * PF_PACKET
14  * - SOCK_DGRAM
15  * - SOCK_RAW
16  *
17  * PF_RDS
18  * - SOCK_SEQPACKET
19  *
20  * Start this program on two connected hosts, one in send mode and
21  * the other with option '-r' to put it in receiver mode.
22  *
23  * If zerocopy mode ('-z') is enabled, the sender will verify that
24  * the kernel queues completions on the error queue for all zerocopy
25  * transfers.
26  */
27 
28 #define _GNU_SOURCE
29 
30 #include <arpa/inet.h>
31 #include <error.h>
32 #include <errno.h>
33 #include <limits.h>
34 #include <linux/errqueue.h>
35 #include <linux/if_packet.h>
36 #include <linux/ipv6.h>
37 #include <linux/socket.h>
38 #include <linux/sockios.h>
39 #include <net/ethernet.h>
40 #include <net/if.h>
41 #include <netinet/ip.h>
42 #include <netinet/ip6.h>
43 #include <netinet/tcp.h>
44 #include <netinet/udp.h>
45 #include <poll.h>
46 #include <sched.h>
47 #include <stdbool.h>
48 #include <stdio.h>
49 #include <stdint.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <sys/ioctl.h>
53 #include <sys/socket.h>
54 #include <sys/stat.h>
55 #include <sys/time.h>
56 #include <sys/types.h>
57 #include <sys/wait.h>
58 #include <unistd.h>
59 #include <linux/rds.h>
60 
61 #ifndef SO_EE_ORIGIN_ZEROCOPY
62 #define SO_EE_ORIGIN_ZEROCOPY		5
63 #endif
64 
65 #ifndef SO_ZEROCOPY
66 #define SO_ZEROCOPY	60
67 #endif
68 
69 #ifndef SO_EE_CODE_ZEROCOPY_COPIED
70 #define SO_EE_CODE_ZEROCOPY_COPIED	1
71 #endif
72 
73 #ifndef MSG_ZEROCOPY
74 #define MSG_ZEROCOPY	0x4000000
75 #endif
76 
77 static int  cfg_cork;
78 static bool cfg_cork_mixed;
79 static int  cfg_cpu		= -1;		/* default: pin to last cpu */
80 static int  cfg_family		= PF_UNSPEC;
81 static int  cfg_ifindex		= 1;
82 static int  cfg_payload_len;
83 static int  cfg_port		= 8000;
84 static bool cfg_rx;
85 static int  cfg_runtime_ms	= 4200;
86 static int  cfg_verbose;
87 static int  cfg_waittime_ms	= 500;
88 static bool cfg_zerocopy;
89 
90 static socklen_t cfg_alen;
91 static struct sockaddr_storage cfg_dst_addr;
92 static struct sockaddr_storage cfg_src_addr;
93 
94 static char payload[IP_MAXPACKET];
95 static long packets, bytes, completions, expected_completions;
96 static int  zerocopied = -1;
97 static uint32_t next_completion;
98 
99 static unsigned long gettimeofday_ms(void)
100 {
101 	struct timeval tv;
102 
103 	gettimeofday(&tv, NULL);
104 	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
105 }
106 
107 static uint16_t get_ip_csum(const uint16_t *start, int num_words)
108 {
109 	unsigned long sum = 0;
110 	int i;
111 
112 	for (i = 0; i < num_words; i++)
113 		sum += start[i];
114 
115 	while (sum >> 16)
116 		sum = (sum & 0xFFFF) + (sum >> 16);
117 
118 	return ~sum;
119 }
120 
121 static int do_setcpu(int cpu)
122 {
123 	cpu_set_t mask;
124 
125 	CPU_ZERO(&mask);
126 	CPU_SET(cpu, &mask);
127 	if (sched_setaffinity(0, sizeof(mask), &mask))
128 		error(1, 0, "setaffinity %d", cpu);
129 
130 	if (cfg_verbose)
131 		fprintf(stderr, "cpu: %u\n", cpu);
132 
133 	return 0;
134 }
135 
136 static void do_setsockopt(int fd, int level, int optname, int val)
137 {
138 	if (setsockopt(fd, level, optname, &val, sizeof(val)))
139 		error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
140 }
141 
142 static int do_poll(int fd, int events)
143 {
144 	struct pollfd pfd;
145 	int ret;
146 
147 	pfd.events = events;
148 	pfd.revents = 0;
149 	pfd.fd = fd;
150 
151 	ret = poll(&pfd, 1, cfg_waittime_ms);
152 	if (ret == -1)
153 		error(1, errno, "poll");
154 
155 	return ret && (pfd.revents & events);
156 }
157 
158 static int do_accept(int fd)
159 {
160 	int fda = fd;
161 
162 	fd = accept(fda, NULL, NULL);
163 	if (fd == -1)
164 		error(1, errno, "accept");
165 	if (close(fda))
166 		error(1, errno, "close listen sock");
167 
168 	return fd;
169 }
170 
171 static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
172 {
173 	struct cmsghdr *cm;
174 
175 	if (!msg->msg_control)
176 		error(1, errno, "NULL cookie");
177 	cm = (void *)msg->msg_control;
178 	cm->cmsg_len = CMSG_LEN(sizeof(cookie));
179 	cm->cmsg_level = SOL_RDS;
180 	cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
181 	memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
182 }
183 
184 static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
185 {
186 	int ret, len, i, flags;
187 	static uint32_t cookie;
188 	char ckbuf[CMSG_SPACE(sizeof(cookie))];
189 
190 	len = 0;
191 	for (i = 0; i < msg->msg_iovlen; i++)
192 		len += msg->msg_iov[i].iov_len;
193 
194 	flags = MSG_DONTWAIT;
195 	if (do_zerocopy) {
196 		flags |= MSG_ZEROCOPY;
197 		if (domain == PF_RDS) {
198 			memset(&msg->msg_control, 0, sizeof(msg->msg_control));
199 			msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
200 			msg->msg_control = (struct cmsghdr *)ckbuf;
201 			add_zcopy_cookie(msg, ++cookie);
202 		}
203 	}
204 
205 	ret = sendmsg(fd, msg, flags);
206 	if (ret == -1 && errno == EAGAIN)
207 		return false;
208 	if (ret == -1)
209 		error(1, errno, "send");
210 	if (cfg_verbose && ret != len)
211 		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
212 
213 	if (len) {
214 		packets++;
215 		bytes += ret;
216 		if (do_zerocopy && ret)
217 			expected_completions++;
218 	}
219 	if (do_zerocopy && domain == PF_RDS) {
220 		msg->msg_control = NULL;
221 		msg->msg_controllen = 0;
222 	}
223 
224 	return true;
225 }
226 
227 static void do_sendmsg_corked(int fd, struct msghdr *msg)
228 {
229 	bool do_zerocopy = cfg_zerocopy;
230 	int i, payload_len, extra_len;
231 
232 	/* split up the packet. for non-multiple, make first buffer longer */
233 	payload_len = cfg_payload_len / cfg_cork;
234 	extra_len = cfg_payload_len - (cfg_cork * payload_len);
235 
236 	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
237 
238 	for (i = 0; i < cfg_cork; i++) {
239 
240 		/* in mixed-frags mode, alternate zerocopy and copy frags
241 		 * start with non-zerocopy, to ensure attach later works
242 		 */
243 		if (cfg_cork_mixed)
244 			do_zerocopy = (i & 1);
245 
246 		msg->msg_iov[0].iov_len = payload_len + extra_len;
247 		extra_len = 0;
248 
249 		do_sendmsg(fd, msg, do_zerocopy,
250 			   (cfg_dst_addr.ss_family == AF_INET ?
251 			    PF_INET : PF_INET6));
252 	}
253 
254 	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
255 }
256 
257 static int setup_iph(struct iphdr *iph, uint16_t payload_len)
258 {
259 	struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
260 	struct sockaddr_in *saddr = (void *) &cfg_src_addr;
261 
262 	memset(iph, 0, sizeof(*iph));
263 
264 	iph->version	= 4;
265 	iph->tos	= 0;
266 	iph->ihl	= 5;
267 	iph->ttl	= 2;
268 	iph->saddr	= saddr->sin_addr.s_addr;
269 	iph->daddr	= daddr->sin_addr.s_addr;
270 	iph->protocol	= IPPROTO_EGP;
271 	iph->tot_len	= htons(sizeof(*iph) + payload_len);
272 	iph->check	= get_ip_csum((void *) iph, iph->ihl << 1);
273 
274 	return sizeof(*iph);
275 }
276 
277 static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
278 {
279 	struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
280 	struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
281 
282 	memset(ip6h, 0, sizeof(*ip6h));
283 
284 	ip6h->version		= 6;
285 	ip6h->payload_len	= htons(payload_len);
286 	ip6h->nexthdr		= IPPROTO_EGP;
287 	ip6h->hop_limit		= 2;
288 	ip6h->saddr		= saddr->sin6_addr;
289 	ip6h->daddr		= daddr->sin6_addr;
290 
291 	return sizeof(*ip6h);
292 }
293 
294 
295 static void setup_sockaddr(int domain, const char *str_addr,
296 			   struct sockaddr_storage *sockaddr)
297 {
298 	struct sockaddr_in6 *addr6 = (void *) sockaddr;
299 	struct sockaddr_in *addr4 = (void *) sockaddr;
300 
301 	switch (domain) {
302 	case PF_INET:
303 		memset(addr4, 0, sizeof(*addr4));
304 		addr4->sin_family = AF_INET;
305 		addr4->sin_port = htons(cfg_port);
306 		if (str_addr &&
307 		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
308 			error(1, 0, "ipv4 parse error: %s", str_addr);
309 		break;
310 	case PF_INET6:
311 		memset(addr6, 0, sizeof(*addr6));
312 		addr6->sin6_family = AF_INET6;
313 		addr6->sin6_port = htons(cfg_port);
314 		if (str_addr &&
315 		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
316 			error(1, 0, "ipv6 parse error: %s", str_addr);
317 		break;
318 	default:
319 		error(1, 0, "illegal domain");
320 	}
321 }
322 
323 static int do_setup_tx(int domain, int type, int protocol)
324 {
325 	int fd;
326 
327 	fd = socket(domain, type, protocol);
328 	if (fd == -1)
329 		error(1, errno, "socket t");
330 
331 	do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
332 	if (cfg_zerocopy)
333 		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
334 
335 	if (domain != PF_PACKET && domain != PF_RDS)
336 		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
337 			error(1, errno, "connect");
338 
339 	if (domain == PF_RDS) {
340 		if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
341 			error(1, errno, "bind");
342 	}
343 
344 	return fd;
345 }
346 
347 static int do_process_zerocopy_cookies(struct sock_extended_err *serr,
348 				       uint32_t *ckbuf, size_t nbytes)
349 {
350 	int ncookies, i;
351 
352 	if (serr->ee_errno != 0)
353 		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
354 	ncookies = serr->ee_data;
355 	if (ncookies > SO_EE_ORIGIN_MAX_ZCOOKIES)
356 		error(1, 0, "Returned %d cookies, max expected %d\n",
357 		      ncookies, SO_EE_ORIGIN_MAX_ZCOOKIES);
358 	if (nbytes != ncookies * sizeof(uint32_t))
359 		error(1, 0, "Expected %d cookies, got %ld\n",
360 		      ncookies, nbytes/sizeof(uint32_t));
361 	for (i = 0; i < ncookies; i++)
362 		if (cfg_verbose >= 2)
363 			fprintf(stderr, "%d\n", ckbuf[i]);
364 	return ncookies;
365 }
366 
367 static bool do_recv_completion(int fd)
368 {
369 	struct sock_extended_err *serr;
370 	struct msghdr msg = {};
371 	struct cmsghdr *cm;
372 	uint32_t hi, lo, range;
373 	int ret, zerocopy;
374 	char control[100];
375 	uint32_t ckbuf[SO_EE_ORIGIN_MAX_ZCOOKIES];
376 	struct iovec iov;
377 
378 	msg.msg_control = control;
379 	msg.msg_controllen = sizeof(control);
380 
381 	iov.iov_base = ckbuf;
382 	iov.iov_len = (SO_EE_ORIGIN_MAX_ZCOOKIES * sizeof(ckbuf[0]));
383 	msg.msg_iov = &iov;
384 	msg.msg_iovlen = 1;
385 
386 	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
387 	if (ret == -1 && errno == EAGAIN)
388 		return false;
389 	if (ret == -1)
390 		error(1, errno, "recvmsg notification");
391 	if (msg.msg_flags & MSG_CTRUNC)
392 		error(1, errno, "recvmsg notification: truncated");
393 
394 	cm = CMSG_FIRSTHDR(&msg);
395 	if (!cm)
396 		error(1, 0, "cmsg: no cmsg");
397 	if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
398 	      (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
399 	      (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
400 		error(1, 0, "serr: wrong type: %d.%d",
401 		      cm->cmsg_level, cm->cmsg_type);
402 
403 	serr = (void *) CMSG_DATA(cm);
404 
405 	if (serr->ee_origin == SO_EE_ORIGIN_ZCOOKIE) {
406 		completions += do_process_zerocopy_cookies(serr, ckbuf, ret);
407 		return true;
408 	}
409 	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
410 		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
411 	if (serr->ee_errno != 0)
412 		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
413 
414 	hi = serr->ee_data;
415 	lo = serr->ee_info;
416 	range = hi - lo + 1;
417 
418 	/* Detect notification gaps. These should not happen often, if at all.
419 	 * Gaps can occur due to drops, reordering and retransmissions.
420 	 */
421 	if (lo != next_completion)
422 		fprintf(stderr, "gap: %u..%u does not append to %u\n",
423 			lo, hi, next_completion);
424 	next_completion = hi + 1;
425 
426 	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
427 	if (zerocopied == -1)
428 		zerocopied = zerocopy;
429 	else if (zerocopied != zerocopy) {
430 		fprintf(stderr, "serr: inconsistent\n");
431 		zerocopied = zerocopy;
432 	}
433 
434 	if (cfg_verbose >= 2)
435 		fprintf(stderr, "completed: %u (h=%u l=%u)\n",
436 			range, hi, lo);
437 
438 	completions += range;
439 	return true;
440 }
441 
442 /* Read all outstanding messages on the errqueue */
443 static void do_recv_completions(int fd)
444 {
445 	while (do_recv_completion(fd)) {}
446 }
447 
448 /* Wait for all remaining completions on the errqueue */
449 static void do_recv_remaining_completions(int fd)
450 {
451 	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
452 
453 	while (completions < expected_completions &&
454 	       gettimeofday_ms() < tstop) {
455 		if (do_poll(fd, POLLERR))
456 			do_recv_completions(fd);
457 	}
458 
459 	if (completions < expected_completions)
460 		fprintf(stderr, "missing notifications: %lu < %lu\n",
461 			completions, expected_completions);
462 }
463 
464 static void do_tx(int domain, int type, int protocol)
465 {
466 	struct iovec iov[3] = { {0} };
467 	struct sockaddr_ll laddr;
468 	struct msghdr msg = {0};
469 	struct ethhdr eth;
470 	union {
471 		struct ipv6hdr ip6h;
472 		struct iphdr iph;
473 	} nh;
474 	uint64_t tstop;
475 	int fd;
476 
477 	fd = do_setup_tx(domain, type, protocol);
478 
479 	if (domain == PF_PACKET) {
480 		uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
481 
482 		/* sock_raw passes ll header as data */
483 		if (type == SOCK_RAW) {
484 			memset(eth.h_dest, 0x06, ETH_ALEN);
485 			memset(eth.h_source, 0x02, ETH_ALEN);
486 			eth.h_proto = htons(proto);
487 			iov[0].iov_base = &eth;
488 			iov[0].iov_len = sizeof(eth);
489 			msg.msg_iovlen++;
490 		}
491 
492 		/* both sock_raw and sock_dgram expect name */
493 		memset(&laddr, 0, sizeof(laddr));
494 		laddr.sll_family	= AF_PACKET;
495 		laddr.sll_ifindex	= cfg_ifindex;
496 		laddr.sll_protocol	= htons(proto);
497 		laddr.sll_halen		= ETH_ALEN;
498 
499 		memset(laddr.sll_addr, 0x06, ETH_ALEN);
500 
501 		msg.msg_name		= &laddr;
502 		msg.msg_namelen		= sizeof(laddr);
503 	}
504 
505 	/* packet and raw sockets with hdrincl must pass network header */
506 	if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
507 		if (cfg_family == PF_INET)
508 			iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
509 		else
510 			iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
511 
512 		iov[1].iov_base = (void *) &nh;
513 		msg.msg_iovlen++;
514 	}
515 
516 	if (domain == PF_RDS) {
517 		msg.msg_name = &cfg_dst_addr;
518 		msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
519 				    sizeof(struct sockaddr_in) :
520 				    sizeof(struct sockaddr_in6));
521 	}
522 
523 	iov[2].iov_base = payload;
524 	iov[2].iov_len = cfg_payload_len;
525 	msg.msg_iovlen++;
526 	msg.msg_iov = &iov[3 - msg.msg_iovlen];
527 
528 	tstop = gettimeofday_ms() + cfg_runtime_ms;
529 	do {
530 		if (cfg_cork)
531 			do_sendmsg_corked(fd, &msg);
532 		else
533 			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
534 
535 		while (!do_poll(fd, POLLOUT)) {
536 			if (cfg_zerocopy)
537 				do_recv_completions(fd);
538 		}
539 
540 	} while (gettimeofday_ms() < tstop);
541 
542 	if (cfg_zerocopy)
543 		do_recv_remaining_completions(fd);
544 
545 	if (close(fd))
546 		error(1, errno, "close");
547 
548 	fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
549 		packets, bytes >> 20, completions,
550 		zerocopied == 1 ? 'y' : 'n');
551 }
552 
553 static int do_setup_rx(int domain, int type, int protocol)
554 {
555 	int fd;
556 
557 	/* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
558 	 * to recv the only copy of the packet, not a clone
559 	 */
560 	if (domain == PF_PACKET)
561 		error(1, 0, "Use PF_INET/SOCK_RAW to read");
562 
563 	if (type == SOCK_RAW && protocol == IPPROTO_RAW)
564 		error(1, 0, "IPPROTO_RAW: not supported on Rx");
565 
566 	fd = socket(domain, type, protocol);
567 	if (fd == -1)
568 		error(1, errno, "socket r");
569 
570 	do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
571 	do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
572 	do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
573 
574 	if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
575 		error(1, errno, "bind");
576 
577 	if (type == SOCK_STREAM) {
578 		if (listen(fd, 1))
579 			error(1, errno, "listen");
580 		fd = do_accept(fd);
581 	}
582 
583 	return fd;
584 }
585 
586 /* Flush all outstanding bytes for the tcp receive queue */
587 static void do_flush_tcp(int fd)
588 {
589 	int ret;
590 
591 	/* MSG_TRUNC flushes up to len bytes */
592 	ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
593 	if (ret == -1 && errno == EAGAIN)
594 		return;
595 	if (ret == -1)
596 		error(1, errno, "flush");
597 	if (!ret)
598 		return;
599 
600 	packets++;
601 	bytes += ret;
602 }
603 
604 /* Flush all outstanding datagrams. Verify first few bytes of each. */
605 static void do_flush_datagram(int fd, int type)
606 {
607 	int ret, off = 0;
608 	char buf[64];
609 
610 	/* MSG_TRUNC will return full datagram length */
611 	ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
612 	if (ret == -1 && errno == EAGAIN)
613 		return;
614 
615 	/* raw ipv4 return with header, raw ipv6 without */
616 	if (cfg_family == PF_INET && type == SOCK_RAW) {
617 		off += sizeof(struct iphdr);
618 		ret -= sizeof(struct iphdr);
619 	}
620 
621 	if (ret == -1)
622 		error(1, errno, "recv");
623 	if (ret != cfg_payload_len)
624 		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
625 	if (ret > sizeof(buf) - off)
626 		ret = sizeof(buf) - off;
627 	if (memcmp(buf + off, payload, ret))
628 		error(1, 0, "recv: data mismatch");
629 
630 	packets++;
631 	bytes += cfg_payload_len;
632 }
633 
634 
635 static void do_recvmsg(int fd)
636 {
637 	int ret, off = 0;
638 	char *buf;
639 	struct iovec iov;
640 	struct msghdr msg;
641 	struct sockaddr_storage din;
642 
643 	buf = calloc(cfg_payload_len, sizeof(char));
644 	iov.iov_base = buf;
645 	iov.iov_len = cfg_payload_len;
646 
647 	memset(&msg, 0, sizeof(msg));
648 	msg.msg_name = &din;
649 	msg.msg_namelen = sizeof(din);
650 	msg.msg_iov = &iov;
651 	msg.msg_iovlen = 1;
652 
653 	ret = recvmsg(fd, &msg, MSG_TRUNC);
654 
655 	if (ret == -1)
656 		error(1, errno, "recv");
657 	if (ret != cfg_payload_len)
658 		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
659 
660 	if (memcmp(buf + off, payload, ret))
661 		error(1, 0, "recv: data mismatch");
662 
663 	free(buf);
664 	packets++;
665 	bytes += cfg_payload_len;
666 }
667 
668 static void do_rx(int domain, int type, int protocol)
669 {
670 	uint64_t tstop;
671 	int fd;
672 
673 	fd = do_setup_rx(domain, type, protocol);
674 
675 	tstop = gettimeofday_ms() + cfg_runtime_ms;
676 	do {
677 		if (type == SOCK_STREAM)
678 			do_flush_tcp(fd);
679 		else if (domain == PF_RDS)
680 			do_recvmsg(fd);
681 		else
682 			do_flush_datagram(fd, type);
683 
684 		do_poll(fd, POLLIN);
685 
686 	} while (gettimeofday_ms() < tstop);
687 
688 	if (close(fd))
689 		error(1, errno, "close");
690 
691 	fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
692 }
693 
694 static void do_test(int domain, int type, int protocol)
695 {
696 	int i;
697 
698 	if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
699 		error(1, 0, "can only cork udp sockets");
700 
701 	do_setcpu(cfg_cpu);
702 
703 	for (i = 0; i < IP_MAXPACKET; i++)
704 		payload[i] = 'a' + (i % 26);
705 
706 	if (cfg_rx)
707 		do_rx(domain, type, protocol);
708 	else
709 		do_tx(domain, type, protocol);
710 }
711 
712 static void usage(const char *filepath)
713 {
714 	error(1, 0, "Usage: %s [options] <test>", filepath);
715 }
716 
717 static void parse_opts(int argc, char **argv)
718 {
719 	const int max_payload_len = sizeof(payload) -
720 				    sizeof(struct ipv6hdr) -
721 				    sizeof(struct tcphdr) -
722 				    40 /* max tcp options */;
723 	int c;
724 	char *daddr = NULL, *saddr = NULL;
725 	char *cfg_test;
726 
727 	cfg_payload_len = max_payload_len;
728 
729 	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
730 		switch (c) {
731 		case '4':
732 			if (cfg_family != PF_UNSPEC)
733 				error(1, 0, "Pass one of -4 or -6");
734 			cfg_family = PF_INET;
735 			cfg_alen = sizeof(struct sockaddr_in);
736 			break;
737 		case '6':
738 			if (cfg_family != PF_UNSPEC)
739 				error(1, 0, "Pass one of -4 or -6");
740 			cfg_family = PF_INET6;
741 			cfg_alen = sizeof(struct sockaddr_in6);
742 			break;
743 		case 'c':
744 			cfg_cork = strtol(optarg, NULL, 0);
745 			break;
746 		case 'C':
747 			cfg_cpu = strtol(optarg, NULL, 0);
748 			break;
749 		case 'D':
750 			daddr = optarg;
751 			break;
752 		case 'i':
753 			cfg_ifindex = if_nametoindex(optarg);
754 			if (cfg_ifindex == 0)
755 				error(1, errno, "invalid iface: %s", optarg);
756 			break;
757 		case 'm':
758 			cfg_cork_mixed = true;
759 			break;
760 		case 'p':
761 			cfg_port = strtoul(optarg, NULL, 0);
762 			break;
763 		case 'r':
764 			cfg_rx = true;
765 			break;
766 		case 's':
767 			cfg_payload_len = strtoul(optarg, NULL, 0);
768 			break;
769 		case 'S':
770 			saddr = optarg;
771 			break;
772 		case 't':
773 			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
774 			break;
775 		case 'v':
776 			cfg_verbose++;
777 			break;
778 		case 'z':
779 			cfg_zerocopy = true;
780 			break;
781 		}
782 	}
783 
784 	cfg_test = argv[argc - 1];
785 	if (strcmp(cfg_test, "rds") == 0) {
786 		if (!daddr)
787 			error(1, 0, "-D <server addr> required for PF_RDS\n");
788 		if (!cfg_rx && !saddr)
789 			error(1, 0, "-S <client addr> required for PF_RDS\n");
790 	}
791 	setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
792 	setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
793 
794 	if (cfg_payload_len > max_payload_len)
795 		error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
796 	if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
797 		error(1, 0, "-m: cork_mixed requires corking and zerocopy");
798 
799 	if (optind != argc - 1)
800 		usage(argv[0]);
801 }
802 
803 int main(int argc, char **argv)
804 {
805 	const char *cfg_test;
806 
807 	parse_opts(argc, argv);
808 
809 	cfg_test = argv[argc - 1];
810 
811 	if (!strcmp(cfg_test, "packet"))
812 		do_test(PF_PACKET, SOCK_RAW, 0);
813 	else if (!strcmp(cfg_test, "packet_dgram"))
814 		do_test(PF_PACKET, SOCK_DGRAM, 0);
815 	else if (!strcmp(cfg_test, "raw"))
816 		do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
817 	else if (!strcmp(cfg_test, "raw_hdrincl"))
818 		do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
819 	else if (!strcmp(cfg_test, "tcp"))
820 		do_test(cfg_family, SOCK_STREAM, 0);
821 	else if (!strcmp(cfg_test, "udp"))
822 		do_test(cfg_family, SOCK_DGRAM, 0);
823 	else if (!strcmp(cfg_test, "rds"))
824 		do_test(PF_RDS, SOCK_SEQPACKET, 0);
825 	else
826 		error(1, 0, "unknown cfg_test %s", cfg_test);
827 
828 	return 0;
829 }
830