xref: /linux/tools/testing/selftests/drivers/net/gro.c (revision d8f87aa5fa0a4276491fa8ef436cd22605a3f9ba)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * This testsuite provides conformance testing for GRO coalescing.
4  *
5  * Test cases:
6  *
7  * data_*:
8  *  Data packets of the same size and same header setup with correct
9  *  sequence numbers coalesce. The one exception being the last data
10  *  packet coalesced: it can be smaller than the rest and coalesced
11  *  as long as it is in the same flow.
12  *   - data_same:    same size packets coalesce
13  *   - data_lrg_sml: large then small coalesces
14  *   - data_sml_lrg: small then large doesn't coalesce
15  *
16  * ack:
17  *  Pure ACK does not coalesce.
18  *
19  * flags_*:
20  *  No packets with PSH, SYN, URG, RST set will be coalesced.
21  *   - flags_psh, flags_syn, flags_rst, flags_urg
22  *
23  * tcp_*:
24  *  Packets with incorrect checksum, non-consecutive seqno and
25  *  different TCP header options shouldn't coalesce. Nit: given that
26  *  some extension headers have paddings, such as timestamp, headers
27  *  that are padded differently would not be coalesced.
28  *   - tcp_csum: incorrect checksum
29  *   - tcp_seq:  non-consecutive sequence numbers
30  *   - tcp_ts:   different timestamps
31  *   - tcp_opt:  different TCP options
32  *
33  * ip_*:
34  *  Packets with different (ECN, TTL, TOS) header, IP options or
35  *  IP fragments shouldn't coalesce.
36  *   - ip_ecn, ip_tos:            shared between IPv4/IPv6
37  *   - ip_ttl, ip_opt, ip_frag4:  IPv4 only
38  *   - ip_id_df*:                 IPv4 IP ID field coalescing tests
39  *   - ip_frag6, ip_v6ext_*:      IPv6 only
40  *
41  * large_*:
42  *  Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
43  *   - large_max: exceeding max size
44  *   - large_rem: remainder handling
45  *
46  * MSS is defined as 4096 - header because if it is too small
47  * (i.e. 1500 MTU - header), it will result in many packets,
48  * increasing the "large" test case's flakiness. This is because
49  * due to time sensitivity in the coalescing window, the receiver
50  * may not coalesce all of the packets.
51  *
52  * Note the timing issue applies to all of the test cases, so some
53  * flakiness is to be expected.
54  *
55  */
56 
57 #define _GNU_SOURCE
58 
59 #include <arpa/inet.h>
60 #include <errno.h>
61 #include <error.h>
62 #include <getopt.h>
63 #include <linux/filter.h>
64 #include <linux/if_packet.h>
65 #include <linux/ipv6.h>
66 #include <net/ethernet.h>
67 #include <net/if.h>
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet/tcp.h>
72 #include <stdbool.h>
73 #include <stddef.h>
74 #include <stdio.h>
75 #include <stdarg.h>
76 #include <string.h>
77 #include <unistd.h>
78 
79 #include "kselftest.h"
80 #include "../../net/lib/ksft.h"
81 
82 #define DPORT 8000
83 #define SPORT 1500
84 #define PAYLOAD_LEN 100
85 #define NUM_PACKETS 4
86 #define START_SEQ 100
87 #define START_ACK 100
88 #define ETH_P_NONE 0
89 #define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
90 #define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
91 #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
92 #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
93 #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
94 #define MIN_EXTHDR_SIZE 8
95 #define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
96 #define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
97 
98 #define ipv6_optlen(p)  (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
99 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
100 
101 enum flush_id_case {
102 	FLUSH_ID_DF1_INC,
103 	FLUSH_ID_DF1_FIXED,
104 	FLUSH_ID_DF0_INC,
105 	FLUSH_ID_DF0_FIXED,
106 	FLUSH_ID_DF1_INC_FIXED,
107 	FLUSH_ID_DF1_FIXED_INC,
108 };
109 
110 static const char *addr6_src = "fdaa::2";
111 static const char *addr6_dst = "fdaa::1";
112 static const char *addr4_src = "192.168.1.200";
113 static const char *addr4_dst = "192.168.1.100";
114 static int proto = -1;
115 static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
116 static char *testname = "data";
117 static char *ifname = "eth0";
118 static char *smac = "aa:00:00:00:00:02";
119 static char *dmac = "aa:00:00:00:00:01";
120 static bool verbose;
121 static bool tx_socket = true;
122 static int tcp_offset = -1;
123 static int total_hdr_len = -1;
124 static int ethhdr_proto = -1;
125 static bool ipip;
126 
127 static void vlog(const char *fmt, ...)
128 {
129 	va_list args;
130 
131 	if (verbose) {
132 		va_start(args, fmt);
133 		vfprintf(stderr, fmt, args);
134 		va_end(args);
135 	}
136 }
137 
138 static void setup_sock_filter(int fd)
139 {
140 	const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
141 	const int ethproto_off = offsetof(struct ethhdr, h_proto);
142 	int optlen = 0;
143 	int ipproto_off, opt_ipproto_off;
144 	int next_off;
145 
146 	if (ipip)
147 		next_off = sizeof(struct iphdr) + offsetof(struct iphdr, protocol);
148 	else if (proto == PF_INET)
149 		next_off = offsetof(struct iphdr, protocol);
150 	else
151 		next_off = offsetof(struct ipv6hdr, nexthdr);
152 	ipproto_off = ETH_HLEN + next_off;
153 
154 	/* Overridden later if exthdrs are used: */
155 	opt_ipproto_off = ipproto_off;
156 
157 	if (strcmp(testname, "ip_opt") == 0) {
158 		optlen = sizeof(struct ip_timestamp);
159 	} else if (strcmp(testname, "ip_frag6") == 0 ||
160 		   strcmp(testname, "ip_v6ext_same") == 0 ||
161 		   strcmp(testname, "ip_v6ext_diff") == 0) {
162 		BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
163 		BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
164 		BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
165 
166 		/* same size for HBH and Fragment extension header types */
167 		optlen = MIN_EXTHDR_SIZE;
168 		opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
169 			+ offsetof(struct ip6_ext, ip6e_nxt);
170 	}
171 
172 	/* this filter validates the following:
173 	 *	- packet is IPv4/IPv6 according to the running test.
174 	 *	- packet is TCP. Also handles the case of one extension header and then TCP.
175 	 *	- checks the packet tcp dport equals to DPORT. Also handles the case of one
176 	 *	  extension header and then TCP.
177 	 */
178 	struct sock_filter filter[] = {
179 			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, ethproto_off),
180 			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
181 			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, ipproto_off),
182 			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
183 			BPF_STMT(BPF_LD  + BPF_B   + BPF_ABS, opt_ipproto_off),
184 			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
185 			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off),
186 			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
187 			BPF_STMT(BPF_LD  + BPF_H   + BPF_ABS, dport_off + optlen),
188 			BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
189 			BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
190 			BPF_STMT(BPF_RET + BPF_K, 0),
191 	};
192 
193 	struct sock_fprog bpf = {
194 		.len = ARRAY_SIZE(filter),
195 		.filter = filter,
196 	};
197 
198 	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
199 		error(1, errno, "error setting filter");
200 }
201 
202 static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
203 {
204 	uint16_t *words = data;
205 	int i;
206 
207 	for (i = 0; i < len / 2; i++)
208 		sum += words[i];
209 	if (len & 1)
210 		sum += ((char *)data)[len - 1];
211 	return sum;
212 }
213 
214 static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
215 {
216 	sum = checksum_nofold(data, len, sum);
217 	while (sum > 0xFFFF)
218 		sum = (sum & 0xFFFF) + (sum >> 16);
219 	return ~sum;
220 }
221 
222 static uint16_t tcp_checksum(void *buf, int payload_len)
223 {
224 	struct pseudo_header6 {
225 		struct in6_addr saddr;
226 		struct in6_addr daddr;
227 		uint16_t protocol;
228 		uint16_t payload_len;
229 	} ph6;
230 	struct pseudo_header4 {
231 		struct in_addr saddr;
232 		struct in_addr daddr;
233 		uint16_t protocol;
234 		uint16_t payload_len;
235 	} ph4;
236 	uint32_t sum = 0;
237 
238 	if (proto == PF_INET6) {
239 		if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
240 			error(1, errno, "inet_pton6 source ip pseudo");
241 		if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
242 			error(1, errno, "inet_pton6 dest ip pseudo");
243 		ph6.protocol = htons(IPPROTO_TCP);
244 		ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
245 
246 		sum = checksum_nofold(&ph6, sizeof(ph6), 0);
247 	} else if (proto == PF_INET) {
248 		if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
249 			error(1, errno, "inet_pton source ip pseudo");
250 		if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
251 			error(1, errno, "inet_pton dest ip pseudo");
252 		ph4.protocol = htons(IPPROTO_TCP);
253 		ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
254 
255 		sum = checksum_nofold(&ph4, sizeof(ph4), 0);
256 	}
257 
258 	return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
259 }
260 
261 static void read_MAC(uint8_t *mac_addr, char *mac)
262 {
263 	if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
264 		   &mac_addr[0], &mac_addr[1], &mac_addr[2],
265 		   &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
266 		error(1, 0, "sscanf");
267 }
268 
269 static void fill_datalinklayer(void *buf)
270 {
271 	struct ethhdr *eth = buf;
272 
273 	memcpy(eth->h_dest, dst_mac, ETH_ALEN);
274 	memcpy(eth->h_source, src_mac, ETH_ALEN);
275 	eth->h_proto = ethhdr_proto;
276 }
277 
278 static void fill_networklayer(void *buf, int payload_len, int protocol)
279 {
280 	struct ipv6hdr *ip6h = buf;
281 	struct iphdr *iph = buf;
282 
283 	if (proto == PF_INET6) {
284 		memset(ip6h, 0, sizeof(*ip6h));
285 
286 		ip6h->version = 6;
287 		ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
288 		ip6h->nexthdr = protocol;
289 		ip6h->hop_limit = 8;
290 		if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
291 			error(1, errno, "inet_pton source ip6");
292 		if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
293 			error(1, errno, "inet_pton dest ip6");
294 	} else if (proto == PF_INET) {
295 		memset(iph, 0, sizeof(*iph));
296 
297 		iph->version = 4;
298 		iph->ihl = 5;
299 		iph->ttl = 8;
300 		iph->protocol	= protocol;
301 		iph->tot_len = htons(sizeof(struct tcphdr) +
302 				payload_len + sizeof(struct iphdr));
303 		iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
304 		if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
305 			error(1, errno, "inet_pton source ip");
306 		if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
307 			error(1, errno, "inet_pton dest ip");
308 		iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
309 	}
310 }
311 
312 static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
313 				int payload_len, int fin)
314 {
315 	struct tcphdr *tcph = buf;
316 
317 	memset(tcph, 0, sizeof(*tcph));
318 
319 	tcph->source = htons(SPORT);
320 	tcph->dest = htons(DPORT);
321 	tcph->seq = ntohl(START_SEQ + seq_offset);
322 	tcph->ack_seq = ntohl(START_ACK + ack_offset);
323 	tcph->ack = 1;
324 	tcph->fin = fin;
325 	tcph->doff = 5;
326 	tcph->window = htons(TCP_MAXWIN);
327 	tcph->urg_ptr = 0;
328 	tcph->check = tcp_checksum(tcph, payload_len);
329 }
330 
331 static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
332 {
333 	int ret = -1;
334 
335 	ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
336 	if (ret == -1)
337 		error(1, errno, "sendto failure");
338 	if (ret != len)
339 		error(1, errno, "sendto wrong length");
340 }
341 
342 static void create_packet(void *buf, int seq_offset, int ack_offset,
343 			  int payload_len, int fin)
344 {
345 	memset(buf, 0, total_hdr_len);
346 	memset(buf + total_hdr_len, 'a', payload_len);
347 
348 	fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
349 			    payload_len, fin);
350 
351 	if (ipip) {
352 		fill_networklayer(buf + ETH_HLEN, payload_len + sizeof(struct iphdr),
353 				  IPPROTO_IPIP);
354 		fill_networklayer(buf + ETH_HLEN + sizeof(struct iphdr),
355 				  payload_len, IPPROTO_TCP);
356 	} else {
357 		fill_networklayer(buf + ETH_HLEN, payload_len, IPPROTO_TCP);
358 	}
359 
360 	fill_datalinklayer(buf);
361 }
362 
363 /* send one extra flag, not first and not last pkt */
364 static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
365 		       int rst, int urg)
366 {
367 	static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
368 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
369 	int payload_len, pkt_size, flag, i;
370 	struct tcphdr *tcph;
371 
372 	payload_len = PAYLOAD_LEN * psh;
373 	pkt_size = total_hdr_len + payload_len;
374 	flag = NUM_PACKETS / 2;
375 
376 	create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
377 
378 	tcph = (struct tcphdr *)(flag_buf + tcp_offset);
379 	tcph->psh = psh;
380 	tcph->syn = syn;
381 	tcph->rst = rst;
382 	tcph->urg = urg;
383 	tcph->check = 0;
384 	tcph->check = tcp_checksum(tcph, payload_len);
385 
386 	for (i = 0; i < NUM_PACKETS + 1; i++) {
387 		if (i == flag) {
388 			write_packet(fd, flag_buf, pkt_size, daddr);
389 			continue;
390 		}
391 		create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
392 		write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
393 	}
394 }
395 
396 /* Test for data of same length, smaller than previous
397  * and of different lengths
398  */
399 static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
400 			   int payload_len1, int payload_len2)
401 {
402 	static char buf[ETH_HLEN + IP_MAXPACKET];
403 
404 	create_packet(buf, 0, 0, payload_len1, 0);
405 	write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
406 	create_packet(buf, payload_len1, 0, payload_len2, 0);
407 	write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
408 }
409 
410 /* If incoming segments make tracked segment length exceed
411  * legal IP datagram length, do not coalesce
412  */
413 static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
414 {
415 	static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
416 	static char last[TOTAL_HDR_LEN + MSS];
417 	static char new_seg[TOTAL_HDR_LEN + MSS];
418 	int i;
419 
420 	for (i = 0; i < NUM_LARGE_PKT; i++)
421 		create_packet(pkts[i], i * MSS, 0, MSS, 0);
422 	create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
423 	create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
424 
425 	for (i = 0; i < NUM_LARGE_PKT; i++)
426 		write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
427 	write_packet(fd, last, total_hdr_len + remainder, daddr);
428 	write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
429 }
430 
431 /* Pure acks and dup acks don't coalesce */
432 static void send_ack(int fd, struct sockaddr_ll *daddr)
433 {
434 	static char buf[MAX_HDR_LEN];
435 
436 	create_packet(buf, 0, 0, 0, 0);
437 	write_packet(fd, buf, total_hdr_len, daddr);
438 	write_packet(fd, buf, total_hdr_len, daddr);
439 	create_packet(buf, 0, 1, 0, 0);
440 	write_packet(fd, buf, total_hdr_len, daddr);
441 }
442 
443 static void recompute_packet(char *buf, char *no_ext, int extlen)
444 {
445 	struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
446 	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
447 	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
448 
449 	memmove(buf, no_ext, total_hdr_len);
450 	memmove(buf + total_hdr_len + extlen,
451 		no_ext + total_hdr_len, PAYLOAD_LEN);
452 
453 	tcphdr->doff = tcphdr->doff + (extlen / 4);
454 	tcphdr->check = 0;
455 	tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
456 	if (proto == PF_INET) {
457 		iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
458 		iph->check = 0;
459 		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
460 
461 		if (ipip) {
462 			iph += 1;
463 			iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
464 			iph->check = 0;
465 			iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
466 		}
467 	} else {
468 		ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
469 	}
470 }
471 
472 static void tcp_write_options(char *buf, int kind, int ts)
473 {
474 	struct tcp_option_ts {
475 		uint8_t kind;
476 		uint8_t len;
477 		uint32_t tsval;
478 		uint32_t tsecr;
479 	} *opt_ts = (void *)buf;
480 	struct tcp_option_window {
481 		uint8_t kind;
482 		uint8_t len;
483 		uint8_t shift;
484 	} *opt_window = (void *)buf;
485 
486 	switch (kind) {
487 	case TCPOPT_NOP:
488 		buf[0] = TCPOPT_NOP;
489 		break;
490 	case TCPOPT_WINDOW:
491 		memset(opt_window, 0, sizeof(struct tcp_option_window));
492 		opt_window->kind = TCPOPT_WINDOW;
493 		opt_window->len = TCPOLEN_WINDOW;
494 		opt_window->shift = 0;
495 		break;
496 	case TCPOPT_TIMESTAMP:
497 		memset(opt_ts, 0, sizeof(struct tcp_option_ts));
498 		opt_ts->kind = TCPOPT_TIMESTAMP;
499 		opt_ts->len = TCPOLEN_TIMESTAMP;
500 		opt_ts->tsval = ts;
501 		opt_ts->tsecr = 0;
502 		break;
503 	default:
504 		error(1, 0, "unimplemented TCP option");
505 		break;
506 	}
507 }
508 
509 /* TCP with options is always a permutation of {TS, NOP, NOP}.
510  * Implement different orders to verify coalescing stops.
511  */
512 static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
513 {
514 	switch (order) {
515 	case 0:
516 		tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
517 		tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
518 		tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
519 				  TCPOPT_TIMESTAMP, ts);
520 		break;
521 	case 1:
522 		tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
523 		tcp_write_options(buf + total_hdr_len + 1,
524 				  TCPOPT_TIMESTAMP, ts);
525 		tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
526 				  TCPOPT_NOP, 0);
527 		break;
528 	case 2:
529 		tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
530 		tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
531 				  TCPOPT_NOP, 0);
532 		tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
533 				  TCPOPT_NOP, 0);
534 		break;
535 	default:
536 		error(1, 0, "unknown order");
537 		break;
538 	}
539 	recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
540 }
541 
542 /* Packets with invalid checksum don't coalesce. */
543 static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
544 {
545 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
546 	struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
547 	int pkt_size = total_hdr_len + PAYLOAD_LEN;
548 
549 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
550 	write_packet(fd, buf, pkt_size, daddr);
551 
552 	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
553 	tcph->check = tcph->check - 1;
554 	write_packet(fd, buf, pkt_size, daddr);
555 }
556 
557  /* Packets with non-consecutive sequence number don't coalesce.*/
558 static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
559 {
560 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
561 	struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
562 	int pkt_size = total_hdr_len + PAYLOAD_LEN;
563 
564 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
565 	write_packet(fd, buf, pkt_size, daddr);
566 
567 	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
568 	tcph->seq = ntohl(htonl(tcph->seq) + 1);
569 	tcph->check = 0;
570 	tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
571 	write_packet(fd, buf, pkt_size, daddr);
572 }
573 
574  /* Packet with different timestamp option or different timestamps
575   * don't coalesce.
576   */
577 static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
578 {
579 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
580 	static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
581 	int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
582 
583 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
584 	add_standard_tcp_options(extpkt, buf, 0, 0);
585 	write_packet(fd, extpkt, pkt_size, daddr);
586 
587 	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
588 	add_standard_tcp_options(extpkt, buf, 0, 0);
589 	write_packet(fd, extpkt, pkt_size, daddr);
590 
591 	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
592 	add_standard_tcp_options(extpkt, buf, 100, 0);
593 	write_packet(fd, extpkt, pkt_size, daddr);
594 
595 	create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
596 	add_standard_tcp_options(extpkt, buf, 100, 1);
597 	write_packet(fd, extpkt, pkt_size, daddr);
598 
599 	create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
600 	add_standard_tcp_options(extpkt, buf, 100, 2);
601 	write_packet(fd, extpkt, pkt_size, daddr);
602 }
603 
604 /* Packet with different tcp options don't coalesce. */
605 static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
606 {
607 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
608 	static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
609 	static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
610 	int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
611 	int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
612 
613 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
614 	add_standard_tcp_options(extpkt1, buf, 0, 0);
615 	write_packet(fd, extpkt1, extpkt1_size, daddr);
616 
617 	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
618 	add_standard_tcp_options(extpkt1, buf, 0, 0);
619 	write_packet(fd, extpkt1, extpkt1_size, daddr);
620 
621 	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
622 	tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
623 	tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
624 	recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
625 	write_packet(fd, extpkt2, extpkt2_size, daddr);
626 }
627 
628 static void add_ipv4_ts_option(void *buf, void *optpkt)
629 {
630 	struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
631 	int optlen = sizeof(struct ip_timestamp);
632 	struct iphdr *iph;
633 
634 	if (optlen % 4)
635 		error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
636 
637 	ts->ipt_code = IPOPT_TS;
638 	ts->ipt_len = optlen;
639 	ts->ipt_ptr = 5;
640 	ts->ipt_flg = IPOPT_TS_TSONLY;
641 
642 	memcpy(optpkt, buf, tcp_offset);
643 	memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
644 	       sizeof(struct tcphdr) + PAYLOAD_LEN);
645 
646 	iph = (struct iphdr *)(optpkt + ETH_HLEN);
647 	iph->ihl = 5 + (optlen / 4);
648 	iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
649 	iph->check = 0;
650 	iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
651 }
652 
653 static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
654 {
655 	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
656 	struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
657 	char *exthdr_payload_start = (char *)(exthdr + 1);
658 
659 	exthdr->hdrlen = 0;
660 	exthdr->nexthdr = IPPROTO_TCP;
661 
662 	memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
663 
664 	memcpy(optpkt, buf, tcp_offset);
665 	memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
666 		sizeof(struct tcphdr) + PAYLOAD_LEN);
667 
668 	iph->nexthdr = exthdr_type;
669 	iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
670 }
671 
672 static void fix_ip4_checksum(struct iphdr *iph)
673 {
674 	iph->check = 0;
675 	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
676 }
677 
678 static void send_flush_id_case(int fd, struct sockaddr_ll *daddr,
679 			       enum flush_id_case tcase)
680 {
681 	static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
682 	static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
683 	static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
684 	bool send_three = false;
685 	struct iphdr *iph1;
686 	struct iphdr *iph2;
687 	struct iphdr *iph3;
688 
689 	iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
690 	iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
691 	iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
692 
693 	create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
694 	create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
695 	create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
696 
697 	switch (tcase) {
698 	case FLUSH_ID_DF1_INC: /* DF=1, Incrementing - should coalesce */
699 		iph1->frag_off |= htons(IP_DF);
700 		iph1->id = htons(8);
701 
702 		iph2->frag_off |= htons(IP_DF);
703 		iph2->id = htons(9);
704 		break;
705 
706 	case FLUSH_ID_DF1_FIXED: /* DF=1, Fixed - should coalesce */
707 		iph1->frag_off |= htons(IP_DF);
708 		iph1->id = htons(8);
709 
710 		iph2->frag_off |= htons(IP_DF);
711 		iph2->id = htons(8);
712 		break;
713 
714 	case FLUSH_ID_DF0_INC: /* DF=0, Incrementing - should coalesce */
715 		iph1->frag_off &= ~htons(IP_DF);
716 		iph1->id = htons(8);
717 
718 		iph2->frag_off &= ~htons(IP_DF);
719 		iph2->id = htons(9);
720 		break;
721 
722 	case FLUSH_ID_DF0_FIXED: /* DF=0, Fixed - should coalesce */
723 		iph1->frag_off &= ~htons(IP_DF);
724 		iph1->id = htons(8);
725 
726 		iph2->frag_off &= ~htons(IP_DF);
727 		iph2->id = htons(8);
728 		break;
729 
730 	case FLUSH_ID_DF1_INC_FIXED: /* DF=1, two packets incrementing, and
731 				      * one fixed - should coalesce only the
732 				      * first two packets
733 				      */
734 		iph1->frag_off |= htons(IP_DF);
735 		iph1->id = htons(8);
736 
737 		iph2->frag_off |= htons(IP_DF);
738 		iph2->id = htons(9);
739 
740 		iph3->frag_off |= htons(IP_DF);
741 		iph3->id = htons(9);
742 		send_three = true;
743 		break;
744 
745 	case FLUSH_ID_DF1_FIXED_INC: /* DF=1, two packets fixed, and one
746 				      * incrementing - should coalesce only
747 				      * the first two packets
748 				      */
749 		iph1->frag_off |= htons(IP_DF);
750 		iph1->id = htons(8);
751 
752 		iph2->frag_off |= htons(IP_DF);
753 		iph2->id = htons(8);
754 
755 		iph3->frag_off |= htons(IP_DF);
756 		iph3->id = htons(9);
757 		send_three = true;
758 		break;
759 	}
760 
761 	fix_ip4_checksum(iph1);
762 	fix_ip4_checksum(iph2);
763 	write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
764 	write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
765 
766 	if (send_three) {
767 		fix_ip4_checksum(iph3);
768 		write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
769 	}
770 }
771 
772 static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
773 {
774 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
775 	static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
776 
777 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
778 	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1);
779 	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
780 
781 	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
782 	add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2);
783 	write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
784 }
785 
786 /* IPv4 options shouldn't coalesce */
787 static void send_ip_options(int fd, struct sockaddr_ll *daddr)
788 {
789 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
790 	static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
791 	int optlen = sizeof(struct ip_timestamp);
792 	int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
793 
794 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
795 	write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
796 
797 	create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
798 	add_ipv4_ts_option(buf, optpkt);
799 	write_packet(fd, optpkt, pkt_size, daddr);
800 
801 	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
802 	write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
803 }
804 
805 /*  IPv4 fragments shouldn't coalesce */
806 static void send_fragment4(int fd, struct sockaddr_ll *daddr)
807 {
808 	static char buf[IP_MAXPACKET];
809 	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
810 	int pkt_size = total_hdr_len + PAYLOAD_LEN;
811 
812 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
813 	write_packet(fd, buf, pkt_size, daddr);
814 
815 	/* Once fragmented, packet would retain the total_len.
816 	 * Tcp header is prepared as if rest of data is in follow-up frags,
817 	 * but follow up frags aren't actually sent.
818 	 */
819 	memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
820 	fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
821 	fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN, IPPROTO_TCP);
822 	fill_datalinklayer(buf);
823 
824 	iph->frag_off = htons(0x6000); // DF = 1, MF = 1
825 	iph->check = 0;
826 	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
827 	write_packet(fd, buf, pkt_size, daddr);
828 }
829 
830 /* IPv4 packets with different ttl don't coalesce.*/
831 static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
832 {
833 	int pkt_size = total_hdr_len + PAYLOAD_LEN;
834 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
835 	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
836 
837 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
838 	write_packet(fd, buf, pkt_size, daddr);
839 
840 	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
841 	iph->ttl = 7;
842 	iph->check = 0;
843 	iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
844 	write_packet(fd, buf, pkt_size, daddr);
845 }
846 
847 /* Packets with different tos don't coalesce.*/
848 static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
849 {
850 	int pkt_size = total_hdr_len + PAYLOAD_LEN;
851 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
852 	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
853 	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
854 
855 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
856 	write_packet(fd, buf, pkt_size, daddr);
857 
858 	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
859 	if (proto == PF_INET) {
860 		iph->tos = 1;
861 		iph->check = 0;
862 		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
863 	} else if (proto == PF_INET6) {
864 		ip6h->priority = 0xf;
865 	}
866 	write_packet(fd, buf, pkt_size, daddr);
867 }
868 
869 /* Packets with different ECN don't coalesce.*/
870 static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
871 {
872 	int pkt_size = total_hdr_len + PAYLOAD_LEN;
873 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
874 	struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
875 
876 	create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
877 	write_packet(fd, buf, pkt_size, daddr);
878 
879 	create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
880 	if (proto == PF_INET) {
881 		buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
882 		iph->check = 0;
883 		iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
884 	} else {
885 		buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
886 	}
887 	write_packet(fd, buf, pkt_size, daddr);
888 }
889 
890 /* IPv6 fragments and packets with extensions don't coalesce.*/
891 static void send_fragment6(int fd, struct sockaddr_ll *daddr)
892 {
893 	static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
894 	static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
895 			   sizeof(struct ip6_frag)];
896 	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
897 	struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
898 	int extlen = sizeof(struct ip6_frag);
899 	int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
900 	int extpkt_len = bufpkt_len + extlen;
901 	int i;
902 
903 	for (i = 0; i < 2; i++) {
904 		create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
905 		write_packet(fd, buf, bufpkt_len, daddr);
906 	}
907 	sleep(1);
908 	create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
909 	memset(extpkt, 0, extpkt_len);
910 
911 	ip6h->nexthdr = IPPROTO_FRAGMENT;
912 	ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
913 	frag->ip6f_nxt = IPPROTO_TCP;
914 
915 	memcpy(extpkt, buf, tcp_offset);
916 	memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
917 	       sizeof(struct tcphdr) + PAYLOAD_LEN);
918 	write_packet(fd, extpkt, extpkt_len, daddr);
919 
920 	create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
921 	write_packet(fd, buf, bufpkt_len, daddr);
922 }
923 
924 static void bind_packetsocket(int fd)
925 {
926 	struct sockaddr_ll daddr = {};
927 
928 	daddr.sll_family = AF_PACKET;
929 	daddr.sll_protocol = ethhdr_proto;
930 	daddr.sll_ifindex = if_nametoindex(ifname);
931 	if (daddr.sll_ifindex == 0)
932 		error(1, errno, "if_nametoindex");
933 
934 	if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
935 		error(1, errno, "could not bind socket");
936 }
937 
938 static void set_timeout(int fd)
939 {
940 	struct timeval timeout;
941 
942 	timeout.tv_sec = 3;
943 	timeout.tv_usec = 0;
944 	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
945 		       sizeof(timeout)) < 0)
946 		error(1, errno, "cannot set timeout, setsockopt failed");
947 }
948 
949 static void set_rcvbuf(int fd)
950 {
951 	int bufsize = 1 * 1024 * 1024; /* 1 MB */
952 
953 	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bufsize, sizeof(bufsize)))
954 		error(1, errno, "cannot set rcvbuf size, setsockopt failed");
955 }
956 
957 static void recv_error(int fd, int rcv_errno)
958 {
959 	struct tpacket_stats stats;
960 	socklen_t len;
961 
962 	len = sizeof(stats);
963 	if (getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len))
964 		error(1, errno, "can't get stats");
965 
966 	fprintf(stderr, "Socket stats: packets=%u, drops=%u\n",
967 		stats.tp_packets, stats.tp_drops);
968 	error(1, rcv_errno, "could not receive");
969 }
970 
971 static void check_recv_pkts(int fd, int *correct_payload,
972 			    int correct_num_pkts)
973 {
974 	static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
975 	struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
976 	struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
977 	struct tcphdr *tcph;
978 	bool bad_packet = false;
979 	int tcp_ext_len = 0;
980 	int ip_ext_len = 0;
981 	int pkt_size = -1;
982 	int data_len = 0;
983 	int num_pkt = 0;
984 	int i;
985 
986 	vlog("Expected {");
987 	for (i = 0; i < correct_num_pkts; i++)
988 		vlog("%d ", correct_payload[i]);
989 	vlog("}, Total %d packets\nReceived {", correct_num_pkts);
990 
991 	while (1) {
992 		ip_ext_len = 0;
993 		pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
994 		if (pkt_size < 0)
995 			recv_error(fd, errno);
996 
997 		if (iph->version == 4)
998 			ip_ext_len = (iph->ihl - 5) * 4;
999 		else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
1000 			ip_ext_len = MIN_EXTHDR_SIZE;
1001 
1002 		tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
1003 
1004 		if (tcph->fin)
1005 			break;
1006 
1007 		tcp_ext_len = (tcph->doff - 5) * 4;
1008 		data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
1009 		/* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
1010 		 * Ipv4/tcp packets without at least 6 bytes of data will be padded.
1011 		 * Packet sockets are protocol agnostic, and will not trim the padding.
1012 		 */
1013 		if (pkt_size == ETH_ZLEN && iph->version == 4) {
1014 			data_len = ntohs(iph->tot_len)
1015 				- sizeof(struct tcphdr) - sizeof(struct iphdr);
1016 		}
1017 		vlog("%d ", data_len);
1018 		if (data_len != correct_payload[num_pkt]) {
1019 			vlog("[!=%d]", correct_payload[num_pkt]);
1020 			bad_packet = true;
1021 		}
1022 		num_pkt++;
1023 	}
1024 	vlog("}, Total %d packets.\n", num_pkt);
1025 	if (num_pkt != correct_num_pkts)
1026 		error(1, 0, "incorrect number of packets");
1027 	if (bad_packet)
1028 		error(1, 0, "incorrect packet geometry");
1029 
1030 	printf("Test succeeded\n\n");
1031 }
1032 
1033 static void gro_sender(void)
1034 {
1035 	const int fin_delay_us = 100 * 1000;
1036 	static char fin_pkt[MAX_HDR_LEN];
1037 	struct sockaddr_ll daddr = {};
1038 	int txfd = -1;
1039 
1040 	txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
1041 	if (txfd < 0)
1042 		error(1, errno, "socket creation");
1043 
1044 	memset(&daddr, 0, sizeof(daddr));
1045 	daddr.sll_ifindex = if_nametoindex(ifname);
1046 	if (daddr.sll_ifindex == 0)
1047 		error(1, errno, "if_nametoindex");
1048 	daddr.sll_family = AF_PACKET;
1049 	memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
1050 	daddr.sll_halen = ETH_ALEN;
1051 	create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
1052 
1053 	/* data sub-tests */
1054 	if (strcmp(testname, "data_same") == 0) {
1055 		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
1056 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1057 	} else if (strcmp(testname, "data_lrg_sml") == 0) {
1058 		send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
1059 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1060 	} else if (strcmp(testname, "data_sml_lrg") == 0) {
1061 		send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
1062 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1063 
1064 	/* ack test */
1065 	} else if (strcmp(testname, "ack") == 0) {
1066 		send_ack(txfd, &daddr);
1067 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1068 
1069 	/* flags sub-tests */
1070 	} else if (strcmp(testname, "flags_psh") == 0) {
1071 		send_flags(txfd, &daddr, 1, 0, 0, 0);
1072 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1073 	} else if (strcmp(testname, "flags_syn") == 0) {
1074 		send_flags(txfd, &daddr, 0, 1, 0, 0);
1075 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1076 	} else if (strcmp(testname, "flags_rst") == 0) {
1077 		send_flags(txfd, &daddr, 0, 0, 1, 0);
1078 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1079 	} else if (strcmp(testname, "flags_urg") == 0) {
1080 		send_flags(txfd, &daddr, 0, 0, 0, 1);
1081 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1082 
1083 	/* tcp sub-tests */
1084 	} else if (strcmp(testname, "tcp_csum") == 0) {
1085 		send_changed_checksum(txfd, &daddr);
1086 		usleep(fin_delay_us);
1087 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1088 	} else if (strcmp(testname, "tcp_seq") == 0) {
1089 		send_changed_seq(txfd, &daddr);
1090 		usleep(fin_delay_us);
1091 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1092 	} else if (strcmp(testname, "tcp_ts") == 0) {
1093 		send_changed_ts(txfd, &daddr);
1094 		usleep(fin_delay_us);
1095 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1096 	} else if (strcmp(testname, "tcp_opt") == 0) {
1097 		send_diff_opt(txfd, &daddr);
1098 		usleep(fin_delay_us);
1099 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1100 
1101 	/* ip sub-tests - shared between IPv4 and IPv6 */
1102 	} else if (strcmp(testname, "ip_ecn") == 0) {
1103 		send_changed_ECN(txfd, &daddr);
1104 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1105 	} else if (strcmp(testname, "ip_tos") == 0) {
1106 		send_changed_tos(txfd, &daddr);
1107 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1108 
1109 	/* ip sub-tests - IPv4 only */
1110 	} else if (strcmp(testname, "ip_ttl") == 0) {
1111 		send_changed_ttl(txfd, &daddr);
1112 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1113 	} else if (strcmp(testname, "ip_opt") == 0) {
1114 		send_ip_options(txfd, &daddr);
1115 		usleep(fin_delay_us);
1116 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1117 	} else if (strcmp(testname, "ip_frag4") == 0) {
1118 		send_fragment4(txfd, &daddr);
1119 		usleep(fin_delay_us);
1120 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1121 	} else if (strcmp(testname, "ip_id_df1_inc") == 0) {
1122 		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC);
1123 		usleep(fin_delay_us);
1124 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1125 	} else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
1126 		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED);
1127 		usleep(fin_delay_us);
1128 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1129 	} else if (strcmp(testname, "ip_id_df0_inc") == 0) {
1130 		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_INC);
1131 		usleep(fin_delay_us);
1132 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1133 	} else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
1134 		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF0_FIXED);
1135 		usleep(fin_delay_us);
1136 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1137 	} else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
1138 		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_INC_FIXED);
1139 		usleep(fin_delay_us);
1140 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1141 	} else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
1142 		send_flush_id_case(txfd, &daddr, FLUSH_ID_DF1_FIXED_INC);
1143 		usleep(fin_delay_us);
1144 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1145 
1146 	/* ip sub-tests - IPv6 only */
1147 	} else if (strcmp(testname, "ip_frag6") == 0) {
1148 		send_fragment6(txfd, &daddr);
1149 		usleep(fin_delay_us);
1150 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1151 	} else if (strcmp(testname, "ip_v6ext_same") == 0) {
1152 		send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
1153 		usleep(fin_delay_us);
1154 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1155 	} else if (strcmp(testname, "ip_v6ext_diff") == 0) {
1156 		send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
1157 		usleep(fin_delay_us);
1158 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1159 
1160 	/* large sub-tests */
1161 	} else if (strcmp(testname, "large_max") == 0) {
1162 		int offset = (proto == PF_INET && !ipip) ? 20 : 0;
1163 		int remainder = (MAX_PAYLOAD + offset) % MSS;
1164 
1165 		send_large(txfd, &daddr, remainder);
1166 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1167 	} else if (strcmp(testname, "large_rem") == 0) {
1168 		int offset = (proto == PF_INET && !ipip) ? 20 : 0;
1169 		int remainder = (MAX_PAYLOAD + offset) % MSS;
1170 
1171 		send_large(txfd, &daddr, remainder + 1);
1172 		write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
1173 	} else {
1174 		error(1, 0, "Unknown testcase: %s", testname);
1175 	}
1176 
1177 	if (close(txfd))
1178 		error(1, errno, "socket close");
1179 }
1180 
1181 static void gro_receiver(void)
1182 {
1183 	static int correct_payload[NUM_PACKETS];
1184 	int rxfd = -1;
1185 
1186 	rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
1187 	if (rxfd < 0)
1188 		error(1, 0, "socket creation");
1189 	setup_sock_filter(rxfd);
1190 	set_timeout(rxfd);
1191 	set_rcvbuf(rxfd);
1192 	bind_packetsocket(rxfd);
1193 
1194 	ksft_ready();
1195 
1196 	memset(correct_payload, 0, sizeof(correct_payload));
1197 
1198 	/* data sub-tests */
1199 	if (strcmp(testname, "data_same") == 0) {
1200 		printf("pure data packet of same size: ");
1201 		correct_payload[0] = PAYLOAD_LEN * 2;
1202 		check_recv_pkts(rxfd, correct_payload, 1);
1203 	} else if (strcmp(testname, "data_lrg_sml") == 0) {
1204 		printf("large data packets followed by a smaller one: ");
1205 		correct_payload[0] = PAYLOAD_LEN * 1.5;
1206 		check_recv_pkts(rxfd, correct_payload, 1);
1207 	} else if (strcmp(testname, "data_sml_lrg") == 0) {
1208 		printf("small data packets followed by a larger one: ");
1209 		correct_payload[0] = PAYLOAD_LEN / 2;
1210 		correct_payload[1] = PAYLOAD_LEN;
1211 		check_recv_pkts(rxfd, correct_payload, 2);
1212 
1213 	/* ack test */
1214 	} else if (strcmp(testname, "ack") == 0) {
1215 		printf("duplicate ack and pure ack: ");
1216 		check_recv_pkts(rxfd, correct_payload, 3);
1217 
1218 	/* flags sub-tests */
1219 	} else if (strcmp(testname, "flags_psh") == 0) {
1220 		correct_payload[0] = PAYLOAD_LEN * 3;
1221 		correct_payload[1] = PAYLOAD_LEN * 2;
1222 		printf("psh flag ends coalescing: ");
1223 		check_recv_pkts(rxfd, correct_payload, 2);
1224 	} else if (strcmp(testname, "flags_syn") == 0) {
1225 		correct_payload[0] = PAYLOAD_LEN * 2;
1226 		correct_payload[1] = 0;
1227 		correct_payload[2] = PAYLOAD_LEN * 2;
1228 		printf("syn flag ends coalescing: ");
1229 		check_recv_pkts(rxfd, correct_payload, 3);
1230 	} else if (strcmp(testname, "flags_rst") == 0) {
1231 		correct_payload[0] = PAYLOAD_LEN * 2;
1232 		correct_payload[1] = 0;
1233 		correct_payload[2] = PAYLOAD_LEN * 2;
1234 		printf("rst flag ends coalescing: ");
1235 		check_recv_pkts(rxfd, correct_payload, 3);
1236 	} else if (strcmp(testname, "flags_urg") == 0) {
1237 		correct_payload[0] = PAYLOAD_LEN * 2;
1238 		correct_payload[1] = 0;
1239 		correct_payload[2] = PAYLOAD_LEN * 2;
1240 		printf("urg flag ends coalescing: ");
1241 		check_recv_pkts(rxfd, correct_payload, 3);
1242 
1243 	/* tcp sub-tests */
1244 	} else if (strcmp(testname, "tcp_csum") == 0) {
1245 		correct_payload[0] = PAYLOAD_LEN;
1246 		correct_payload[1] = PAYLOAD_LEN;
1247 		printf("changed checksum does not coalesce: ");
1248 		check_recv_pkts(rxfd, correct_payload, 2);
1249 	} else if (strcmp(testname, "tcp_seq") == 0) {
1250 		correct_payload[0] = PAYLOAD_LEN;
1251 		correct_payload[1] = PAYLOAD_LEN;
1252 		printf("Wrong Seq number doesn't coalesce: ");
1253 		check_recv_pkts(rxfd, correct_payload, 2);
1254 	} else if (strcmp(testname, "tcp_ts") == 0) {
1255 		correct_payload[0] = PAYLOAD_LEN * 2;
1256 		correct_payload[1] = PAYLOAD_LEN;
1257 		correct_payload[2] = PAYLOAD_LEN;
1258 		correct_payload[3] = PAYLOAD_LEN;
1259 		printf("Different timestamp doesn't coalesce: ");
1260 		check_recv_pkts(rxfd, correct_payload, 4);
1261 	} else if (strcmp(testname, "tcp_opt") == 0) {
1262 		correct_payload[0] = PAYLOAD_LEN * 2;
1263 		correct_payload[1] = PAYLOAD_LEN;
1264 		printf("Different options doesn't coalesce: ");
1265 		check_recv_pkts(rxfd, correct_payload, 2);
1266 
1267 	/* ip sub-tests - shared between IPv4 and IPv6 */
1268 	} else if (strcmp(testname, "ip_ecn") == 0) {
1269 		correct_payload[0] = PAYLOAD_LEN;
1270 		correct_payload[1] = PAYLOAD_LEN;
1271 		printf("different ECN doesn't coalesce: ");
1272 		check_recv_pkts(rxfd, correct_payload, 2);
1273 	} else if (strcmp(testname, "ip_tos") == 0) {
1274 		correct_payload[0] = PAYLOAD_LEN;
1275 		correct_payload[1] = PAYLOAD_LEN;
1276 		printf("different tos doesn't coalesce: ");
1277 		check_recv_pkts(rxfd, correct_payload, 2);
1278 
1279 	/* ip sub-tests - IPv4 only */
1280 	} else if (strcmp(testname, "ip_ttl") == 0) {
1281 		correct_payload[0] = PAYLOAD_LEN;
1282 		correct_payload[1] = PAYLOAD_LEN;
1283 		printf("different ttl doesn't coalesce: ");
1284 		check_recv_pkts(rxfd, correct_payload, 2);
1285 	} else if (strcmp(testname, "ip_opt") == 0) {
1286 		correct_payload[0] = PAYLOAD_LEN;
1287 		correct_payload[1] = PAYLOAD_LEN;
1288 		correct_payload[2] = PAYLOAD_LEN;
1289 		printf("ip options doesn't coalesce: ");
1290 		check_recv_pkts(rxfd, correct_payload, 3);
1291 	} else if (strcmp(testname, "ip_frag4") == 0) {
1292 		correct_payload[0] = PAYLOAD_LEN;
1293 		correct_payload[1] = PAYLOAD_LEN;
1294 		printf("fragmented ip4 doesn't coalesce: ");
1295 		check_recv_pkts(rxfd, correct_payload, 2);
1296 	} else if (strcmp(testname, "ip_id_df1_inc") == 0) {
1297 		printf("DF=1, Incrementing - should coalesce: ");
1298 		correct_payload[0] = PAYLOAD_LEN * 2;
1299 		check_recv_pkts(rxfd, correct_payload, 1);
1300 	} else if (strcmp(testname, "ip_id_df1_fixed") == 0) {
1301 		printf("DF=1, Fixed - should coalesce: ");
1302 		correct_payload[0] = PAYLOAD_LEN * 2;
1303 		check_recv_pkts(rxfd, correct_payload, 1);
1304 	} else if (strcmp(testname, "ip_id_df0_inc") == 0) {
1305 		printf("DF=0, Incrementing - should coalesce: ");
1306 		correct_payload[0] = PAYLOAD_LEN * 2;
1307 		check_recv_pkts(rxfd, correct_payload, 1);
1308 	} else if (strcmp(testname, "ip_id_df0_fixed") == 0) {
1309 		printf("DF=0, Fixed - should coalesce: ");
1310 		correct_payload[0] = PAYLOAD_LEN * 2;
1311 		check_recv_pkts(rxfd, correct_payload, 1);
1312 	} else if (strcmp(testname, "ip_id_df1_inc_fixed") == 0) {
1313 		printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
1314 		correct_payload[0] = PAYLOAD_LEN * 2;
1315 		correct_payload[1] = PAYLOAD_LEN;
1316 		check_recv_pkts(rxfd, correct_payload, 2);
1317 	} else if (strcmp(testname, "ip_id_df1_fixed_inc") == 0) {
1318 		printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
1319 		correct_payload[0] = PAYLOAD_LEN * 2;
1320 		correct_payload[1] = PAYLOAD_LEN;
1321 		check_recv_pkts(rxfd, correct_payload, 2);
1322 
1323 	/* ip sub-tests - IPv6 only */
1324 	} else if (strcmp(testname, "ip_frag6") == 0) {
1325 		/* GRO doesn't check for ipv6 hop limit when flushing.
1326 		 * Hence no corresponding test to the ipv4 case.
1327 		 */
1328 		printf("fragmented ip6 doesn't coalesce: ");
1329 		correct_payload[0] = PAYLOAD_LEN * 2;
1330 		correct_payload[1] = PAYLOAD_LEN;
1331 		correct_payload[2] = PAYLOAD_LEN;
1332 		check_recv_pkts(rxfd, correct_payload, 3);
1333 	} else if (strcmp(testname, "ip_v6ext_same") == 0) {
1334 		printf("ipv6 with ext header does coalesce: ");
1335 		correct_payload[0] = PAYLOAD_LEN * 2;
1336 		check_recv_pkts(rxfd, correct_payload, 1);
1337 	} else if (strcmp(testname, "ip_v6ext_diff") == 0) {
1338 		printf("ipv6 with ext header with different payloads doesn't coalesce: ");
1339 		correct_payload[0] = PAYLOAD_LEN;
1340 		correct_payload[1] = PAYLOAD_LEN;
1341 		check_recv_pkts(rxfd, correct_payload, 2);
1342 
1343 	/* large sub-tests */
1344 	} else if (strcmp(testname, "large_max") == 0) {
1345 		int offset = (proto == PF_INET && !ipip) ? 20 : 0;
1346 		int remainder = (MAX_PAYLOAD + offset) % MSS;
1347 
1348 		correct_payload[0] = (MAX_PAYLOAD + offset);
1349 		correct_payload[1] = remainder;
1350 		printf("Shouldn't coalesce if exceed IP max pkt size: ");
1351 		check_recv_pkts(rxfd, correct_payload, 2);
1352 	} else if (strcmp(testname, "large_rem") == 0) {
1353 		int offset = (proto == PF_INET && !ipip) ? 20 : 0;
1354 		int remainder = (MAX_PAYLOAD + offset) % MSS;
1355 
1356 		/* last segment sent individually, doesn't start new segment */
1357 		correct_payload[0] = (MAX_PAYLOAD + offset) - remainder;
1358 		correct_payload[1] = remainder + 1;
1359 		correct_payload[2] = remainder + 1;
1360 		printf("last segment sent individually: ");
1361 		check_recv_pkts(rxfd, correct_payload, 3);
1362 	} else {
1363 		error(1, 0, "Test case error: unknown testname %s", testname);
1364 	}
1365 
1366 	if (close(rxfd))
1367 		error(1, 0, "socket close");
1368 }
1369 
1370 static void parse_args(int argc, char **argv)
1371 {
1372 	static const struct option opts[] = {
1373 		{ "daddr", required_argument, NULL, 'd' },
1374 		{ "dmac", required_argument, NULL, 'D' },
1375 		{ "iface", required_argument, NULL, 'i' },
1376 		{ "ipv4", no_argument, NULL, '4' },
1377 		{ "ipv6", no_argument, NULL, '6' },
1378 		{ "ipip", no_argument, NULL, 'e' },
1379 		{ "rx", no_argument, NULL, 'r' },
1380 		{ "saddr", required_argument, NULL, 's' },
1381 		{ "smac", required_argument, NULL, 'S' },
1382 		{ "test", required_argument, NULL, 't' },
1383 		{ "verbose", no_argument, NULL, 'v' },
1384 		{ 0, 0, 0, 0 }
1385 	};
1386 	int c;
1387 
1388 	while ((c = getopt_long(argc, argv, "46d:D:ei:rs:S:t:v", opts, NULL)) != -1) {
1389 		switch (c) {
1390 		case '4':
1391 			proto = PF_INET;
1392 			ethhdr_proto = htons(ETH_P_IP);
1393 			break;
1394 		case '6':
1395 			proto = PF_INET6;
1396 			ethhdr_proto = htons(ETH_P_IPV6);
1397 			break;
1398 		case 'e':
1399 			ipip = true;
1400 			proto = PF_INET;
1401 			ethhdr_proto = htons(ETH_P_IP);
1402 			break;
1403 		case 'd':
1404 			addr4_dst = addr6_dst = optarg;
1405 			break;
1406 		case 'D':
1407 			dmac = optarg;
1408 			break;
1409 		case 'i':
1410 			ifname = optarg;
1411 			break;
1412 		case 'r':
1413 			tx_socket = false;
1414 			break;
1415 		case 's':
1416 			addr4_src = addr6_src = optarg;
1417 			break;
1418 		case 'S':
1419 			smac = optarg;
1420 			break;
1421 		case 't':
1422 			testname = optarg;
1423 			break;
1424 		case 'v':
1425 			verbose = true;
1426 			break;
1427 		default:
1428 			error(1, 0, "%s invalid option %c\n", __func__, c);
1429 			break;
1430 		}
1431 	}
1432 }
1433 
1434 int main(int argc, char **argv)
1435 {
1436 	parse_args(argc, argv);
1437 
1438 	if (ipip) {
1439 		tcp_offset = ETH_HLEN + sizeof(struct iphdr) * 2;
1440 		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
1441 	} else if (proto == PF_INET) {
1442 		tcp_offset = ETH_HLEN + sizeof(struct iphdr);
1443 		total_hdr_len = tcp_offset + sizeof(struct tcphdr);
1444 	} else if (proto == PF_INET6) {
1445 		tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
1446 		total_hdr_len = MAX_HDR_LEN;
1447 	} else {
1448 		error(1, 0, "Protocol family is not ipv4 or ipv6");
1449 	}
1450 
1451 	read_MAC(src_mac, smac);
1452 	read_MAC(dst_mac, dmac);
1453 
1454 	if (tx_socket) {
1455 		gro_sender();
1456 	} else {
1457 		/* Only the receiver exit status determines test success. */
1458 		gro_receiver();
1459 		fprintf(stderr, "Gro::%s test passed.\n", testname);
1460 	}
1461 
1462 	return 0;
1463 }
1464