xref: /linux/tools/testing/selftests/net/ip_defrag.c (revision 5f09bc8cc4010a3ea17c5881a62fc45192ebe7b0)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 
5 #include <arpa/inet.h>
6 #include <errno.h>
7 #include <error.h>
8 #include <linux/in.h>
9 #include <netinet/ip.h>
10 #include <netinet/ip6.h>
11 #include <netinet/udp.h>
12 #include <stdbool.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <time.h>
17 #include <unistd.h>
18 
19 static bool		cfg_do_ipv4;
20 static bool		cfg_do_ipv6;
21 static bool		cfg_verbose;
22 static bool		cfg_overlap;
23 static unsigned short	cfg_port = 9000;
24 
25 const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
26 const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
27 
28 #define IP4_HLEN	(sizeof(struct iphdr))
29 #define IP6_HLEN	(sizeof(struct ip6_hdr))
30 #define UDP_HLEN	(sizeof(struct udphdr))
31 
32 /* IPv6 fragment header lenth. */
33 #define FRAG_HLEN	8
34 
35 static int payload_len;
36 static int max_frag_len;
37 
38 #define MSG_LEN_MAX	60000	/* Max UDP payload length. */
39 
40 #define IP4_MF		(1u << 13)  /* IPv4 MF flag. */
41 #define IP6_MF		(1)  /* IPv6 MF flag. */
42 
43 #define CSUM_MANGLED_0 (0xffff)
44 
45 static uint8_t udp_payload[MSG_LEN_MAX];
46 static uint8_t ip_frame[IP_MAXPACKET];
47 static uint32_t ip_id = 0xabcd;
48 static int msg_counter;
49 static int frag_counter;
50 static unsigned int seed;
51 
52 /* Receive a UDP packet. Validate it matches udp_payload. */
53 static void recv_validate_udp(int fd_udp)
54 {
55 	ssize_t ret;
56 	static uint8_t recv_buff[MSG_LEN_MAX];
57 
58 	ret = recv(fd_udp, recv_buff, payload_len, 0);
59 	msg_counter++;
60 
61 	if (cfg_overlap) {
62 		if (ret != -1)
63 			error(1, 0, "recv: expected timeout; got %d",
64 				(int)ret);
65 		if (errno != ETIMEDOUT && errno != EAGAIN)
66 			error(1, errno, "recv: expected timeout: %d",
67 				 errno);
68 		return;  /* OK */
69 	}
70 
71 	if (ret == -1)
72 		error(1, errno, "recv: payload_len = %d max_frag_len = %d",
73 			payload_len, max_frag_len);
74 	if (ret != payload_len)
75 		error(1, 0, "recv: wrong size: %d vs %d", (int)ret, payload_len);
76 	if (memcmp(udp_payload, recv_buff, payload_len))
77 		error(1, 0, "recv: wrong data");
78 }
79 
80 static uint32_t raw_checksum(uint8_t *buf, int len, uint32_t sum)
81 {
82 	int i;
83 
84 	for (i = 0; i < (len & ~1U); i += 2) {
85 		sum += (u_int16_t)ntohs(*((u_int16_t *)(buf + i)));
86 		if (sum > 0xffff)
87 			sum -= 0xffff;
88 	}
89 
90 	if (i < len) {
91 		sum += buf[i] << 8;
92 		if (sum > 0xffff)
93 			sum -= 0xffff;
94 	}
95 
96 	return sum;
97 }
98 
99 static uint16_t udp_checksum(struct ip *iphdr, struct udphdr *udphdr)
100 {
101 	uint32_t sum = 0;
102 	uint16_t res;
103 
104 	sum = raw_checksum((uint8_t *)&iphdr->ip_src, 2 * sizeof(iphdr->ip_src),
105 				IPPROTO_UDP + (uint32_t)(UDP_HLEN + payload_len));
106 	sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
107 	sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
108 	res = 0xffff & ~sum;
109 	if (res)
110 		return htons(res);
111 	else
112 		return CSUM_MANGLED_0;
113 }
114 
115 static uint16_t udp6_checksum(struct ip6_hdr *iphdr, struct udphdr *udphdr)
116 {
117 	uint32_t sum = 0;
118 	uint16_t res;
119 
120 	sum = raw_checksum((uint8_t *)&iphdr->ip6_src, 2 * sizeof(iphdr->ip6_src),
121 				IPPROTO_UDP);
122 	sum = raw_checksum((uint8_t *)&udphdr->len, sizeof(udphdr->len), sum);
123 	sum = raw_checksum((uint8_t *)udphdr, UDP_HLEN, sum);
124 	sum = raw_checksum((uint8_t *)udp_payload, payload_len, sum);
125 	res = 0xffff & ~sum;
126 	if (res)
127 		return htons(res);
128 	else
129 		return CSUM_MANGLED_0;
130 }
131 
132 static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
133 				int offset, bool ipv6)
134 {
135 	int frag_len;
136 	int res;
137 	int payload_offset = offset > 0 ? offset - UDP_HLEN : 0;
138 	uint8_t *frag_start = ipv6 ? ip_frame + IP6_HLEN + FRAG_HLEN :
139 					ip_frame + IP4_HLEN;
140 
141 	if (offset == 0) {
142 		struct udphdr udphdr;
143 		udphdr.source = htons(cfg_port + 1);
144 		udphdr.dest = htons(cfg_port);
145 		udphdr.len = htons(UDP_HLEN + payload_len);
146 		udphdr.check = 0;
147 		if (ipv6)
148 			udphdr.check = udp6_checksum((struct ip6_hdr *)ip_frame, &udphdr);
149 		else
150 			udphdr.check = udp_checksum((struct ip *)ip_frame, &udphdr);
151 		memcpy(frag_start, &udphdr, UDP_HLEN);
152 	}
153 
154 	if (ipv6) {
155 		struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
156 		struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
157 		if (payload_len - payload_offset <= max_frag_len && offset > 0) {
158 			/* This is the last fragment. */
159 			frag_len = FRAG_HLEN + payload_len - payload_offset;
160 			fraghdr->ip6f_offlg = htons(offset);
161 		} else {
162 			frag_len = FRAG_HLEN + max_frag_len;
163 			fraghdr->ip6f_offlg = htons(offset | IP6_MF);
164 		}
165 		ip6hdr->ip6_plen = htons(frag_len);
166 		if (offset == 0)
167 			memcpy(frag_start + UDP_HLEN, udp_payload,
168 				frag_len - FRAG_HLEN - UDP_HLEN);
169 		else
170 			memcpy(frag_start, udp_payload + payload_offset,
171 				frag_len - FRAG_HLEN);
172 		frag_len += IP6_HLEN;
173 	} else {
174 		struct ip *iphdr = (struct ip *)ip_frame;
175 		if (payload_len - payload_offset <= max_frag_len && offset > 0) {
176 			/* This is the last fragment. */
177 			frag_len = IP4_HLEN + payload_len - payload_offset;
178 			iphdr->ip_off = htons(offset / 8);
179 		} else {
180 			frag_len = IP4_HLEN + max_frag_len;
181 			iphdr->ip_off = htons(offset / 8 | IP4_MF);
182 		}
183 		iphdr->ip_len = htons(frag_len);
184 		if (offset == 0)
185 			memcpy(frag_start + UDP_HLEN, udp_payload,
186 				frag_len - IP4_HLEN - UDP_HLEN);
187 		else
188 			memcpy(frag_start, udp_payload + payload_offset,
189 				frag_len - IP4_HLEN);
190 	}
191 
192 	res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
193 	if (res < 0)
194 		error(1, errno, "send_fragment");
195 	if (res != frag_len)
196 		error(1, 0, "send_fragment: %d vs %d", res, frag_len);
197 
198 	frag_counter++;
199 }
200 
201 static void send_udp_frags(int fd_raw, struct sockaddr *addr,
202 				socklen_t alen, bool ipv6)
203 {
204 	struct ip *iphdr = (struct ip *)ip_frame;
205 	struct ip6_hdr *ip6hdr = (struct ip6_hdr *)ip_frame;
206 	const bool ipv4 = !ipv6;
207 	int res;
208 	int offset;
209 	int frag_len;
210 
211 	/* Send the UDP datagram using raw IP fragments: the 0th fragment
212 	 * has the UDP header; other fragments are pieces of udp_payload
213 	 * split in chunks of frag_len size.
214 	 *
215 	 * Odd fragments (1st, 3rd, 5th, etc.) are sent out first, then
216 	 * even fragments (0th, 2nd, etc.) are sent out.
217 	 */
218 	if (ipv6) {
219 		struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
220 		((struct sockaddr_in6 *)addr)->sin6_port = 0;
221 		memset(ip6hdr, 0, sizeof(*ip6hdr));
222 		ip6hdr->ip6_flow = htonl(6<<28);  /* Version. */
223 		ip6hdr->ip6_nxt = IPPROTO_FRAGMENT;
224 		ip6hdr->ip6_hops = 255;
225 		ip6hdr->ip6_src = addr6;
226 		ip6hdr->ip6_dst = addr6;
227 		fraghdr->ip6f_nxt = IPPROTO_UDP;
228 		fraghdr->ip6f_reserved = 0;
229 		fraghdr->ip6f_ident = htonl(ip_id++);
230 	} else {
231 		memset(iphdr, 0, sizeof(*iphdr));
232 		iphdr->ip_hl = 5;
233 		iphdr->ip_v = 4;
234 		iphdr->ip_tos = 0;
235 		iphdr->ip_id = htons(ip_id++);
236 		iphdr->ip_ttl = 0x40;
237 		iphdr->ip_p = IPPROTO_UDP;
238 		iphdr->ip_src.s_addr = htonl(INADDR_LOOPBACK);
239 		iphdr->ip_dst = addr4;
240 		iphdr->ip_sum = 0;
241 	}
242 
243 	/* Occasionally test in-order fragments. */
244 	if (!cfg_overlap && (rand() % 100 < 15)) {
245 		offset = 0;
246 		while (offset < (UDP_HLEN + payload_len)) {
247 			send_fragment(fd_raw, addr, alen, offset, ipv6);
248 			offset += max_frag_len;
249 		}
250 		return;
251 	}
252 
253 	/* Occasionally test IPv4 "runs" (see net/ipv4/ip_fragment.c) */
254 	if (ipv4 && !cfg_overlap && (rand() % 100 < 20) &&
255 			(payload_len > 9 * max_frag_len)) {
256 		offset = 6 * max_frag_len;
257 		while (offset < (UDP_HLEN + payload_len)) {
258 			send_fragment(fd_raw, addr, alen, offset, ipv6);
259 			offset += max_frag_len;
260 		}
261 		offset = 3 * max_frag_len;
262 		while (offset < 6 * max_frag_len) {
263 			send_fragment(fd_raw, addr, alen, offset, ipv6);
264 			offset += max_frag_len;
265 		}
266 		offset = 0;
267 		while (offset < 3 * max_frag_len) {
268 			send_fragment(fd_raw, addr, alen, offset, ipv6);
269 			offset += max_frag_len;
270 		}
271 		return;
272 	}
273 
274 	/* Odd fragments. */
275 	offset = max_frag_len;
276 	while (offset < (UDP_HLEN + payload_len)) {
277 		send_fragment(fd_raw, addr, alen, offset, ipv6);
278 		/* IPv4 ignores duplicates, so randomly send a duplicate. */
279 		if (ipv4 && (1 == rand() % 100))
280 			send_fragment(fd_raw, addr, alen, offset, ipv6);
281 		offset += 2 * max_frag_len;
282 	}
283 
284 	if (cfg_overlap) {
285 		/* Send an extra random fragment. */
286 		if (ipv6) {
287 			struct ip6_frag *fraghdr = (struct ip6_frag *)(ip_frame + IP6_HLEN);
288 			/* sendto() returns EINVAL if offset + frag_len is too small. */
289 			offset = rand() % (UDP_HLEN + payload_len - 1);
290 			frag_len = max_frag_len + rand() % 256;
291 			/* In IPv6 if !!(frag_len % 8), the fragment is dropped. */
292 			frag_len &= ~0x7;
293 			fraghdr->ip6f_offlg = htons(offset / 8 | IP6_MF);
294 			ip6hdr->ip6_plen = htons(frag_len);
295 			frag_len += IP6_HLEN;
296 		} else {
297 			/* In IPv4, duplicates and some fragments completely inside
298 			 * previously sent fragments are dropped/ignored. So
299 			 * random offset and frag_len can result in a dropped
300 			 * fragment instead of a dropped queue/packet. So we
301 			 * hard-code offset and frag_len.
302 			 *
303 			 * See ade446403bfb ("net: ipv4: do not handle duplicate
304 			 * fragments as overlapping").
305 			 */
306 			if (max_frag_len * 4 < payload_len || max_frag_len < 16) {
307 				/* not enough payload to play with random offset and frag_len. */
308 				offset = 8;
309 				frag_len = IP4_HLEN + UDP_HLEN + max_frag_len;
310 			} else {
311 				offset = rand() % (payload_len / 2);
312 				frag_len = 2 * max_frag_len + 1 + rand() % 256;
313 			}
314 			iphdr->ip_off = htons(offset / 8 | IP4_MF);
315 			iphdr->ip_len = htons(frag_len);
316 		}
317 		res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
318 		if (res < 0)
319 			error(1, errno, "sendto overlap: %d", frag_len);
320 		if (res != frag_len)
321 			error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len);
322 		frag_counter++;
323 	}
324 
325 	/* Event fragments. */
326 	offset = 0;
327 	while (offset < (UDP_HLEN + payload_len)) {
328 		send_fragment(fd_raw, addr, alen, offset, ipv6);
329 		/* IPv4 ignores duplicates, so randomly send a duplicate. */
330 		if (ipv4 && (1 == rand() % 100))
331 			send_fragment(fd_raw, addr, alen, offset, ipv6);
332 		offset += 2 * max_frag_len;
333 	}
334 }
335 
336 static void run_test(struct sockaddr *addr, socklen_t alen, bool ipv6)
337 {
338 	int fd_tx_raw, fd_rx_udp;
339 	/* Frag queue timeout is set to one second in the calling script;
340 	 * socket timeout should be just a bit longer to avoid tests interfering
341 	 * with each other.
342 	 */
343 	struct timeval tv = { .tv_sec = 1, .tv_usec = 10 };
344 	int idx;
345 	int min_frag_len = ipv6 ? 1280 : 8;
346 
347 	/* Initialize the payload. */
348 	for (idx = 0; idx < MSG_LEN_MAX; ++idx)
349 		udp_payload[idx] = idx % 256;
350 
351 	/* Open sockets. */
352 	fd_tx_raw = socket(addr->sa_family, SOCK_RAW, IPPROTO_RAW);
353 	if (fd_tx_raw == -1)
354 		error(1, errno, "socket tx_raw");
355 
356 	fd_rx_udp = socket(addr->sa_family, SOCK_DGRAM, 0);
357 	if (fd_rx_udp == -1)
358 		error(1, errno, "socket rx_udp");
359 	if (bind(fd_rx_udp, addr, alen))
360 		error(1, errno, "bind");
361 	/* Fail fast. */
362 	if (setsockopt(fd_rx_udp, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
363 		error(1, errno, "setsockopt rcv timeout");
364 
365 	for (payload_len = min_frag_len; payload_len < MSG_LEN_MAX;
366 			payload_len += (rand() % 4096)) {
367 		if (cfg_verbose)
368 			printf("payload_len: %d\n", payload_len);
369 
370 		if (cfg_overlap) {
371 			/* With overlaps, one send/receive pair below takes
372 			 * at least one second (== timeout) to run, so there
373 			 * is not enough test time to run a nested loop:
374 			 * the full overlap test takes 20-30 seconds.
375 			 */
376 			max_frag_len = min_frag_len +
377 				rand() % (1500 - FRAG_HLEN - min_frag_len);
378 			send_udp_frags(fd_tx_raw, addr, alen, ipv6);
379 			recv_validate_udp(fd_rx_udp);
380 		} else {
381 			/* Without overlaps, each packet reassembly (== one
382 			 * send/receive pair below) takes very little time to
383 			 * run, so we can easily afford more thourough testing
384 			 * with a nested loop: the full non-overlap test takes
385 			 * less than one second).
386 			 */
387 			max_frag_len = min_frag_len;
388 			do {
389 				send_udp_frags(fd_tx_raw, addr, alen, ipv6);
390 				recv_validate_udp(fd_rx_udp);
391 				max_frag_len += 8 * (rand() % 8);
392 			} while (max_frag_len < (1500 - FRAG_HLEN) &&
393 				 max_frag_len <= payload_len);
394 		}
395 	}
396 
397 	/* Cleanup. */
398 	if (close(fd_tx_raw))
399 		error(1, errno, "close tx_raw");
400 	if (close(fd_rx_udp))
401 		error(1, errno, "close rx_udp");
402 
403 	if (cfg_verbose)
404 		printf("processed %d messages, %d fragments\n",
405 			msg_counter, frag_counter);
406 
407 	fprintf(stderr, "PASS\n");
408 }
409 
410 
411 static void run_test_v4(void)
412 {
413 	struct sockaddr_in addr = {0};
414 
415 	addr.sin_family = AF_INET;
416 	addr.sin_port = htons(cfg_port);
417 	addr.sin_addr = addr4;
418 
419 	run_test((void *)&addr, sizeof(addr), false /* !ipv6 */);
420 }
421 
422 static void run_test_v6(void)
423 {
424 	struct sockaddr_in6 addr = {0};
425 
426 	addr.sin6_family = AF_INET6;
427 	addr.sin6_port = htons(cfg_port);
428 	addr.sin6_addr = addr6;
429 
430 	run_test((void *)&addr, sizeof(addr), true /* ipv6 */);
431 }
432 
433 static void parse_opts(int argc, char **argv)
434 {
435 	int c;
436 
437 	while ((c = getopt(argc, argv, "46ov")) != -1) {
438 		switch (c) {
439 		case '4':
440 			cfg_do_ipv4 = true;
441 			break;
442 		case '6':
443 			cfg_do_ipv6 = true;
444 			break;
445 		case 'o':
446 			cfg_overlap = true;
447 			break;
448 		case 'v':
449 			cfg_verbose = true;
450 			break;
451 		default:
452 			error(1, 0, "%s: parse error", argv[0]);
453 		}
454 	}
455 }
456 
457 int main(int argc, char **argv)
458 {
459 	parse_opts(argc, argv);
460 	seed = time(NULL);
461 	srand(seed);
462 	/* Print the seed to track/reproduce potential failures. */
463 	printf("seed = %d\n", seed);
464 
465 	if (cfg_do_ipv4)
466 		run_test_v4();
467 	if (cfg_do_ipv6)
468 		run_test_v6();
469 
470 	return 0;
471 }
472