xref: /linux/tools/testing/selftests/net/tun.c (revision 37a93dd5c49b5fda807fd204edf2547c3493319c)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <unistd.h>
11 #include <linux/if_tun.h>
12 #include <sys/ioctl.h>
13 #include <sys/socket.h>
14 
15 #include "kselftest_harness.h"
16 #include "tuntap_helpers.h"
17 
18 static const char param_dev_geneve_name[] = "geneve1";
19 static unsigned char param_hwaddr_outer_dst[] = { 0x00, 0xfe, 0x98,
20 						  0x14, 0x22, 0x42 };
21 static unsigned char param_hwaddr_outer_src[] = { 0x00, 0xfe, 0x98,
22 						  0x94, 0xd2, 0x43 };
23 static unsigned char param_hwaddr_inner_dst[] = { 0x00, 0xfe, 0x98,
24 						  0x94, 0x22, 0xcc };
25 static unsigned char param_hwaddr_inner_src[] = { 0x00, 0xfe, 0x98,
26 						  0x94, 0xd2, 0xdd };
27 
28 static struct in_addr param_ipaddr4_outer_dst = {
29 	__constant_htonl(0xac100001),
30 };
31 
32 static struct in_addr param_ipaddr4_outer_src = {
33 	__constant_htonl(0xac100002),
34 };
35 
36 static struct in_addr param_ipaddr4_inner_dst = {
37 	__constant_htonl(0xac100101),
38 };
39 
40 static struct in_addr param_ipaddr4_inner_src = {
41 	__constant_htonl(0xac100102),
42 };
43 
44 static struct in6_addr param_ipaddr6_outer_dst = {
45 	{ { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
46 };
47 
48 static struct in6_addr param_ipaddr6_outer_src = {
49 	{ { 0x20, 0x02, 0x0d, 0xb8, 0x01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
50 };
51 
52 static struct in6_addr param_ipaddr6_inner_dst = {
53 	{ { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } },
54 };
55 
56 static struct in6_addr param_ipaddr6_inner_src = {
57 	{ { 0x20, 0x02, 0x0d, 0xb8, 0x02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 } },
58 };
59 
60 #ifndef BIT
61 #define BIT(nr) (1UL << (nr))
62 #endif
63 
64 #define VN_ID 1
65 #define VN_PORT 4789
66 #define UDP_SRC_PORT 22
67 #define UDP_DST_PORT 48878
68 #define IPPREFIX_LEN 24
69 #define IP6PREFIX_LEN 64
70 #define TIMEOUT_SEC 10
71 #define TIMEOUT_USEC 100000
72 #define MAX_RETRIES 20
73 
74 #define UDP_TUNNEL_GENEVE_4IN4 0x01
75 #define UDP_TUNNEL_GENEVE_6IN4 0x02
76 #define UDP_TUNNEL_GENEVE_4IN6 0x04
77 #define UDP_TUNNEL_GENEVE_6IN6 0x08
78 
79 #define UDP_TUNNEL_MAX_SEGMENTS BIT(7)
80 
81 #define UDP_TUNNEL_OUTER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_6IN4)
82 #define UDP_TUNNEL_INNER_IPV4 (UDP_TUNNEL_GENEVE_4IN4 | UDP_TUNNEL_GENEVE_4IN6)
83 
84 #define UDP_TUNNEL_GENEVE_4IN4_HDRLEN                        \
85 	(ETH_HLEN + 2 * sizeof(struct iphdr) + GENEVE_HLEN + \
86 	 2 * sizeof(struct udphdr))
87 #define UDP_TUNNEL_GENEVE_6IN6_HDRLEN                          \
88 	(ETH_HLEN + 2 * sizeof(struct ipv6hdr) + GENEVE_HLEN + \
89 	 2 * sizeof(struct udphdr))
90 #define UDP_TUNNEL_GENEVE_4IN6_HDRLEN                               \
91 	(ETH_HLEN + sizeof(struct iphdr) + sizeof(struct ipv6hdr) + \
92 	 GENEVE_HLEN + 2 * sizeof(struct udphdr))
93 #define UDP_TUNNEL_GENEVE_6IN4_HDRLEN                               \
94 	(ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct iphdr) + \
95 	 GENEVE_HLEN + 2 * sizeof(struct udphdr))
96 
97 #define UDP_TUNNEL_HDRLEN(type)                                             \
98 	((type) == UDP_TUNNEL_GENEVE_4IN4 ? UDP_TUNNEL_GENEVE_4IN4_HDRLEN : \
99 	 (type) == UDP_TUNNEL_GENEVE_6IN6 ? UDP_TUNNEL_GENEVE_6IN6_HDRLEN : \
100 	 (type) == UDP_TUNNEL_GENEVE_4IN6 ? UDP_TUNNEL_GENEVE_4IN6_HDRLEN : \
101 	 (type) == UDP_TUNNEL_GENEVE_6IN4 ? UDP_TUNNEL_GENEVE_6IN4_HDRLEN : \
102 					    0)
103 
104 #define UDP_TUNNEL_MSS(type) (ETH_DATA_LEN - UDP_TUNNEL_HDRLEN(type))
105 #define UDP_TUNNEL_MAX(type, is_tap) \
106 	(ETH_MAX_MTU - UDP_TUNNEL_HDRLEN(type) - ((is_tap) ? ETH_HLEN : 0))
107 
108 #define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel)
109 #define MAX_VNET_TUNNEL_PACKET_SZ                                       \
110 	(TUN_VNET_TNL_SIZE + ETH_HLEN + UDP_TUNNEL_GENEVE_6IN6_HDRLEN + \
111 	 ETH_MAX_MTU)
112 
113 struct geneve_setup_config {
114 	int family;
115 	union {
116 		struct in_addr r4;
117 		struct in6_addr r6;
118 	} remote;
119 	__be32 vnid;
120 	__be16 vnport;
121 	unsigned char hwaddr[6];
122 	uint8_t csum;
123 };
124 
125 static int tun_attach(int fd, char *dev)
126 {
127 	struct ifreq ifr;
128 
129 	memset(&ifr, 0, sizeof(ifr));
130 	strcpy(ifr.ifr_name, dev);
131 	ifr.ifr_flags = IFF_ATTACH_QUEUE;
132 
133 	return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
134 }
135 
136 static int tun_detach(int fd, char *dev)
137 {
138 	struct ifreq ifr;
139 
140 	memset(&ifr, 0, sizeof(ifr));
141 	strcpy(ifr.ifr_name, dev);
142 	ifr.ifr_flags = IFF_DETACH_QUEUE;
143 
144 	return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
145 }
146 
147 static int tun_alloc(char *dev)
148 {
149 	struct ifreq ifr;
150 	int fd, err;
151 
152 	fd = open("/dev/net/tun", O_RDWR);
153 	if (fd < 0) {
154 		fprintf(stderr, "can't open tun: %s\n", strerror(errno));
155 		return fd;
156 	}
157 
158 	memset(&ifr, 0, sizeof(ifr));
159 	strcpy(ifr.ifr_name, dev);
160 	ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
161 
162 	err = ioctl(fd, TUNSETIFF, (void *)&ifr);
163 	if (err < 0) {
164 		fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
165 		close(fd);
166 		return err;
167 	}
168 	strcpy(dev, ifr.ifr_name);
169 	return fd;
170 }
171 
172 static int tun_delete(char *dev)
173 {
174 	return ip_link_del(dev);
175 }
176 
177 static int tun_open(char *dev, const int flags, const int hdrlen,
178 		    const int features, const unsigned char *mac_addr)
179 {
180 	struct ifreq ifr = { 0 };
181 	int fd, sk = -1;
182 
183 	fd = open("/dev/net/tun", O_RDWR);
184 	if (fd < 0) {
185 		perror("open");
186 		return -1;
187 	}
188 
189 	ifr.ifr_flags = flags;
190 	if (ioctl(fd, TUNSETIFF, (void *)&ifr) < 0) {
191 		perror("ioctl(TUNSETIFF)");
192 		goto err;
193 	}
194 	strcpy(dev, ifr.ifr_name);
195 
196 	if (hdrlen > 0) {
197 		if (ioctl(fd, TUNSETVNETHDRSZ, &hdrlen) < 0) {
198 			perror("ioctl(TUNSETVNETHDRSZ)");
199 			goto err;
200 		}
201 	}
202 
203 	if (features) {
204 		if (ioctl(fd, TUNSETOFFLOAD, features) < 0) {
205 			perror("ioctl(TUNSETOFFLOAD)");
206 			goto err;
207 		}
208 	}
209 
210 	sk = socket(PF_INET, SOCK_DGRAM, 0);
211 	if (sk < 0) {
212 		perror("socket");
213 		goto err;
214 	}
215 
216 	if (ioctl(sk, SIOCGIFFLAGS, &ifr) < 0) {
217 		perror("ioctl(SIOCGIFFLAGS)");
218 		goto err;
219 	}
220 
221 	ifr.ifr_flags |= (IFF_UP | IFF_RUNNING);
222 	if (ioctl(sk, SIOCSIFFLAGS, &ifr) < 0) {
223 		perror("ioctl(SIOCSIFFLAGS)");
224 		goto err;
225 	}
226 
227 	if (mac_addr && flags & IFF_TAP) {
228 		ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
229 		memcpy(ifr.ifr_hwaddr.sa_data, mac_addr, ETH_ALEN);
230 
231 		if (ioctl(sk, SIOCSIFHWADDR, &ifr) < 0) {
232 			perror("ioctl(SIOCSIFHWADDR)");
233 			goto err;
234 		}
235 	}
236 
237 out:
238 	if (sk >= 0)
239 		close(sk);
240 	return fd;
241 
242 err:
243 	close(fd);
244 	fd = -1;
245 	goto out;
246 }
247 
248 static size_t sockaddr_len(int family)
249 {
250 	return (family == AF_INET) ? sizeof(struct sockaddr_in) :
251 				     sizeof(struct sockaddr_in6);
252 }
253 
254 static int geneve_fill_newlink(struct rt_link_newlink_req *req, void *data)
255 {
256 	struct geneve_setup_config *cfg = data;
257 
258 #define SET_GENEVE_REMOTE rt_link_newlink_req_set_linkinfo_data_geneve_remote
259 #define SET_GENEVE_REMOTE6 rt_link_newlink_req_set_linkinfo_data_geneve_remote6
260 
261 	rt_link_newlink_req_set_address(req, cfg->hwaddr, ETH_ALEN);
262 	rt_link_newlink_req_set_linkinfo_data_geneve_id(req, cfg->vnid);
263 	rt_link_newlink_req_set_linkinfo_data_geneve_port(req, cfg->vnport);
264 	rt_link_newlink_req_set_linkinfo_data_geneve_udp_csum(req, cfg->csum);
265 
266 	if (cfg->family == AF_INET)
267 		SET_GENEVE_REMOTE(req, cfg->remote.r4.s_addr);
268 	else
269 		SET_GENEVE_REMOTE6(req, &cfg->remote.r6,
270 				   sizeof(cfg->remote.r6));
271 
272 	return 0;
273 }
274 
275 static int geneve_create(const char *dev, int family, void *remote,
276 			 void *hwaddr)
277 {
278 	struct geneve_setup_config geneve;
279 
280 	memset(&geneve, 0, sizeof(geneve));
281 	geneve.vnid = VN_ID;
282 	geneve.vnport = htons(VN_PORT);
283 	geneve.csum = 1;
284 	geneve.family = family;
285 	if (family == AF_INET)
286 		memcpy(&geneve.remote.r4, remote, sizeof(struct in_addr));
287 	else
288 		memcpy(&geneve.remote.r6, remote, sizeof(struct in6_addr));
289 	memcpy(geneve.hwaddr, hwaddr, ETH_ALEN);
290 
291 	return ip_link_add(dev, "geneve", geneve_fill_newlink, (void *)&geneve);
292 }
293 
294 static int set_pmtu_discover(int fd, bool is_ipv4)
295 {
296 	int level, name, val;
297 
298 	if (is_ipv4) {
299 		level = SOL_IP;
300 		name = IP_MTU_DISCOVER;
301 		val = IP_PMTUDISC_DO;
302 	} else {
303 		level = SOL_IPV6;
304 		name = IPV6_MTU_DISCOVER;
305 		val = IPV6_PMTUDISC_DO;
306 	}
307 
308 	return setsockopt(fd, level, name, &val, sizeof(val));
309 }
310 
311 static int udp_socket_open(struct sockaddr_storage *ssa, bool do_frag,
312 			   bool do_connect, struct sockaddr_storage *dsa)
313 {
314 	struct timeval to = { .tv_sec = TIMEOUT_SEC };
315 	int fd, family = ssa->ss_family;
316 	int salen = sockaddr_len(family);
317 
318 	fd = socket(family, SOCK_DGRAM, 0);
319 	if (fd < 0)
320 		return -1;
321 
322 	if (bind(fd, (struct sockaddr *)ssa, salen) < 0) {
323 		perror("bind");
324 		goto err;
325 	}
326 
327 	if (do_connect && connect(fd, (struct sockaddr *)dsa, salen) < 0) {
328 		perror("connect");
329 		goto err;
330 	}
331 
332 	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &to, sizeof(to)) < 0) {
333 		perror("setsockopt(SO_RCVTIMEO)");
334 		goto err;
335 	}
336 
337 	if (!do_frag && set_pmtu_discover(fd, family == AF_INET) < 0) {
338 		perror("set_pmtu_discover");
339 		goto err;
340 	}
341 	return fd;
342 
343 err:
344 	close(fd);
345 	return -1;
346 }
347 
348 static void parse_route_rsp(struct rt_route_getroute_rsp *rsp, void *rtm_type)
349 {
350 	*(uint8_t *)rtm_type = rsp->_hdr.rtm_type;
351 }
352 
353 static int ip_route_check(const char *intf, int family, void *addr)
354 {
355 	uint8_t rtm_type, table = RT_TABLE_LOCAL;
356 	int retries = MAX_RETRIES;
357 
358 	while (retries-- > 0) {
359 		if (ip_route_get(intf, family, table, addr, parse_route_rsp,
360 				 &rtm_type) == 0 &&
361 		    rtm_type == RTN_LOCAL)
362 			break;
363 
364 		usleep(TIMEOUT_USEC);
365 	}
366 
367 	if (retries < 0)
368 		return -1;
369 
370 	return 0;
371 }
372 
373 static int send_gso_udp_msg(int socket, struct sockaddr_storage *addr,
374 			    uint8_t *send_buf, int send_len, int gso_size)
375 {
376 	char control[CMSG_SPACE(sizeof(uint16_t))] = { 0 };
377 	int alen = sockaddr_len(addr->ss_family);
378 	struct msghdr msg = { 0 };
379 	struct iovec iov = { 0 };
380 	int ret;
381 
382 	iov.iov_base = send_buf;
383 	iov.iov_len = send_len;
384 
385 	msg.msg_iov = &iov;
386 	msg.msg_iovlen = 1;
387 	msg.msg_name = addr;
388 	msg.msg_namelen = alen;
389 
390 	if (gso_size > 0) {
391 		struct cmsghdr *cmsg;
392 
393 		msg.msg_control = control;
394 		msg.msg_controllen = sizeof(control);
395 
396 		cmsg = CMSG_FIRSTHDR(&msg);
397 		cmsg->cmsg_level = SOL_UDP;
398 		cmsg->cmsg_type = UDP_SEGMENT;
399 		cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t));
400 		*(uint16_t *)CMSG_DATA(cmsg) = gso_size;
401 	}
402 
403 	ret = sendmsg(socket, &msg, 0);
404 	if (ret < 0)
405 		perror("sendmsg");
406 
407 	return ret;
408 }
409 
410 static int validate_hdrlen(uint8_t **cur, int *len, int x)
411 {
412 	if (*len < x)
413 		return -1;
414 	*cur += x;
415 	*len -= x;
416 	return 0;
417 }
418 
419 static int parse_udp_tunnel_vnet_packet(uint8_t *buf, int len, int tunnel_type,
420 					bool is_tap)
421 {
422 	struct ipv6hdr *iph6;
423 	struct udphdr *udph;
424 	struct iphdr *iph4;
425 	uint8_t *cur = buf;
426 
427 	if (validate_hdrlen(&cur, &len, TUN_VNET_TNL_SIZE))
428 		return -1;
429 
430 	if (is_tap) {
431 		if (validate_hdrlen(&cur, &len, ETH_HLEN))
432 			return -1;
433 	}
434 
435 	if (tunnel_type & UDP_TUNNEL_OUTER_IPV4) {
436 		iph4 = (struct iphdr *)cur;
437 		if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
438 			return -1;
439 		if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
440 			return -1;
441 	} else {
442 		iph6 = (struct ipv6hdr *)cur;
443 		if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
444 			return -1;
445 		if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
446 			return -1;
447 	}
448 
449 	udph = (struct udphdr *)cur;
450 	if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
451 		return -1;
452 	if (ntohs(udph->dest) != VN_PORT)
453 		return -1;
454 
455 	if (validate_hdrlen(&cur, &len, GENEVE_HLEN))
456 		return -1;
457 	if (validate_hdrlen(&cur, &len, ETH_HLEN))
458 		return -1;
459 
460 	if (tunnel_type & UDP_TUNNEL_INNER_IPV4) {
461 		iph4 = (struct iphdr *)cur;
462 		if (validate_hdrlen(&cur, &len, sizeof(struct iphdr)))
463 			return -1;
464 		if (iph4->version != 4 || iph4->protocol != IPPROTO_UDP)
465 			return -1;
466 	} else {
467 		iph6 = (struct ipv6hdr *)cur;
468 		if (validate_hdrlen(&cur, &len, sizeof(struct ipv6hdr)))
469 			return -1;
470 		if (iph6->version != 6 || iph6->nexthdr != IPPROTO_UDP)
471 			return -1;
472 	}
473 
474 	udph = (struct udphdr *)cur;
475 	if (validate_hdrlen(&cur, &len, sizeof(struct udphdr)))
476 		return -1;
477 	if (ntohs(udph->dest) != UDP_DST_PORT)
478 		return -1;
479 
480 	return len;
481 }
482 
483 FIXTURE(tun)
484 {
485 	char ifname[IFNAMSIZ];
486 	int fd, fd2;
487 };
488 
489 FIXTURE_SETUP(tun)
490 {
491 	memset(self->ifname, 0, sizeof(self->ifname));
492 
493 	self->fd = tun_alloc(self->ifname);
494 	ASSERT_GE(self->fd, 0);
495 
496 	self->fd2 = tun_alloc(self->ifname);
497 	ASSERT_GE(self->fd2, 0);
498 }
499 
500 FIXTURE_TEARDOWN(tun)
501 {
502 	if (self->fd >= 0)
503 		close(self->fd);
504 	if (self->fd2 >= 0)
505 		close(self->fd2);
506 }
507 
508 TEST_F(tun, delete_detach_close)
509 {
510 	EXPECT_EQ(tun_delete(self->ifname), 0);
511 	EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
512 	EXPECT_EQ(errno, 22);
513 }
514 
515 TEST_F(tun, detach_delete_close)
516 {
517 	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
518 	EXPECT_EQ(tun_delete(self->ifname), 0);
519 }
520 
521 TEST_F(tun, detach_close_delete)
522 {
523 	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
524 	close(self->fd);
525 	self->fd = -1;
526 	EXPECT_EQ(tun_delete(self->ifname), 0);
527 }
528 
529 TEST_F(tun, reattach_delete_close)
530 {
531 	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
532 	EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
533 	EXPECT_EQ(tun_delete(self->ifname), 0);
534 }
535 
536 TEST_F(tun, reattach_close_delete)
537 {
538 	EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
539 	EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
540 	close(self->fd);
541 	self->fd = -1;
542 	EXPECT_EQ(tun_delete(self->ifname), 0);
543 }
544 
545 FIXTURE(tun_vnet_udptnl)
546 {
547 	char ifname[IFNAMSIZ];
548 	int fd, sock;
549 };
550 
551 FIXTURE_VARIANT(tun_vnet_udptnl)
552 {
553 	int tunnel_type;
554 	int gso_size;
555 	int data_size;
556 	int r_num_mss;
557 	bool is_tap, no_gso;
558 };
559 
560 /* clang-format off */
561 #define TUN_VNET_UDPTNL_VARIANT_ADD(type, desc)                              \
562 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1byte) {         \
563 		/* no GSO: send a single byte */                             \
564 		.tunnel_type = type,                                         \
565 		.data_size = 1,                                              \
566 		.r_num_mss = 1,                                              \
567 		.is_tap = true,                                              \
568 		.no_gso = true,                                              \
569 	};                                                                   \
570 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_1mss) {          \
571 		/* no GSO: send a single MSS, fall back to no GSO */         \
572 		.tunnel_type = type,                                         \
573 		.data_size = UDP_TUNNEL_MSS(type),                           \
574 		.r_num_mss = 1,                                              \
575 		.is_tap = true,                                              \
576 		.no_gso = true,                                              \
577 	};                                                                   \
578 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_nogsosz_gtmss) {         \
579 		/* no GSO: send a single MSS + 1B: fail */                   \
580 		.tunnel_type = type,                                         \
581 		.data_size = UDP_TUNNEL_MSS(type) + 1,                       \
582 		.r_num_mss = 1,                                              \
583 		.is_tap = true,                                              \
584 		.no_gso = true,                                              \
585 	};                                                                   \
586 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1byte) {                 \
587 		/* GSO: send 1 byte, gso 1 byte, fall back to no GSO */      \
588 		.tunnel_type = type,                                         \
589 		.gso_size = 1,                                               \
590 		.data_size = 1,                                              \
591 		.r_num_mss = 1,                                              \
592 		.is_tap = true,                                              \
593 		.no_gso = true,                                              \
594 	};                                                                   \
595 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_1mss) {                  \
596 		/* send a single MSS: fall back to no GSO */                 \
597 		.tunnel_type = type,                                         \
598 		.gso_size = UDP_TUNNEL_MSS(type),                            \
599 		.data_size = UDP_TUNNEL_MSS(type),                           \
600 		.r_num_mss = 1,                                              \
601 		.is_tap = true,                                              \
602 		.no_gso = true,                                              \
603 	};                                                                   \
604 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_ltgso) {                 \
605 		/* data <= MSS < gso: will fall back to no GSO */            \
606 		.tunnel_type = type,                                         \
607 		.gso_size = UDP_TUNNEL_MSS(type) + 1,                        \
608 		.data_size = UDP_TUNNEL_MSS(type),                           \
609 		.r_num_mss = 1,                                              \
610 		.is_tap = true,                                              \
611 		.no_gso = true,                                              \
612 	};                                                                   \
613 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_gtgso) {                 \
614 		/* GSO: a single MSS + 1B */                                 \
615 		.tunnel_type = type,                                         \
616 		.gso_size = UDP_TUNNEL_MSS(type),                            \
617 		.data_size = UDP_TUNNEL_MSS(type) + 1,                       \
618 		.r_num_mss = 2,                                              \
619 		.is_tap = true,                                              \
620 	};                                                                   \
621 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_2mss) {                  \
622 		/* no GSO: send exactly 2 MSS */                             \
623 		.tunnel_type = type,                                         \
624 		.gso_size = UDP_TUNNEL_MSS(type),                            \
625 		.data_size = UDP_TUNNEL_MSS(type) * 2,                       \
626 		.r_num_mss = 2,                                              \
627 		.is_tap = true,                                              \
628 	};                                                                   \
629 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxbytes) {              \
630 		/* GSO: send max bytes */                                    \
631 		.tunnel_type = type,                                         \
632 		.gso_size = UDP_TUNNEL_MSS(type),                            \
633 		.data_size = UDP_TUNNEL_MAX(type, true),                     \
634 		.r_num_mss = UDP_TUNNEL_MAX(type, true) /                    \
635 			     UDP_TUNNEL_MSS(type) + 1,                       \
636 		.is_tap = true,                                              \
637 	};                                                                   \
638 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_over_maxbytes) {         \
639 		/* GSO: send oversize max bytes: fail */                     \
640 		.tunnel_type = type,                                         \
641 		.gso_size = UDP_TUNNEL_MSS(type),                            \
642 		.data_size = ETH_MAX_MTU,                                    \
643 		.r_num_mss = ETH_MAX_MTU / UDP_TUNNEL_MSS(type) + 1,         \
644 		.is_tap = true,                                              \
645 	};                                                                   \
646 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_maxsegs) {               \
647 		/* GSO: send max number of min sized segments */             \
648 		.tunnel_type = type,                                         \
649 		.gso_size = 1,                                               \
650 		.data_size = UDP_TUNNEL_MAX_SEGMENTS,                        \
651 		.r_num_mss = UDP_TUNNEL_MAX_SEGMENTS,                        \
652 		.is_tap = true,                                              \
653 	};                                                                   \
654 	FIXTURE_VARIANT_ADD(tun_vnet_udptnl, desc##_5byte) {                 \
655 		/* GSO: send 5 bytes, gso 2 bytes */                         \
656 		.tunnel_type = type,                                         \
657 		.gso_size = 2,                                               \
658 		.data_size = 5,                                              \
659 		.r_num_mss = 3,                                              \
660 		.is_tap = true,                                              \
661 	} /* clang-format on */
662 
663 TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN4, 4in4);
664 TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN4, 6in4);
665 TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_4IN6, 4in6);
666 TUN_VNET_UDPTNL_VARIANT_ADD(UDP_TUNNEL_GENEVE_6IN6, 6in6);
667 
668 static void assign_ifaddr_vars(int family, int is_outer, void **srcip,
669 			       void **dstip, void **srcmac, void **dstmac)
670 {
671 	if (is_outer) {
672 		if (family == AF_INET) {
673 			*srcip = (void *)&param_ipaddr4_outer_src;
674 			*dstip = (void *)&param_ipaddr4_outer_dst;
675 		} else {
676 			*srcip = (void *)&param_ipaddr6_outer_src;
677 			*dstip = (void *)&param_ipaddr6_outer_dst;
678 		}
679 		*srcmac = param_hwaddr_outer_src;
680 		*dstmac = param_hwaddr_outer_dst;
681 	} else {
682 		if (family == AF_INET) {
683 			*srcip = (void *)&param_ipaddr4_inner_src;
684 			*dstip = (void *)&param_ipaddr4_inner_dst;
685 		} else {
686 			*srcip = (void *)&param_ipaddr6_inner_src;
687 			*dstip = (void *)&param_ipaddr6_inner_dst;
688 		}
689 		*srcmac = param_hwaddr_inner_src;
690 		*dstmac = param_hwaddr_inner_dst;
691 	}
692 }
693 
694 static void assign_sockaddr_vars(int family, int is_outer,
695 				 struct sockaddr_storage *src,
696 				 struct sockaddr_storage *dst)
697 {
698 	src->ss_family = family;
699 	dst->ss_family = family;
700 
701 	if (family == AF_INET) {
702 		struct sockaddr_in *s4 = (struct sockaddr_in *)src;
703 		struct sockaddr_in *d4 = (struct sockaddr_in *)dst;
704 
705 		s4->sin_addr = is_outer ? param_ipaddr4_outer_src :
706 					  param_ipaddr4_inner_src;
707 		d4->sin_addr = is_outer ? param_ipaddr4_outer_dst :
708 					  param_ipaddr4_inner_dst;
709 		if (!is_outer) {
710 			s4->sin_port = htons(UDP_SRC_PORT);
711 			d4->sin_port = htons(UDP_DST_PORT);
712 		}
713 	} else {
714 		struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)src;
715 		struct sockaddr_in6 *d6 = (struct sockaddr_in6 *)dst;
716 
717 		s6->sin6_addr = is_outer ? param_ipaddr6_outer_src :
718 					   param_ipaddr6_inner_src;
719 		d6->sin6_addr = is_outer ? param_ipaddr6_outer_dst :
720 					   param_ipaddr6_inner_dst;
721 		if (!is_outer) {
722 			s6->sin6_port = htons(UDP_SRC_PORT);
723 			d6->sin6_port = htons(UDP_DST_PORT);
724 		}
725 	}
726 }
727 
728 FIXTURE_SETUP(tun_vnet_udptnl)
729 {
730 	int ret, family, prefix, flags, features;
731 	int tunnel_type = variant->tunnel_type;
732 	struct sockaddr_storage ssa, dsa;
733 	void *sip, *dip, *smac, *dmac;
734 
735 	flags = (variant->is_tap ? IFF_TAP : IFF_TUN) | IFF_VNET_HDR |
736 		IFF_MULTI_QUEUE | IFF_NO_PI;
737 	features = TUN_F_CSUM | TUN_F_UDP_TUNNEL_GSO |
738 		   TUN_F_UDP_TUNNEL_GSO_CSUM | TUN_F_USO4 | TUN_F_USO6;
739 	self->fd = tun_open(self->ifname, flags, TUN_VNET_TNL_SIZE, features,
740 			    param_hwaddr_outer_src);
741 	ASSERT_GE(self->fd, 0);
742 
743 	family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET : AF_INET6;
744 	prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
745 	assign_ifaddr_vars(family, 1, &sip, &dip, &smac, &dmac);
746 
747 	ret = ip_addr_add(self->ifname, family, sip, prefix);
748 	ASSERT_EQ(ret, 0);
749 	ret = ip_neigh_add(self->ifname, family, dip, dmac);
750 	ASSERT_EQ(ret, 0);
751 	ret = ip_route_check(self->ifname, family, sip);
752 	ASSERT_EQ(ret, 0);
753 
754 	ret = geneve_create(param_dev_geneve_name, family, dip,
755 			    param_hwaddr_inner_src);
756 	ASSERT_EQ(ret, 0);
757 
758 	family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET : AF_INET6;
759 	prefix = (family == AF_INET) ? IPPREFIX_LEN : IP6PREFIX_LEN;
760 	assign_ifaddr_vars(family, 0, &sip, &dip, &smac, &dmac);
761 
762 	ret = ip_addr_add(param_dev_geneve_name, family, sip, prefix);
763 	ASSERT_EQ(ret, 0);
764 	ret = ip_neigh_add(param_dev_geneve_name, family, dip, dmac);
765 	ASSERT_EQ(ret, 0);
766 	ret = ip_route_check(param_dev_geneve_name, family, sip);
767 	ASSERT_EQ(ret, 0);
768 
769 	assign_sockaddr_vars(family, 0, &ssa, &dsa);
770 	self->sock = udp_socket_open(&ssa, false, true, &dsa);
771 	ASSERT_GE(self->sock, 0);
772 }
773 
774 FIXTURE_TEARDOWN(tun_vnet_udptnl)
775 {
776 	int ret;
777 
778 	if (self->sock != -1)
779 		close(self->sock);
780 
781 	ret = ip_link_del(param_dev_geneve_name);
782 	EXPECT_EQ(ret, 0);
783 
784 	ret = tun_delete(self->ifname);
785 	EXPECT_EQ(ret, 0);
786 }
787 
788 static int build_gso_packet_into_tun(const FIXTURE_VARIANT(tun_vnet_udptnl) *
789 					     variant,
790 				     uint8_t *buf)
791 {
792 	int pktlen, hlen, proto, inner_family, outer_family;
793 	int tunnel_type = variant->tunnel_type;
794 	int payload_len = variant->data_size;
795 	int gso_size = variant->gso_size;
796 	uint8_t *outer_udph, *cur = buf;
797 	void *sip, *dip, *smac, *dmac;
798 	bool is_tap = variant->is_tap;
799 
800 	hlen = (is_tap ? ETH_HLEN : 0) + UDP_TUNNEL_HDRLEN(tunnel_type);
801 	inner_family = (tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
802 							       AF_INET6;
803 	outer_family = (tunnel_type & UDP_TUNNEL_OUTER_IPV4) ? AF_INET :
804 							       AF_INET6;
805 
806 	cur += build_virtio_net_hdr_v1_hash_tunnel(cur, is_tap, hlen, gso_size,
807 						   outer_family, inner_family);
808 
809 	pktlen = hlen + payload_len;
810 	assign_ifaddr_vars(outer_family, 1, &sip, &dip, &smac, &dmac);
811 
812 	if (is_tap) {
813 		proto = outer_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
814 		pktlen -= ETH_HLEN;
815 		cur += build_eth(cur, proto, dmac, smac);
816 	}
817 
818 	if (outer_family == AF_INET) {
819 		pktlen = pktlen - sizeof(struct iphdr);
820 		cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
821 	} else {
822 		pktlen = pktlen - sizeof(struct ipv6hdr);
823 		cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
824 	}
825 
826 	outer_udph = cur;
827 	assign_ifaddr_vars(inner_family, 0, &sip, &dip, &smac, &dmac);
828 
829 	pktlen -= sizeof(struct udphdr);
830 	proto = inner_family == AF_INET ? ETH_P_IP : ETH_P_IPV6;
831 	cur += build_udp_header(cur, UDP_SRC_PORT, VN_PORT, pktlen);
832 	cur += build_geneve_header(cur, VN_ID);
833 	cur += build_eth(cur, proto, dmac, smac);
834 
835 	pktlen = sizeof(struct udphdr) + payload_len;
836 	if (inner_family == AF_INET)
837 		cur += build_ipv4_header(cur, IPPROTO_UDP, pktlen, dip, sip);
838 	else
839 		cur += build_ipv6_header(cur, IPPROTO_UDP, 0, pktlen, dip, sip);
840 
841 	cur += build_udp_packet(cur, UDP_DST_PORT, UDP_SRC_PORT, payload_len,
842 				inner_family, false);
843 
844 	build_udp_packet_csum(outer_udph, outer_family, false);
845 
846 	return cur - buf;
847 }
848 
849 static int
850 receive_gso_packet_from_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
851 			       const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
852 			       int *r_num_mss)
853 {
854 	uint8_t packet_buf[MAX_VNET_TUNNEL_PACKET_SZ];
855 	int len, total_len = 0, socket = self->sock;
856 	int payload_len = variant->data_size;
857 
858 	while (total_len < payload_len) {
859 		len = recv(socket, packet_buf, sizeof(packet_buf), 0);
860 		if (len <= 0) {
861 			if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
862 				perror("recv");
863 			break;
864 		}
865 
866 		(*r_num_mss)++;
867 		total_len += len;
868 	}
869 
870 	return total_len;
871 }
872 
873 static int send_gso_packet_into_tunnel(FIXTURE_DATA(tun_vnet_udptnl) * self,
874 				       const FIXTURE_VARIANT(tun_vnet_udptnl) *
875 					       variant)
876 {
877 	int family = (variant->tunnel_type & UDP_TUNNEL_INNER_IPV4) ? AF_INET :
878 								      AF_INET6;
879 	uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ] = { 0 };
880 	int payload_len = variant->data_size;
881 	int gso_size = variant->gso_size;
882 	struct sockaddr_storage ssa, dsa;
883 
884 	assign_sockaddr_vars(family, 0, &ssa, &dsa);
885 	return send_gso_udp_msg(self->sock, &dsa, buf, payload_len, gso_size);
886 }
887 
888 static int
889 receive_gso_packet_from_tun(FIXTURE_DATA(tun_vnet_udptnl) * self,
890 			    const FIXTURE_VARIANT(tun_vnet_udptnl) * variant,
891 			    struct virtio_net_hdr_v1_hash_tunnel *vnet_hdr)
892 {
893 	struct timeval timeout = { .tv_sec = TIMEOUT_SEC };
894 	uint8_t buf[MAX_VNET_TUNNEL_PACKET_SZ];
895 	int tunnel_type = variant->tunnel_type;
896 	int payload_len = variant->data_size;
897 	bool is_tap = variant->is_tap;
898 	int ret, len, total_len = 0;
899 	int tun_fd = self->fd;
900 	fd_set fdset;
901 
902 	while (total_len < payload_len) {
903 		FD_ZERO(&fdset);
904 		FD_SET(tun_fd, &fdset);
905 
906 		ret = select(tun_fd + 1, &fdset, NULL, NULL, &timeout);
907 		if (ret <= 0) {
908 			perror("select");
909 			break;
910 		}
911 		if (!FD_ISSET(tun_fd, &fdset))
912 			continue;
913 
914 		len = read(tun_fd, buf, sizeof(buf));
915 		if (len <= 0) {
916 			if (len < 0 && errno != EAGAIN && errno != EWOULDBLOCK)
917 				perror("read");
918 			break;
919 		}
920 
921 		len = parse_udp_tunnel_vnet_packet(buf, len, tunnel_type,
922 						   is_tap);
923 		if (len < 0)
924 			continue;
925 
926 		if (total_len == 0)
927 			memcpy(vnet_hdr, buf, TUN_VNET_TNL_SIZE);
928 
929 		total_len += len;
930 	}
931 
932 	return total_len;
933 }
934 
935 TEST_F(tun_vnet_udptnl, send_gso_packet)
936 {
937 	uint8_t pkt[MAX_VNET_TUNNEL_PACKET_SZ];
938 	int r_num_mss = 0;
939 	int ret, off;
940 
941 	memset(pkt, 0, sizeof(pkt));
942 	off = build_gso_packet_into_tun(variant, pkt);
943 	ret = write(self->fd, pkt, off);
944 	ASSERT_EQ(ret, off);
945 
946 	ret = receive_gso_packet_from_tunnel(self, variant, &r_num_mss);
947 	ASSERT_EQ(ret, variant->data_size);
948 	ASSERT_EQ(r_num_mss, variant->r_num_mss);
949 }
950 
951 TEST_F(tun_vnet_udptnl, recv_gso_packet)
952 {
953 	struct virtio_net_hdr_v1_hash_tunnel vnet_hdr = { 0 };
954 	struct virtio_net_hdr_v1 *vh = &vnet_hdr.hash_hdr.hdr;
955 	int ret, gso_type = VIRTIO_NET_HDR_GSO_UDP_L4;
956 
957 	ret = send_gso_packet_into_tunnel(self, variant);
958 	ASSERT_EQ(ret, variant->data_size);
959 
960 	memset(&vnet_hdr, 0, sizeof(vnet_hdr));
961 	ret = receive_gso_packet_from_tun(self, variant, &vnet_hdr);
962 	ASSERT_EQ(ret, variant->data_size);
963 
964 	if (!variant->no_gso) {
965 		ASSERT_EQ(vh->gso_size, variant->gso_size);
966 		gso_type |= (variant->tunnel_type & UDP_TUNNEL_OUTER_IPV4) ?
967 				    (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4) :
968 				    (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6);
969 		ASSERT_EQ(vh->gso_type, gso_type);
970 	}
971 }
972 
973 XFAIL_ADD(tun_vnet_udptnl, 4in4_nogsosz_gtmss, recv_gso_packet);
974 XFAIL_ADD(tun_vnet_udptnl, 6in4_nogsosz_gtmss, recv_gso_packet);
975 XFAIL_ADD(tun_vnet_udptnl, 4in6_nogsosz_gtmss, recv_gso_packet);
976 XFAIL_ADD(tun_vnet_udptnl, 6in6_nogsosz_gtmss, recv_gso_packet);
977 
978 XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, send_gso_packet);
979 XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, send_gso_packet);
980 XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, send_gso_packet);
981 XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, send_gso_packet);
982 
983 XFAIL_ADD(tun_vnet_udptnl, 4in4_over_maxbytes, recv_gso_packet);
984 XFAIL_ADD(tun_vnet_udptnl, 6in4_over_maxbytes, recv_gso_packet);
985 XFAIL_ADD(tun_vnet_udptnl, 4in6_over_maxbytes, recv_gso_packet);
986 XFAIL_ADD(tun_vnet_udptnl, 6in6_over_maxbytes, recv_gso_packet);
987 
988 TEST_HARNESS_MAIN
989