xref: /linux/tools/testing/selftests/net/ipsec.c (revision 37a93dd5c49b5fda807fd204edf2547c3493319c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ipsec.c - Check xfrm on veth inside a net-ns.
4  * Copyright (c) 2018 Dmitry Safonov
5  */
6 
7 #define _GNU_SOURCE
8 
9 #include <arpa/inet.h>
10 #include <asm/types.h>
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <limits.h>
14 #include <linux/limits.h>
15 #include <linux/netlink.h>
16 #include <linux/random.h>
17 #include <linux/rtnetlink.h>
18 #include <linux/veth.h>
19 #include <linux/xfrm.h>
20 #include <netinet/in.h>
21 #include <net/if.h>
22 #include <sched.h>
23 #include <stdbool.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/socket.h>
30 #include <sys/stat.h>
31 #include <sys/syscall.h>
32 #include <sys/types.h>
33 #include <sys/wait.h>
34 #include <time.h>
35 #include <unistd.h>
36 
37 #include "kselftest.h"
38 
39 #define printk(fmt, ...)						\
40 	ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
41 
42 #define pr_err(fmt, ...)	printk(fmt ": %m", ##__VA_ARGS__)
43 
44 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
45 
46 #ifndef offsetof
47 #define offsetof(TYPE, MEMBER)	__builtin_offsetof(TYPE, MEMBER)
48 #endif
49 
50 #define IPV4_STR_SZ	16	/* xxx.xxx.xxx.xxx is longest + \0 */
51 #define MAX_PAYLOAD	2048
52 #define XFRM_ALGO_KEY_BUF_SIZE	512
53 #define MAX_PROCESSES	(1 << 14) /* /16 mask divided by /30 subnets */
54 #define INADDR_A	((in_addr_t) 0x0a000000) /* 10.0.0.0 */
55 #define INADDR_B	((in_addr_t) 0xc0a80000) /* 192.168.0.0 */
56 
57 /* /30 mask for one veth connection */
58 #define PREFIX_LEN	30
59 #define child_ip(nr)	(4*nr + 1)
60 #define grchild_ip(nr)	(4*nr + 2)
61 
62 #define VETH_FMT	"ktst-%d"
63 #define VETH_LEN	12
64 
65 #define XFRM_ALGO_NR_KEYS 29
66 
67 static int nsfd_parent	= -1;
68 static int nsfd_childa	= -1;
69 static int nsfd_childb	= -1;
70 static long page_size;
71 
72 /*
73  * ksft_cnt is static in kselftest, so isn't shared with children.
74  * We have to send a test result back to parent and count there.
75  * results_fd is a pipe with test feedback from children.
76  */
77 static int results_fd[2];
78 
79 const unsigned int ping_delay_nsec	= 50 * 1000 * 1000;
80 const unsigned int ping_timeout		= 300;
81 const unsigned int ping_count		= 100;
82 const unsigned int ping_success		= 80;
83 
84 struct xfrm_key_entry {
85 	char algo_name[35];
86 	int key_len;
87 };
88 
89 struct xfrm_key_entry xfrm_key_entries[] = {
90 	{"digest_null", 0},
91 	{"ecb(cipher_null)", 0},
92 	{"cbc(des)", 64},
93 	{"hmac(md5)", 128},
94 	{"cmac(aes)", 128},
95 	{"xcbc(aes)", 128},
96 	{"cbc(cast5)", 128},
97 	{"cbc(serpent)", 128},
98 	{"hmac(sha1)", 160},
99 	{"hmac(rmd160)", 160},
100 	{"cbc(des3_ede)", 192},
101 	{"hmac(sha256)", 256},
102 	{"cbc(aes)", 256},
103 	{"cbc(camellia)", 256},
104 	{"cbc(twofish)", 256},
105 	{"rfc3686(ctr(aes))", 288},
106 	{"hmac(sha384)", 384},
107 	{"cbc(blowfish)", 448},
108 	{"hmac(sha512)", 512},
109 	{"rfc4106(gcm(aes))-128", 160},
110 	{"rfc4543(gcm(aes))-128", 160},
111 	{"rfc4309(ccm(aes))-128", 152},
112 	{"rfc4106(gcm(aes))-192", 224},
113 	{"rfc4543(gcm(aes))-192", 224},
114 	{"rfc4309(ccm(aes))-192", 216},
115 	{"rfc4106(gcm(aes))-256", 288},
116 	{"rfc4543(gcm(aes))-256", 288},
117 	{"rfc4309(ccm(aes))-256", 280},
118 	{"rfc7539(chacha20,poly1305)-128", 0}
119 };
120 
121 static void randomize_buffer(void *buf, size_t buflen)
122 {
123 	int *p = (int *)buf;
124 	size_t words = buflen / sizeof(int);
125 	size_t leftover = buflen % sizeof(int);
126 
127 	if (!buflen)
128 		return;
129 
130 	while (words--)
131 		*p++ = rand();
132 
133 	if (leftover) {
134 		int tmp = rand();
135 
136 		memcpy(buf + buflen - leftover, &tmp, leftover);
137 	}
138 
139 	return;
140 }
141 
142 static int unshare_open(void)
143 {
144 	const char *netns_path = "/proc/self/ns/net";
145 	int fd;
146 
147 	if (unshare(CLONE_NEWNET) != 0) {
148 		pr_err("unshare()");
149 		return -1;
150 	}
151 
152 	fd = open(netns_path, O_RDONLY);
153 	if (fd <= 0) {
154 		pr_err("open(%s)", netns_path);
155 		return -1;
156 	}
157 
158 	return fd;
159 }
160 
161 static int switch_ns(int fd)
162 {
163 	if (setns(fd, CLONE_NEWNET)) {
164 		pr_err("setns()");
165 		return -1;
166 	}
167 	return 0;
168 }
169 
170 /*
171  * Running the test inside a new parent net namespace to bother less
172  * about cleanup on error-path.
173  */
174 static int init_namespaces(void)
175 {
176 	nsfd_parent = unshare_open();
177 	if (nsfd_parent <= 0)
178 		return -1;
179 
180 	nsfd_childa = unshare_open();
181 	if (nsfd_childa <= 0)
182 		return -1;
183 
184 	if (switch_ns(nsfd_parent))
185 		return -1;
186 
187 	nsfd_childb = unshare_open();
188 	if (nsfd_childb <= 0)
189 		return -1;
190 
191 	if (switch_ns(nsfd_parent))
192 		return -1;
193 	return 0;
194 }
195 
196 static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
197 {
198 	if (*sock > 0) {
199 		seq_nr++;
200 		return 0;
201 	}
202 
203 	*sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
204 	if (*sock <= 0) {
205 		pr_err("socket(AF_NETLINK)");
206 		return -1;
207 	}
208 
209 	randomize_buffer(seq_nr, sizeof(*seq_nr));
210 
211 	return 0;
212 }
213 
214 static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
215 {
216 	return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
217 }
218 
219 static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
220 		unsigned short rta_type, const void *payload, size_t size)
221 {
222 	/* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
223 	struct rtattr *attr = rtattr_hdr(nh);
224 	size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
225 
226 	if (req_sz < nl_size) {
227 		printk("req buf is too small: %zu < %zu", req_sz, nl_size);
228 		return -1;
229 	}
230 	nh->nlmsg_len = nl_size;
231 
232 	attr->rta_len = RTA_LENGTH(size);
233 	attr->rta_type = rta_type;
234 	if (payload)
235 		memcpy(RTA_DATA(attr), payload, size);
236 
237 	return 0;
238 }
239 
240 static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
241 		unsigned short rta_type, const void *payload, size_t size)
242 {
243 	struct rtattr *ret = rtattr_hdr(nh);
244 
245 	if (rtattr_pack(nh, req_sz, rta_type, payload, size))
246 		return 0;
247 
248 	return ret;
249 }
250 
251 static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
252 		unsigned short rta_type)
253 {
254 	return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
255 }
256 
257 static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
258 {
259 	char *nlmsg_end = (char *)nh + nh->nlmsg_len;
260 
261 	attr->rta_len = nlmsg_end - (char *)attr;
262 }
263 
264 static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
265 		const char *peer, int ns)
266 {
267 	struct ifinfomsg pi;
268 	struct rtattr *peer_attr;
269 
270 	memset(&pi, 0, sizeof(pi));
271 	pi.ifi_family	= AF_UNSPEC;
272 	pi.ifi_change	= 0xFFFFFFFF;
273 
274 	peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
275 	if (!peer_attr)
276 		return -1;
277 
278 	if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
279 		return -1;
280 
281 	if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
282 		return -1;
283 
284 	rtattr_end(nh, peer_attr);
285 
286 	return 0;
287 }
288 
289 static int netlink_check_answer(int sock)
290 {
291 	struct nlmsgerror {
292 		struct nlmsghdr hdr;
293 		int error;
294 		struct nlmsghdr orig_msg;
295 	} answer;
296 
297 	if (recv(sock, &answer, sizeof(answer), 0) < 0) {
298 		pr_err("recv()");
299 		return -1;
300 	} else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
301 		printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type);
302 		return -1;
303 	} else if (answer.error) {
304 		printk("NLMSG_ERROR: %d: %s",
305 			answer.error, strerror(-answer.error));
306 		return answer.error;
307 	}
308 
309 	return 0;
310 }
311 
312 static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a,
313 		const char *peerb, int ns_b)
314 {
315 	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
316 	struct {
317 		struct nlmsghdr		nh;
318 		struct ifinfomsg	info;
319 		char			attrbuf[MAX_PAYLOAD];
320 	} req;
321 	const char veth_type[] = "veth";
322 	struct rtattr *link_info, *info_data;
323 
324 	memset(&req, 0, sizeof(req));
325 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
326 	req.nh.nlmsg_type	= RTM_NEWLINK;
327 	req.nh.nlmsg_flags	= flags;
328 	req.nh.nlmsg_seq	= seq;
329 	req.info.ifi_family	= AF_UNSPEC;
330 	req.info.ifi_change	= 0xFFFFFFFF;
331 
332 	if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera)))
333 		return -1;
334 
335 	if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
336 		return -1;
337 
338 	link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
339 	if (!link_info)
340 		return -1;
341 
342 	if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
343 		return -1;
344 
345 	info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
346 	if (!info_data)
347 		return -1;
348 
349 	if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b))
350 		return -1;
351 
352 	rtattr_end(&req.nh, info_data);
353 	rtattr_end(&req.nh, link_info);
354 
355 	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
356 		pr_err("send()");
357 		return -1;
358 	}
359 	return netlink_check_answer(sock);
360 }
361 
362 static int ip4_addr_set(int sock, uint32_t seq, const char *intf,
363 		struct in_addr addr, uint8_t prefix)
364 {
365 	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
366 	struct {
367 		struct nlmsghdr		nh;
368 		struct ifaddrmsg	info;
369 		char			attrbuf[MAX_PAYLOAD];
370 	} req;
371 
372 	memset(&req, 0, sizeof(req));
373 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
374 	req.nh.nlmsg_type	= RTM_NEWADDR;
375 	req.nh.nlmsg_flags	= flags;
376 	req.nh.nlmsg_seq	= seq;
377 	req.info.ifa_family	= AF_INET;
378 	req.info.ifa_prefixlen	= prefix;
379 	req.info.ifa_index	= if_nametoindex(intf);
380 
381 #ifdef DEBUG
382 	{
383 		char addr_str[IPV4_STR_SZ] = {};
384 
385 		strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1);
386 
387 		printk("ip addr set %s", addr_str);
388 	}
389 #endif
390 
391 	if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr)))
392 		return -1;
393 
394 	if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr)))
395 		return -1;
396 
397 	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
398 		pr_err("send()");
399 		return -1;
400 	}
401 	return netlink_check_answer(sock);
402 }
403 
404 static int link_set_up(int sock, uint32_t seq, const char *intf)
405 {
406 	struct {
407 		struct nlmsghdr		nh;
408 		struct ifinfomsg	info;
409 		char			attrbuf[MAX_PAYLOAD];
410 	} req;
411 
412 	memset(&req, 0, sizeof(req));
413 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
414 	req.nh.nlmsg_type	= RTM_NEWLINK;
415 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
416 	req.nh.nlmsg_seq	= seq;
417 	req.info.ifi_family	= AF_UNSPEC;
418 	req.info.ifi_change	= 0xFFFFFFFF;
419 	req.info.ifi_index	= if_nametoindex(intf);
420 	req.info.ifi_flags	= IFF_UP;
421 	req.info.ifi_change	= IFF_UP;
422 
423 	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
424 		pr_err("send()");
425 		return -1;
426 	}
427 	return netlink_check_answer(sock);
428 }
429 
430 static int ip4_route_set(int sock, uint32_t seq, const char *intf,
431 		struct in_addr src, struct in_addr dst)
432 {
433 	struct {
434 		struct nlmsghdr	nh;
435 		struct rtmsg	rt;
436 		char		attrbuf[MAX_PAYLOAD];
437 	} req;
438 	unsigned int index = if_nametoindex(intf);
439 
440 	memset(&req, 0, sizeof(req));
441 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.rt));
442 	req.nh.nlmsg_type	= RTM_NEWROUTE;
443 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
444 	req.nh.nlmsg_seq	= seq;
445 	req.rt.rtm_family	= AF_INET;
446 	req.rt.rtm_dst_len	= 32;
447 	req.rt.rtm_table	= RT_TABLE_MAIN;
448 	req.rt.rtm_protocol	= RTPROT_BOOT;
449 	req.rt.rtm_scope	= RT_SCOPE_LINK;
450 	req.rt.rtm_type		= RTN_UNICAST;
451 
452 	if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst)))
453 		return -1;
454 
455 	if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src)))
456 		return -1;
457 
458 	if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
459 		return -1;
460 
461 	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
462 		pr_err("send()");
463 		return -1;
464 	}
465 
466 	return netlink_check_answer(sock);
467 }
468 
469 static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth,
470 		struct in_addr tunsrc, struct in_addr tundst)
471 {
472 	if (ip4_addr_set(route_sock, (*route_seq)++, "lo",
473 			tunsrc, PREFIX_LEN)) {
474 		printk("Failed to set ipv4 addr");
475 		return -1;
476 	}
477 
478 	if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) {
479 		printk("Failed to set ipv4 route");
480 		return -1;
481 	}
482 
483 	return 0;
484 }
485 
486 static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst)
487 {
488 	struct in_addr intsrc = inet_makeaddr(INADDR_B, src);
489 	struct in_addr tunsrc = inet_makeaddr(INADDR_A, src);
490 	struct in_addr tundst = inet_makeaddr(INADDR_A, dst);
491 	int route_sock = -1, ret = -1;
492 	uint32_t route_seq;
493 
494 	if (switch_ns(nsfd))
495 		return -1;
496 
497 	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) {
498 		printk("Failed to open netlink route socket in child");
499 		return -1;
500 	}
501 
502 	if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) {
503 		printk("Failed to set ipv4 addr");
504 		goto err;
505 	}
506 
507 	if (link_set_up(route_sock, route_seq++, veth)) {
508 		printk("Failed to bring up %s", veth);
509 		goto err;
510 	}
511 
512 	if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) {
513 		printk("Failed to add tunnel route on %s", veth);
514 		goto err;
515 	}
516 	ret = 0;
517 
518 err:
519 	close(route_sock);
520 	return ret;
521 }
522 
523 #define ALGO_LEN	64
524 enum desc_type {
525 	CREATE_TUNNEL	= 0,
526 	ALLOCATE_SPI,
527 	MONITOR_ACQUIRE,
528 	EXPIRE_STATE,
529 	EXPIRE_POLICY,
530 	SPDINFO_ATTRS,
531 };
532 const char *desc_name[] = {
533 	"create tunnel",
534 	"alloc spi",
535 	"monitor acquire",
536 	"expire state",
537 	"expire policy",
538 	"spdinfo attributes",
539 	""
540 };
541 struct xfrm_desc {
542 	enum desc_type	type;
543 	uint8_t		proto;
544 	char		a_algo[ALGO_LEN];
545 	char		e_algo[ALGO_LEN];
546 	char		c_algo[ALGO_LEN];
547 	char		ae_algo[ALGO_LEN];
548 	unsigned int	icv_len;
549 	/* unsigned key_len; */
550 };
551 
552 enum msg_type {
553 	MSG_ACK		= 0,
554 	MSG_EXIT,
555 	MSG_PING,
556 	MSG_XFRM_PREPARE,
557 	MSG_XFRM_ADD,
558 	MSG_XFRM_DEL,
559 	MSG_XFRM_CLEANUP,
560 };
561 
562 struct test_desc {
563 	enum msg_type type;
564 	union {
565 		struct {
566 			in_addr_t reply_ip;
567 			unsigned int port;
568 		} ping;
569 		struct xfrm_desc xfrm_desc;
570 	} body;
571 };
572 
573 struct test_result {
574 	struct xfrm_desc desc;
575 	unsigned int res;
576 };
577 
578 static void write_test_result(unsigned int res, struct xfrm_desc *d)
579 {
580 	struct test_result tr = {};
581 	ssize_t ret;
582 
583 	tr.desc = *d;
584 	tr.res = res;
585 
586 	ret = write(results_fd[1], &tr, sizeof(tr));
587 	if (ret != sizeof(tr))
588 		pr_err("Failed to write the result in pipe %zd", ret);
589 }
590 
591 static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail)
592 {
593 	ssize_t bytes = write(fd, msg, sizeof(*msg));
594 
595 	/* Make sure that write/read is atomic to a pipe */
596 	BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF);
597 
598 	if (bytes < 0) {
599 		pr_err("write()");
600 		if (exit_of_fail)
601 			exit(KSFT_FAIL);
602 	}
603 	if (bytes != sizeof(*msg)) {
604 		pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg));
605 		if (exit_of_fail)
606 			exit(KSFT_FAIL);
607 	}
608 }
609 
610 static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail)
611 {
612 	ssize_t bytes = read(fd, msg, sizeof(*msg));
613 
614 	if (bytes < 0) {
615 		pr_err("read()");
616 		if (exit_of_fail)
617 			exit(KSFT_FAIL);
618 	}
619 	if (bytes != sizeof(*msg)) {
620 		pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg));
621 		if (exit_of_fail)
622 			exit(KSFT_FAIL);
623 	}
624 }
625 
626 static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout,
627 		unsigned int *server_port, int sock[2])
628 {
629 	struct sockaddr_in server;
630 	struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout };
631 	socklen_t s_len = sizeof(server);
632 
633 	sock[0] = socket(AF_INET, SOCK_DGRAM, 0);
634 	if (sock[0] < 0) {
635 		pr_err("socket()");
636 		return -1;
637 	}
638 
639 	server.sin_family	= AF_INET;
640 	server.sin_port		= 0;
641 	memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr));
642 
643 	if (bind(sock[0], (struct sockaddr *)&server, s_len)) {
644 		pr_err("bind()");
645 		goto err_close_server;
646 	}
647 
648 	if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) {
649 		pr_err("getsockname()");
650 		goto err_close_server;
651 	}
652 
653 	*server_port = ntohs(server.sin_port);
654 
655 	if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) {
656 		pr_err("setsockopt()");
657 		goto err_close_server;
658 	}
659 
660 	sock[1] = socket(AF_INET, SOCK_DGRAM, 0);
661 	if (sock[1] < 0) {
662 		pr_err("socket()");
663 		goto err_close_server;
664 	}
665 
666 	return 0;
667 
668 err_close_server:
669 	close(sock[0]);
670 	return -1;
671 }
672 
673 static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port,
674 		char *buf, size_t buf_len)
675 {
676 	struct sockaddr_in server;
677 	const struct sockaddr *dest_addr = (struct sockaddr *)&server;
678 	char *sock_buf[buf_len];
679 	ssize_t r_bytes, s_bytes;
680 
681 	server.sin_family	= AF_INET;
682 	server.sin_port		= htons(port);
683 	server.sin_addr.s_addr	= dest_ip;
684 
685 	s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
686 	if (s_bytes < 0) {
687 		pr_err("sendto()");
688 		return -1;
689 	} else if (s_bytes != buf_len) {
690 		printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
691 		return -1;
692 	}
693 
694 	r_bytes = recv(sock[0], sock_buf, buf_len, 0);
695 	if (r_bytes < 0) {
696 		if (errno != EAGAIN)
697 			pr_err("recv()");
698 		return -1;
699 	} else if (r_bytes == 0) { /* EOF */
700 		printk("EOF on reply to ping");
701 		return -1;
702 	} else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
703 		printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
704 		return -1;
705 	}
706 
707 	return 0;
708 }
709 
710 static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port,
711 		char *buf, size_t buf_len)
712 {
713 	struct sockaddr_in server;
714 	const struct sockaddr *dest_addr = (struct sockaddr *)&server;
715 	char *sock_buf[buf_len];
716 	ssize_t r_bytes, s_bytes;
717 
718 	server.sin_family	= AF_INET;
719 	server.sin_port		= htons(port);
720 	server.sin_addr.s_addr	= dest_ip;
721 
722 	r_bytes = recv(sock[0], sock_buf, buf_len, 0);
723 	if (r_bytes < 0) {
724 		if (errno != EAGAIN)
725 			pr_err("recv()");
726 		return -1;
727 	}
728 	if (r_bytes == 0) { /* EOF */
729 		printk("EOF on reply to ping");
730 		return -1;
731 	}
732 	if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
733 		printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
734 		return -1;
735 	}
736 
737 	s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
738 	if (s_bytes < 0) {
739 		pr_err("sendto()");
740 		return -1;
741 	} else if (s_bytes != buf_len) {
742 		printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
743 		return -1;
744 	}
745 
746 	return 0;
747 }
748 
749 typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port,
750 		char *buf, size_t buf_len);
751 static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from,
752 		bool init_side, int d_port, in_addr_t to, ping_f func)
753 {
754 	struct test_desc msg;
755 	unsigned int s_port, i, ping_succeeded = 0;
756 	int ping_sock[2];
757 	char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {};
758 
759 	if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) {
760 		printk("Failed to init ping");
761 		return -1;
762 	}
763 
764 	memset(&msg, 0, sizeof(msg));
765 	msg.type		= MSG_PING;
766 	msg.body.ping.port	= s_port;
767 	memcpy(&msg.body.ping.reply_ip, &from, sizeof(from));
768 
769 	write_msg(cmd_fd, &msg, 0);
770 	if (init_side) {
771 		/* The other end sends ip to ping */
772 		read_msg(cmd_fd, &msg, 0);
773 		if (msg.type != MSG_PING)
774 			return -1;
775 		to = msg.body.ping.reply_ip;
776 		d_port = msg.body.ping.port;
777 	}
778 
779 	for (i = 0; i < ping_count ; i++) {
780 		struct timespec sleep_time = {
781 			.tv_sec = 0,
782 			.tv_nsec = ping_delay_nsec,
783 		};
784 
785 		ping_succeeded += !func(ping_sock, to, d_port, buf, page_size);
786 		nanosleep(&sleep_time, 0);
787 	}
788 
789 	close(ping_sock[0]);
790 	close(ping_sock[1]);
791 
792 	strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1);
793 	strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1);
794 
795 	if (ping_succeeded < ping_success) {
796 		printk("ping (%s) %s->%s failed %u/%u times",
797 			init_side ? "send" : "reply", from_str, to_str,
798 			ping_count - ping_succeeded, ping_count);
799 		return -1;
800 	}
801 
802 #ifdef DEBUG
803 	printk("ping (%s) %s->%s succeeded %u/%u times",
804 		init_side ? "send" : "reply", from_str, to_str,
805 		ping_succeeded, ping_count);
806 #endif
807 
808 	return 0;
809 }
810 
811 static int xfrm_fill_key(char *name, char *buf,
812 		size_t buf_len, unsigned int *key_len)
813 {
814 	int i;
815 
816 	for (i = 0; i < XFRM_ALGO_NR_KEYS; i++) {
817 		if (strncmp(name, xfrm_key_entries[i].algo_name, ALGO_LEN) == 0)
818 			*key_len = xfrm_key_entries[i].key_len;
819 	}
820 
821 	if (*key_len > buf_len) {
822 		printk("Can't pack a key - too big for buffer");
823 		return -1;
824 	}
825 
826 	randomize_buffer(buf, *key_len);
827 
828 	return 0;
829 }
830 
831 static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
832 		struct xfrm_desc *desc)
833 {
834 	union {
835 		union {
836 			struct xfrm_algo	alg;
837 			struct xfrm_algo_aead	aead;
838 			struct xfrm_algo_auth	auth;
839 		} u;
840 		struct {
841 			unsigned char __offset_to_FAM[offsetof(struct xfrm_algo_auth, alg_key)];
842 			char buf[XFRM_ALGO_KEY_BUF_SIZE];
843 		};
844 	} alg = {};
845 	size_t alen, elen, clen, aelen;
846 	unsigned short type;
847 
848 	alen = strlen(desc->a_algo);
849 	elen = strlen(desc->e_algo);
850 	clen = strlen(desc->c_algo);
851 	aelen = strlen(desc->ae_algo);
852 
853 	/* Verify desc */
854 	switch (desc->proto) {
855 	case IPPROTO_AH:
856 		if (!alen || elen || clen || aelen) {
857 			printk("BUG: buggy ah desc");
858 			return -1;
859 		}
860 		strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1);
861 		if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
862 				sizeof(alg.buf), &alg.u.alg.alg_key_len))
863 			return -1;
864 		type = XFRMA_ALG_AUTH;
865 		break;
866 	case IPPROTO_COMP:
867 		if (!clen || elen || alen || aelen) {
868 			printk("BUG: buggy comp desc");
869 			return -1;
870 		}
871 		strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1);
872 		if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key,
873 				sizeof(alg.buf), &alg.u.alg.alg_key_len))
874 			return -1;
875 		type = XFRMA_ALG_COMP;
876 		break;
877 	case IPPROTO_ESP:
878 		if (!((alen && elen) ^ aelen) || clen) {
879 			printk("BUG: buggy esp desc");
880 			return -1;
881 		}
882 		if (aelen) {
883 			alg.u.aead.alg_icv_len = desc->icv_len;
884 			strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1);
885 			if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key,
886 						sizeof(alg.buf), &alg.u.aead.alg_key_len))
887 				return -1;
888 			type = XFRMA_ALG_AEAD;
889 		} else {
890 
891 			strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1);
892 			type = XFRMA_ALG_CRYPT;
893 			if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key,
894 						sizeof(alg.buf), &alg.u.alg.alg_key_len))
895 				return -1;
896 			if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
897 				return -1;
898 
899 			strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN);
900 			type = XFRMA_ALG_AUTH;
901 			if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
902 						sizeof(alg.buf), &alg.u.alg.alg_key_len))
903 				return -1;
904 		}
905 		break;
906 	default:
907 		printk("BUG: unknown proto in desc");
908 		return -1;
909 	}
910 
911 	if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
912 		return -1;
913 
914 	return 0;
915 }
916 
917 static inline uint32_t gen_spi(struct in_addr src)
918 {
919 	return htonl(inet_lnaof(src));
920 }
921 
922 static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi,
923 		struct in_addr src, struct in_addr dst,
924 		struct xfrm_desc *desc)
925 {
926 	struct {
927 		struct nlmsghdr		nh;
928 		struct xfrm_usersa_info	info;
929 		char			attrbuf[MAX_PAYLOAD];
930 	} req;
931 
932 	memset(&req, 0, sizeof(req));
933 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
934 	req.nh.nlmsg_type	= XFRM_MSG_NEWSA;
935 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
936 	req.nh.nlmsg_seq	= seq;
937 
938 	/* Fill selector. */
939 	memcpy(&req.info.sel.daddr, &dst, sizeof(dst));
940 	memcpy(&req.info.sel.saddr, &src, sizeof(src));
941 	req.info.sel.family		= AF_INET;
942 	req.info.sel.prefixlen_d	= PREFIX_LEN;
943 	req.info.sel.prefixlen_s	= PREFIX_LEN;
944 
945 	/* Fill id */
946 	memcpy(&req.info.id.daddr, &dst, sizeof(dst));
947 	/* Note: zero-spi cannot be deleted */
948 	req.info.id.spi = spi;
949 	req.info.id.proto	= desc->proto;
950 
951 	memcpy(&req.info.saddr, &src, sizeof(src));
952 
953 	/* Fill lifteme_cfg */
954 	req.info.lft.soft_byte_limit	= XFRM_INF;
955 	req.info.lft.hard_byte_limit	= XFRM_INF;
956 	req.info.lft.soft_packet_limit	= XFRM_INF;
957 	req.info.lft.hard_packet_limit	= XFRM_INF;
958 
959 	req.info.family		= AF_INET;
960 	req.info.mode		= XFRM_MODE_TUNNEL;
961 
962 	if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc))
963 		return -1;
964 
965 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
966 		pr_err("send()");
967 		return -1;
968 	}
969 
970 	return netlink_check_answer(xfrm_sock);
971 }
972 
973 static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi,
974 		struct in_addr src, struct in_addr dst,
975 		struct xfrm_desc *desc)
976 {
977 	if (memcmp(&info->sel.daddr, &dst, sizeof(dst)))
978 		return false;
979 
980 	if (memcmp(&info->sel.saddr, &src, sizeof(src)))
981 		return false;
982 
983 	if (info->sel.family != AF_INET					||
984 			info->sel.prefixlen_d != PREFIX_LEN		||
985 			info->sel.prefixlen_s != PREFIX_LEN)
986 		return false;
987 
988 	if (info->id.spi != spi || info->id.proto != desc->proto)
989 		return false;
990 
991 	if (memcmp(&info->id.daddr, &dst, sizeof(dst)))
992 		return false;
993 
994 	if (memcmp(&info->saddr, &src, sizeof(src)))
995 		return false;
996 
997 	if (info->lft.soft_byte_limit != XFRM_INF			||
998 			info->lft.hard_byte_limit != XFRM_INF		||
999 			info->lft.soft_packet_limit != XFRM_INF		||
1000 			info->lft.hard_packet_limit != XFRM_INF)
1001 		return false;
1002 
1003 	if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL)
1004 		return false;
1005 
1006 	/* XXX: check xfrm algo, see xfrm_state_pack_algo(). */
1007 
1008 	return true;
1009 }
1010 
1011 static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi,
1012 		struct in_addr src, struct in_addr dst,
1013 		struct xfrm_desc *desc)
1014 {
1015 	struct {
1016 		struct nlmsghdr		nh;
1017 		char			attrbuf[MAX_PAYLOAD];
1018 	} req;
1019 	struct {
1020 		struct nlmsghdr		nh;
1021 		union {
1022 			struct xfrm_usersa_info	info;
1023 			int error;
1024 		};
1025 		char			attrbuf[MAX_PAYLOAD];
1026 	} answer;
1027 	struct xfrm_address_filter filter = {};
1028 	bool found = false;
1029 
1030 
1031 	memset(&req, 0, sizeof(req));
1032 	req.nh.nlmsg_len	= NLMSG_LENGTH(0);
1033 	req.nh.nlmsg_type	= XFRM_MSG_GETSA;
1034 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_DUMP;
1035 	req.nh.nlmsg_seq	= seq;
1036 
1037 	/*
1038 	 * Add dump filter by source address as there may be other tunnels
1039 	 * in this netns (if tests run in parallel).
1040 	 */
1041 	filter.family = AF_INET;
1042 	filter.splen = 0x1f;	/* 0xffffffff mask see addr_match() */
1043 	memcpy(&filter.saddr, &src, sizeof(src));
1044 	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER,
1045 				&filter, sizeof(filter)))
1046 		return -1;
1047 
1048 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1049 		pr_err("send()");
1050 		return -1;
1051 	}
1052 
1053 	while (1) {
1054 		if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
1055 			pr_err("recv()");
1056 			return -1;
1057 		}
1058 		if (answer.nh.nlmsg_type == NLMSG_ERROR) {
1059 			printk("NLMSG_ERROR: %d: %s",
1060 				answer.error, strerror(-answer.error));
1061 			return -1;
1062 		} else if (answer.nh.nlmsg_type == NLMSG_DONE) {
1063 			if (found)
1064 				return 0;
1065 			printk("didn't find allocated xfrm state in dump");
1066 			return -1;
1067 		} else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
1068 			if (xfrm_usersa_found(&answer.info, spi, src, dst, desc))
1069 				found = true;
1070 		}
1071 	}
1072 }
1073 
1074 static int xfrm_set(int xfrm_sock, uint32_t *seq,
1075 		struct in_addr src, struct in_addr dst,
1076 		struct in_addr tunsrc, struct in_addr tundst,
1077 		struct xfrm_desc *desc)
1078 {
1079 	int err;
1080 
1081 	err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
1082 	if (err) {
1083 		printk("Failed to add xfrm state");
1084 		return -1;
1085 	}
1086 
1087 	err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
1088 	if (err) {
1089 		printk("Failed to add xfrm state");
1090 		return -1;
1091 	}
1092 
1093 	/* Check dumps for XFRM_MSG_GETSA */
1094 	err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
1095 	err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
1096 	if (err) {
1097 		printk("Failed to check xfrm state");
1098 		return -1;
1099 	}
1100 
1101 	return 0;
1102 }
1103 
1104 static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi,
1105 		struct in_addr src, struct in_addr dst, uint8_t dir,
1106 		struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
1107 {
1108 	struct {
1109 		struct nlmsghdr			nh;
1110 		struct xfrm_userpolicy_info	info;
1111 		char				attrbuf[MAX_PAYLOAD];
1112 	} req;
1113 	struct xfrm_user_tmpl tmpl;
1114 
1115 	memset(&req, 0, sizeof(req));
1116 	memset(&tmpl, 0, sizeof(tmpl));
1117 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
1118 	req.nh.nlmsg_type	= XFRM_MSG_NEWPOLICY;
1119 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
1120 	req.nh.nlmsg_seq	= seq;
1121 
1122 	/* Fill selector. */
1123 	memcpy(&req.info.sel.daddr, &dst, sizeof(tundst));
1124 	memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc));
1125 	req.info.sel.family		= AF_INET;
1126 	req.info.sel.prefixlen_d	= PREFIX_LEN;
1127 	req.info.sel.prefixlen_s	= PREFIX_LEN;
1128 
1129 	/* Fill lifteme_cfg */
1130 	req.info.lft.soft_byte_limit	= XFRM_INF;
1131 	req.info.lft.hard_byte_limit	= XFRM_INF;
1132 	req.info.lft.soft_packet_limit	= XFRM_INF;
1133 	req.info.lft.hard_packet_limit	= XFRM_INF;
1134 
1135 	req.info.dir = dir;
1136 
1137 	/* Fill tmpl */
1138 	memcpy(&tmpl.id.daddr, &dst, sizeof(dst));
1139 	/* Note: zero-spi cannot be deleted */
1140 	tmpl.id.spi = spi;
1141 	tmpl.id.proto	= proto;
1142 	tmpl.family	= AF_INET;
1143 	memcpy(&tmpl.saddr, &src, sizeof(src));
1144 	tmpl.mode	= XFRM_MODE_TUNNEL;
1145 	tmpl.aalgos = (~(uint32_t)0);
1146 	tmpl.ealgos = (~(uint32_t)0);
1147 	tmpl.calgos = (~(uint32_t)0);
1148 
1149 	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl)))
1150 		return -1;
1151 
1152 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1153 		pr_err("send()");
1154 		return -1;
1155 	}
1156 
1157 	return netlink_check_answer(xfrm_sock);
1158 }
1159 
1160 static int xfrm_prepare(int xfrm_sock, uint32_t *seq,
1161 		struct in_addr src, struct in_addr dst,
1162 		struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
1163 {
1164 	if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
1165 				XFRM_POLICY_OUT, tunsrc, tundst, proto)) {
1166 		printk("Failed to add xfrm policy");
1167 		return -1;
1168 	}
1169 
1170 	if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src,
1171 				XFRM_POLICY_IN, tunsrc, tundst, proto)) {
1172 		printk("Failed to add xfrm policy");
1173 		return -1;
1174 	}
1175 
1176 	return 0;
1177 }
1178 
1179 static int xfrm_policy_del(int xfrm_sock, uint32_t seq,
1180 		struct in_addr src, struct in_addr dst, uint8_t dir,
1181 		struct in_addr tunsrc, struct in_addr tundst)
1182 {
1183 	struct {
1184 		struct nlmsghdr			nh;
1185 		struct xfrm_userpolicy_id	id;
1186 		char				attrbuf[MAX_PAYLOAD];
1187 	} req;
1188 
1189 	memset(&req, 0, sizeof(req));
1190 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.id));
1191 	req.nh.nlmsg_type	= XFRM_MSG_DELPOLICY;
1192 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
1193 	req.nh.nlmsg_seq	= seq;
1194 
1195 	/* Fill id */
1196 	memcpy(&req.id.sel.daddr, &dst, sizeof(tundst));
1197 	memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc));
1198 	req.id.sel.family		= AF_INET;
1199 	req.id.sel.prefixlen_d		= PREFIX_LEN;
1200 	req.id.sel.prefixlen_s		= PREFIX_LEN;
1201 	req.id.dir = dir;
1202 
1203 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1204 		pr_err("send()");
1205 		return -1;
1206 	}
1207 
1208 	return netlink_check_answer(xfrm_sock);
1209 }
1210 
1211 static int xfrm_cleanup(int xfrm_sock, uint32_t *seq,
1212 		struct in_addr src, struct in_addr dst,
1213 		struct in_addr tunsrc, struct in_addr tundst)
1214 {
1215 	if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst,
1216 				XFRM_POLICY_OUT, tunsrc, tundst)) {
1217 		printk("Failed to add xfrm policy");
1218 		return -1;
1219 	}
1220 
1221 	if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src,
1222 				XFRM_POLICY_IN, tunsrc, tundst)) {
1223 		printk("Failed to add xfrm policy");
1224 		return -1;
1225 	}
1226 
1227 	return 0;
1228 }
1229 
1230 static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi,
1231 		struct in_addr src, struct in_addr dst, uint8_t proto)
1232 {
1233 	struct {
1234 		struct nlmsghdr		nh;
1235 		struct xfrm_usersa_id	id;
1236 		char			attrbuf[MAX_PAYLOAD];
1237 	} req;
1238 	xfrm_address_t saddr = {};
1239 
1240 	memset(&req, 0, sizeof(req));
1241 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.id));
1242 	req.nh.nlmsg_type	= XFRM_MSG_DELSA;
1243 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
1244 	req.nh.nlmsg_seq	= seq;
1245 
1246 	memcpy(&req.id.daddr, &dst, sizeof(dst));
1247 	req.id.family		= AF_INET;
1248 	req.id.proto		= proto;
1249 	/* Note: zero-spi cannot be deleted */
1250 	req.id.spi = spi;
1251 
1252 	memcpy(&saddr, &src, sizeof(src));
1253 	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr)))
1254 		return -1;
1255 
1256 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1257 		pr_err("send()");
1258 		return -1;
1259 	}
1260 
1261 	return netlink_check_answer(xfrm_sock);
1262 }
1263 
1264 static int xfrm_delete(int xfrm_sock, uint32_t *seq,
1265 		struct in_addr src, struct in_addr dst,
1266 		struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
1267 {
1268 	if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) {
1269 		printk("Failed to remove xfrm state");
1270 		return -1;
1271 	}
1272 
1273 	if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) {
1274 		printk("Failed to remove xfrm state");
1275 		return -1;
1276 	}
1277 
1278 	return 0;
1279 }
1280 
1281 static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq,
1282 		uint32_t spi, uint8_t proto)
1283 {
1284 	struct {
1285 		struct nlmsghdr			nh;
1286 		struct xfrm_userspi_info	spi;
1287 	} req;
1288 	struct {
1289 		struct nlmsghdr			nh;
1290 		union {
1291 			struct xfrm_usersa_info	info;
1292 			int error;
1293 		};
1294 	} answer;
1295 
1296 	memset(&req, 0, sizeof(req));
1297 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.spi));
1298 	req.nh.nlmsg_type	= XFRM_MSG_ALLOCSPI;
1299 	req.nh.nlmsg_flags	= NLM_F_REQUEST;
1300 	req.nh.nlmsg_seq	= (*seq)++;
1301 
1302 	req.spi.info.family	= AF_INET;
1303 	req.spi.min		= spi;
1304 	req.spi.max		= spi;
1305 	req.spi.info.id.proto	= proto;
1306 
1307 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1308 		pr_err("send()");
1309 		return KSFT_FAIL;
1310 	}
1311 
1312 	if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
1313 		pr_err("recv()");
1314 		return KSFT_FAIL;
1315 	} else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
1316 		uint32_t new_spi = htonl(answer.info.id.spi);
1317 
1318 		if (new_spi != spi) {
1319 			printk("allocated spi is different from requested: %#x != %#x",
1320 					new_spi, spi);
1321 			return KSFT_FAIL;
1322 		}
1323 		return KSFT_PASS;
1324 	} else if (answer.nh.nlmsg_type != NLMSG_ERROR) {
1325 		printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type);
1326 		return KSFT_FAIL;
1327 	}
1328 
1329 	printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error));
1330 	return (answer.error) ? KSFT_FAIL : KSFT_PASS;
1331 }
1332 
1333 static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups)
1334 {
1335 	struct sockaddr_nl snl = {};
1336 	socklen_t addr_len;
1337 	int ret = -1;
1338 
1339 	snl.nl_family = AF_NETLINK;
1340 	snl.nl_groups = groups;
1341 
1342 	if (netlink_sock(sock, seq, proto)) {
1343 		printk("Failed to open xfrm netlink socket");
1344 		return -1;
1345 	}
1346 
1347 	if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) {
1348 		pr_err("bind()");
1349 		goto out_close;
1350 	}
1351 
1352 	addr_len = sizeof(snl);
1353 	if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) {
1354 		pr_err("getsockname()");
1355 		goto out_close;
1356 	}
1357 	if (addr_len != sizeof(snl)) {
1358 		printk("Wrong address length %d", addr_len);
1359 		goto out_close;
1360 	}
1361 	if (snl.nl_family != AF_NETLINK) {
1362 		printk("Wrong address family %d", snl.nl_family);
1363 		goto out_close;
1364 	}
1365 	return 0;
1366 
1367 out_close:
1368 	close(*sock);
1369 	return ret;
1370 }
1371 
1372 static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr)
1373 {
1374 	struct {
1375 		struct nlmsghdr nh;
1376 		union {
1377 			struct xfrm_user_acquire acq;
1378 			int error;
1379 		};
1380 		char attrbuf[MAX_PAYLOAD];
1381 	} req;
1382 	struct xfrm_user_tmpl xfrm_tmpl = {};
1383 	int xfrm_listen = -1, ret = KSFT_FAIL;
1384 	uint32_t seq_listen;
1385 
1386 	if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE))
1387 		return KSFT_FAIL;
1388 
1389 	memset(&req, 0, sizeof(req));
1390 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.acq));
1391 	req.nh.nlmsg_type	= XFRM_MSG_ACQUIRE;
1392 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
1393 	req.nh.nlmsg_seq	= (*seq)++;
1394 
1395 	req.acq.policy.sel.family	= AF_INET;
1396 	req.acq.aalgos	= 0xfeed;
1397 	req.acq.ealgos	= 0xbaad;
1398 	req.acq.calgos	= 0xbabe;
1399 
1400 	xfrm_tmpl.family = AF_INET;
1401 	xfrm_tmpl.id.proto = IPPROTO_ESP;
1402 	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl)))
1403 		goto out_close;
1404 
1405 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1406 		pr_err("send()");
1407 		goto out_close;
1408 	}
1409 
1410 	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
1411 		pr_err("recv()");
1412 		goto out_close;
1413 	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
1414 		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
1415 		goto out_close;
1416 	}
1417 
1418 	if (req.error) {
1419 		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
1420 		ret = req.error;
1421 		goto out_close;
1422 	}
1423 
1424 	if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
1425 		pr_err("recv()");
1426 		goto out_close;
1427 	}
1428 
1429 	if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad
1430 			|| req.acq.calgos != 0xbabe) {
1431 		printk("xfrm_user_acquire has changed  %x %x %x",
1432 				req.acq.aalgos, req.acq.ealgos, req.acq.calgos);
1433 		goto out_close;
1434 	}
1435 
1436 	ret = KSFT_PASS;
1437 out_close:
1438 	close(xfrm_listen);
1439 	return ret;
1440 }
1441 
1442 static int xfrm_expire_state(int xfrm_sock, uint32_t *seq,
1443 		unsigned int nr, struct xfrm_desc *desc)
1444 {
1445 	struct {
1446 		struct nlmsghdr nh;
1447 		union {
1448 			struct xfrm_user_expire expire;
1449 			int error;
1450 		};
1451 	} req;
1452 	struct in_addr src, dst;
1453 	int xfrm_listen = -1, ret = KSFT_FAIL;
1454 	uint32_t seq_listen;
1455 
1456 	src = inet_makeaddr(INADDR_B, child_ip(nr));
1457 	dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
1458 
1459 	if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) {
1460 		printk("Failed to add xfrm state");
1461 		return KSFT_FAIL;
1462 	}
1463 
1464 	if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
1465 		return KSFT_FAIL;
1466 
1467 	memset(&req, 0, sizeof(req));
1468 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.expire));
1469 	req.nh.nlmsg_type	= XFRM_MSG_EXPIRE;
1470 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
1471 	req.nh.nlmsg_seq	= (*seq)++;
1472 
1473 	memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst));
1474 	req.expire.state.id.spi		= gen_spi(src);
1475 	req.expire.state.id.proto	= desc->proto;
1476 	req.expire.state.family		= AF_INET;
1477 	req.expire.hard			= 0xff;
1478 
1479 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1480 		pr_err("send()");
1481 		goto out_close;
1482 	}
1483 
1484 	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
1485 		pr_err("recv()");
1486 		goto out_close;
1487 	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
1488 		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
1489 		goto out_close;
1490 	}
1491 
1492 	if (req.error) {
1493 		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
1494 		ret = req.error;
1495 		goto out_close;
1496 	}
1497 
1498 	if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
1499 		pr_err("recv()");
1500 		goto out_close;
1501 	}
1502 
1503 	if (req.expire.hard != 0x1) {
1504 		printk("expire.hard is not set: %x", req.expire.hard);
1505 		goto out_close;
1506 	}
1507 
1508 	ret = KSFT_PASS;
1509 out_close:
1510 	close(xfrm_listen);
1511 	return ret;
1512 }
1513 
1514 static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq,
1515 		unsigned int nr, struct xfrm_desc *desc)
1516 {
1517 	struct {
1518 		struct nlmsghdr nh;
1519 		union {
1520 			struct xfrm_user_polexpire expire;
1521 			int error;
1522 		};
1523 	} req;
1524 	struct in_addr src, dst, tunsrc, tundst;
1525 	int xfrm_listen = -1, ret = KSFT_FAIL;
1526 	uint32_t seq_listen;
1527 
1528 	src = inet_makeaddr(INADDR_B, child_ip(nr));
1529 	dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
1530 	tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
1531 	tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
1532 
1533 	if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
1534 				XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) {
1535 		printk("Failed to add xfrm policy");
1536 		return KSFT_FAIL;
1537 	}
1538 
1539 	if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
1540 		return KSFT_FAIL;
1541 
1542 	memset(&req, 0, sizeof(req));
1543 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.expire));
1544 	req.nh.nlmsg_type	= XFRM_MSG_POLEXPIRE;
1545 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
1546 	req.nh.nlmsg_seq	= (*seq)++;
1547 
1548 	/* Fill selector. */
1549 	memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst));
1550 	memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc));
1551 	req.expire.pol.sel.family	= AF_INET;
1552 	req.expire.pol.sel.prefixlen_d	= PREFIX_LEN;
1553 	req.expire.pol.sel.prefixlen_s	= PREFIX_LEN;
1554 	req.expire.pol.dir		= XFRM_POLICY_OUT;
1555 	req.expire.hard			= 0xff;
1556 
1557 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1558 		pr_err("send()");
1559 		goto out_close;
1560 	}
1561 
1562 	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
1563 		pr_err("recv()");
1564 		goto out_close;
1565 	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
1566 		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
1567 		goto out_close;
1568 	}
1569 
1570 	if (req.error) {
1571 		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
1572 		ret = req.error;
1573 		goto out_close;
1574 	}
1575 
1576 	if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
1577 		pr_err("recv()");
1578 		goto out_close;
1579 	}
1580 
1581 	if (req.expire.hard != 0x1) {
1582 		printk("expire.hard is not set: %x", req.expire.hard);
1583 		goto out_close;
1584 	}
1585 
1586 	ret = KSFT_PASS;
1587 out_close:
1588 	close(xfrm_listen);
1589 	return ret;
1590 }
1591 
1592 static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq,
1593 		unsigned thresh4_l, unsigned thresh4_r,
1594 		unsigned thresh6_l, unsigned thresh6_r,
1595 		bool add_bad_attr)
1596 
1597 {
1598 	struct {
1599 		struct nlmsghdr		nh;
1600 		union {
1601 			uint32_t	unused;
1602 			int		error;
1603 		};
1604 		char			attrbuf[MAX_PAYLOAD];
1605 	} req;
1606 	struct xfrmu_spdhthresh thresh;
1607 
1608 	memset(&req, 0, sizeof(req));
1609 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.unused));
1610 	req.nh.nlmsg_type	= XFRM_MSG_NEWSPDINFO;
1611 	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
1612 	req.nh.nlmsg_seq	= (*seq)++;
1613 
1614 	thresh.lbits = thresh4_l;
1615 	thresh.rbits = thresh4_r;
1616 	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh)))
1617 		return -1;
1618 
1619 	thresh.lbits = thresh6_l;
1620 	thresh.rbits = thresh6_r;
1621 	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh)))
1622 		return -1;
1623 
1624 	if (add_bad_attr) {
1625 		BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1);
1626 		if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) {
1627 			pr_err("adding attribute failed: no space");
1628 			return -1;
1629 		}
1630 	}
1631 
1632 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1633 		pr_err("send()");
1634 		return -1;
1635 	}
1636 
1637 	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
1638 		pr_err("recv()");
1639 		return -1;
1640 	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
1641 		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
1642 		return -1;
1643 	}
1644 
1645 	if (req.error) {
1646 		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
1647 		return -1;
1648 	}
1649 
1650 	return 0;
1651 }
1652 
1653 static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq)
1654 {
1655 	struct {
1656 		struct nlmsghdr			nh;
1657 		union {
1658 			uint32_t	unused;
1659 			int		error;
1660 		};
1661 		char			attrbuf[MAX_PAYLOAD];
1662 	} req;
1663 
1664 	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) {
1665 		pr_err("Can't set SPD HTHRESH");
1666 		return KSFT_FAIL;
1667 	}
1668 
1669 	memset(&req, 0, sizeof(req));
1670 
1671 	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.unused));
1672 	req.nh.nlmsg_type	= XFRM_MSG_GETSPDINFO;
1673 	req.nh.nlmsg_flags	= NLM_F_REQUEST;
1674 	req.nh.nlmsg_seq	= (*seq)++;
1675 	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
1676 		pr_err("send()");
1677 		return KSFT_FAIL;
1678 	}
1679 
1680 	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
1681 		pr_err("recv()");
1682 		return KSFT_FAIL;
1683 	} else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) {
1684 		size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused));
1685 		struct rtattr *attr = (void *)req.attrbuf;
1686 		int got_thresh = 0;
1687 
1688 		for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
1689 			if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) {
1690 				struct xfrmu_spdhthresh *t = RTA_DATA(attr);
1691 
1692 				got_thresh++;
1693 				if (t->lbits != 32 || t->rbits != 31) {
1694 					pr_err("thresh differ: %u, %u",
1695 							t->lbits, t->rbits);
1696 					return KSFT_FAIL;
1697 				}
1698 			}
1699 			if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) {
1700 				struct xfrmu_spdhthresh *t = RTA_DATA(attr);
1701 
1702 				got_thresh++;
1703 				if (t->lbits != 120 || t->rbits != 16) {
1704 					pr_err("thresh differ: %u, %u",
1705 							t->lbits, t->rbits);
1706 					return KSFT_FAIL;
1707 				}
1708 			}
1709 		}
1710 		if (got_thresh != 2) {
1711 			pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh);
1712 			return KSFT_FAIL;
1713 		}
1714 	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
1715 		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
1716 		return KSFT_FAIL;
1717 	} else {
1718 		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
1719 		return -1;
1720 	}
1721 
1722 	/* Restore the default */
1723 	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) {
1724 		pr_err("Can't restore SPD HTHRESH");
1725 		return KSFT_FAIL;
1726 	}
1727 
1728 	/*
1729 	 * At this moment xfrm uses nlmsg_parse_deprecated(), which
1730 	 * implies NL_VALIDATE_LIBERAL - ignoring attributes with
1731 	 * (type > maxtype). nla_parse_depricated_strict() would enforce
1732 	 * it. Or even stricter nla_parse().
1733 	 * Right now it's not expected to fail, but to be ignored.
1734 	 */
1735 	if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true))
1736 		return KSFT_PASS;
1737 
1738 	return KSFT_PASS;
1739 }
1740 
1741 static int child_serv(int xfrm_sock, uint32_t *seq,
1742 		unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
1743 {
1744 	struct in_addr src, dst, tunsrc, tundst;
1745 	struct test_desc msg;
1746 	int ret = KSFT_FAIL;
1747 
1748 	src = inet_makeaddr(INADDR_B, child_ip(nr));
1749 	dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
1750 	tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
1751 	tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
1752 
1753 	/* UDP pinging without xfrm */
1754 	if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) {
1755 		printk("ping failed before setting xfrm");
1756 		return KSFT_FAIL;
1757 	}
1758 
1759 	memset(&msg, 0, sizeof(msg));
1760 	msg.type = MSG_XFRM_PREPARE;
1761 	memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
1762 	write_msg(cmd_fd, &msg, 1);
1763 
1764 	if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
1765 		printk("failed to prepare xfrm");
1766 		goto cleanup;
1767 	}
1768 
1769 	memset(&msg, 0, sizeof(msg));
1770 	msg.type = MSG_XFRM_ADD;
1771 	memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
1772 	write_msg(cmd_fd, &msg, 1);
1773 	if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
1774 		printk("failed to set xfrm");
1775 		goto delete;
1776 	}
1777 
1778 	/* UDP pinging with xfrm tunnel */
1779 	if (do_ping(cmd_fd, buf, page_size, tunsrc,
1780 				true, 0, 0, udp_ping_send)) {
1781 		printk("ping failed for xfrm");
1782 		goto delete;
1783 	}
1784 
1785 	ret = KSFT_PASS;
1786 delete:
1787 	/* xfrm delete */
1788 	memset(&msg, 0, sizeof(msg));
1789 	msg.type = MSG_XFRM_DEL;
1790 	memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
1791 	write_msg(cmd_fd, &msg, 1);
1792 
1793 	if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
1794 		printk("failed ping to remove xfrm");
1795 		ret = KSFT_FAIL;
1796 	}
1797 
1798 cleanup:
1799 	memset(&msg, 0, sizeof(msg));
1800 	msg.type = MSG_XFRM_CLEANUP;
1801 	memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
1802 	write_msg(cmd_fd, &msg, 1);
1803 	if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
1804 		printk("failed ping to cleanup xfrm");
1805 		ret = KSFT_FAIL;
1806 	}
1807 	return ret;
1808 }
1809 
1810 static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
1811 {
1812 	struct xfrm_desc desc;
1813 	struct test_desc msg;
1814 	int xfrm_sock = -1;
1815 	uint32_t seq;
1816 
1817 	if (switch_ns(nsfd_childa))
1818 		exit(KSFT_FAIL);
1819 
1820 	if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
1821 		printk("Failed to open xfrm netlink socket");
1822 		exit(KSFT_FAIL);
1823 	}
1824 
1825 	/* Check that seq sock is ready, just for sure. */
1826 	memset(&msg, 0, sizeof(msg));
1827 	msg.type = MSG_ACK;
1828 	write_msg(cmd_fd, &msg, 1);
1829 	read_msg(cmd_fd, &msg, 1);
1830 	if (msg.type != MSG_ACK) {
1831 		printk("Ack failed");
1832 		exit(KSFT_FAIL);
1833 	}
1834 
1835 	for (;;) {
1836 		ssize_t received = read(test_desc_fd, &desc, sizeof(desc));
1837 		int ret;
1838 
1839 		if (received == 0) /* EOF */
1840 			break;
1841 
1842 		if (received != sizeof(desc)) {
1843 			pr_err("read() returned %zd", received);
1844 			exit(KSFT_FAIL);
1845 		}
1846 
1847 		switch (desc.type) {
1848 		case CREATE_TUNNEL:
1849 			ret = child_serv(xfrm_sock, &seq, nr,
1850 					 cmd_fd, buf, &desc);
1851 			break;
1852 		case ALLOCATE_SPI:
1853 			ret = xfrm_state_allocspi(xfrm_sock, &seq,
1854 						  -1, desc.proto);
1855 			break;
1856 		case MONITOR_ACQUIRE:
1857 			ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr);
1858 			break;
1859 		case EXPIRE_STATE:
1860 			ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc);
1861 			break;
1862 		case EXPIRE_POLICY:
1863 			ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
1864 			break;
1865 		case SPDINFO_ATTRS:
1866 			ret = xfrm_spdinfo_attrs(xfrm_sock, &seq);
1867 			break;
1868 		default:
1869 			printk("Unknown desc type %d", desc.type);
1870 			exit(KSFT_FAIL);
1871 		}
1872 		write_test_result(ret, &desc);
1873 	}
1874 
1875 	close(xfrm_sock);
1876 
1877 	msg.type = MSG_EXIT;
1878 	write_msg(cmd_fd, &msg, 1);
1879 	exit(KSFT_PASS);
1880 }
1881 
1882 static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
1883 		struct test_desc *msg, int xfrm_sock, uint32_t *seq)
1884 {
1885 	struct in_addr src, dst, tunsrc, tundst;
1886 	bool tun_reply;
1887 	struct xfrm_desc *desc = &msg->body.xfrm_desc;
1888 
1889 	src = inet_makeaddr(INADDR_B, grchild_ip(nr));
1890 	dst = inet_makeaddr(INADDR_B, child_ip(nr));
1891 	tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr));
1892 	tundst = inet_makeaddr(INADDR_A, child_ip(nr));
1893 
1894 	switch (msg->type) {
1895 	case MSG_EXIT:
1896 		exit(KSFT_PASS);
1897 	case MSG_ACK:
1898 		write_msg(cmd_fd, msg, 1);
1899 		break;
1900 	case MSG_PING:
1901 		tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t));
1902 		/* UDP pinging without xfrm */
1903 		if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src,
1904 				false, msg->body.ping.port,
1905 				msg->body.ping.reply_ip, udp_ping_reply)) {
1906 			printk("ping failed before setting xfrm");
1907 		}
1908 		break;
1909 	case MSG_XFRM_PREPARE:
1910 		if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst,
1911 					desc->proto)) {
1912 			xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
1913 			printk("failed to prepare xfrm");
1914 		}
1915 		break;
1916 	case MSG_XFRM_ADD:
1917 		if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
1918 			xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
1919 			printk("failed to set xfrm");
1920 		}
1921 		break;
1922 	case MSG_XFRM_DEL:
1923 		if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst,
1924 					desc->proto)) {
1925 			xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
1926 			printk("failed to remove xfrm");
1927 		}
1928 		break;
1929 	case MSG_XFRM_CLEANUP:
1930 		if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
1931 			printk("failed to cleanup xfrm");
1932 		}
1933 		break;
1934 	default:
1935 		printk("got unknown msg type %d", msg->type);
1936 	}
1937 }
1938 
1939 static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
1940 {
1941 	struct test_desc msg;
1942 	int xfrm_sock = -1;
1943 	uint32_t seq;
1944 
1945 	if (switch_ns(nsfd_childb))
1946 		exit(KSFT_FAIL);
1947 
1948 	if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
1949 		printk("Failed to open xfrm netlink socket");
1950 		exit(KSFT_FAIL);
1951 	}
1952 
1953 	do {
1954 		read_msg(cmd_fd, &msg, 1);
1955 		grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq);
1956 	} while (1);
1957 
1958 	close(xfrm_sock);
1959 	exit(KSFT_FAIL);
1960 }
1961 
1962 static int start_child(unsigned int nr, char *veth, int test_desc_fd[2])
1963 {
1964 	int cmd_sock[2];
1965 	void *data_map;
1966 	pid_t child;
1967 
1968 	if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr)))
1969 		return -1;
1970 
1971 	if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr)))
1972 		return -1;
1973 
1974 	child = fork();
1975 	if (child < 0) {
1976 		pr_err("fork()");
1977 		return -1;
1978 	} else if (child) {
1979 		/* in parent - selftest */
1980 		return switch_ns(nsfd_parent);
1981 	}
1982 
1983 	if (close(test_desc_fd[1])) {
1984 		pr_err("close()");
1985 		return -1;
1986 	}
1987 
1988 	/* child */
1989 	data_map = mmap(0, page_size, PROT_READ | PROT_WRITE,
1990 			MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1991 	if (data_map == MAP_FAILED) {
1992 		pr_err("mmap()");
1993 		return -1;
1994 	}
1995 
1996 	randomize_buffer(data_map, page_size);
1997 
1998 	if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) {
1999 		pr_err("socketpair()");
2000 		return -1;
2001 	}
2002 
2003 	child = fork();
2004 	if (child < 0) {
2005 		pr_err("fork()");
2006 		return -1;
2007 	} else if (child) {
2008 		if (close(cmd_sock[0])) {
2009 			pr_err("close()");
2010 			return -1;
2011 		}
2012 		return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map);
2013 	}
2014 	if (close(cmd_sock[1])) {
2015 		pr_err("close()");
2016 		return -1;
2017 	}
2018 	return grand_child_f(nr, cmd_sock[0], data_map);
2019 }
2020 
2021 static void exit_usage(char **argv)
2022 {
2023 	printk("Usage: %s [nr_process]", argv[0]);
2024 	exit(KSFT_FAIL);
2025 }
2026 
2027 static int __write_desc(int test_desc_fd, struct xfrm_desc *desc)
2028 {
2029 	ssize_t ret;
2030 
2031 	ret = write(test_desc_fd, desc, sizeof(*desc));
2032 
2033 	if (ret == sizeof(*desc))
2034 		return 0;
2035 
2036 	pr_err("Writing test's desc failed %ld", ret);
2037 
2038 	return -1;
2039 }
2040 
2041 static int write_desc(int proto, int test_desc_fd,
2042 		char *a, char *e, char *c, char *ae)
2043 {
2044 	struct xfrm_desc desc = {};
2045 
2046 	desc.type = CREATE_TUNNEL;
2047 	desc.proto = proto;
2048 
2049 	if (a)
2050 		strncpy(desc.a_algo, a, ALGO_LEN - 1);
2051 	if (e)
2052 		strncpy(desc.e_algo, e, ALGO_LEN - 1);
2053 	if (c)
2054 		strncpy(desc.c_algo, c, ALGO_LEN - 1);
2055 	if (ae)
2056 		strncpy(desc.ae_algo, ae, ALGO_LEN - 1);
2057 
2058 	return __write_desc(test_desc_fd, &desc);
2059 }
2060 
2061 int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP };
2062 char *ah_list[] = {
2063 	"digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)",
2064 	"hmac(sha384)", "hmac(sha512)", "hmac(rmd160)",
2065 	"xcbc(aes)", "cmac(aes)"
2066 };
2067 char *comp_list[] = {
2068 	"deflate",
2069 #if 0
2070 	/* No compression backend realization */
2071 	"lzs", "lzjh"
2072 #endif
2073 };
2074 char *e_list[] = {
2075 	"ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)",
2076 	"cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)",
2077 	"cbc(twofish)", "rfc3686(ctr(aes))"
2078 };
2079 char *ae_list[] = {
2080 #if 0
2081 	/* not implemented */
2082 	"rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))",
2083 	"rfc7539esp(chacha20,poly1305)"
2084 #endif
2085 };
2086 
2087 const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \
2088 				+ (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \
2089 				+ ARRAY_SIZE(ae_list);
2090 
2091 static int write_proto_plan(int fd, int proto)
2092 {
2093 	unsigned int i;
2094 
2095 	switch (proto) {
2096 	case IPPROTO_AH:
2097 		for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
2098 			if (write_desc(proto, fd, ah_list[i], 0, 0, 0))
2099 				return -1;
2100 		}
2101 		break;
2102 	case IPPROTO_COMP:
2103 		for (i = 0; i < ARRAY_SIZE(comp_list); i++) {
2104 			if (write_desc(proto, fd, 0, 0, comp_list[i], 0))
2105 				return -1;
2106 		}
2107 		break;
2108 	case IPPROTO_ESP:
2109 		for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
2110 			int j;
2111 
2112 			for (j = 0; j < ARRAY_SIZE(e_list); j++) {
2113 				if (write_desc(proto, fd, ah_list[i],
2114 							e_list[j], 0, 0))
2115 					return -1;
2116 			}
2117 		}
2118 		for (i = 0; i < ARRAY_SIZE(ae_list); i++) {
2119 			if (write_desc(proto, fd, 0, 0, 0, ae_list[i]))
2120 				return -1;
2121 		}
2122 		break;
2123 	default:
2124 		printk("BUG: Specified unknown proto %d", proto);
2125 		return -1;
2126 	}
2127 
2128 	return 0;
2129 }
2130 
2131 /*
2132  * Some structures in xfrm uapi header differ in size between
2133  * 64-bit and 32-bit ABI:
2134  *
2135  *             32-bit UABI               |            64-bit UABI
2136  *  -------------------------------------|-------------------------------------
2137  *   sizeof(xfrm_usersa_info)     = 220  |  sizeof(xfrm_usersa_info)     = 224
2138  *   sizeof(xfrm_userpolicy_info) = 164  |  sizeof(xfrm_userpolicy_info) = 168
2139  *   sizeof(xfrm_userspi_info)    = 228  |  sizeof(xfrm_userspi_info)    = 232
2140  *   sizeof(xfrm_user_acquire)    = 276  |  sizeof(xfrm_user_acquire)    = 280
2141  *   sizeof(xfrm_user_expire)     = 224  |  sizeof(xfrm_user_expire)     = 232
2142  *   sizeof(xfrm_user_polexpire)  = 168  |  sizeof(xfrm_user_polexpire)  = 176
2143  *
2144  * Check the affected by the UABI difference structures.
2145  * Also, check translation for xfrm_set_spdinfo: it has it's own attributes
2146  * which needs to be correctly copied, but not translated.
2147  */
2148 const unsigned int compat_plan = 5;
2149 static int write_compat_struct_tests(int test_desc_fd)
2150 {
2151 	struct xfrm_desc desc = {};
2152 
2153 	desc.type = ALLOCATE_SPI;
2154 	desc.proto = IPPROTO_AH;
2155 	strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1);
2156 
2157 	if (__write_desc(test_desc_fd, &desc))
2158 		return -1;
2159 
2160 	desc.type = MONITOR_ACQUIRE;
2161 	if (__write_desc(test_desc_fd, &desc))
2162 		return -1;
2163 
2164 	desc.type = EXPIRE_STATE;
2165 	if (__write_desc(test_desc_fd, &desc))
2166 		return -1;
2167 
2168 	desc.type = EXPIRE_POLICY;
2169 	if (__write_desc(test_desc_fd, &desc))
2170 		return -1;
2171 
2172 	desc.type = SPDINFO_ATTRS;
2173 	if (__write_desc(test_desc_fd, &desc))
2174 		return -1;
2175 
2176 	return 0;
2177 }
2178 
2179 static int write_test_plan(int test_desc_fd)
2180 {
2181 	unsigned int i;
2182 	pid_t child;
2183 
2184 	child = fork();
2185 	if (child < 0) {
2186 		pr_err("fork()");
2187 		return -1;
2188 	}
2189 	if (child) {
2190 		if (close(test_desc_fd))
2191 			printk("close(): %m");
2192 		return 0;
2193 	}
2194 
2195 	if (write_compat_struct_tests(test_desc_fd))
2196 		exit(KSFT_FAIL);
2197 
2198 	for (i = 0; i < ARRAY_SIZE(proto_list); i++) {
2199 		if (write_proto_plan(test_desc_fd, proto_list[i]))
2200 			exit(KSFT_FAIL);
2201 	}
2202 
2203 	exit(KSFT_PASS);
2204 }
2205 
2206 static int children_cleanup(void)
2207 {
2208 	unsigned ret = KSFT_PASS;
2209 
2210 	while (1) {
2211 		int status;
2212 		pid_t p = wait(&status);
2213 
2214 		if ((p < 0) && errno == ECHILD)
2215 			break;
2216 
2217 		if (p < 0) {
2218 			pr_err("wait()");
2219 			return KSFT_FAIL;
2220 		}
2221 
2222 		if (!WIFEXITED(status)) {
2223 			ret = KSFT_FAIL;
2224 			continue;
2225 		}
2226 
2227 		if (WEXITSTATUS(status) == KSFT_FAIL)
2228 			ret = KSFT_FAIL;
2229 	}
2230 
2231 	return ret;
2232 }
2233 
2234 typedef void (*print_res)(const char *, ...);
2235 
2236 static int check_results(void)
2237 {
2238 	struct test_result tr = {};
2239 	struct xfrm_desc *d = &tr.desc;
2240 	int ret = KSFT_PASS;
2241 
2242 	while (1) {
2243 		ssize_t received = read(results_fd[0], &tr, sizeof(tr));
2244 		print_res result;
2245 
2246 		if (received == 0) /* EOF */
2247 			break;
2248 
2249 		if (received != sizeof(tr)) {
2250 			pr_err("read() returned %zd", received);
2251 			return KSFT_FAIL;
2252 		}
2253 
2254 		switch (tr.res) {
2255 		case KSFT_PASS:
2256 			result = ksft_test_result_pass;
2257 			break;
2258 		case KSFT_FAIL:
2259 		default:
2260 			result = ksft_test_result_fail;
2261 			ret = KSFT_FAIL;
2262 		}
2263 
2264 		result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n",
2265 		       desc_name[d->type], (unsigned int)d->proto, d->a_algo,
2266 		       d->e_algo, d->c_algo, d->ae_algo, d->icv_len);
2267 	}
2268 
2269 	return ret;
2270 }
2271 
2272 int main(int argc, char **argv)
2273 {
2274 	long nr_process = 1;
2275 	int route_sock = -1, ret = KSFT_SKIP;
2276 	int test_desc_fd[2];
2277 	uint32_t route_seq;
2278 	unsigned int i;
2279 
2280 	if (argc > 2)
2281 		exit_usage(argv);
2282 
2283 	if (argc > 1) {
2284 		char *endptr;
2285 
2286 		errno = 0;
2287 		nr_process = strtol(argv[1], &endptr, 10);
2288 		if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN))
2289 				|| (errno != 0 && nr_process == 0)
2290 				|| (endptr == argv[1]) || (*endptr != '\0')) {
2291 			printk("Failed to parse [nr_process]");
2292 			exit_usage(argv);
2293 		}
2294 
2295 		if (nr_process > MAX_PROCESSES || nr_process < 1) {
2296 			printk("nr_process should be between [1; %u]",
2297 					MAX_PROCESSES);
2298 			exit_usage(argv);
2299 		}
2300 	}
2301 
2302 	srand(time(NULL));
2303 	page_size = sysconf(_SC_PAGESIZE);
2304 	if (page_size < 1)
2305 		ksft_exit_skip("sysconf(): %m\n");
2306 
2307 	if (pipe2(test_desc_fd, O_DIRECT) < 0)
2308 		ksft_exit_skip("pipe(): %m\n");
2309 
2310 	if (pipe2(results_fd, O_DIRECT) < 0)
2311 		ksft_exit_skip("pipe(): %m\n");
2312 
2313 	if (init_namespaces())
2314 		ksft_exit_skip("Failed to create namespaces\n");
2315 
2316 	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
2317 		ksft_exit_skip("Failed to open netlink route socket\n");
2318 
2319 	for (i = 0; i < nr_process; i++) {
2320 		char veth[VETH_LEN];
2321 
2322 		snprintf(veth, VETH_LEN, VETH_FMT, i);
2323 
2324 		if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) {
2325 			close(route_sock);
2326 			ksft_exit_fail_msg("Failed to create veth device");
2327 		}
2328 
2329 		if (start_child(i, veth, test_desc_fd)) {
2330 			close(route_sock);
2331 			ksft_exit_fail_msg("Child %u failed to start", i);
2332 		}
2333 	}
2334 
2335 	if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1]))
2336 		ksft_exit_fail_msg("close(): %m");
2337 
2338 	ksft_set_plan(proto_plan + compat_plan);
2339 
2340 	if (write_test_plan(test_desc_fd[1]))
2341 		ksft_exit_fail_msg("Failed to write test plan to pipe");
2342 
2343 	ret = check_results();
2344 
2345 	if (children_cleanup() == KSFT_FAIL)
2346 		exit(KSFT_FAIL);
2347 
2348 	exit(ret);
2349 }
2350