xref: /linux/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c (revision 0fc8f6200d2313278fbf4539bbab74677c685531)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Check if we can migrate child sockets.
4  *
5  *   1. call listen() for 4 server sockets.
6  *   2. call connect() for 25 client sockets.
7  *   3. call listen() for 1 server socket. (migration target)
8  *   4. update a map to migrate all child sockets
9  *        to the last server socket (migrate_map[cookie] = 4)
10  *   5. for TCP_ESTABLISHED and TCP_SYN_RECV cases, verify via epoll
11  *        that the last server socket is not ready before migration.
12  *   6. call shutdown() for first 4 server sockets
13  *        and migrate the requests in the accept queue
14  *        to the last server socket.
15  *   7. for TCP_ESTABLISHED and TCP_SYN_RECV cases, verify via epoll
16  *        that the last server socket is ready after migration.
17  *   8. call listen() for the second server socket.
18  *   9. call shutdown() for the last server
19  *        and migrate the requests in the accept queue
20  *        to the second server socket.
21  *  10. call listen() for the last server.
22  *  11. call shutdown() for the second server
23  *        and migrate the requests in the accept queue
24  *        to the last server socket.
25  *  12. call accept() for the last server socket.
26  *
27  * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
28  */
29 
30 #include <bpf/bpf.h>
31 #include <bpf/libbpf.h>
32 #include <sys/epoll.h>
33 
34 #include "test_progs.h"
35 #include "test_migrate_reuseport.skel.h"
36 #include "network_helpers.h"
37 
38 #ifndef TCP_FASTOPEN_CONNECT
39 #define TCP_FASTOPEN_CONNECT 30
40 #endif
41 
42 #define IFINDEX_LO 1
43 
44 #define NR_SERVERS 5
45 #define NR_CLIENTS (NR_SERVERS * 5)
46 #define MIGRATED_TO (NR_SERVERS - 1)
47 
48 /* fastopenq->max_qlen and sk->sk_max_ack_backlog */
49 #define QLEN (NR_CLIENTS * 5)
50 
51 #define MSG "Hello World\0"
52 #define MSGLEN 12
53 
54 static struct migrate_reuseport_test_case {
55 	const char *name;
56 	__s64 servers[NR_SERVERS];
57 	__s64 clients[NR_CLIENTS];
58 	struct sockaddr_storage addr;
59 	socklen_t addrlen;
60 	int family;
61 	int state;
62 	bool drop_ack;
63 	bool expire_synack_timer;
64 	bool fastopen;
65 	struct bpf_link *link;
66 } test_cases[] = {
67 	{
68 		.name = "IPv4 TCP_ESTABLISHED  inet_csk_listen_stop",
69 		.family = AF_INET,
70 		.state = BPF_TCP_ESTABLISHED,
71 		.drop_ack = false,
72 		.expire_synack_timer = false,
73 		.fastopen = false,
74 	},
75 	{
76 		.name = "IPv4 TCP_SYN_RECV     inet_csk_listen_stop",
77 		.family = AF_INET,
78 		.state = BPF_TCP_SYN_RECV,
79 		.drop_ack = true,
80 		.expire_synack_timer = false,
81 		.fastopen = true,
82 	},
83 	{
84 		.name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
85 		.family = AF_INET,
86 		.state = BPF_TCP_NEW_SYN_RECV,
87 		.drop_ack = true,
88 		.expire_synack_timer = true,
89 		.fastopen = false,
90 	},
91 	{
92 		.name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
93 		.family = AF_INET,
94 		.state = BPF_TCP_NEW_SYN_RECV,
95 		.drop_ack = true,
96 		.expire_synack_timer = false,
97 		.fastopen = false,
98 	},
99 	{
100 		.name = "IPv6 TCP_ESTABLISHED  inet_csk_listen_stop",
101 		.family = AF_INET6,
102 		.state = BPF_TCP_ESTABLISHED,
103 		.drop_ack = false,
104 		.expire_synack_timer = false,
105 		.fastopen = false,
106 	},
107 	{
108 		.name = "IPv6 TCP_SYN_RECV     inet_csk_listen_stop",
109 		.family = AF_INET6,
110 		.state = BPF_TCP_SYN_RECV,
111 		.drop_ack = true,
112 		.expire_synack_timer = false,
113 		.fastopen = true,
114 	},
115 	{
116 		.name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
117 		.family = AF_INET6,
118 		.state = BPF_TCP_NEW_SYN_RECV,
119 		.drop_ack = true,
120 		.expire_synack_timer = true,
121 		.fastopen = false,
122 	},
123 	{
124 		.name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
125 		.family = AF_INET6,
126 		.state = BPF_TCP_NEW_SYN_RECV,
127 		.drop_ack = true,
128 		.expire_synack_timer = false,
129 		.fastopen = false,
130 	}
131 };
132 
133 static void init_fds(__s64 fds[], int len)
134 {
135 	int i;
136 
137 	for (i = 0; i < len; i++)
138 		fds[i] = -1;
139 }
140 
141 static void close_fds(__s64 fds[], int len)
142 {
143 	int i;
144 
145 	for (i = 0; i < len; i++) {
146 		if (fds[i] != -1) {
147 			close(fds[i]);
148 			fds[i] = -1;
149 		}
150 	}
151 }
152 
153 static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
154 {
155 	int err = 0, fd, len;
156 
157 	fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
158 	if (!ASSERT_NEQ(fd, -1, "open"))
159 		return -1;
160 
161 	if (restore) {
162 		len = write(fd, buf, *saved_len);
163 		if (!ASSERT_EQ(len, *saved_len, "write - restore"))
164 			err = -1;
165 	} else {
166 		*saved_len = read(fd, buf, size);
167 		if (!ASSERT_GE(*saved_len, 1, "read")) {
168 			err = -1;
169 			goto close;
170 		}
171 
172 		err = lseek(fd, 0, SEEK_SET);
173 		if (!ASSERT_OK(err, "lseek"))
174 			goto close;
175 
176 		/* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
177 		 *  TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
178 		 */
179 		len = write(fd, "519", 3);
180 		if (!ASSERT_EQ(len, 3, "write - setup"))
181 			err = -1;
182 	}
183 
184 close:
185 	close(fd);
186 
187 	return err;
188 }
189 
190 static int drop_ack(struct migrate_reuseport_test_case *test_case,
191 		    struct test_migrate_reuseport *skel)
192 {
193 	if (test_case->family == AF_INET)
194 		skel->bss->server_port = ((struct sockaddr_in *)
195 					  &test_case->addr)->sin_port;
196 	else
197 		skel->bss->server_port = ((struct sockaddr_in6 *)
198 					  &test_case->addr)->sin6_port;
199 
200 	test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
201 						  IFINDEX_LO);
202 	if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
203 		return -1;
204 
205 	return 0;
206 }
207 
208 static int pass_ack(struct migrate_reuseport_test_case *test_case)
209 {
210 	int err;
211 
212 	err = bpf_link__destroy(test_case->link);
213 	if (!ASSERT_OK(err, "bpf_link__destroy"))
214 		return -1;
215 
216 	test_case->link = NULL;
217 
218 	return 0;
219 }
220 
221 static int start_servers(struct migrate_reuseport_test_case *test_case,
222 			 struct test_migrate_reuseport *skel)
223 {
224 	int i, err, prog_fd, reuseport = 1, qlen = QLEN;
225 
226 	prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
227 
228 	make_sockaddr(test_case->family,
229 		      test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
230 		      &test_case->addr, &test_case->addrlen);
231 
232 	for (i = 0; i < NR_SERVERS; i++) {
233 		test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
234 					       IPPROTO_TCP);
235 		if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
236 			return -1;
237 
238 		err = setsockopt(test_case->servers[i], SOL_SOCKET,
239 				 SO_REUSEPORT, &reuseport, sizeof(reuseport));
240 		if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
241 			return -1;
242 
243 		err = bind(test_case->servers[i],
244 			   (struct sockaddr *)&test_case->addr,
245 			   test_case->addrlen);
246 		if (!ASSERT_OK(err, "bind"))
247 			return -1;
248 
249 		if (i == 0) {
250 			err = setsockopt(test_case->servers[i], SOL_SOCKET,
251 					 SO_ATTACH_REUSEPORT_EBPF,
252 					 &prog_fd, sizeof(prog_fd));
253 			if (!ASSERT_OK(err,
254 				       "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
255 				return -1;
256 
257 			err = getsockname(test_case->servers[i],
258 					  (struct sockaddr *)&test_case->addr,
259 					  &test_case->addrlen);
260 			if (!ASSERT_OK(err, "getsockname"))
261 				return -1;
262 		}
263 
264 		if (test_case->fastopen) {
265 			err = setsockopt(test_case->servers[i],
266 					 SOL_TCP, TCP_FASTOPEN,
267 					 &qlen, sizeof(qlen));
268 			if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
269 				return -1;
270 		}
271 
272 		/* All requests will be tied to the first four listeners */
273 		if (i != MIGRATED_TO) {
274 			err = listen(test_case->servers[i], qlen);
275 			if (!ASSERT_OK(err, "listen"))
276 				return -1;
277 		}
278 	}
279 
280 	return 0;
281 }
282 
283 static int start_clients(struct migrate_reuseport_test_case *test_case)
284 {
285 	char buf[MSGLEN] = MSG;
286 	int i, err;
287 
288 	for (i = 0; i < NR_CLIENTS; i++) {
289 		test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
290 					       IPPROTO_TCP);
291 		if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
292 			return -1;
293 
294 		/* The attached XDP program drops only the final ACK, so
295 		 * clients will transition to TCP_ESTABLISHED immediately.
296 		 */
297 		err = settimeo(test_case->clients[i], 100);
298 		if (!ASSERT_OK(err, "settimeo"))
299 			return -1;
300 
301 		if (test_case->fastopen) {
302 			int fastopen = 1;
303 
304 			err = setsockopt(test_case->clients[i], IPPROTO_TCP,
305 					 TCP_FASTOPEN_CONNECT, &fastopen,
306 					 sizeof(fastopen));
307 			if (!ASSERT_OK(err,
308 				       "setsockopt - TCP_FASTOPEN_CONNECT"))
309 				return -1;
310 		}
311 
312 		err = connect(test_case->clients[i],
313 			      (struct sockaddr *)&test_case->addr,
314 			      test_case->addrlen);
315 		if (!ASSERT_OK(err, "connect"))
316 			return -1;
317 
318 		err = write(test_case->clients[i], buf, MSGLEN);
319 		if (!ASSERT_EQ(err, MSGLEN, "write"))
320 			return -1;
321 	}
322 
323 	return 0;
324 }
325 
326 static int update_maps(struct migrate_reuseport_test_case *test_case,
327 		       struct test_migrate_reuseport *skel)
328 {
329 	int i, err, migrated_to = MIGRATED_TO;
330 	int reuseport_map_fd, migrate_map_fd;
331 	__u64 value;
332 
333 	reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
334 	migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
335 
336 	for (i = 0; i < NR_SERVERS; i++) {
337 		value = (__u64)test_case->servers[i];
338 		err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
339 					  BPF_NOEXIST);
340 		if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
341 			return -1;
342 
343 		err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
344 		if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
345 			return -1;
346 
347 		err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
348 					  BPF_NOEXIST);
349 		if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
350 			return -1;
351 	}
352 
353 	return 0;
354 }
355 
356 static int migrate_dance(struct migrate_reuseport_test_case *test_case)
357 {
358 	struct epoll_event ev = {
359 		.events = EPOLLIN,
360 	};
361 	int epoll = -1, nfds;
362 	int i, err;
363 
364 	if (test_case->state != BPF_TCP_NEW_SYN_RECV) {
365 		epoll = epoll_create1(0);
366 		if (!ASSERT_NEQ(epoll, -1, "epoll_create1"))
367 			return -1;
368 
369 		ev.data.fd = test_case->servers[MIGRATED_TO];
370 		if (!ASSERT_OK(epoll_ctl(epoll, EPOLL_CTL_ADD,
371 					 test_case->servers[MIGRATED_TO], &ev),
372 			       "epoll_ctl"))
373 			goto close_epoll;
374 
375 		nfds = epoll_wait(epoll, &ev, 1, 0);
376 		if (!ASSERT_EQ(nfds, 0, "epoll_wait 1"))
377 			goto close_epoll;
378 	}
379 
380 	/* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
381 	 * to the last listener based on eBPF.
382 	 */
383 	for (i = 0; i < MIGRATED_TO; i++) {
384 		err = shutdown(test_case->servers[i], SHUT_RDWR);
385 		if (!ASSERT_OK(err, "shutdown"))
386 			goto close_epoll;
387 	}
388 
389 	/* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
390 	if (test_case->state == BPF_TCP_NEW_SYN_RECV)
391 		return 0;
392 
393 	nfds = epoll_wait(epoll, &ev, 1, 0);
394 	if (!ASSERT_EQ(nfds, 1, "epoll_wait 2")) {
395 close_epoll:
396 		if (epoll >= 0)
397 			close(epoll);
398 		return -1;
399 	}
400 
401 	close(epoll);
402 
403 	/* Note that we use the second listener instead of the
404 	 * first one here.
405 	 *
406 	 * The fist listener is bind()ed with port 0 and,
407 	 * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
408 	 * calling listen() again will bind() the first listener
409 	 * on a new ephemeral port and detach it from the existing
410 	 * reuseport group.  (See: __inet_bind(), tcp_set_state())
411 	 *
412 	 * OTOH, the second one is bind()ed with a specific port,
413 	 * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
414 	 * resurrect the listener on the existing reuseport group.
415 	 */
416 	err = listen(test_case->servers[1], QLEN);
417 	if (!ASSERT_OK(err, "listen"))
418 		return -1;
419 
420 	/* Migrate from the last listener to the second one.
421 	 *
422 	 * All listeners were detached out of the reuseport_map,
423 	 * so migration will be done by kernel random pick from here.
424 	 */
425 	err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
426 	if (!ASSERT_OK(err, "shutdown"))
427 		return -1;
428 
429 	/* Back to the existing reuseport group */
430 	err = listen(test_case->servers[MIGRATED_TO], QLEN);
431 	if (!ASSERT_OK(err, "listen"))
432 		return -1;
433 
434 	/* Migrate back to the last one from the second one */
435 	err = shutdown(test_case->servers[1], SHUT_RDWR);
436 	if (!ASSERT_OK(err, "shutdown"))
437 		return -1;
438 
439 	return 0;
440 }
441 
442 static void count_requests(struct migrate_reuseport_test_case *test_case,
443 			   struct test_migrate_reuseport *skel)
444 {
445 	struct sockaddr_storage addr;
446 	socklen_t len = sizeof(addr);
447 	int err, cnt = 0, client;
448 	char buf[MSGLEN];
449 
450 	err = settimeo(test_case->servers[MIGRATED_TO], 4000);
451 	if (!ASSERT_OK(err, "settimeo"))
452 		goto out;
453 
454 	for (; cnt < NR_CLIENTS; cnt++) {
455 		client = accept(test_case->servers[MIGRATED_TO],
456 				(struct sockaddr *)&addr, &len);
457 		if (!ASSERT_NEQ(client, -1, "accept"))
458 			goto out;
459 
460 		memset(buf, 0, MSGLEN);
461 		read(client, &buf, MSGLEN);
462 		close(client);
463 
464 		if (!ASSERT_STREQ(buf, MSG, "read"))
465 			goto out;
466 	}
467 
468 out:
469 	ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
470 
471 	switch (test_case->state) {
472 	case BPF_TCP_ESTABLISHED:
473 		cnt = skel->bss->migrated_at_close;
474 		break;
475 	case BPF_TCP_SYN_RECV:
476 		cnt = skel->bss->migrated_at_close_fastopen;
477 		break;
478 	case BPF_TCP_NEW_SYN_RECV:
479 		if (test_case->expire_synack_timer)
480 			cnt = skel->bss->migrated_at_send_synack;
481 		else
482 			cnt = skel->bss->migrated_at_recv_ack;
483 		break;
484 	default:
485 		cnt = 0;
486 	}
487 
488 	ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
489 }
490 
491 static void run_test(struct migrate_reuseport_test_case *test_case,
492 		     struct test_migrate_reuseport *skel)
493 {
494 	int err, saved_len;
495 	char buf[16];
496 
497 	skel->bss->migrated_at_close = 0;
498 	skel->bss->migrated_at_close_fastopen = 0;
499 	skel->bss->migrated_at_send_synack = 0;
500 	skel->bss->migrated_at_recv_ack = 0;
501 
502 	init_fds(test_case->servers, NR_SERVERS);
503 	init_fds(test_case->clients, NR_CLIENTS);
504 
505 	if (test_case->fastopen) {
506 		memset(buf, 0, sizeof(buf));
507 
508 		err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
509 		if (!ASSERT_OK(err, "setup_fastopen - setup"))
510 			return;
511 	}
512 
513 	err = start_servers(test_case, skel);
514 	if (!ASSERT_OK(err, "start_servers"))
515 		goto close_servers;
516 
517 	if (test_case->drop_ack) {
518 		/* Drop the final ACK of the 3-way handshake and stick the
519 		 * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
520 		 */
521 		err = drop_ack(test_case, skel);
522 		if (!ASSERT_OK(err, "drop_ack"))
523 			goto close_servers;
524 	}
525 
526 	/* Tie requests to the first four listeners */
527 	err = start_clients(test_case);
528 	if (!ASSERT_OK(err, "start_clients"))
529 		goto close_clients;
530 
531 	err = listen(test_case->servers[MIGRATED_TO], QLEN);
532 	if (!ASSERT_OK(err, "listen"))
533 		goto close_clients;
534 
535 	err = update_maps(test_case, skel);
536 	if (!ASSERT_OK(err, "fill_maps"))
537 		goto close_clients;
538 
539 	/* Migrate the requests in the accept queue only.
540 	 * TCP_NEW_SYN_RECV requests are not migrated at this point.
541 	 */
542 	err = migrate_dance(test_case);
543 	if (!ASSERT_OK(err, "migrate_dance"))
544 		goto close_clients;
545 
546 	if (test_case->expire_synack_timer) {
547 		/* Wait for SYN+ACK timers to expire so that
548 		 * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
549 		 */
550 		sleep(1);
551 	}
552 
553 	if (test_case->link) {
554 		/* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
555 		err = pass_ack(test_case);
556 		if (!ASSERT_OK(err, "pass_ack"))
557 			goto close_clients;
558 	}
559 
560 	count_requests(test_case, skel);
561 
562 close_clients:
563 	close_fds(test_case->clients, NR_CLIENTS);
564 
565 	if (test_case->link) {
566 		err = pass_ack(test_case);
567 		ASSERT_OK(err, "pass_ack - clean up");
568 	}
569 
570 close_servers:
571 	close_fds(test_case->servers, NR_SERVERS);
572 
573 	if (test_case->fastopen) {
574 		err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
575 		ASSERT_OK(err, "setup_fastopen - restore");
576 	}
577 }
578 
579 void serial_test_migrate_reuseport(void)
580 {
581 	struct test_migrate_reuseport *skel;
582 	int i;
583 
584 	skel = test_migrate_reuseport__open_and_load();
585 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
586 		return;
587 
588 	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
589 		test__start_subtest(test_cases[i].name);
590 		run_test(&test_cases[i], skel);
591 	}
592 
593 	test_migrate_reuseport__destroy(skel);
594 }
595