xref: /linux/tools/testing/selftests/bpf/network_helpers.c (revision eb71ab2bf72260054677e348498ba995a057c463)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #define _GNU_SOURCE
3 
4 #include <errno.h>
5 #include <stdbool.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <sched.h>
10 
11 #include <arpa/inet.h>
12 #include <sys/mount.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/un.h>
16 #include <sys/eventfd.h>
17 
18 #include <linux/err.h>
19 #include <linux/in.h>
20 #include <linux/in6.h>
21 #include <linux/limits.h>
22 
23 #include <linux/ip.h>
24 #include <netinet/udp.h>
25 #include <netinet/tcp.h>
26 #include <net/if.h>
27 
28 #include "bpf_util.h"
29 #include "network_helpers.h"
30 #include "test_progs.h"
31 
32 #ifdef TRAFFIC_MONITOR
33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
35 #include <pcap/pcap.h>
36 #include <pcap/dlt.h>
37 #endif
38 
39 #ifndef IPPROTO_MPTCP
40 #define IPPROTO_MPTCP 262
41 #endif
42 
43 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
44 #define log_err(MSG, ...) ({						\
45 			int __save = errno;				\
46 			fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
47 				__FILE__, __LINE__, clean_errno(),	\
48 				##__VA_ARGS__);				\
49 			errno = __save;					\
50 })
51 
52 struct ipv4_packet pkt_v4 = {
53 	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
54 	.iph.ihl = 5,
55 	.iph.protocol = IPPROTO_TCP,
56 	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
57 	.tcp.urg_ptr = 123,
58 	.tcp.doff = 5,
59 };
60 
61 struct ipv6_packet pkt_v6 = {
62 	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
63 	.iph.nexthdr = IPPROTO_TCP,
64 	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
65 	.tcp.urg_ptr = 123,
66 	.tcp.doff = 5,
67 };
68 
69 static const struct network_helper_opts default_opts;
70 
settimeo(int fd,int timeout_ms)71 int settimeo(int fd, int timeout_ms)
72 {
73 	struct timeval timeout = { .tv_sec = 3 };
74 
75 	if (timeout_ms > 0) {
76 		timeout.tv_sec = timeout_ms / 1000;
77 		timeout.tv_usec = (timeout_ms % 1000) * 1000;
78 	}
79 
80 	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
81 		       sizeof(timeout))) {
82 		log_err("Failed to set SO_RCVTIMEO");
83 		return -1;
84 	}
85 
86 	if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
87 		       sizeof(timeout))) {
88 		log_err("Failed to set SO_SNDTIMEO");
89 		return -1;
90 	}
91 
92 	return 0;
93 }
94 
95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
96 
start_server_addr(int type,const struct sockaddr_storage * addr,socklen_t addrlen,const struct network_helper_opts * opts)97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
98 		      const struct network_helper_opts *opts)
99 {
100 	int on = 1, fd;
101 
102 	if (!opts)
103 		opts = &default_opts;
104 
105 	fd = socket(addr->ss_family, type, opts->proto);
106 	if (fd < 0) {
107 		log_err("Failed to create server socket");
108 		return -1;
109 	}
110 
111 	if (settimeo(fd, opts->timeout_ms))
112 		goto error_close;
113 
114 	if (type == SOCK_STREAM &&
115 	    setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) {
116 		log_err("Failed to enable SO_REUSEADDR");
117 		goto error_close;
118 	}
119 
120 	if (opts->post_socket_cb &&
121 	    opts->post_socket_cb(fd, opts->cb_opts)) {
122 		log_err("Failed to call post_socket_cb");
123 		goto error_close;
124 	}
125 
126 	if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) {
127 		log_err("Failed to bind socket");
128 		goto error_close;
129 	}
130 
131 	if (type == SOCK_STREAM) {
132 		if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) {
133 			log_err("Failed to listed on socket");
134 			goto error_close;
135 		}
136 	}
137 
138 	return fd;
139 
140 error_close:
141 	save_errno_close(fd);
142 	return -1;
143 }
144 
start_server_str(int family,int type,const char * addr_str,__u16 port,const struct network_helper_opts * opts)145 int start_server_str(int family, int type, const char *addr_str, __u16 port,
146 		     const struct network_helper_opts *opts)
147 {
148 	struct sockaddr_storage addr;
149 	socklen_t addrlen;
150 
151 	if (!opts)
152 		opts = &default_opts;
153 
154 	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
155 		return -1;
156 
157 	return start_server_addr(type, &addr, addrlen, opts);
158 }
159 
start_server(int family,int type,const char * addr_str,__u16 port,int timeout_ms)160 int start_server(int family, int type, const char *addr_str, __u16 port,
161 		 int timeout_ms)
162 {
163 	struct network_helper_opts opts = {
164 		.timeout_ms	= timeout_ms,
165 	};
166 
167 	return start_server_str(family, type, addr_str, port, &opts);
168 }
169 
reuseport_cb(int fd,void * opts)170 static int reuseport_cb(int fd, void *opts)
171 {
172 	int on = 1;
173 
174 	return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
175 }
176 
start_reuseport_server(int family,int type,const char * addr_str,__u16 port,int timeout_ms,unsigned int nr_listens)177 int *start_reuseport_server(int family, int type, const char *addr_str,
178 			    __u16 port, int timeout_ms, unsigned int nr_listens)
179 {
180 	struct network_helper_opts opts = {
181 		.timeout_ms = timeout_ms,
182 		.post_socket_cb = reuseport_cb,
183 	};
184 	struct sockaddr_storage addr;
185 	unsigned int nr_fds = 0;
186 	socklen_t addrlen;
187 	int *fds;
188 
189 	if (!nr_listens)
190 		return NULL;
191 
192 	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
193 		return NULL;
194 
195 	fds = malloc(sizeof(*fds) * nr_listens);
196 	if (!fds)
197 		return NULL;
198 
199 	fds[0] = start_server_addr(type, &addr, addrlen, &opts);
200 	if (fds[0] == -1)
201 		goto close_fds;
202 	nr_fds = 1;
203 
204 	if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
205 		goto close_fds;
206 
207 	for (; nr_fds < nr_listens; nr_fds++) {
208 		fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts);
209 		if (fds[nr_fds] == -1)
210 			goto close_fds;
211 	}
212 
213 	return fds;
214 
215 close_fds:
216 	free_fds(fds, nr_fds);
217 	return NULL;
218 }
219 
free_fds(int * fds,unsigned int nr_close_fds)220 void free_fds(int *fds, unsigned int nr_close_fds)
221 {
222 	if (fds) {
223 		while (nr_close_fds)
224 			close(fds[--nr_close_fds]);
225 		free(fds);
226 	}
227 }
228 
fastopen_connect(int server_fd,const char * data,unsigned int data_len,int timeout_ms)229 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
230 		     int timeout_ms)
231 {
232 	struct sockaddr_storage addr;
233 	socklen_t addrlen = sizeof(addr);
234 	struct sockaddr_in *addr_in;
235 	int fd, ret;
236 
237 	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
238 		log_err("Failed to get server addr");
239 		return -1;
240 	}
241 
242 	addr_in = (struct sockaddr_in *)&addr;
243 	fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
244 	if (fd < 0) {
245 		log_err("Failed to create client socket");
246 		return -1;
247 	}
248 
249 	if (settimeo(fd, timeout_ms))
250 		goto error_close;
251 
252 	ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
253 		     addrlen);
254 	if (ret != data_len) {
255 		log_err("sendto(data, %u) != %d\n", data_len, ret);
256 		goto error_close;
257 	}
258 
259 	return fd;
260 
261 error_close:
262 	save_errno_close(fd);
263 	return -1;
264 }
265 
client_socket(int family,int type,const struct network_helper_opts * opts)266 int client_socket(int family, int type,
267 		  const struct network_helper_opts *opts)
268 {
269 	int fd;
270 
271 	if (!opts)
272 		opts = &default_opts;
273 
274 	fd = socket(family, type, opts->proto);
275 	if (fd < 0) {
276 		log_err("Failed to create client socket");
277 		return -1;
278 	}
279 
280 	if (settimeo(fd, opts->timeout_ms))
281 		goto error_close;
282 
283 	if (opts->post_socket_cb &&
284 	    opts->post_socket_cb(fd, opts->cb_opts))
285 		goto error_close;
286 
287 	return fd;
288 
289 error_close:
290 	save_errno_close(fd);
291 	return -1;
292 }
293 
connect_to_addr(int type,const struct sockaddr_storage * addr,socklen_t addrlen,const struct network_helper_opts * opts)294 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
295 		    const struct network_helper_opts *opts)
296 {
297 	int fd;
298 
299 	if (!opts)
300 		opts = &default_opts;
301 
302 	fd = client_socket(addr->ss_family, type, opts);
303 	if (fd < 0) {
304 		log_err("Failed to create client socket");
305 		return -1;
306 	}
307 
308 	if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
309 		log_err("Failed to connect to server");
310 		save_errno_close(fd);
311 		return -1;
312 	}
313 
314 	return fd;
315 }
316 
connect_to_addr_str(int family,int type,const char * addr_str,__u16 port,const struct network_helper_opts * opts)317 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
318 			const struct network_helper_opts *opts)
319 {
320 	struct sockaddr_storage addr;
321 	socklen_t addrlen;
322 
323 	if (!opts)
324 		opts = &default_opts;
325 
326 	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
327 		return -1;
328 
329 	return connect_to_addr(type, &addr, addrlen, opts);
330 }
331 
connect_to_fd_opts(int server_fd,const struct network_helper_opts * opts)332 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
333 {
334 	struct sockaddr_storage addr;
335 	socklen_t addrlen, optlen;
336 	int type;
337 
338 	if (!opts)
339 		opts = &default_opts;
340 
341 	optlen = sizeof(type);
342 	if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
343 		log_err("getsockopt(SOL_TYPE)");
344 		return -1;
345 	}
346 
347 	addrlen = sizeof(addr);
348 	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
349 		log_err("Failed to get server addr");
350 		return -1;
351 	}
352 
353 	return connect_to_addr(type, &addr, addrlen, opts);
354 }
355 
connect_to_fd(int server_fd,int timeout_ms)356 int connect_to_fd(int server_fd, int timeout_ms)
357 {
358 	struct network_helper_opts opts = {
359 		.timeout_ms = timeout_ms,
360 	};
361 	socklen_t optlen;
362 	int protocol;
363 
364 	optlen = sizeof(protocol);
365 	if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
366 		log_err("getsockopt(SOL_PROTOCOL)");
367 		return -1;
368 	}
369 	opts.proto = protocol;
370 
371 	return connect_to_fd_opts(server_fd, &opts);
372 }
373 
connect_fd_to_fd(int client_fd,int server_fd,int timeout_ms)374 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
375 {
376 	struct sockaddr_storage addr;
377 	socklen_t len = sizeof(addr);
378 
379 	if (settimeo(client_fd, timeout_ms))
380 		return -1;
381 
382 	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
383 		log_err("Failed to get server addr");
384 		return -1;
385 	}
386 
387 	if (connect(client_fd, (const struct sockaddr *)&addr, len)) {
388 		log_err("Failed to connect to server");
389 		return -1;
390 	}
391 
392 	return 0;
393 }
394 
make_sockaddr(int family,const char * addr_str,__u16 port,struct sockaddr_storage * addr,socklen_t * len)395 int make_sockaddr(int family, const char *addr_str, __u16 port,
396 		  struct sockaddr_storage *addr, socklen_t *len)
397 {
398 	if (family == AF_INET) {
399 		struct sockaddr_in *sin = (void *)addr;
400 
401 		memset(addr, 0, sizeof(*sin));
402 		sin->sin_family = AF_INET;
403 		sin->sin_port = htons(port);
404 		if (addr_str &&
405 		    inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
406 			log_err("inet_pton(AF_INET, %s)", addr_str);
407 			return -1;
408 		}
409 		if (len)
410 			*len = sizeof(*sin);
411 		return 0;
412 	} else if (family == AF_INET6) {
413 		struct sockaddr_in6 *sin6 = (void *)addr;
414 
415 		memset(addr, 0, sizeof(*sin6));
416 		sin6->sin6_family = AF_INET6;
417 		sin6->sin6_port = htons(port);
418 		if (addr_str &&
419 		    inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
420 			log_err("inet_pton(AF_INET6, %s)", addr_str);
421 			return -1;
422 		}
423 		if (len)
424 			*len = sizeof(*sin6);
425 		return 0;
426 	} else if (family == AF_UNIX) {
427 		/* Note that we always use abstract unix sockets to avoid having
428 		 * to clean up leftover files.
429 		 */
430 		struct sockaddr_un *sun = (void *)addr;
431 
432 		memset(addr, 0, sizeof(*sun));
433 		sun->sun_family = family;
434 		sun->sun_path[0] = 0;
435 		strscpy(sun->sun_path + 1, addr_str, sizeof(sun->sun_path) - 1);
436 		if (len)
437 			*len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
438 		return 0;
439 	}
440 	return -1;
441 }
442 
ping_command(int family)443 char *ping_command(int family)
444 {
445 	if (family == AF_INET6) {
446 		/* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
447 		if (!system("which ping6 >/dev/null 2>&1"))
448 			return "ping6";
449 		else
450 			return "ping -6";
451 	}
452 	return "ping";
453 }
454 
append_tid(char * str,size_t sz)455 int append_tid(char *str, size_t sz)
456 {
457 	size_t end;
458 
459 	if (!str)
460 		return -1;
461 
462 	end = strlen(str);
463 	if (end + 8 > sz)
464 		return -1;
465 
466 	sprintf(&str[end], "%07ld", sys_gettid());
467 	str[end + 7] = '\0';
468 
469 	return 0;
470 }
471 
remove_netns(const char * name)472 int remove_netns(const char *name)
473 {
474 	char *cmd;
475 	int r;
476 
477 	r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name);
478 	if (r < 0) {
479 		log_err("Failed to malloc cmd");
480 		return -1;
481 	}
482 
483 	r = system(cmd);
484 	free(cmd);
485 	return r;
486 }
487 
make_netns(const char * name)488 int make_netns(const char *name)
489 {
490 	char *cmd;
491 	int r;
492 
493 	r = asprintf(&cmd, "ip netns add %s", name);
494 	if (r < 0) {
495 		log_err("Failed to malloc cmd");
496 		return -1;
497 	}
498 
499 	r = system(cmd);
500 	free(cmd);
501 
502 	if (r)
503 		return r;
504 
505 	r = asprintf(&cmd, "ip -n %s link set lo up", name);
506 	if (r < 0) {
507 		log_err("Failed to malloc cmd for setting up lo");
508 		remove_netns(name);
509 		return -1;
510 	}
511 
512 	r = system(cmd);
513 	free(cmd);
514 
515 	return r;
516 }
517 
518 struct nstoken {
519 	int orig_netns_fd;
520 };
521 
open_netns(const char * name)522 struct nstoken *open_netns(const char *name)
523 {
524 	int nsfd;
525 	char nspath[PATH_MAX];
526 	int err;
527 	struct nstoken *token;
528 
529 	token = calloc(1, sizeof(struct nstoken));
530 	if (!token) {
531 		log_err("Failed to malloc token");
532 		return NULL;
533 	}
534 
535 	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
536 	if (token->orig_netns_fd == -1) {
537 		log_err("Failed to open(/proc/self/ns/net)");
538 		goto fail;
539 	}
540 
541 	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
542 	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
543 	if (nsfd == -1) {
544 		log_err("Failed to open(%s)", nspath);
545 		goto fail;
546 	}
547 
548 	err = setns(nsfd, CLONE_NEWNET);
549 	close(nsfd);
550 	if (err) {
551 		log_err("Failed to setns(nsfd)");
552 		goto fail;
553 	}
554 
555 	return token;
556 fail:
557 	if (token->orig_netns_fd != -1)
558 		close(token->orig_netns_fd);
559 	free(token);
560 	return NULL;
561 }
562 
close_netns(struct nstoken * token)563 void close_netns(struct nstoken *token)
564 {
565 	if (!token)
566 		return;
567 
568 	if (setns(token->orig_netns_fd, CLONE_NEWNET))
569 		log_err("Failed to setns(orig_netns_fd)");
570 	close(token->orig_netns_fd);
571 	free(token);
572 }
573 
open_tuntap(const char * dev_name,bool need_mac)574 int open_tuntap(const char *dev_name, bool need_mac)
575 {
576 	int err = 0;
577 	struct ifreq ifr;
578 	int fd = open("/dev/net/tun", O_RDWR);
579 
580 	if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
581 		return -1;
582 
583 	ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
584 	strscpy(ifr.ifr_name, dev_name);
585 
586 	err = ioctl(fd, TUNSETIFF, &ifr);
587 	if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
588 		close(fd);
589 		return -1;
590 	}
591 
592 	err = fcntl(fd, F_SETFL, O_NONBLOCK);
593 	if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
594 		close(fd);
595 		return -1;
596 	}
597 
598 	return fd;
599 }
600 
get_socket_local_port(int sock_fd)601 int get_socket_local_port(int sock_fd)
602 {
603 	struct sockaddr_storage addr;
604 	socklen_t addrlen = sizeof(addr);
605 	int err;
606 
607 	err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
608 	if (err < 0)
609 		return err;
610 
611 	if (addr.ss_family == AF_INET) {
612 		struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
613 
614 		return sin->sin_port;
615 	} else if (addr.ss_family == AF_INET6) {
616 		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
617 
618 		return sin->sin6_port;
619 	}
620 
621 	return -1;
622 }
623 
get_hw_ring_size(char * ifname,struct ethtool_ringparam * ring_param)624 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
625 {
626 	struct ifreq ifr = {0};
627 	int sockfd, err;
628 
629 	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
630 	if (sockfd < 0)
631 		return -errno;
632 
633 	memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
634 
635 	ring_param->cmd = ETHTOOL_GRINGPARAM;
636 	ifr.ifr_data = (char *)ring_param;
637 
638 	if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
639 		err = errno;
640 		close(sockfd);
641 		return -err;
642 	}
643 
644 	close(sockfd);
645 	return 0;
646 }
647 
set_hw_ring_size(char * ifname,struct ethtool_ringparam * ring_param)648 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
649 {
650 	struct ifreq ifr = {0};
651 	int sockfd, err;
652 
653 	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
654 	if (sockfd < 0)
655 		return -errno;
656 
657 	memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
658 
659 	ring_param->cmd = ETHTOOL_SRINGPARAM;
660 	ifr.ifr_data = (char *)ring_param;
661 
662 	if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
663 		err = errno;
664 		close(sockfd);
665 		return -err;
666 	}
667 
668 	close(sockfd);
669 	return 0;
670 }
671 
672 struct send_recv_arg {
673 	int		fd;
674 	uint32_t	bytes;
675 	int		stop;
676 };
677 
send_recv_server(void * arg)678 static void *send_recv_server(void *arg)
679 {
680 	struct send_recv_arg *a = (struct send_recv_arg *)arg;
681 	ssize_t nr_sent = 0, bytes = 0;
682 	char batch[1500];
683 	int err = 0, fd;
684 
685 	fd = accept(a->fd, NULL, NULL);
686 	while (fd == -1) {
687 		if (errno == EINTR)
688 			continue;
689 		err = -errno;
690 		goto done;
691 	}
692 
693 	if (settimeo(fd, 0)) {
694 		err = -errno;
695 		goto done;
696 	}
697 
698 	while (bytes < a->bytes && !READ_ONCE(a->stop)) {
699 		nr_sent = send(fd, &batch,
700 			       MIN(a->bytes - bytes, sizeof(batch)), 0);
701 		if (nr_sent == -1 && errno == EINTR)
702 			continue;
703 		if (nr_sent == -1) {
704 			err = -errno;
705 			break;
706 		}
707 		bytes += nr_sent;
708 	}
709 
710 	if (bytes != a->bytes) {
711 		log_err("send %zd expected %u", bytes, a->bytes);
712 		if (!err)
713 			err = bytes > a->bytes ? -E2BIG : -EINTR;
714 	}
715 
716 done:
717 	if (fd >= 0)
718 		close(fd);
719 	if (err) {
720 		WRITE_ONCE(a->stop, 1);
721 		return ERR_PTR(err);
722 	}
723 	return NULL;
724 }
725 
send_recv_data(int lfd,int fd,uint32_t total_bytes)726 int send_recv_data(int lfd, int fd, uint32_t total_bytes)
727 {
728 	ssize_t nr_recv = 0, bytes = 0;
729 	struct send_recv_arg arg = {
730 		.fd	= lfd,
731 		.bytes	= total_bytes,
732 		.stop	= 0,
733 	};
734 	pthread_t srv_thread;
735 	void *thread_ret;
736 	char batch[1500];
737 	int err = 0;
738 
739 	err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg);
740 	if (err) {
741 		log_err("Failed to pthread_create");
742 		return err;
743 	}
744 
745 	/* recv total_bytes */
746 	while (bytes < total_bytes && !READ_ONCE(arg.stop)) {
747 		nr_recv = recv(fd, &batch,
748 			       MIN(total_bytes - bytes, sizeof(batch)), 0);
749 		if (nr_recv == -1 && errno == EINTR)
750 			continue;
751 		if (nr_recv == -1) {
752 			err = -errno;
753 			break;
754 		}
755 		bytes += nr_recv;
756 	}
757 
758 	if (bytes != total_bytes) {
759 		log_err("recv %zd expected %u", bytes, total_bytes);
760 		if (!err)
761 			err = bytes > total_bytes ? -E2BIG : -EINTR;
762 	}
763 
764 	WRITE_ONCE(arg.stop, 1);
765 	pthread_join(srv_thread, &thread_ret);
766 	if (IS_ERR(thread_ret)) {
767 		log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret));
768 		err = err ? : PTR_ERR(thread_ret);
769 	}
770 
771 	return err;
772 }
773 
tc_prog_attach(const char * dev,int ingress_fd,int egress_fd)774 int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
775 {
776 	int ifindex, ret;
777 
778 	if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
779 			 "at least one program fd is valid"))
780 		return -1;
781 
782 	ifindex = if_nametoindex(dev);
783 	if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
784 		return -1;
785 
786 	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
787 			    .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
788 	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
789 			    .priority = 1, .prog_fd = ingress_fd);
790 	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
791 			    .priority = 1, .prog_fd = egress_fd);
792 
793 	ret = bpf_tc_hook_create(&hook);
794 	if (!ASSERT_OK(ret, "create tc hook"))
795 		return ret;
796 
797 	if (ingress_fd >= 0) {
798 		hook.attach_point = BPF_TC_INGRESS;
799 		ret = bpf_tc_attach(&hook, &opts1);
800 		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
801 			bpf_tc_hook_destroy(&hook);
802 			return ret;
803 		}
804 	}
805 
806 	if (egress_fd >= 0) {
807 		hook.attach_point = BPF_TC_EGRESS;
808 		ret = bpf_tc_attach(&hook, &opts2);
809 		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
810 			bpf_tc_hook_destroy(&hook);
811 			return ret;
812 		}
813 	}
814 
815 	return 0;
816 }
817 
818 #ifdef TRAFFIC_MONITOR
819 struct tmonitor_ctx {
820 	pcap_t *pcap;
821 	pcap_dumper_t *dumper;
822 	pthread_t thread;
823 	int wake_fd;
824 
825 	volatile bool done;
826 	char pkt_fname[PATH_MAX];
827 	int pcap_fd;
828 };
829 
__base_pr(const char * format,va_list args)830 static int __base_pr(const char *format, va_list args)
831 {
832 	return vfprintf(stdout, format, args);
833 }
834 
835 static tm_print_fn_t __tm_pr = __base_pr;
836 
traffic_monitor_set_print(tm_print_fn_t fn)837 tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
838 {
839 	tm_print_fn_t old_print_fn;
840 
841 	old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
842 
843 	return old_print_fn;
844 }
845 
tm_print(const char * format,...)846 void tm_print(const char *format, ...)
847 {
848 	tm_print_fn_t print_fn;
849 	va_list args;
850 
851 	print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
852 	if (!print_fn)
853 		return;
854 
855 	va_start(args, format);
856 	print_fn(format, args);
857 	va_end(args);
858 }
859 
860 /* Is this packet captured with a Ethernet protocol type? */
is_ethernet(const u_char * packet)861 static bool is_ethernet(const u_char *packet)
862 {
863 	u16 arphdr_type;
864 
865 	memcpy(&arphdr_type, packet + 8, 2);
866 	arphdr_type = ntohs(arphdr_type);
867 
868 	/* Except the following cases, the protocol type contains the
869 	 * Ethernet protocol type for the packet.
870 	 *
871 	 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
872 	 */
873 	switch (arphdr_type) {
874 	case 770: /* ARPHRD_FRAD */
875 	case 778: /* ARPHDR_IPGRE */
876 	case 803: /* ARPHRD_IEEE80211_RADIOTAP */
877 		tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
878 		return false;
879 	}
880 	return true;
881 }
882 
883 static const char * const pkt_types[] = {
884 	"In",
885 	"B",			/* Broadcast */
886 	"M",			/* Multicast */
887 	"C",			/* Captured with the promiscuous mode */
888 	"Out",
889 };
890 
pkt_type_str(u16 pkt_type)891 static const char *pkt_type_str(u16 pkt_type)
892 {
893 	if (pkt_type < ARRAY_SIZE(pkt_types))
894 		return pkt_types[pkt_type];
895 	return "Unknown";
896 }
897 
898 #define MAX_FLAGS_STRLEN 21
899 /* Show the information of the transport layer in the packet */
show_transport(const u_char * packet,u16 len,u32 ifindex,const char * src_addr,const char * dst_addr,u16 proto,bool ipv6,u8 pkt_type)900 static void show_transport(const u_char *packet, u16 len, u32 ifindex,
901 			   const char *src_addr, const char *dst_addr,
902 			   u16 proto, bool ipv6, u8 pkt_type)
903 {
904 	char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
905 	const char *transport_str;
906 	u16 src_port, dst_port;
907 	struct udphdr *udp;
908 	struct tcphdr *tcp;
909 
910 	ifname = if_indextoname(ifindex, _ifname);
911 	if (!ifname) {
912 		snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
913 		ifname = _ifname;
914 	}
915 
916 	if (proto == IPPROTO_UDP) {
917 		udp = (struct udphdr *)packet;
918 		src_port = ntohs(udp->source);
919 		dst_port = ntohs(udp->dest);
920 		transport_str = "UDP";
921 	} else if (proto == IPPROTO_TCP) {
922 		tcp = (struct tcphdr *)packet;
923 		src_port = ntohs(tcp->source);
924 		dst_port = ntohs(tcp->dest);
925 		transport_str = "TCP";
926 	} else if (proto == IPPROTO_ICMP) {
927 		tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
928 			 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
929 			 packet[0], packet[1]);
930 		return;
931 	} else if (proto == IPPROTO_ICMPV6) {
932 		tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
933 			 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
934 			 packet[0], packet[1]);
935 		return;
936 	} else {
937 		tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
938 			 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
939 			 src_addr, dst_addr, proto);
940 		return;
941 	}
942 
943 	/* TCP or UDP*/
944 
945 	if (proto == IPPROTO_TCP)
946 		snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
947 			 tcp->fin ? ", FIN" : "",
948 			 tcp->syn ? ", SYN" : "",
949 			 tcp->rst ? ", RST" : "",
950 			 tcp->ack ? ", ACK" : "");
951 
952 	if (ipv6)
953 		tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
954 			 ifname, pkt_type_str(pkt_type), src_addr, src_port,
955 			 dst_addr, dst_port, transport_str, len, flags);
956 	else
957 		tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
958 			 ifname, pkt_type_str(pkt_type), src_addr, src_port,
959 			 dst_addr, dst_port, transport_str, len, flags);
960 }
961 
show_ipv6_packet(const u_char * packet,u32 ifindex,u8 pkt_type)962 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
963 {
964 	char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN];
965 	struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
966 	const char *src, *dst;
967 	u_char proto;
968 
969 	src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf));
970 	if (!src)
971 		src = "<invalid>";
972 	dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf));
973 	if (!dst)
974 		dst = "<invalid>";
975 	proto = pkt->nexthdr;
976 	show_transport(packet + sizeof(struct ipv6hdr),
977 		       ntohs(pkt->payload_len),
978 		       ifindex, src, dst, proto, true, pkt_type);
979 }
980 
show_ipv4_packet(const u_char * packet,u32 ifindex,u8 pkt_type)981 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
982 {
983 	char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN];
984 	struct iphdr *pkt = (struct iphdr *)packet;
985 	const char *src, *dst;
986 	u_char proto;
987 
988 	src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf));
989 	if (!src)
990 		src = "<invalid>";
991 	dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf));
992 	if (!dst)
993 		dst = "<invalid>";
994 	proto = pkt->protocol;
995 	show_transport(packet + sizeof(struct iphdr),
996 		       ntohs(pkt->tot_len),
997 		       ifindex, src, dst, proto, false, pkt_type);
998 }
999 
traffic_monitor_thread(void * arg)1000 static void *traffic_monitor_thread(void *arg)
1001 {
1002 	char *ifname, _ifname[IF_NAMESIZE];
1003 	const u_char *packet, *payload;
1004 	struct tmonitor_ctx *ctx = arg;
1005 	pcap_dumper_t *dumper = ctx->dumper;
1006 	int fd = ctx->pcap_fd, nfds, r;
1007 	int wake_fd = ctx->wake_fd;
1008 	struct pcap_pkthdr header;
1009 	pcap_t *pcap = ctx->pcap;
1010 	u32 ifindex;
1011 	fd_set fds;
1012 	u16 proto;
1013 	u8 ptype;
1014 
1015 	nfds = (fd > wake_fd ? fd : wake_fd) + 1;
1016 	FD_ZERO(&fds);
1017 
1018 	while (!ctx->done) {
1019 		FD_SET(fd, &fds);
1020 		FD_SET(wake_fd, &fds);
1021 		r = select(nfds, &fds, NULL, NULL, NULL);
1022 		if (!r)
1023 			continue;
1024 		if (r < 0) {
1025 			if (errno == EINTR)
1026 				continue;
1027 			log_err("Fail to select on pcap fd and wake fd");
1028 			break;
1029 		}
1030 
1031 		/* This instance of pcap is non-blocking */
1032 		packet = pcap_next(pcap, &header);
1033 		if (!packet)
1034 			continue;
1035 
1036 		/* According to the man page of pcap_dump(), first argument
1037 		 * is the pcap_dumper_t pointer even it's argument type is
1038 		 * u_char *.
1039 		 */
1040 		pcap_dump((u_char *)dumper, &header, packet);
1041 
1042 		/* Not sure what other types of packets look like. Here, we
1043 		 * parse only Ethernet and compatible packets.
1044 		 */
1045 		if (!is_ethernet(packet))
1046 			continue;
1047 
1048 		/* Skip SLL2 header
1049 		 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
1050 		 *
1051 		 * Although the document doesn't mention that, the payload
1052 		 * doesn't include the Ethernet header. The payload starts
1053 		 * from the first byte of the network layer header.
1054 		 */
1055 		payload = packet + 20;
1056 
1057 		memcpy(&proto, packet, 2);
1058 		proto = ntohs(proto);
1059 		memcpy(&ifindex, packet + 4, 4);
1060 		ifindex = ntohl(ifindex);
1061 		ptype = packet[10];
1062 
1063 		if (proto == ETH_P_IPV6) {
1064 			show_ipv6_packet(payload, ifindex, ptype);
1065 		} else if (proto == ETH_P_IP) {
1066 			show_ipv4_packet(payload, ifindex, ptype);
1067 		} else {
1068 			ifname = if_indextoname(ifindex, _ifname);
1069 			if (!ifname) {
1070 				snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
1071 				ifname = _ifname;
1072 			}
1073 
1074 			tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
1075 				 ifname, pkt_type_str(ptype), proto);
1076 		}
1077 	}
1078 
1079 	return NULL;
1080 }
1081 
1082 /* Prepare the pcap handle to capture packets.
1083  *
1084  * This pcap is non-blocking and immediate mode is enabled to receive
1085  * captured packets as soon as possible.  The snaplen is set to 1024 bytes
1086  * to limit the size of captured content. The format of the link-layer
1087  * header is set to DLT_LINUX_SLL2 to enable handling various link-layer
1088  * technologies.
1089  */
traffic_monitor_prepare_pcap(void)1090 static pcap_t *traffic_monitor_prepare_pcap(void)
1091 {
1092 	char errbuf[PCAP_ERRBUF_SIZE];
1093 	pcap_t *pcap;
1094 	int r;
1095 
1096 	/* Listen on all NICs in the namespace */
1097 	pcap = pcap_create("any", errbuf);
1098 	if (!pcap) {
1099 		log_err("Failed to open pcap: %s", errbuf);
1100 		return NULL;
1101 	}
1102 	/* Limit the size of the packet (first N bytes) */
1103 	r = pcap_set_snaplen(pcap, 1024);
1104 	if (r) {
1105 		log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
1106 		goto error;
1107 	}
1108 	/* To receive packets as fast as possible */
1109 	r = pcap_set_immediate_mode(pcap, 1);
1110 	if (r) {
1111 		log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
1112 		goto error;
1113 	}
1114 	r = pcap_setnonblock(pcap, 1, errbuf);
1115 	if (r) {
1116 		log_err("Failed to set nonblock: %s", errbuf);
1117 		goto error;
1118 	}
1119 	r = pcap_activate(pcap);
1120 	if (r) {
1121 		log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
1122 		goto error;
1123 	}
1124 	/* Determine the format of the link-layer header */
1125 	r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
1126 	if (r) {
1127 		log_err("Failed to set datalink: %s", pcap_geterr(pcap));
1128 		goto error;
1129 	}
1130 
1131 	return pcap;
1132 error:
1133 	pcap_close(pcap);
1134 	return NULL;
1135 }
1136 
encode_test_name(char * buf,size_t len,const char * test_name,const char * subtest_name)1137 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name)
1138 {
1139 	char *p;
1140 
1141 	if (subtest_name)
1142 		snprintf(buf, len, "%s__%s", test_name, subtest_name);
1143 	else
1144 		snprintf(buf, len, "%s", test_name);
1145 	while ((p = strchr(buf, '/')))
1146 		*p = '_';
1147 	while ((p = strchr(buf, ' ')))
1148 		*p = '_';
1149 }
1150 
1151 #define PCAP_DIR "/tmp/tmon_pcap"
1152 
1153 /* Start to monitor the network traffic in the given network namespace.
1154  *
1155  * netns: the name of the network namespace to monitor. If NULL, the
1156  *        current network namespace is monitored.
1157  * test_name: the name of the running test.
1158  * subtest_name: the name of the running subtest if there is. It should be
1159  *               NULL if it is not a subtest.
1160  *
1161  * This function will start a thread to capture packets going through NICs
1162  * in the give network namespace.
1163  */
traffic_monitor_start(const char * netns,const char * test_name,const char * subtest_name)1164 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
1165 					   const char *subtest_name)
1166 {
1167 	struct nstoken *nstoken = NULL;
1168 	struct tmonitor_ctx *ctx;
1169 	char test_name_buf[64];
1170 	static int tmon_seq;
1171 	int r;
1172 
1173 	if (netns) {
1174 		nstoken = open_netns(netns);
1175 		if (!nstoken)
1176 			return NULL;
1177 	}
1178 	ctx = malloc(sizeof(*ctx));
1179 	if (!ctx) {
1180 		log_err("Failed to malloc ctx");
1181 		goto fail_ctx;
1182 	}
1183 	memset(ctx, 0, sizeof(*ctx));
1184 
1185 	encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name);
1186 	snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
1187 		 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
1188 		 test_name_buf, netns ? netns : "unknown");
1189 
1190 	r = mkdir(PCAP_DIR, 0755);
1191 	if (r && errno != EEXIST) {
1192 		log_err("Failed to create " PCAP_DIR);
1193 		goto fail_pcap;
1194 	}
1195 
1196 	ctx->pcap = traffic_monitor_prepare_pcap();
1197 	if (!ctx->pcap)
1198 		goto fail_pcap;
1199 	ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
1200 	if (ctx->pcap_fd < 0) {
1201 		log_err("Failed to get pcap fd");
1202 		goto fail_dumper;
1203 	}
1204 
1205 	/* Create a packet file */
1206 	ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
1207 	if (!ctx->dumper) {
1208 		log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
1209 		goto fail_dumper;
1210 	}
1211 
1212 	/* Create an eventfd to wake up the monitor thread */
1213 	ctx->wake_fd = eventfd(0, 0);
1214 	if (ctx->wake_fd < 0) {
1215 		log_err("Failed to create eventfd");
1216 		goto fail_eventfd;
1217 	}
1218 
1219 	r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
1220 	if (r) {
1221 		log_err("Failed to create thread");
1222 		goto fail;
1223 	}
1224 
1225 	close_netns(nstoken);
1226 
1227 	return ctx;
1228 
1229 fail:
1230 	close(ctx->wake_fd);
1231 
1232 fail_eventfd:
1233 	pcap_dump_close(ctx->dumper);
1234 	unlink(ctx->pkt_fname);
1235 
1236 fail_dumper:
1237 	pcap_close(ctx->pcap);
1238 
1239 fail_pcap:
1240 	free(ctx);
1241 
1242 fail_ctx:
1243 	close_netns(nstoken);
1244 
1245 	return NULL;
1246 }
1247 
traffic_monitor_release(struct tmonitor_ctx * ctx)1248 static void traffic_monitor_release(struct tmonitor_ctx *ctx)
1249 {
1250 	pcap_close(ctx->pcap);
1251 	pcap_dump_close(ctx->dumper);
1252 
1253 	close(ctx->wake_fd);
1254 
1255 	free(ctx);
1256 }
1257 
1258 /* Stop the network traffic monitor.
1259  *
1260  * ctx: the context returned by traffic_monitor_start()
1261  */
traffic_monitor_stop(struct tmonitor_ctx * ctx)1262 void traffic_monitor_stop(struct tmonitor_ctx *ctx)
1263 {
1264 	__u64 w = 1;
1265 
1266 	if (!ctx)
1267 		return;
1268 
1269 	/* Stop the monitor thread */
1270 	ctx->done = true;
1271 	/* Wake up the background thread. */
1272 	write(ctx->wake_fd, &w, sizeof(w));
1273 	pthread_join(ctx->thread, NULL);
1274 
1275 	tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
1276 
1277 	traffic_monitor_release(ctx);
1278 }
1279 
1280 #endif /* TRAFFIC_MONITOR */
1281