xref: /linux/tools/testing/selftests/bpf/network_helpers.c (revision 38c6104e0bc7c8af20ab4897cb0504e3339e4fe4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #define _GNU_SOURCE
3 
4 #include <errno.h>
5 #include <stdbool.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <sched.h>
10 
11 #include <arpa/inet.h>
12 #include <sys/mount.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/un.h>
16 #include <sys/eventfd.h>
17 
18 #include <linux/err.h>
19 #include <linux/in.h>
20 #include <linux/in6.h>
21 #include <linux/limits.h>
22 
23 #include <linux/ip.h>
24 #include <netinet/udp.h>
25 #include <netinet/tcp.h>
26 #include <net/if.h>
27 
28 #include "bpf_util.h"
29 #include "network_helpers.h"
30 #include "test_progs.h"
31 
32 #ifdef TRAFFIC_MONITOR
33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
35 #include <pcap/pcap.h>
36 #include <pcap/dlt.h>
37 #endif
38 
39 #ifndef IPPROTO_MPTCP
40 #define IPPROTO_MPTCP 262
41 #endif
42 
43 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
44 #define log_err(MSG, ...) ({						\
45 			int __save = errno;				\
46 			fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
47 				__FILE__, __LINE__, clean_errno(),	\
48 				##__VA_ARGS__);				\
49 			errno = __save;					\
50 })
51 
52 struct ipv4_packet pkt_v4 = {
53 	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
54 	.iph.ihl = 5,
55 	.iph.protocol = IPPROTO_TCP,
56 	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
57 	.tcp.urg_ptr = 123,
58 	.tcp.doff = 5,
59 };
60 
61 struct ipv6_packet pkt_v6 = {
62 	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
63 	.iph.nexthdr = IPPROTO_TCP,
64 	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
65 	.tcp.urg_ptr = 123,
66 	.tcp.doff = 5,
67 };
68 
69 static const struct network_helper_opts default_opts;
70 
71 int settimeo(int fd, int timeout_ms)
72 {
73 	struct timeval timeout = { .tv_sec = 3 };
74 
75 	if (timeout_ms > 0) {
76 		timeout.tv_sec = timeout_ms / 1000;
77 		timeout.tv_usec = (timeout_ms % 1000) * 1000;
78 	}
79 
80 	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
81 		       sizeof(timeout))) {
82 		log_err("Failed to set SO_RCVTIMEO");
83 		return -1;
84 	}
85 
86 	if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
87 		       sizeof(timeout))) {
88 		log_err("Failed to set SO_SNDTIMEO");
89 		return -1;
90 	}
91 
92 	return 0;
93 }
94 
95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
96 
97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
98 		      const struct network_helper_opts *opts)
99 {
100 	int fd;
101 
102 	if (!opts)
103 		opts = &default_opts;
104 
105 	fd = socket(addr->ss_family, type, opts->proto);
106 	if (fd < 0) {
107 		log_err("Failed to create server socket");
108 		return -1;
109 	}
110 
111 	if (settimeo(fd, opts->timeout_ms))
112 		goto error_close;
113 
114 	if (opts->post_socket_cb &&
115 	    opts->post_socket_cb(fd, opts->cb_opts)) {
116 		log_err("Failed to call post_socket_cb");
117 		goto error_close;
118 	}
119 
120 	if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) {
121 		log_err("Failed to bind socket");
122 		goto error_close;
123 	}
124 
125 	if (type == SOCK_STREAM) {
126 		if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) {
127 			log_err("Failed to listed on socket");
128 			goto error_close;
129 		}
130 	}
131 
132 	return fd;
133 
134 error_close:
135 	save_errno_close(fd);
136 	return -1;
137 }
138 
139 int start_server_str(int family, int type, const char *addr_str, __u16 port,
140 		     const struct network_helper_opts *opts)
141 {
142 	struct sockaddr_storage addr;
143 	socklen_t addrlen;
144 
145 	if (!opts)
146 		opts = &default_opts;
147 
148 	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
149 		return -1;
150 
151 	return start_server_addr(type, &addr, addrlen, opts);
152 }
153 
154 int start_server(int family, int type, const char *addr_str, __u16 port,
155 		 int timeout_ms)
156 {
157 	struct network_helper_opts opts = {
158 		.timeout_ms	= timeout_ms,
159 	};
160 
161 	return start_server_str(family, type, addr_str, port, &opts);
162 }
163 
164 static int reuseport_cb(int fd, void *opts)
165 {
166 	int on = 1;
167 
168 	return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
169 }
170 
171 int *start_reuseport_server(int family, int type, const char *addr_str,
172 			    __u16 port, int timeout_ms, unsigned int nr_listens)
173 {
174 	struct network_helper_opts opts = {
175 		.timeout_ms = timeout_ms,
176 		.post_socket_cb = reuseport_cb,
177 	};
178 	struct sockaddr_storage addr;
179 	unsigned int nr_fds = 0;
180 	socklen_t addrlen;
181 	int *fds;
182 
183 	if (!nr_listens)
184 		return NULL;
185 
186 	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
187 		return NULL;
188 
189 	fds = malloc(sizeof(*fds) * nr_listens);
190 	if (!fds)
191 		return NULL;
192 
193 	fds[0] = start_server_addr(type, &addr, addrlen, &opts);
194 	if (fds[0] == -1)
195 		goto close_fds;
196 	nr_fds = 1;
197 
198 	if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
199 		goto close_fds;
200 
201 	for (; nr_fds < nr_listens; nr_fds++) {
202 		fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts);
203 		if (fds[nr_fds] == -1)
204 			goto close_fds;
205 	}
206 
207 	return fds;
208 
209 close_fds:
210 	free_fds(fds, nr_fds);
211 	return NULL;
212 }
213 
214 void free_fds(int *fds, unsigned int nr_close_fds)
215 {
216 	if (fds) {
217 		while (nr_close_fds)
218 			close(fds[--nr_close_fds]);
219 		free(fds);
220 	}
221 }
222 
223 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
224 		     int timeout_ms)
225 {
226 	struct sockaddr_storage addr;
227 	socklen_t addrlen = sizeof(addr);
228 	struct sockaddr_in *addr_in;
229 	int fd, ret;
230 
231 	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
232 		log_err("Failed to get server addr");
233 		return -1;
234 	}
235 
236 	addr_in = (struct sockaddr_in *)&addr;
237 	fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
238 	if (fd < 0) {
239 		log_err("Failed to create client socket");
240 		return -1;
241 	}
242 
243 	if (settimeo(fd, timeout_ms))
244 		goto error_close;
245 
246 	ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
247 		     addrlen);
248 	if (ret != data_len) {
249 		log_err("sendto(data, %u) != %d\n", data_len, ret);
250 		goto error_close;
251 	}
252 
253 	return fd;
254 
255 error_close:
256 	save_errno_close(fd);
257 	return -1;
258 }
259 
260 int client_socket(int family, int type,
261 		  const struct network_helper_opts *opts)
262 {
263 	int fd;
264 
265 	if (!opts)
266 		opts = &default_opts;
267 
268 	fd = socket(family, type, opts->proto);
269 	if (fd < 0) {
270 		log_err("Failed to create client socket");
271 		return -1;
272 	}
273 
274 	if (settimeo(fd, opts->timeout_ms))
275 		goto error_close;
276 
277 	if (opts->post_socket_cb &&
278 	    opts->post_socket_cb(fd, opts->cb_opts))
279 		goto error_close;
280 
281 	return fd;
282 
283 error_close:
284 	save_errno_close(fd);
285 	return -1;
286 }
287 
288 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
289 		    const struct network_helper_opts *opts)
290 {
291 	int fd;
292 
293 	if (!opts)
294 		opts = &default_opts;
295 
296 	fd = client_socket(addr->ss_family, type, opts);
297 	if (fd < 0) {
298 		log_err("Failed to create client socket");
299 		return -1;
300 	}
301 
302 	if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
303 		log_err("Failed to connect to server");
304 		save_errno_close(fd);
305 		return -1;
306 	}
307 
308 	return fd;
309 }
310 
311 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
312 			const struct network_helper_opts *opts)
313 {
314 	struct sockaddr_storage addr;
315 	socklen_t addrlen;
316 
317 	if (!opts)
318 		opts = &default_opts;
319 
320 	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
321 		return -1;
322 
323 	return connect_to_addr(type, &addr, addrlen, opts);
324 }
325 
326 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
327 {
328 	struct sockaddr_storage addr;
329 	socklen_t addrlen, optlen;
330 	int type;
331 
332 	if (!opts)
333 		opts = &default_opts;
334 
335 	optlen = sizeof(type);
336 	if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
337 		log_err("getsockopt(SOL_TYPE)");
338 		return -1;
339 	}
340 
341 	addrlen = sizeof(addr);
342 	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
343 		log_err("Failed to get server addr");
344 		return -1;
345 	}
346 
347 	return connect_to_addr(type, &addr, addrlen, opts);
348 }
349 
350 int connect_to_fd(int server_fd, int timeout_ms)
351 {
352 	struct network_helper_opts opts = {
353 		.timeout_ms = timeout_ms,
354 	};
355 	socklen_t optlen;
356 	int protocol;
357 
358 	optlen = sizeof(protocol);
359 	if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
360 		log_err("getsockopt(SOL_PROTOCOL)");
361 		return -1;
362 	}
363 	opts.proto = protocol;
364 
365 	return connect_to_fd_opts(server_fd, &opts);
366 }
367 
368 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
369 {
370 	struct sockaddr_storage addr;
371 	socklen_t len = sizeof(addr);
372 
373 	if (settimeo(client_fd, timeout_ms))
374 		return -1;
375 
376 	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
377 		log_err("Failed to get server addr");
378 		return -1;
379 	}
380 
381 	if (connect(client_fd, (const struct sockaddr *)&addr, len)) {
382 		log_err("Failed to connect to server");
383 		return -1;
384 	}
385 
386 	return 0;
387 }
388 
389 int make_sockaddr(int family, const char *addr_str, __u16 port,
390 		  struct sockaddr_storage *addr, socklen_t *len)
391 {
392 	if (family == AF_INET) {
393 		struct sockaddr_in *sin = (void *)addr;
394 
395 		memset(addr, 0, sizeof(*sin));
396 		sin->sin_family = AF_INET;
397 		sin->sin_port = htons(port);
398 		if (addr_str &&
399 		    inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
400 			log_err("inet_pton(AF_INET, %s)", addr_str);
401 			return -1;
402 		}
403 		if (len)
404 			*len = sizeof(*sin);
405 		return 0;
406 	} else if (family == AF_INET6) {
407 		struct sockaddr_in6 *sin6 = (void *)addr;
408 
409 		memset(addr, 0, sizeof(*sin6));
410 		sin6->sin6_family = AF_INET6;
411 		sin6->sin6_port = htons(port);
412 		if (addr_str &&
413 		    inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
414 			log_err("inet_pton(AF_INET6, %s)", addr_str);
415 			return -1;
416 		}
417 		if (len)
418 			*len = sizeof(*sin6);
419 		return 0;
420 	} else if (family == AF_UNIX) {
421 		/* Note that we always use abstract unix sockets to avoid having
422 		 * to clean up leftover files.
423 		 */
424 		struct sockaddr_un *sun = (void *)addr;
425 
426 		memset(addr, 0, sizeof(*sun));
427 		sun->sun_family = family;
428 		sun->sun_path[0] = 0;
429 		strcpy(sun->sun_path + 1, addr_str);
430 		if (len)
431 			*len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
432 		return 0;
433 	}
434 	return -1;
435 }
436 
437 char *ping_command(int family)
438 {
439 	if (family == AF_INET6) {
440 		/* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
441 		if (!system("which ping6 >/dev/null 2>&1"))
442 			return "ping6";
443 		else
444 			return "ping -6";
445 	}
446 	return "ping";
447 }
448 
449 int append_tid(char *str, size_t sz)
450 {
451 	size_t end;
452 
453 	if (!str)
454 		return -1;
455 
456 	end = strlen(str);
457 	if (end + 8 > sz)
458 		return -1;
459 
460 	sprintf(&str[end], "%07d", gettid());
461 	str[end + 7] = '\0';
462 
463 	return 0;
464 }
465 
466 int remove_netns(const char *name)
467 {
468 	char *cmd;
469 	int r;
470 
471 	r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name);
472 	if (r < 0) {
473 		log_err("Failed to malloc cmd");
474 		return -1;
475 	}
476 
477 	r = system(cmd);
478 	free(cmd);
479 	return r;
480 }
481 
482 int make_netns(const char *name)
483 {
484 	char *cmd;
485 	int r;
486 
487 	r = asprintf(&cmd, "ip netns add %s", name);
488 	if (r < 0) {
489 		log_err("Failed to malloc cmd");
490 		return -1;
491 	}
492 
493 	r = system(cmd);
494 	free(cmd);
495 
496 	if (r)
497 		return r;
498 
499 	r = asprintf(&cmd, "ip -n %s link set lo up", name);
500 	if (r < 0) {
501 		log_err("Failed to malloc cmd for setting up lo");
502 		remove_netns(name);
503 		return -1;
504 	}
505 
506 	r = system(cmd);
507 	free(cmd);
508 
509 	return r;
510 }
511 
512 struct nstoken {
513 	int orig_netns_fd;
514 };
515 
516 struct nstoken *open_netns(const char *name)
517 {
518 	int nsfd;
519 	char nspath[PATH_MAX];
520 	int err;
521 	struct nstoken *token;
522 
523 	token = calloc(1, sizeof(struct nstoken));
524 	if (!token) {
525 		log_err("Failed to malloc token");
526 		return NULL;
527 	}
528 
529 	token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
530 	if (token->orig_netns_fd == -1) {
531 		log_err("Failed to open(/proc/self/ns/net)");
532 		goto fail;
533 	}
534 
535 	snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
536 	nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
537 	if (nsfd == -1) {
538 		log_err("Failed to open(%s)", nspath);
539 		goto fail;
540 	}
541 
542 	err = setns(nsfd, CLONE_NEWNET);
543 	close(nsfd);
544 	if (err) {
545 		log_err("Failed to setns(nsfd)");
546 		goto fail;
547 	}
548 
549 	return token;
550 fail:
551 	if (token->orig_netns_fd != -1)
552 		close(token->orig_netns_fd);
553 	free(token);
554 	return NULL;
555 }
556 
557 void close_netns(struct nstoken *token)
558 {
559 	if (!token)
560 		return;
561 
562 	if (setns(token->orig_netns_fd, CLONE_NEWNET))
563 		log_err("Failed to setns(orig_netns_fd)");
564 	close(token->orig_netns_fd);
565 	free(token);
566 }
567 
568 int get_socket_local_port(int sock_fd)
569 {
570 	struct sockaddr_storage addr;
571 	socklen_t addrlen = sizeof(addr);
572 	int err;
573 
574 	err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
575 	if (err < 0)
576 		return err;
577 
578 	if (addr.ss_family == AF_INET) {
579 		struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
580 
581 		return sin->sin_port;
582 	} else if (addr.ss_family == AF_INET6) {
583 		struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
584 
585 		return sin->sin6_port;
586 	}
587 
588 	return -1;
589 }
590 
591 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
592 {
593 	struct ifreq ifr = {0};
594 	int sockfd, err;
595 
596 	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
597 	if (sockfd < 0)
598 		return -errno;
599 
600 	memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
601 
602 	ring_param->cmd = ETHTOOL_GRINGPARAM;
603 	ifr.ifr_data = (char *)ring_param;
604 
605 	if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
606 		err = errno;
607 		close(sockfd);
608 		return -err;
609 	}
610 
611 	close(sockfd);
612 	return 0;
613 }
614 
615 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
616 {
617 	struct ifreq ifr = {0};
618 	int sockfd, err;
619 
620 	sockfd = socket(AF_INET, SOCK_DGRAM, 0);
621 	if (sockfd < 0)
622 		return -errno;
623 
624 	memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
625 
626 	ring_param->cmd = ETHTOOL_SRINGPARAM;
627 	ifr.ifr_data = (char *)ring_param;
628 
629 	if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
630 		err = errno;
631 		close(sockfd);
632 		return -err;
633 	}
634 
635 	close(sockfd);
636 	return 0;
637 }
638 
639 struct send_recv_arg {
640 	int		fd;
641 	uint32_t	bytes;
642 	int		stop;
643 };
644 
645 static void *send_recv_server(void *arg)
646 {
647 	struct send_recv_arg *a = (struct send_recv_arg *)arg;
648 	ssize_t nr_sent = 0, bytes = 0;
649 	char batch[1500];
650 	int err = 0, fd;
651 
652 	fd = accept(a->fd, NULL, NULL);
653 	while (fd == -1) {
654 		if (errno == EINTR)
655 			continue;
656 		err = -errno;
657 		goto done;
658 	}
659 
660 	if (settimeo(fd, 0)) {
661 		err = -errno;
662 		goto done;
663 	}
664 
665 	while (bytes < a->bytes && !READ_ONCE(a->stop)) {
666 		nr_sent = send(fd, &batch,
667 			       MIN(a->bytes - bytes, sizeof(batch)), 0);
668 		if (nr_sent == -1 && errno == EINTR)
669 			continue;
670 		if (nr_sent == -1) {
671 			err = -errno;
672 			break;
673 		}
674 		bytes += nr_sent;
675 	}
676 
677 	if (bytes != a->bytes) {
678 		log_err("send %zd expected %u", bytes, a->bytes);
679 		if (!err)
680 			err = bytes > a->bytes ? -E2BIG : -EINTR;
681 	}
682 
683 done:
684 	if (fd >= 0)
685 		close(fd);
686 	if (err) {
687 		WRITE_ONCE(a->stop, 1);
688 		return ERR_PTR(err);
689 	}
690 	return NULL;
691 }
692 
693 int send_recv_data(int lfd, int fd, uint32_t total_bytes)
694 {
695 	ssize_t nr_recv = 0, bytes = 0;
696 	struct send_recv_arg arg = {
697 		.fd	= lfd,
698 		.bytes	= total_bytes,
699 		.stop	= 0,
700 	};
701 	pthread_t srv_thread;
702 	void *thread_ret;
703 	char batch[1500];
704 	int err = 0;
705 
706 	err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg);
707 	if (err) {
708 		log_err("Failed to pthread_create");
709 		return err;
710 	}
711 
712 	/* recv total_bytes */
713 	while (bytes < total_bytes && !READ_ONCE(arg.stop)) {
714 		nr_recv = recv(fd, &batch,
715 			       MIN(total_bytes - bytes, sizeof(batch)), 0);
716 		if (nr_recv == -1 && errno == EINTR)
717 			continue;
718 		if (nr_recv == -1) {
719 			err = -errno;
720 			break;
721 		}
722 		bytes += nr_recv;
723 	}
724 
725 	if (bytes != total_bytes) {
726 		log_err("recv %zd expected %u", bytes, total_bytes);
727 		if (!err)
728 			err = bytes > total_bytes ? -E2BIG : -EINTR;
729 	}
730 
731 	WRITE_ONCE(arg.stop, 1);
732 	pthread_join(srv_thread, &thread_ret);
733 	if (IS_ERR(thread_ret)) {
734 		log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret));
735 		err = err ? : PTR_ERR(thread_ret);
736 	}
737 
738 	return err;
739 }
740 
741 #ifdef TRAFFIC_MONITOR
742 struct tmonitor_ctx {
743 	pcap_t *pcap;
744 	pcap_dumper_t *dumper;
745 	pthread_t thread;
746 	int wake_fd;
747 
748 	volatile bool done;
749 	char pkt_fname[PATH_MAX];
750 	int pcap_fd;
751 };
752 
753 static int __base_pr(const char *format, va_list args)
754 {
755 	return vfprintf(stdout, format, args);
756 }
757 
758 static tm_print_fn_t __tm_pr = __base_pr;
759 
760 tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
761 {
762 	tm_print_fn_t old_print_fn;
763 
764 	old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
765 
766 	return old_print_fn;
767 }
768 
769 void tm_print(const char *format, ...)
770 {
771 	tm_print_fn_t print_fn;
772 	va_list args;
773 
774 	print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
775 	if (!print_fn)
776 		return;
777 
778 	va_start(args, format);
779 	print_fn(format, args);
780 	va_end(args);
781 }
782 
783 /* Is this packet captured with a Ethernet protocol type? */
784 static bool is_ethernet(const u_char *packet)
785 {
786 	u16 arphdr_type;
787 
788 	memcpy(&arphdr_type, packet + 8, 2);
789 	arphdr_type = ntohs(arphdr_type);
790 
791 	/* Except the following cases, the protocol type contains the
792 	 * Ethernet protocol type for the packet.
793 	 *
794 	 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
795 	 */
796 	switch (arphdr_type) {
797 	case 770: /* ARPHRD_FRAD */
798 	case 778: /* ARPHDR_IPGRE */
799 	case 803: /* ARPHRD_IEEE80211_RADIOTAP */
800 		tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
801 		return false;
802 	}
803 	return true;
804 }
805 
806 static const char * const pkt_types[] = {
807 	"In",
808 	"B",			/* Broadcast */
809 	"M",			/* Multicast */
810 	"C",			/* Captured with the promiscuous mode */
811 	"Out",
812 };
813 
814 static const char *pkt_type_str(u16 pkt_type)
815 {
816 	if (pkt_type < ARRAY_SIZE(pkt_types))
817 		return pkt_types[pkt_type];
818 	return "Unknown";
819 }
820 
821 #define MAX_FLAGS_STRLEN 21
822 /* Show the information of the transport layer in the packet */
823 static void show_transport(const u_char *packet, u16 len, u32 ifindex,
824 			   const char *src_addr, const char *dst_addr,
825 			   u16 proto, bool ipv6, u8 pkt_type)
826 {
827 	char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
828 	const char *transport_str;
829 	u16 src_port, dst_port;
830 	struct udphdr *udp;
831 	struct tcphdr *tcp;
832 
833 	ifname = if_indextoname(ifindex, _ifname);
834 	if (!ifname) {
835 		snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
836 		ifname = _ifname;
837 	}
838 
839 	if (proto == IPPROTO_UDP) {
840 		udp = (struct udphdr *)packet;
841 		src_port = ntohs(udp->source);
842 		dst_port = ntohs(udp->dest);
843 		transport_str = "UDP";
844 	} else if (proto == IPPROTO_TCP) {
845 		tcp = (struct tcphdr *)packet;
846 		src_port = ntohs(tcp->source);
847 		dst_port = ntohs(tcp->dest);
848 		transport_str = "TCP";
849 	} else if (proto == IPPROTO_ICMP) {
850 		tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
851 			 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
852 			 packet[0], packet[1]);
853 		return;
854 	} else if (proto == IPPROTO_ICMPV6) {
855 		tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
856 			 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
857 			 packet[0], packet[1]);
858 		return;
859 	} else {
860 		tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
861 			 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
862 			 src_addr, dst_addr, proto);
863 		return;
864 	}
865 
866 	/* TCP or UDP*/
867 
868 	if (proto == IPPROTO_TCP)
869 		snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
870 			 tcp->fin ? ", FIN" : "",
871 			 tcp->syn ? ", SYN" : "",
872 			 tcp->rst ? ", RST" : "",
873 			 tcp->ack ? ", ACK" : "");
874 
875 	if (ipv6)
876 		tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
877 			 ifname, pkt_type_str(pkt_type), src_addr, src_port,
878 			 dst_addr, dst_port, transport_str, len, flags);
879 	else
880 		tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
881 			 ifname, pkt_type_str(pkt_type), src_addr, src_port,
882 			 dst_addr, dst_port, transport_str, len, flags);
883 }
884 
885 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
886 {
887 	char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN];
888 	struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
889 	const char *src, *dst;
890 	u_char proto;
891 
892 	src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf));
893 	if (!src)
894 		src = "<invalid>";
895 	dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf));
896 	if (!dst)
897 		dst = "<invalid>";
898 	proto = pkt->nexthdr;
899 	show_transport(packet + sizeof(struct ipv6hdr),
900 		       ntohs(pkt->payload_len),
901 		       ifindex, src, dst, proto, true, pkt_type);
902 }
903 
904 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
905 {
906 	char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN];
907 	struct iphdr *pkt = (struct iphdr *)packet;
908 	const char *src, *dst;
909 	u_char proto;
910 
911 	src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf));
912 	if (!src)
913 		src = "<invalid>";
914 	dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf));
915 	if (!dst)
916 		dst = "<invalid>";
917 	proto = pkt->protocol;
918 	show_transport(packet + sizeof(struct iphdr),
919 		       ntohs(pkt->tot_len),
920 		       ifindex, src, dst, proto, false, pkt_type);
921 }
922 
923 static void *traffic_monitor_thread(void *arg)
924 {
925 	char *ifname, _ifname[IF_NAMESIZE];
926 	const u_char *packet, *payload;
927 	struct tmonitor_ctx *ctx = arg;
928 	pcap_dumper_t *dumper = ctx->dumper;
929 	int fd = ctx->pcap_fd, nfds, r;
930 	int wake_fd = ctx->wake_fd;
931 	struct pcap_pkthdr header;
932 	pcap_t *pcap = ctx->pcap;
933 	u32 ifindex;
934 	fd_set fds;
935 	u16 proto;
936 	u8 ptype;
937 
938 	nfds = (fd > wake_fd ? fd : wake_fd) + 1;
939 	FD_ZERO(&fds);
940 
941 	while (!ctx->done) {
942 		FD_SET(fd, &fds);
943 		FD_SET(wake_fd, &fds);
944 		r = select(nfds, &fds, NULL, NULL, NULL);
945 		if (!r)
946 			continue;
947 		if (r < 0) {
948 			if (errno == EINTR)
949 				continue;
950 			log_err("Fail to select on pcap fd and wake fd");
951 			break;
952 		}
953 
954 		/* This instance of pcap is non-blocking */
955 		packet = pcap_next(pcap, &header);
956 		if (!packet)
957 			continue;
958 
959 		/* According to the man page of pcap_dump(), first argument
960 		 * is the pcap_dumper_t pointer even it's argument type is
961 		 * u_char *.
962 		 */
963 		pcap_dump((u_char *)dumper, &header, packet);
964 
965 		/* Not sure what other types of packets look like. Here, we
966 		 * parse only Ethernet and compatible packets.
967 		 */
968 		if (!is_ethernet(packet))
969 			continue;
970 
971 		/* Skip SLL2 header
972 		 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
973 		 *
974 		 * Although the document doesn't mention that, the payload
975 		 * doesn't include the Ethernet header. The payload starts
976 		 * from the first byte of the network layer header.
977 		 */
978 		payload = packet + 20;
979 
980 		memcpy(&proto, packet, 2);
981 		proto = ntohs(proto);
982 		memcpy(&ifindex, packet + 4, 4);
983 		ifindex = ntohl(ifindex);
984 		ptype = packet[10];
985 
986 		if (proto == ETH_P_IPV6) {
987 			show_ipv6_packet(payload, ifindex, ptype);
988 		} else if (proto == ETH_P_IP) {
989 			show_ipv4_packet(payload, ifindex, ptype);
990 		} else {
991 			ifname = if_indextoname(ifindex, _ifname);
992 			if (!ifname) {
993 				snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
994 				ifname = _ifname;
995 			}
996 
997 			tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
998 				 ifname, pkt_type_str(ptype), proto);
999 		}
1000 	}
1001 
1002 	return NULL;
1003 }
1004 
1005 /* Prepare the pcap handle to capture packets.
1006  *
1007  * This pcap is non-blocking and immediate mode is enabled to receive
1008  * captured packets as soon as possible.  The snaplen is set to 1024 bytes
1009  * to limit the size of captured content. The format of the link-layer
1010  * header is set to DLT_LINUX_SLL2 to enable handling various link-layer
1011  * technologies.
1012  */
1013 static pcap_t *traffic_monitor_prepare_pcap(void)
1014 {
1015 	char errbuf[PCAP_ERRBUF_SIZE];
1016 	pcap_t *pcap;
1017 	int r;
1018 
1019 	/* Listen on all NICs in the namespace */
1020 	pcap = pcap_create("any", errbuf);
1021 	if (!pcap) {
1022 		log_err("Failed to open pcap: %s", errbuf);
1023 		return NULL;
1024 	}
1025 	/* Limit the size of the packet (first N bytes) */
1026 	r = pcap_set_snaplen(pcap, 1024);
1027 	if (r) {
1028 		log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
1029 		goto error;
1030 	}
1031 	/* To receive packets as fast as possible */
1032 	r = pcap_set_immediate_mode(pcap, 1);
1033 	if (r) {
1034 		log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
1035 		goto error;
1036 	}
1037 	r = pcap_setnonblock(pcap, 1, errbuf);
1038 	if (r) {
1039 		log_err("Failed to set nonblock: %s", errbuf);
1040 		goto error;
1041 	}
1042 	r = pcap_activate(pcap);
1043 	if (r) {
1044 		log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
1045 		goto error;
1046 	}
1047 	/* Determine the format of the link-layer header */
1048 	r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
1049 	if (r) {
1050 		log_err("Failed to set datalink: %s", pcap_geterr(pcap));
1051 		goto error;
1052 	}
1053 
1054 	return pcap;
1055 error:
1056 	pcap_close(pcap);
1057 	return NULL;
1058 }
1059 
1060 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name)
1061 {
1062 	char *p;
1063 
1064 	if (subtest_name)
1065 		snprintf(buf, len, "%s__%s", test_name, subtest_name);
1066 	else
1067 		snprintf(buf, len, "%s", test_name);
1068 	while ((p = strchr(buf, '/')))
1069 		*p = '_';
1070 	while ((p = strchr(buf, ' ')))
1071 		*p = '_';
1072 }
1073 
1074 #define PCAP_DIR "/tmp/tmon_pcap"
1075 
1076 /* Start to monitor the network traffic in the given network namespace.
1077  *
1078  * netns: the name of the network namespace to monitor. If NULL, the
1079  *        current network namespace is monitored.
1080  * test_name: the name of the running test.
1081  * subtest_name: the name of the running subtest if there is. It should be
1082  *               NULL if it is not a subtest.
1083  *
1084  * This function will start a thread to capture packets going through NICs
1085  * in the give network namespace.
1086  */
1087 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
1088 					   const char *subtest_name)
1089 {
1090 	struct nstoken *nstoken = NULL;
1091 	struct tmonitor_ctx *ctx;
1092 	char test_name_buf[64];
1093 	static int tmon_seq;
1094 	int r;
1095 
1096 	if (netns) {
1097 		nstoken = open_netns(netns);
1098 		if (!nstoken)
1099 			return NULL;
1100 	}
1101 	ctx = malloc(sizeof(*ctx));
1102 	if (!ctx) {
1103 		log_err("Failed to malloc ctx");
1104 		goto fail_ctx;
1105 	}
1106 	memset(ctx, 0, sizeof(*ctx));
1107 
1108 	encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name);
1109 	snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
1110 		 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
1111 		 test_name_buf, netns ? netns : "unknown");
1112 
1113 	r = mkdir(PCAP_DIR, 0755);
1114 	if (r && errno != EEXIST) {
1115 		log_err("Failed to create " PCAP_DIR);
1116 		goto fail_pcap;
1117 	}
1118 
1119 	ctx->pcap = traffic_monitor_prepare_pcap();
1120 	if (!ctx->pcap)
1121 		goto fail_pcap;
1122 	ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
1123 	if (ctx->pcap_fd < 0) {
1124 		log_err("Failed to get pcap fd");
1125 		goto fail_dumper;
1126 	}
1127 
1128 	/* Create a packet file */
1129 	ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
1130 	if (!ctx->dumper) {
1131 		log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
1132 		goto fail_dumper;
1133 	}
1134 
1135 	/* Create an eventfd to wake up the monitor thread */
1136 	ctx->wake_fd = eventfd(0, 0);
1137 	if (ctx->wake_fd < 0) {
1138 		log_err("Failed to create eventfd");
1139 		goto fail_eventfd;
1140 	}
1141 
1142 	r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
1143 	if (r) {
1144 		log_err("Failed to create thread");
1145 		goto fail;
1146 	}
1147 
1148 	close_netns(nstoken);
1149 
1150 	return ctx;
1151 
1152 fail:
1153 	close(ctx->wake_fd);
1154 
1155 fail_eventfd:
1156 	pcap_dump_close(ctx->dumper);
1157 	unlink(ctx->pkt_fname);
1158 
1159 fail_dumper:
1160 	pcap_close(ctx->pcap);
1161 
1162 fail_pcap:
1163 	free(ctx);
1164 
1165 fail_ctx:
1166 	close_netns(nstoken);
1167 
1168 	return NULL;
1169 }
1170 
1171 static void traffic_monitor_release(struct tmonitor_ctx *ctx)
1172 {
1173 	pcap_close(ctx->pcap);
1174 	pcap_dump_close(ctx->dumper);
1175 
1176 	close(ctx->wake_fd);
1177 
1178 	free(ctx);
1179 }
1180 
1181 /* Stop the network traffic monitor.
1182  *
1183  * ctx: the context returned by traffic_monitor_start()
1184  */
1185 void traffic_monitor_stop(struct tmonitor_ctx *ctx)
1186 {
1187 	__u64 w = 1;
1188 
1189 	if (!ctx)
1190 		return;
1191 
1192 	/* Stop the monitor thread */
1193 	ctx->done = true;
1194 	/* Wake up the background thread. */
1195 	write(ctx->wake_fd, &w, sizeof(w));
1196 	pthread_join(ctx->thread, NULL);
1197 
1198 	tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
1199 
1200 	traffic_monitor_release(ctx);
1201 }
1202 
1203 #endif /* TRAFFIC_MONITOR */
1204