1 // SPDX-License-Identifier: GPL-2.0-only
2 #define _GNU_SOURCE
3
4 #include <errno.h>
5 #include <stdbool.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <unistd.h>
9 #include <sched.h>
10
11 #include <arpa/inet.h>
12 #include <sys/mount.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/un.h>
16 #include <sys/eventfd.h>
17
18 #include <linux/err.h>
19 #include <linux/in.h>
20 #include <linux/in6.h>
21 #include <linux/limits.h>
22
23 #include <linux/ip.h>
24 #include <netinet/udp.h>
25 #include <netinet/tcp.h>
26 #include <net/if.h>
27
28 #include "bpf_util.h"
29 #include "network_helpers.h"
30 #include "test_progs.h"
31
32 #ifdef TRAFFIC_MONITOR
33 /* Prevent pcap.h from including pcap/bpf.h and causing conflicts */
34 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
35 #include <pcap/pcap.h>
36 #include <pcap/dlt.h>
37 #endif
38
39 #ifndef IPPROTO_MPTCP
40 #define IPPROTO_MPTCP 262
41 #endif
42
43 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
44 #define log_err(MSG, ...) ({ \
45 int __save = errno; \
46 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
47 __FILE__, __LINE__, clean_errno(), \
48 ##__VA_ARGS__); \
49 errno = __save; \
50 })
51
52 struct ipv4_packet pkt_v4 = {
53 .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
54 .iph.ihl = 5,
55 .iph.protocol = IPPROTO_TCP,
56 .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
57 .tcp.urg_ptr = 123,
58 .tcp.doff = 5,
59 };
60
61 struct ipv6_packet pkt_v6 = {
62 .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
63 .iph.nexthdr = IPPROTO_TCP,
64 .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
65 .tcp.urg_ptr = 123,
66 .tcp.doff = 5,
67 };
68
69 static const struct network_helper_opts default_opts;
70
settimeo(int fd,int timeout_ms)71 int settimeo(int fd, int timeout_ms)
72 {
73 struct timeval timeout = { .tv_sec = 3 };
74
75 if (timeout_ms > 0) {
76 timeout.tv_sec = timeout_ms / 1000;
77 timeout.tv_usec = (timeout_ms % 1000) * 1000;
78 }
79
80 if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
81 sizeof(timeout))) {
82 log_err("Failed to set SO_RCVTIMEO");
83 return -1;
84 }
85
86 if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
87 sizeof(timeout))) {
88 log_err("Failed to set SO_SNDTIMEO");
89 return -1;
90 }
91
92 return 0;
93 }
94
95 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
96
start_server_addr(int type,const struct sockaddr_storage * addr,socklen_t addrlen,const struct network_helper_opts * opts)97 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
98 const struct network_helper_opts *opts)
99 {
100 int on = 1, fd;
101
102 if (!opts)
103 opts = &default_opts;
104
105 fd = socket(addr->ss_family, type, opts->proto);
106 if (fd < 0) {
107 log_err("Failed to create server socket");
108 return -1;
109 }
110
111 if (settimeo(fd, opts->timeout_ms))
112 goto error_close;
113
114 if (type == SOCK_STREAM &&
115 setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) {
116 log_err("Failed to enable SO_REUSEADDR");
117 goto error_close;
118 }
119
120 if (opts->post_socket_cb &&
121 opts->post_socket_cb(fd, opts->cb_opts)) {
122 log_err("Failed to call post_socket_cb");
123 goto error_close;
124 }
125
126 if (bind(fd, (struct sockaddr *)addr, addrlen) < 0) {
127 log_err("Failed to bind socket");
128 goto error_close;
129 }
130
131 if (type == SOCK_STREAM) {
132 if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) {
133 log_err("Failed to listed on socket");
134 goto error_close;
135 }
136 }
137
138 return fd;
139
140 error_close:
141 save_errno_close(fd);
142 return -1;
143 }
144
start_server_str(int family,int type,const char * addr_str,__u16 port,const struct network_helper_opts * opts)145 int start_server_str(int family, int type, const char *addr_str, __u16 port,
146 const struct network_helper_opts *opts)
147 {
148 struct sockaddr_storage addr;
149 socklen_t addrlen;
150
151 if (!opts)
152 opts = &default_opts;
153
154 if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
155 return -1;
156
157 return start_server_addr(type, &addr, addrlen, opts);
158 }
159
start_server(int family,int type,const char * addr_str,__u16 port,int timeout_ms)160 int start_server(int family, int type, const char *addr_str, __u16 port,
161 int timeout_ms)
162 {
163 struct network_helper_opts opts = {
164 .timeout_ms = timeout_ms,
165 };
166
167 return start_server_str(family, type, addr_str, port, &opts);
168 }
169
reuseport_cb(int fd,void * opts)170 static int reuseport_cb(int fd, void *opts)
171 {
172 int on = 1;
173
174 return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
175 }
176
start_reuseport_server(int family,int type,const char * addr_str,__u16 port,int timeout_ms,unsigned int nr_listens)177 int *start_reuseport_server(int family, int type, const char *addr_str,
178 __u16 port, int timeout_ms, unsigned int nr_listens)
179 {
180 struct network_helper_opts opts = {
181 .timeout_ms = timeout_ms,
182 .post_socket_cb = reuseport_cb,
183 };
184 struct sockaddr_storage addr;
185 unsigned int nr_fds = 0;
186 socklen_t addrlen;
187 int *fds;
188
189 if (!nr_listens)
190 return NULL;
191
192 if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
193 return NULL;
194
195 fds = malloc(sizeof(*fds) * nr_listens);
196 if (!fds)
197 return NULL;
198
199 fds[0] = start_server_addr(type, &addr, addrlen, &opts);
200 if (fds[0] == -1)
201 goto close_fds;
202 nr_fds = 1;
203
204 if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
205 goto close_fds;
206
207 for (; nr_fds < nr_listens; nr_fds++) {
208 fds[nr_fds] = start_server_addr(type, &addr, addrlen, &opts);
209 if (fds[nr_fds] == -1)
210 goto close_fds;
211 }
212
213 return fds;
214
215 close_fds:
216 free_fds(fds, nr_fds);
217 return NULL;
218 }
219
free_fds(int * fds,unsigned int nr_close_fds)220 void free_fds(int *fds, unsigned int nr_close_fds)
221 {
222 if (fds) {
223 while (nr_close_fds)
224 close(fds[--nr_close_fds]);
225 free(fds);
226 }
227 }
228
fastopen_connect(int server_fd,const char * data,unsigned int data_len,int timeout_ms)229 int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
230 int timeout_ms)
231 {
232 struct sockaddr_storage addr;
233 socklen_t addrlen = sizeof(addr);
234 struct sockaddr_in *addr_in;
235 int fd, ret;
236
237 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
238 log_err("Failed to get server addr");
239 return -1;
240 }
241
242 addr_in = (struct sockaddr_in *)&addr;
243 fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
244 if (fd < 0) {
245 log_err("Failed to create client socket");
246 return -1;
247 }
248
249 if (settimeo(fd, timeout_ms))
250 goto error_close;
251
252 ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
253 addrlen);
254 if (ret != data_len) {
255 log_err("sendto(data, %u) != %d\n", data_len, ret);
256 goto error_close;
257 }
258
259 return fd;
260
261 error_close:
262 save_errno_close(fd);
263 return -1;
264 }
265
client_socket(int family,int type,const struct network_helper_opts * opts)266 int client_socket(int family, int type,
267 const struct network_helper_opts *opts)
268 {
269 int fd;
270
271 if (!opts)
272 opts = &default_opts;
273
274 fd = socket(family, type, opts->proto);
275 if (fd < 0) {
276 log_err("Failed to create client socket");
277 return -1;
278 }
279
280 if (settimeo(fd, opts->timeout_ms))
281 goto error_close;
282
283 if (opts->post_socket_cb &&
284 opts->post_socket_cb(fd, opts->cb_opts))
285 goto error_close;
286
287 return fd;
288
289 error_close:
290 save_errno_close(fd);
291 return -1;
292 }
293
connect_to_addr(int type,const struct sockaddr_storage * addr,socklen_t addrlen,const struct network_helper_opts * opts)294 int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
295 const struct network_helper_opts *opts)
296 {
297 int fd;
298
299 if (!opts)
300 opts = &default_opts;
301
302 fd = client_socket(addr->ss_family, type, opts);
303 if (fd < 0) {
304 log_err("Failed to create client socket");
305 return -1;
306 }
307
308 if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
309 log_err("Failed to connect to server");
310 save_errno_close(fd);
311 return -1;
312 }
313
314 return fd;
315 }
316
connect_to_addr_str(int family,int type,const char * addr_str,__u16 port,const struct network_helper_opts * opts)317 int connect_to_addr_str(int family, int type, const char *addr_str, __u16 port,
318 const struct network_helper_opts *opts)
319 {
320 struct sockaddr_storage addr;
321 socklen_t addrlen;
322
323 if (!opts)
324 opts = &default_opts;
325
326 if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
327 return -1;
328
329 return connect_to_addr(type, &addr, addrlen, opts);
330 }
331
connect_to_fd_opts(int server_fd,const struct network_helper_opts * opts)332 int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
333 {
334 struct sockaddr_storage addr;
335 socklen_t addrlen, optlen;
336 int type;
337
338 if (!opts)
339 opts = &default_opts;
340
341 optlen = sizeof(type);
342 if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
343 log_err("getsockopt(SOL_TYPE)");
344 return -1;
345 }
346
347 addrlen = sizeof(addr);
348 if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
349 log_err("Failed to get server addr");
350 return -1;
351 }
352
353 return connect_to_addr(type, &addr, addrlen, opts);
354 }
355
connect_to_fd(int server_fd,int timeout_ms)356 int connect_to_fd(int server_fd, int timeout_ms)
357 {
358 struct network_helper_opts opts = {
359 .timeout_ms = timeout_ms,
360 };
361 socklen_t optlen;
362 int protocol;
363
364 optlen = sizeof(protocol);
365 if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
366 log_err("getsockopt(SOL_PROTOCOL)");
367 return -1;
368 }
369 opts.proto = protocol;
370
371 return connect_to_fd_opts(server_fd, &opts);
372 }
373
connect_fd_to_fd(int client_fd,int server_fd,int timeout_ms)374 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
375 {
376 struct sockaddr_storage addr;
377 socklen_t len = sizeof(addr);
378
379 if (settimeo(client_fd, timeout_ms))
380 return -1;
381
382 if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
383 log_err("Failed to get server addr");
384 return -1;
385 }
386
387 if (connect(client_fd, (const struct sockaddr *)&addr, len)) {
388 log_err("Failed to connect to server");
389 return -1;
390 }
391
392 return 0;
393 }
394
make_sockaddr(int family,const char * addr_str,__u16 port,struct sockaddr_storage * addr,socklen_t * len)395 int make_sockaddr(int family, const char *addr_str, __u16 port,
396 struct sockaddr_storage *addr, socklen_t *len)
397 {
398 if (family == AF_INET) {
399 struct sockaddr_in *sin = (void *)addr;
400
401 memset(addr, 0, sizeof(*sin));
402 sin->sin_family = AF_INET;
403 sin->sin_port = htons(port);
404 if (addr_str &&
405 inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
406 log_err("inet_pton(AF_INET, %s)", addr_str);
407 return -1;
408 }
409 if (len)
410 *len = sizeof(*sin);
411 return 0;
412 } else if (family == AF_INET6) {
413 struct sockaddr_in6 *sin6 = (void *)addr;
414
415 memset(addr, 0, sizeof(*sin6));
416 sin6->sin6_family = AF_INET6;
417 sin6->sin6_port = htons(port);
418 if (addr_str &&
419 inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
420 log_err("inet_pton(AF_INET6, %s)", addr_str);
421 return -1;
422 }
423 if (len)
424 *len = sizeof(*sin6);
425 return 0;
426 } else if (family == AF_UNIX) {
427 /* Note that we always use abstract unix sockets to avoid having
428 * to clean up leftover files.
429 */
430 struct sockaddr_un *sun = (void *)addr;
431
432 memset(addr, 0, sizeof(*sun));
433 sun->sun_family = family;
434 sun->sun_path[0] = 0;
435 strscpy(sun->sun_path + 1, addr_str, sizeof(sun->sun_path) - 1);
436 if (len)
437 *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
438 return 0;
439 }
440 return -1;
441 }
442
ping_command(int family)443 char *ping_command(int family)
444 {
445 if (family == AF_INET6) {
446 /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
447 if (!system("which ping6 >/dev/null 2>&1"))
448 return "ping6";
449 else
450 return "ping -6";
451 }
452 return "ping";
453 }
454
append_tid(char * str,size_t sz)455 int append_tid(char *str, size_t sz)
456 {
457 size_t end;
458
459 if (!str)
460 return -1;
461
462 end = strlen(str);
463 if (end + 8 > sz)
464 return -1;
465
466 sprintf(&str[end], "%07ld", sys_gettid());
467 str[end + 7] = '\0';
468
469 return 0;
470 }
471
remove_netns(const char * name)472 int remove_netns(const char *name)
473 {
474 char *cmd;
475 int r;
476
477 r = asprintf(&cmd, "ip netns del %s >/dev/null 2>&1", name);
478 if (r < 0) {
479 log_err("Failed to malloc cmd");
480 return -1;
481 }
482
483 r = system(cmd);
484 free(cmd);
485 return r;
486 }
487
make_netns(const char * name)488 int make_netns(const char *name)
489 {
490 char *cmd;
491 int r;
492
493 r = asprintf(&cmd, "ip netns add %s", name);
494 if (r < 0) {
495 log_err("Failed to malloc cmd");
496 return -1;
497 }
498
499 r = system(cmd);
500 free(cmd);
501
502 if (r)
503 return r;
504
505 r = asprintf(&cmd, "ip -n %s link set lo up", name);
506 if (r < 0) {
507 log_err("Failed to malloc cmd for setting up lo");
508 remove_netns(name);
509 return -1;
510 }
511
512 r = system(cmd);
513 free(cmd);
514
515 return r;
516 }
517
518 struct nstoken {
519 int orig_netns_fd;
520 };
521
open_netns(const char * name)522 struct nstoken *open_netns(const char *name)
523 {
524 int nsfd;
525 char nspath[PATH_MAX];
526 int err;
527 struct nstoken *token;
528
529 token = calloc(1, sizeof(struct nstoken));
530 if (!token) {
531 log_err("Failed to malloc token");
532 return NULL;
533 }
534
535 token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
536 if (token->orig_netns_fd == -1) {
537 log_err("Failed to open(/proc/self/ns/net)");
538 goto fail;
539 }
540
541 snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
542 nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
543 if (nsfd == -1) {
544 log_err("Failed to open(%s)", nspath);
545 goto fail;
546 }
547
548 err = setns(nsfd, CLONE_NEWNET);
549 close(nsfd);
550 if (err) {
551 log_err("Failed to setns(nsfd)");
552 goto fail;
553 }
554
555 return token;
556 fail:
557 if (token->orig_netns_fd != -1)
558 close(token->orig_netns_fd);
559 free(token);
560 return NULL;
561 }
562
close_netns(struct nstoken * token)563 void close_netns(struct nstoken *token)
564 {
565 if (!token)
566 return;
567
568 if (setns(token->orig_netns_fd, CLONE_NEWNET))
569 log_err("Failed to setns(orig_netns_fd)");
570 close(token->orig_netns_fd);
571 free(token);
572 }
573
open_tuntap(const char * dev_name,bool need_mac)574 int open_tuntap(const char *dev_name, bool need_mac)
575 {
576 int err = 0;
577 struct ifreq ifr;
578 int fd = open("/dev/net/tun", O_RDWR);
579
580 if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
581 return -1;
582
583 ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
584 strscpy(ifr.ifr_name, dev_name);
585
586 err = ioctl(fd, TUNSETIFF, &ifr);
587 if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
588 close(fd);
589 return -1;
590 }
591
592 err = fcntl(fd, F_SETFL, O_NONBLOCK);
593 if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
594 close(fd);
595 return -1;
596 }
597
598 return fd;
599 }
600
get_socket_local_port(int sock_fd)601 int get_socket_local_port(int sock_fd)
602 {
603 struct sockaddr_storage addr;
604 socklen_t addrlen = sizeof(addr);
605 int err;
606
607 err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
608 if (err < 0)
609 return err;
610
611 if (addr.ss_family == AF_INET) {
612 struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
613
614 return sin->sin_port;
615 } else if (addr.ss_family == AF_INET6) {
616 struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
617
618 return sin->sin6_port;
619 }
620
621 return -1;
622 }
623
get_hw_ring_size(char * ifname,struct ethtool_ringparam * ring_param)624 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
625 {
626 struct ifreq ifr = {0};
627 int sockfd, err;
628
629 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
630 if (sockfd < 0)
631 return -errno;
632
633 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
634
635 ring_param->cmd = ETHTOOL_GRINGPARAM;
636 ifr.ifr_data = (char *)ring_param;
637
638 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
639 err = errno;
640 close(sockfd);
641 return -err;
642 }
643
644 close(sockfd);
645 return 0;
646 }
647
set_hw_ring_size(char * ifname,struct ethtool_ringparam * ring_param)648 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
649 {
650 struct ifreq ifr = {0};
651 int sockfd, err;
652
653 sockfd = socket(AF_INET, SOCK_DGRAM, 0);
654 if (sockfd < 0)
655 return -errno;
656
657 memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
658
659 ring_param->cmd = ETHTOOL_SRINGPARAM;
660 ifr.ifr_data = (char *)ring_param;
661
662 if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
663 err = errno;
664 close(sockfd);
665 return -err;
666 }
667
668 close(sockfd);
669 return 0;
670 }
671
672 struct send_recv_arg {
673 int fd;
674 uint32_t bytes;
675 int stop;
676 };
677
send_recv_server(void * arg)678 static void *send_recv_server(void *arg)
679 {
680 struct send_recv_arg *a = (struct send_recv_arg *)arg;
681 ssize_t nr_sent = 0, bytes = 0;
682 char batch[1500];
683 int err = 0, fd;
684
685 fd = accept(a->fd, NULL, NULL);
686 while (fd == -1) {
687 if (errno == EINTR)
688 continue;
689 err = -errno;
690 goto done;
691 }
692
693 if (settimeo(fd, 0)) {
694 err = -errno;
695 goto done;
696 }
697
698 while (bytes < a->bytes && !READ_ONCE(a->stop)) {
699 nr_sent = send(fd, &batch,
700 MIN(a->bytes - bytes, sizeof(batch)), 0);
701 if (nr_sent == -1 && errno == EINTR)
702 continue;
703 if (nr_sent == -1) {
704 err = -errno;
705 break;
706 }
707 bytes += nr_sent;
708 }
709
710 if (bytes != a->bytes) {
711 log_err("send %zd expected %u", bytes, a->bytes);
712 if (!err)
713 err = bytes > a->bytes ? -E2BIG : -EINTR;
714 }
715
716 done:
717 if (fd >= 0)
718 close(fd);
719 if (err) {
720 WRITE_ONCE(a->stop, 1);
721 return ERR_PTR(err);
722 }
723 return NULL;
724 }
725
send_recv_data(int lfd,int fd,uint32_t total_bytes)726 int send_recv_data(int lfd, int fd, uint32_t total_bytes)
727 {
728 ssize_t nr_recv = 0, bytes = 0;
729 struct send_recv_arg arg = {
730 .fd = lfd,
731 .bytes = total_bytes,
732 .stop = 0,
733 };
734 pthread_t srv_thread;
735 void *thread_ret;
736 char batch[1500];
737 int err = 0;
738
739 err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg);
740 if (err) {
741 log_err("Failed to pthread_create");
742 return err;
743 }
744
745 /* recv total_bytes */
746 while (bytes < total_bytes && !READ_ONCE(arg.stop)) {
747 nr_recv = recv(fd, &batch,
748 MIN(total_bytes - bytes, sizeof(batch)), 0);
749 if (nr_recv == -1 && errno == EINTR)
750 continue;
751 if (nr_recv == -1) {
752 err = -errno;
753 break;
754 }
755 bytes += nr_recv;
756 }
757
758 if (bytes != total_bytes) {
759 log_err("recv %zd expected %u", bytes, total_bytes);
760 if (!err)
761 err = bytes > total_bytes ? -E2BIG : -EINTR;
762 }
763
764 WRITE_ONCE(arg.stop, 1);
765 pthread_join(srv_thread, &thread_ret);
766 if (IS_ERR(thread_ret)) {
767 log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret));
768 err = err ? : PTR_ERR(thread_ret);
769 }
770
771 return err;
772 }
773
tc_prog_attach(const char * dev,int ingress_fd,int egress_fd)774 int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
775 {
776 int ifindex, ret;
777
778 if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
779 "at least one program fd is valid"))
780 return -1;
781
782 ifindex = if_nametoindex(dev);
783 if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
784 return -1;
785
786 DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
787 .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
788 DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
789 .priority = 1, .prog_fd = ingress_fd);
790 DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
791 .priority = 1, .prog_fd = egress_fd);
792
793 ret = bpf_tc_hook_create(&hook);
794 if (!ASSERT_OK(ret, "create tc hook"))
795 return ret;
796
797 if (ingress_fd >= 0) {
798 hook.attach_point = BPF_TC_INGRESS;
799 ret = bpf_tc_attach(&hook, &opts1);
800 if (!ASSERT_OK(ret, "bpf_tc_attach")) {
801 bpf_tc_hook_destroy(&hook);
802 return ret;
803 }
804 }
805
806 if (egress_fd >= 0) {
807 hook.attach_point = BPF_TC_EGRESS;
808 ret = bpf_tc_attach(&hook, &opts2);
809 if (!ASSERT_OK(ret, "bpf_tc_attach")) {
810 bpf_tc_hook_destroy(&hook);
811 return ret;
812 }
813 }
814
815 return 0;
816 }
817
818 #ifdef TRAFFIC_MONITOR
819 struct tmonitor_ctx {
820 pcap_t *pcap;
821 pcap_dumper_t *dumper;
822 pthread_t thread;
823 int wake_fd;
824
825 volatile bool done;
826 char pkt_fname[PATH_MAX];
827 int pcap_fd;
828 };
829
__base_pr(const char * format,va_list args)830 static int __base_pr(const char *format, va_list args)
831 {
832 return vfprintf(stdout, format, args);
833 }
834
835 static tm_print_fn_t __tm_pr = __base_pr;
836
traffic_monitor_set_print(tm_print_fn_t fn)837 tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
838 {
839 tm_print_fn_t old_print_fn;
840
841 old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
842
843 return old_print_fn;
844 }
845
tm_print(const char * format,...)846 void tm_print(const char *format, ...)
847 {
848 tm_print_fn_t print_fn;
849 va_list args;
850
851 print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
852 if (!print_fn)
853 return;
854
855 va_start(args, format);
856 print_fn(format, args);
857 va_end(args);
858 }
859
860 /* Is this packet captured with a Ethernet protocol type? */
is_ethernet(const u_char * packet)861 static bool is_ethernet(const u_char *packet)
862 {
863 u16 arphdr_type;
864
865 memcpy(&arphdr_type, packet + 8, 2);
866 arphdr_type = ntohs(arphdr_type);
867
868 /* Except the following cases, the protocol type contains the
869 * Ethernet protocol type for the packet.
870 *
871 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
872 */
873 switch (arphdr_type) {
874 case 770: /* ARPHRD_FRAD */
875 case 778: /* ARPHDR_IPGRE */
876 case 803: /* ARPHRD_IEEE80211_RADIOTAP */
877 tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
878 return false;
879 }
880 return true;
881 }
882
883 static const char * const pkt_types[] = {
884 "In",
885 "B", /* Broadcast */
886 "M", /* Multicast */
887 "C", /* Captured with the promiscuous mode */
888 "Out",
889 };
890
pkt_type_str(u16 pkt_type)891 static const char *pkt_type_str(u16 pkt_type)
892 {
893 if (pkt_type < ARRAY_SIZE(pkt_types))
894 return pkt_types[pkt_type];
895 return "Unknown";
896 }
897
898 #define MAX_FLAGS_STRLEN 21
899 /* Show the information of the transport layer in the packet */
show_transport(const u_char * packet,u16 len,u32 ifindex,const char * src_addr,const char * dst_addr,u16 proto,bool ipv6,u8 pkt_type)900 static void show_transport(const u_char *packet, u16 len, u32 ifindex,
901 const char *src_addr, const char *dst_addr,
902 u16 proto, bool ipv6, u8 pkt_type)
903 {
904 char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
905 const char *transport_str;
906 u16 src_port, dst_port;
907 struct udphdr *udp;
908 struct tcphdr *tcp;
909
910 ifname = if_indextoname(ifindex, _ifname);
911 if (!ifname) {
912 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
913 ifname = _ifname;
914 }
915
916 if (proto == IPPROTO_UDP) {
917 udp = (struct udphdr *)packet;
918 src_port = ntohs(udp->source);
919 dst_port = ntohs(udp->dest);
920 transport_str = "UDP";
921 } else if (proto == IPPROTO_TCP) {
922 tcp = (struct tcphdr *)packet;
923 src_port = ntohs(tcp->source);
924 dst_port = ntohs(tcp->dest);
925 transport_str = "TCP";
926 } else if (proto == IPPROTO_ICMP) {
927 tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
928 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
929 packet[0], packet[1]);
930 return;
931 } else if (proto == IPPROTO_ICMPV6) {
932 tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
933 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
934 packet[0], packet[1]);
935 return;
936 } else {
937 tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
938 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
939 src_addr, dst_addr, proto);
940 return;
941 }
942
943 /* TCP or UDP*/
944
945 if (proto == IPPROTO_TCP)
946 snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
947 tcp->fin ? ", FIN" : "",
948 tcp->syn ? ", SYN" : "",
949 tcp->rst ? ", RST" : "",
950 tcp->ack ? ", ACK" : "");
951
952 if (ipv6)
953 tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
954 ifname, pkt_type_str(pkt_type), src_addr, src_port,
955 dst_addr, dst_port, transport_str, len, flags);
956 else
957 tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
958 ifname, pkt_type_str(pkt_type), src_addr, src_port,
959 dst_addr, dst_port, transport_str, len, flags);
960 }
961
show_ipv6_packet(const u_char * packet,u32 ifindex,u8 pkt_type)962 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
963 {
964 char src_buf[INET6_ADDRSTRLEN], dst_buf[INET6_ADDRSTRLEN];
965 struct ipv6hdr *pkt = (struct ipv6hdr *)packet;
966 const char *src, *dst;
967 u_char proto;
968
969 src = inet_ntop(AF_INET6, &pkt->saddr, src_buf, sizeof(src_buf));
970 if (!src)
971 src = "<invalid>";
972 dst = inet_ntop(AF_INET6, &pkt->daddr, dst_buf, sizeof(dst_buf));
973 if (!dst)
974 dst = "<invalid>";
975 proto = pkt->nexthdr;
976 show_transport(packet + sizeof(struct ipv6hdr),
977 ntohs(pkt->payload_len),
978 ifindex, src, dst, proto, true, pkt_type);
979 }
980
show_ipv4_packet(const u_char * packet,u32 ifindex,u8 pkt_type)981 static void show_ipv4_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
982 {
983 char src_buf[INET_ADDRSTRLEN], dst_buf[INET_ADDRSTRLEN];
984 struct iphdr *pkt = (struct iphdr *)packet;
985 const char *src, *dst;
986 u_char proto;
987
988 src = inet_ntop(AF_INET, &pkt->saddr, src_buf, sizeof(src_buf));
989 if (!src)
990 src = "<invalid>";
991 dst = inet_ntop(AF_INET, &pkt->daddr, dst_buf, sizeof(dst_buf));
992 if (!dst)
993 dst = "<invalid>";
994 proto = pkt->protocol;
995 show_transport(packet + sizeof(struct iphdr),
996 ntohs(pkt->tot_len),
997 ifindex, src, dst, proto, false, pkt_type);
998 }
999
traffic_monitor_thread(void * arg)1000 static void *traffic_monitor_thread(void *arg)
1001 {
1002 char *ifname, _ifname[IF_NAMESIZE];
1003 const u_char *packet, *payload;
1004 struct tmonitor_ctx *ctx = arg;
1005 pcap_dumper_t *dumper = ctx->dumper;
1006 int fd = ctx->pcap_fd, nfds, r;
1007 int wake_fd = ctx->wake_fd;
1008 struct pcap_pkthdr header;
1009 pcap_t *pcap = ctx->pcap;
1010 u32 ifindex;
1011 fd_set fds;
1012 u16 proto;
1013 u8 ptype;
1014
1015 nfds = (fd > wake_fd ? fd : wake_fd) + 1;
1016 FD_ZERO(&fds);
1017
1018 while (!ctx->done) {
1019 FD_SET(fd, &fds);
1020 FD_SET(wake_fd, &fds);
1021 r = select(nfds, &fds, NULL, NULL, NULL);
1022 if (!r)
1023 continue;
1024 if (r < 0) {
1025 if (errno == EINTR)
1026 continue;
1027 log_err("Fail to select on pcap fd and wake fd");
1028 break;
1029 }
1030
1031 /* This instance of pcap is non-blocking */
1032 packet = pcap_next(pcap, &header);
1033 if (!packet)
1034 continue;
1035
1036 /* According to the man page of pcap_dump(), first argument
1037 * is the pcap_dumper_t pointer even it's argument type is
1038 * u_char *.
1039 */
1040 pcap_dump((u_char *)dumper, &header, packet);
1041
1042 /* Not sure what other types of packets look like. Here, we
1043 * parse only Ethernet and compatible packets.
1044 */
1045 if (!is_ethernet(packet))
1046 continue;
1047
1048 /* Skip SLL2 header
1049 * https://www.tcpdump.org/linktypes/LINKTYPE_LINUX_SLL2.html
1050 *
1051 * Although the document doesn't mention that, the payload
1052 * doesn't include the Ethernet header. The payload starts
1053 * from the first byte of the network layer header.
1054 */
1055 payload = packet + 20;
1056
1057 memcpy(&proto, packet, 2);
1058 proto = ntohs(proto);
1059 memcpy(&ifindex, packet + 4, 4);
1060 ifindex = ntohl(ifindex);
1061 ptype = packet[10];
1062
1063 if (proto == ETH_P_IPV6) {
1064 show_ipv6_packet(payload, ifindex, ptype);
1065 } else if (proto == ETH_P_IP) {
1066 show_ipv4_packet(payload, ifindex, ptype);
1067 } else {
1068 ifname = if_indextoname(ifindex, _ifname);
1069 if (!ifname) {
1070 snprintf(_ifname, sizeof(_ifname), "unknown(%d)", ifindex);
1071 ifname = _ifname;
1072 }
1073
1074 tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
1075 ifname, pkt_type_str(ptype), proto);
1076 }
1077 }
1078
1079 return NULL;
1080 }
1081
1082 /* Prepare the pcap handle to capture packets.
1083 *
1084 * This pcap is non-blocking and immediate mode is enabled to receive
1085 * captured packets as soon as possible. The snaplen is set to 1024 bytes
1086 * to limit the size of captured content. The format of the link-layer
1087 * header is set to DLT_LINUX_SLL2 to enable handling various link-layer
1088 * technologies.
1089 */
traffic_monitor_prepare_pcap(void)1090 static pcap_t *traffic_monitor_prepare_pcap(void)
1091 {
1092 char errbuf[PCAP_ERRBUF_SIZE];
1093 pcap_t *pcap;
1094 int r;
1095
1096 /* Listen on all NICs in the namespace */
1097 pcap = pcap_create("any", errbuf);
1098 if (!pcap) {
1099 log_err("Failed to open pcap: %s", errbuf);
1100 return NULL;
1101 }
1102 /* Limit the size of the packet (first N bytes) */
1103 r = pcap_set_snaplen(pcap, 1024);
1104 if (r) {
1105 log_err("Failed to set snaplen: %s", pcap_geterr(pcap));
1106 goto error;
1107 }
1108 /* To receive packets as fast as possible */
1109 r = pcap_set_immediate_mode(pcap, 1);
1110 if (r) {
1111 log_err("Failed to set immediate mode: %s", pcap_geterr(pcap));
1112 goto error;
1113 }
1114 r = pcap_setnonblock(pcap, 1, errbuf);
1115 if (r) {
1116 log_err("Failed to set nonblock: %s", errbuf);
1117 goto error;
1118 }
1119 r = pcap_activate(pcap);
1120 if (r) {
1121 log_err("Failed to activate pcap: %s", pcap_geterr(pcap));
1122 goto error;
1123 }
1124 /* Determine the format of the link-layer header */
1125 r = pcap_set_datalink(pcap, DLT_LINUX_SLL2);
1126 if (r) {
1127 log_err("Failed to set datalink: %s", pcap_geterr(pcap));
1128 goto error;
1129 }
1130
1131 return pcap;
1132 error:
1133 pcap_close(pcap);
1134 return NULL;
1135 }
1136
encode_test_name(char * buf,size_t len,const char * test_name,const char * subtest_name)1137 static void encode_test_name(char *buf, size_t len, const char *test_name, const char *subtest_name)
1138 {
1139 char *p;
1140
1141 if (subtest_name)
1142 snprintf(buf, len, "%s__%s", test_name, subtest_name);
1143 else
1144 snprintf(buf, len, "%s", test_name);
1145 while ((p = strchr(buf, '/')))
1146 *p = '_';
1147 while ((p = strchr(buf, ' ')))
1148 *p = '_';
1149 }
1150
1151 #define PCAP_DIR "/tmp/tmon_pcap"
1152
1153 /* Start to monitor the network traffic in the given network namespace.
1154 *
1155 * netns: the name of the network namespace to monitor. If NULL, the
1156 * current network namespace is monitored.
1157 * test_name: the name of the running test.
1158 * subtest_name: the name of the running subtest if there is. It should be
1159 * NULL if it is not a subtest.
1160 *
1161 * This function will start a thread to capture packets going through NICs
1162 * in the give network namespace.
1163 */
traffic_monitor_start(const char * netns,const char * test_name,const char * subtest_name)1164 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
1165 const char *subtest_name)
1166 {
1167 struct nstoken *nstoken = NULL;
1168 struct tmonitor_ctx *ctx;
1169 char test_name_buf[64];
1170 static int tmon_seq;
1171 int r;
1172
1173 if (netns) {
1174 nstoken = open_netns(netns);
1175 if (!nstoken)
1176 return NULL;
1177 }
1178 ctx = malloc(sizeof(*ctx));
1179 if (!ctx) {
1180 log_err("Failed to malloc ctx");
1181 goto fail_ctx;
1182 }
1183 memset(ctx, 0, sizeof(*ctx));
1184
1185 encode_test_name(test_name_buf, sizeof(test_name_buf), test_name, subtest_name);
1186 snprintf(ctx->pkt_fname, sizeof(ctx->pkt_fname),
1187 PCAP_DIR "/packets-%d-%d-%s-%s.log", getpid(), tmon_seq++,
1188 test_name_buf, netns ? netns : "unknown");
1189
1190 r = mkdir(PCAP_DIR, 0755);
1191 if (r && errno != EEXIST) {
1192 log_err("Failed to create " PCAP_DIR);
1193 goto fail_pcap;
1194 }
1195
1196 ctx->pcap = traffic_monitor_prepare_pcap();
1197 if (!ctx->pcap)
1198 goto fail_pcap;
1199 ctx->pcap_fd = pcap_get_selectable_fd(ctx->pcap);
1200 if (ctx->pcap_fd < 0) {
1201 log_err("Failed to get pcap fd");
1202 goto fail_dumper;
1203 }
1204
1205 /* Create a packet file */
1206 ctx->dumper = pcap_dump_open(ctx->pcap, ctx->pkt_fname);
1207 if (!ctx->dumper) {
1208 log_err("Failed to open pcap dump: %s", ctx->pkt_fname);
1209 goto fail_dumper;
1210 }
1211
1212 /* Create an eventfd to wake up the monitor thread */
1213 ctx->wake_fd = eventfd(0, 0);
1214 if (ctx->wake_fd < 0) {
1215 log_err("Failed to create eventfd");
1216 goto fail_eventfd;
1217 }
1218
1219 r = pthread_create(&ctx->thread, NULL, traffic_monitor_thread, ctx);
1220 if (r) {
1221 log_err("Failed to create thread");
1222 goto fail;
1223 }
1224
1225 close_netns(nstoken);
1226
1227 return ctx;
1228
1229 fail:
1230 close(ctx->wake_fd);
1231
1232 fail_eventfd:
1233 pcap_dump_close(ctx->dumper);
1234 unlink(ctx->pkt_fname);
1235
1236 fail_dumper:
1237 pcap_close(ctx->pcap);
1238
1239 fail_pcap:
1240 free(ctx);
1241
1242 fail_ctx:
1243 close_netns(nstoken);
1244
1245 return NULL;
1246 }
1247
traffic_monitor_release(struct tmonitor_ctx * ctx)1248 static void traffic_monitor_release(struct tmonitor_ctx *ctx)
1249 {
1250 pcap_close(ctx->pcap);
1251 pcap_dump_close(ctx->dumper);
1252
1253 close(ctx->wake_fd);
1254
1255 free(ctx);
1256 }
1257
1258 /* Stop the network traffic monitor.
1259 *
1260 * ctx: the context returned by traffic_monitor_start()
1261 */
traffic_monitor_stop(struct tmonitor_ctx * ctx)1262 void traffic_monitor_stop(struct tmonitor_ctx *ctx)
1263 {
1264 __u64 w = 1;
1265
1266 if (!ctx)
1267 return;
1268
1269 /* Stop the monitor thread */
1270 ctx->done = true;
1271 /* Wake up the background thread. */
1272 write(ctx->wake_fd, &w, sizeof(w));
1273 pthread_join(ctx->thread, NULL);
1274
1275 tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
1276
1277 traffic_monitor_release(ctx);
1278 }
1279
1280 #endif /* TRAFFIC_MONITOR */
1281