1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3 /*
4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
5 * between src and dst. The netns fwd has veth links to each src and dst. The
6 * client is in src and server in dst. The test installs a TC BPF program to each
7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
9 * switch from ingress side; it also installs a checker prog on the egress side
10 * to drop unexpected traffic.
11 */
12
13 #include <arpa/inet.h>
14 #include <linux/if_tun.h>
15 #include <linux/limits.h>
16 #include <linux/sysctl.h>
17 #include <linux/time_types.h>
18 #include <linux/net_tstamp.h>
19 #include <net/if.h>
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24
25 #include "test_progs.h"
26 #include "network_helpers.h"
27 #include "netlink_helpers.h"
28 #include "test_tc_neigh_fib.skel.h"
29 #include "test_tc_neigh.skel.h"
30 #include "test_tc_peer.skel.h"
31 #include "test_tc_dtime.skel.h"
32
33 #ifndef TCP_TX_DELAY
34 #define TCP_TX_DELAY 37
35 #endif
36
37 #define NS_SRC "ns_src"
38 #define NS_FWD "ns_fwd"
39 #define NS_DST "ns_dst"
40
41 #define IP4_SRC "172.16.1.100"
42 #define IP4_DST "172.16.2.100"
43 #define IP4_TUN_SRC "172.17.1.100"
44 #define IP4_TUN_FWD "172.17.1.200"
45 #define IP4_PORT 9004
46
47 #define IP6_SRC "0::1:dead:beef:cafe"
48 #define IP6_DST "0::2:dead:beef:cafe"
49 #define IP6_TUN_SRC "1::1:dead:beef:cafe"
50 #define IP6_TUN_FWD "1::2:dead:beef:cafe"
51 #define IP6_PORT 9006
52
53 #define IP4_SLL "169.254.0.1"
54 #define IP4_DLL "169.254.0.2"
55 #define IP4_NET "169.254.0.0"
56
57 #define MAC_DST_FWD "00:11:22:33:44:55"
58 #define MAC_DST "00:22:33:44:55:66"
59
60 #define IFADDR_STR_LEN 18
61 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
62
63 #define TIMEOUT_MILLIS 10000
64 #define NSEC_PER_SEC 1000000000ULL
65
66 #define log_err(MSG, ...) \
67 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
68 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
69
70 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
71 static struct netns_obj *netns_objs[3];
72
write_file(const char * path,const char * newval)73 static int write_file(const char *path, const char *newval)
74 {
75 FILE *f;
76
77 f = fopen(path, "r+");
78 if (!f)
79 return -1;
80 if (fwrite(newval, strlen(newval), 1, f) != 1) {
81 log_err("writing to %s failed", path);
82 fclose(f);
83 return -1;
84 }
85 fclose(f);
86 return 0;
87 }
88
netns_setup_namespaces(const char * verb)89 static int netns_setup_namespaces(const char *verb)
90 {
91 struct netns_obj **ns_obj = netns_objs;
92 const char * const *ns = namespaces;
93
94 while (*ns) {
95 if (strcmp(verb, "add") == 0) {
96 *ns_obj = netns_new(*ns, false);
97 if (!ASSERT_OK_PTR(*ns_obj, "netns_new"))
98 return -1;
99 } else {
100 if (!ASSERT_OK_PTR(*ns_obj, "netns_obj is NULL"))
101 return -1;
102 netns_free(*ns_obj);
103 *ns_obj = NULL;
104 }
105 ns++;
106 ns_obj++;
107 }
108 return 0;
109 }
110
netns_setup_namespaces_nofail(const char * verb)111 static void netns_setup_namespaces_nofail(const char *verb)
112 {
113 struct netns_obj **ns_obj = netns_objs;
114 const char * const *ns = namespaces;
115
116 while (*ns) {
117 if (strcmp(verb, "add") == 0) {
118 *ns_obj = netns_new(*ns, false);
119 } else {
120 if (*ns_obj)
121 netns_free(*ns_obj);
122 *ns_obj = NULL;
123 }
124 ns++;
125 ns_obj++;
126 }
127 }
128
129 enum dev_mode {
130 MODE_VETH,
131 MODE_NETKIT,
132 };
133
134 struct netns_setup_result {
135 enum dev_mode dev_mode;
136 int ifindex_src;
137 int ifindex_src_fwd;
138 int ifindex_dst;
139 int ifindex_dst_fwd;
140 };
141
get_ifaddr(const char * name,char * ifaddr)142 static int get_ifaddr(const char *name, char *ifaddr)
143 {
144 char path[PATH_MAX];
145 FILE *f;
146 int ret;
147
148 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
149 f = fopen(path, "r");
150 if (!ASSERT_OK_PTR(f, path))
151 return -1;
152
153 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
154 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
155 fclose(f);
156 return -1;
157 }
158 fclose(f);
159 return 0;
160 }
161
create_netkit(int mode,char * prim,char * peer)162 static int create_netkit(int mode, char *prim, char *peer)
163 {
164 struct rtattr *linkinfo, *data, *peer_info;
165 struct rtnl_handle rth = { .fd = -1 };
166 const char *type = "netkit";
167 struct {
168 struct nlmsghdr n;
169 struct ifinfomsg i;
170 char buf[1024];
171 } req = {};
172 int err;
173
174 err = rtnl_open(&rth, 0);
175 if (!ASSERT_OK(err, "open_rtnetlink"))
176 return err;
177
178 memset(&req, 0, sizeof(req));
179 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
180 req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
181 req.n.nlmsg_type = RTM_NEWLINK;
182 req.i.ifi_family = AF_UNSPEC;
183
184 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
185 linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
186 addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
187 data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
188 addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
189 peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
190 req.n.nlmsg_len += sizeof(struct ifinfomsg);
191 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
192 addattr_nest_end(&req.n, peer_info);
193 addattr_nest_end(&req.n, data);
194 addattr_nest_end(&req.n, linkinfo);
195
196 err = rtnl_talk(&rth, &req.n, NULL);
197 ASSERT_OK(err, "talk_rtnetlink");
198 rtnl_close(&rth);
199 return err;
200 }
201
netns_setup_links_and_routes(struct netns_setup_result * result)202 static int netns_setup_links_and_routes(struct netns_setup_result *result)
203 {
204 struct nstoken *nstoken = NULL;
205 char src_fwd_addr[IFADDR_STR_LEN+1] = {};
206 char src_addr[IFADDR_STR_LEN + 1] = {};
207 int err;
208
209 if (result->dev_mode == MODE_VETH) {
210 SYS(fail, "ip link add src type veth peer name src_fwd");
211 SYS(fail, "ip link add dst type veth peer name dst_fwd");
212
213 SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
214 SYS(fail, "ip link set dst address " MAC_DST);
215 } else if (result->dev_mode == MODE_NETKIT) {
216 err = create_netkit(NETKIT_L3, "src", "src_fwd");
217 if (!ASSERT_OK(err, "create_ifindex_src"))
218 goto fail;
219 err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
220 if (!ASSERT_OK(err, "create_ifindex_dst"))
221 goto fail;
222 }
223
224 if (get_ifaddr("src_fwd", src_fwd_addr))
225 goto fail;
226
227 if (get_ifaddr("src", src_addr))
228 goto fail;
229
230 result->ifindex_src = if_nametoindex("src");
231 if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
232 goto fail;
233
234 result->ifindex_src_fwd = if_nametoindex("src_fwd");
235 if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
236 goto fail;
237
238 result->ifindex_dst = if_nametoindex("dst");
239 if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
240 goto fail;
241
242 result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
243 if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
244 goto fail;
245
246 SYS(fail, "ip link set src netns " NS_SRC);
247 SYS(fail, "ip link set src_fwd netns " NS_FWD);
248 SYS(fail, "ip link set dst_fwd netns " NS_FWD);
249 SYS(fail, "ip link set dst netns " NS_DST);
250
251 /** setup in 'src' namespace */
252 nstoken = open_netns(NS_SRC);
253 if (!ASSERT_OK_PTR(nstoken, "setns src"))
254 goto fail;
255
256 SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
257 SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
258 SYS(fail, "ip link set dev src up");
259
260 SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
261 SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
262 SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
263
264 if (result->dev_mode == MODE_VETH) {
265 SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
266 src_fwd_addr);
267 SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
268 src_fwd_addr);
269 }
270
271 close_netns(nstoken);
272
273 /** setup in 'fwd' namespace */
274 nstoken = open_netns(NS_FWD);
275 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
276 goto fail;
277
278 /* The fwd netns automatically gets a v6 LL address / routes, but also
279 * needs v4 one in order to start ARP probing. IP4_NET route is added
280 * to the endpoints so that the ARP processing will reply.
281 */
282 SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
283 SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
284 SYS(fail, "ip link set dev src_fwd up");
285 SYS(fail, "ip link set dev dst_fwd up");
286
287 SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
288 SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
289 SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
290 SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
291
292 if (result->dev_mode == MODE_VETH) {
293 SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
294 SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
295 SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
296 SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
297 }
298
299 close_netns(nstoken);
300
301 /** setup in 'dst' namespace */
302 nstoken = open_netns(NS_DST);
303 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
304 goto fail;
305
306 SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
307 SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
308 SYS(fail, "ip link set dev dst up");
309 SYS(fail, "ip link set dev lo up");
310
311 SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
312 SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
313 SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
314
315 if (result->dev_mode == MODE_VETH) {
316 SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
317 SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
318 }
319
320 close_netns(nstoken);
321
322 return 0;
323 fail:
324 if (nstoken)
325 close_netns(nstoken);
326 return -1;
327 }
328
qdisc_clsact_create(struct bpf_tc_hook * qdisc_hook,int ifindex)329 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
330 {
331 char err_str[128], ifname[16];
332 int err;
333
334 qdisc_hook->ifindex = ifindex;
335 qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
336 err = bpf_tc_hook_create(qdisc_hook);
337 snprintf(err_str, sizeof(err_str),
338 "qdisc add dev %s clsact",
339 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
340 err_str[sizeof(err_str) - 1] = 0;
341 ASSERT_OK(err, err_str);
342
343 return err;
344 }
345
xgress_filter_add(struct bpf_tc_hook * qdisc_hook,enum bpf_tc_attach_point xgress,const struct bpf_program * prog,int priority)346 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
347 enum bpf_tc_attach_point xgress,
348 const struct bpf_program *prog, int priority)
349 {
350 LIBBPF_OPTS(bpf_tc_opts, tc_attach);
351 char err_str[128], ifname[16];
352 int err;
353
354 qdisc_hook->attach_point = xgress;
355 tc_attach.prog_fd = bpf_program__fd(prog);
356 tc_attach.priority = priority;
357 err = bpf_tc_attach(qdisc_hook, &tc_attach);
358 snprintf(err_str, sizeof(err_str),
359 "filter add dev %s %s prio %d bpf da %s",
360 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
361 xgress == BPF_TC_INGRESS ? "ingress" : "egress",
362 priority, bpf_program__name(prog));
363 err_str[sizeof(err_str) - 1] = 0;
364 ASSERT_OK(err, err_str);
365
366 return err;
367 }
368
369 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
370 if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
371 goto fail; \
372 })
373
374 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
375 if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
376 goto fail; \
377 })
378
netns_load_bpf(const struct bpf_program * src_prog,const struct bpf_program * dst_prog,const struct bpf_program * chk_prog,const struct netns_setup_result * setup_result)379 static int netns_load_bpf(const struct bpf_program *src_prog,
380 const struct bpf_program *dst_prog,
381 const struct bpf_program *chk_prog,
382 const struct netns_setup_result *setup_result)
383 {
384 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
385 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
386 int err;
387
388 /* tc qdisc add dev src_fwd clsact */
389 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
390 /* tc filter add dev src_fwd ingress bpf da src_prog */
391 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
392 /* tc filter add dev src_fwd egress bpf da chk_prog */
393 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
394
395 /* tc qdisc add dev dst_fwd clsact */
396 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
397 /* tc filter add dev dst_fwd ingress bpf da dst_prog */
398 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
399 /* tc filter add dev dst_fwd egress bpf da chk_prog */
400 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
401
402 return 0;
403 fail:
404 return -1;
405 }
406
test_tcp(int family,const char * addr,__u16 port)407 static void test_tcp(int family, const char *addr, __u16 port)
408 {
409 int listen_fd = -1, accept_fd = -1, client_fd = -1;
410 char buf[] = "testing testing";
411 int n;
412 struct nstoken *nstoken;
413
414 nstoken = open_netns(NS_DST);
415 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
416 return;
417
418 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
419 if (!ASSERT_GE(listen_fd, 0, "listen"))
420 goto done;
421
422 close_netns(nstoken);
423 nstoken = open_netns(NS_SRC);
424 if (!ASSERT_OK_PTR(nstoken, "setns src"))
425 goto done;
426
427 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
428 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
429 goto done;
430
431 accept_fd = accept(listen_fd, NULL, NULL);
432 if (!ASSERT_GE(accept_fd, 0, "accept"))
433 goto done;
434
435 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
436 goto done;
437
438 n = write(client_fd, buf, sizeof(buf));
439 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
440 goto done;
441
442 n = read(accept_fd, buf, sizeof(buf));
443 ASSERT_EQ(n, sizeof(buf), "recv from server");
444
445 done:
446 if (nstoken)
447 close_netns(nstoken);
448 if (listen_fd >= 0)
449 close(listen_fd);
450 if (accept_fd >= 0)
451 close(accept_fd);
452 if (client_fd >= 0)
453 close(client_fd);
454 }
455
test_ping(int family,const char * addr)456 static int test_ping(int family, const char *addr)
457 {
458 SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
459 return 0;
460 fail:
461 return -1;
462 }
463
test_connectivity(void)464 static void test_connectivity(void)
465 {
466 test_tcp(AF_INET, IP4_DST, IP4_PORT);
467 test_ping(AF_INET, IP4_DST);
468 test_tcp(AF_INET6, IP6_DST, IP6_PORT);
469 test_ping(AF_INET6, IP6_DST);
470 }
471
set_forwarding(bool enable)472 static int set_forwarding(bool enable)
473 {
474 int err;
475
476 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
477 if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
478 return err;
479
480 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
481 if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
482 return err;
483
484 return 0;
485 }
486
__rcv_tstamp(int fd,const char * expected,size_t s,__u64 * tstamp)487 static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
488 {
489 struct timespec pkt_ts = {};
490 char ctl[CMSG_SPACE(sizeof(pkt_ts))];
491 struct timespec now_ts;
492 struct msghdr msg = {};
493 __u64 now_ns, pkt_ns;
494 struct cmsghdr *cmsg;
495 struct iovec iov;
496 char data[32];
497 int ret;
498
499 iov.iov_base = data;
500 iov.iov_len = sizeof(data);
501 msg.msg_iov = &iov;
502 msg.msg_iovlen = 1;
503 msg.msg_control = &ctl;
504 msg.msg_controllen = sizeof(ctl);
505
506 ret = recvmsg(fd, &msg, 0);
507 if (!ASSERT_EQ(ret, s, "recvmsg"))
508 return -1;
509 ASSERT_STRNEQ(data, expected, s, "expected rcv data");
510
511 cmsg = CMSG_FIRSTHDR(&msg);
512 if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
513 cmsg->cmsg_type == SO_TIMESTAMPNS)
514 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
515
516 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
517 if (tstamp) {
518 /* caller will check the tstamp itself */
519 *tstamp = pkt_ns;
520 return 0;
521 }
522
523 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
524
525 ret = clock_gettime(CLOCK_REALTIME, &now_ts);
526 ASSERT_OK(ret, "clock_gettime");
527 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
528
529 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
530 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
531 "check rcv tstamp");
532 return 0;
533 }
534
rcv_tstamp(int fd,const char * expected,size_t s)535 static void rcv_tstamp(int fd, const char *expected, size_t s)
536 {
537 __rcv_tstamp(fd, expected, s, NULL);
538 }
539
wait_netstamp_needed_key(void)540 static int wait_netstamp_needed_key(void)
541 {
542 int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
543 char buf[] = "testing testing";
544 struct nstoken *nstoken;
545 __u64 tstamp = 0;
546
547 nstoken = open_netns(NS_DST);
548 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
549 return -1;
550
551 srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
552 if (!ASSERT_GE(srv_fd, 0, "start_server"))
553 goto done;
554
555 err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS,
556 &opt, sizeof(opt));
557 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)"))
558 goto done;
559
560 cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
561 if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
562 goto done;
563
564 again:
565 n = write(cli_fd, buf, sizeof(buf));
566 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
567 goto done;
568 err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
569 if (!ASSERT_OK(err, "__rcv_tstamp"))
570 goto done;
571 if (!tstamp && nretries++ < 5) {
572 sleep(1);
573 printf("netstamp_needed_key retry#%d\n", nretries);
574 goto again;
575 }
576
577 done:
578 if (!tstamp && srv_fd != -1) {
579 close(srv_fd);
580 srv_fd = -1;
581 }
582 if (cli_fd != -1)
583 close(cli_fd);
584 close_netns(nstoken);
585 return srv_fd;
586 }
587
snd_tstamp(int fd,char * b,size_t s)588 static void snd_tstamp(int fd, char *b, size_t s)
589 {
590 struct sock_txtime opt = { .clockid = CLOCK_TAI };
591 char ctl[CMSG_SPACE(sizeof(__u64))];
592 struct timespec now_ts;
593 struct msghdr msg = {};
594 struct cmsghdr *cmsg;
595 struct iovec iov;
596 __u64 now_ns;
597 int ret;
598
599 ret = clock_gettime(CLOCK_TAI, &now_ts);
600 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
601 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
602
603 iov.iov_base = b;
604 iov.iov_len = s;
605 msg.msg_iov = &iov;
606 msg.msg_iovlen = 1;
607 msg.msg_control = &ctl;
608 msg.msg_controllen = sizeof(ctl);
609
610 cmsg = CMSG_FIRSTHDR(&msg);
611 cmsg->cmsg_level = SOL_SOCKET;
612 cmsg->cmsg_type = SCM_TXTIME;
613 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
614 *(__u64 *)CMSG_DATA(cmsg) = now_ns;
615
616 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
617 ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
618
619 ret = sendmsg(fd, &msg, 0);
620 ASSERT_EQ(ret, s, "sendmsg");
621 }
622
test_inet_dtime(int family,int type,const char * addr,__u16 port)623 static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
624 {
625 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
626 char buf[] = "testing testing";
627 struct nstoken *nstoken;
628
629 nstoken = open_netns(NS_DST);
630 if (!ASSERT_OK_PTR(nstoken, "setns dst"))
631 return;
632 listen_fd = start_server(family, type, addr, port, 0);
633 close_netns(nstoken);
634
635 if (!ASSERT_GE(listen_fd, 0, "listen"))
636 return;
637
638 /* Ensure the kernel puts the (rcv) timestamp for all skb */
639 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS,
640 &opt, sizeof(opt));
641 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)"))
642 goto done;
643
644 if (type == SOCK_STREAM) {
645 /* Ensure the kernel set EDT when sending out rst/ack
646 * from the kernel's ctl_sk.
647 */
648 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
649 sizeof(opt));
650 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
651 goto done;
652 }
653
654 nstoken = open_netns(NS_SRC);
655 if (!ASSERT_OK_PTR(nstoken, "setns src"))
656 goto done;
657 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
658 close_netns(nstoken);
659
660 if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
661 goto done;
662
663 if (type == SOCK_STREAM) {
664 int n;
665
666 accept_fd = accept(listen_fd, NULL, NULL);
667 if (!ASSERT_GE(accept_fd, 0, "accept"))
668 goto done;
669
670 n = write(client_fd, buf, sizeof(buf));
671 if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
672 goto done;
673 rcv_tstamp(accept_fd, buf, sizeof(buf));
674 } else {
675 snd_tstamp(client_fd, buf, sizeof(buf));
676 rcv_tstamp(listen_fd, buf, sizeof(buf));
677 }
678
679 done:
680 close(listen_fd);
681 if (accept_fd != -1)
682 close(accept_fd);
683 if (client_fd != -1)
684 close(client_fd);
685 }
686
netns_load_dtime_bpf(struct test_tc_dtime * skel,const struct netns_setup_result * setup_result)687 static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
688 const struct netns_setup_result *setup_result)
689 {
690 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
691 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
692 LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
693 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
694 struct nstoken *nstoken;
695 int err;
696
697 /* setup ns_src tc progs */
698 nstoken = open_netns(NS_SRC);
699 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
700 return -1;
701 /* tc qdisc add dev src clsact */
702 QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
703 /* tc filter add dev src ingress bpf da ingress_host */
704 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
705 /* tc filter add dev src egress bpf da egress_host */
706 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
707 close_netns(nstoken);
708
709 /* setup ns_dst tc progs */
710 nstoken = open_netns(NS_DST);
711 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
712 return -1;
713 /* tc qdisc add dev dst clsact */
714 QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
715 /* tc filter add dev dst ingress bpf da ingress_host */
716 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
717 /* tc filter add dev dst egress bpf da egress_host */
718 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
719 close_netns(nstoken);
720
721 /* setup ns_fwd tc progs */
722 nstoken = open_netns(NS_FWD);
723 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
724 return -1;
725 /* tc qdisc add dev dst_fwd clsact */
726 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
727 /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
728 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
729 skel->progs.ingress_fwdns_prio100, 100);
730 /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
731 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
732 skel->progs.ingress_fwdns_prio101, 101);
733 /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
734 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
735 skel->progs.egress_fwdns_prio100, 100);
736 /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
737 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
738 skel->progs.egress_fwdns_prio101, 101);
739
740 /* tc qdisc add dev src_fwd clsact */
741 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
742 /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
743 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
744 skel->progs.ingress_fwdns_prio100, 100);
745 /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
746 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
747 skel->progs.ingress_fwdns_prio101, 101);
748 /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
749 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
750 skel->progs.egress_fwdns_prio100, 100);
751 /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
752 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
753 skel->progs.egress_fwdns_prio101, 101);
754 close_netns(nstoken);
755 return 0;
756
757 fail:
758 close_netns(nstoken);
759 return err;
760 }
761
762 enum {
763 INGRESS_FWDNS_P100,
764 INGRESS_FWDNS_P101,
765 EGRESS_FWDNS_P100,
766 EGRESS_FWDNS_P101,
767 INGRESS_ENDHOST,
768 EGRESS_ENDHOST,
769 SET_DTIME,
770 __MAX_CNT,
771 };
772
773 const char *cnt_names[] = {
774 "ingress_fwdns_p100",
775 "ingress_fwdns_p101",
776 "egress_fwdns_p100",
777 "egress_fwdns_p101",
778 "ingress_endhost",
779 "egress_endhost",
780 "set_dtime",
781 };
782
783 enum {
784 TCP_IP6_CLEAR_DTIME,
785 TCP_IP4,
786 TCP_IP6,
787 UDP_IP4,
788 UDP_IP6,
789 TCP_IP4_RT_FWD,
790 TCP_IP6_RT_FWD,
791 UDP_IP4_RT_FWD,
792 UDP_IP6_RT_FWD,
793 UKN_TEST,
794 __NR_TESTS,
795 };
796
797 const char *test_names[] = {
798 "tcp ip6 clear dtime",
799 "tcp ip4",
800 "tcp ip6",
801 "udp ip4",
802 "udp ip6",
803 "tcp ip4 rt fwd",
804 "tcp ip6 rt fwd",
805 "udp ip4 rt fwd",
806 "udp ip6 rt fwd",
807 };
808
dtime_cnt_str(int test,int cnt)809 static const char *dtime_cnt_str(int test, int cnt)
810 {
811 static char name[64];
812
813 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
814
815 return name;
816 }
817
dtime_err_str(int test,int cnt)818 static const char *dtime_err_str(int test, int cnt)
819 {
820 static char name[64];
821
822 snprintf(name, sizeof(name), "%s %s errs", test_names[test],
823 cnt_names[cnt]);
824
825 return name;
826 }
827
test_tcp_clear_dtime(struct test_tc_dtime * skel)828 static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
829 {
830 int i, t = TCP_IP6_CLEAR_DTIME;
831 __u32 *dtimes = skel->bss->dtimes[t];
832 __u32 *errs = skel->bss->errs[t];
833
834 skel->bss->test = t;
835 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
836
837 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
838 dtime_cnt_str(t, INGRESS_FWDNS_P100));
839 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
840 dtime_cnt_str(t, INGRESS_FWDNS_P101));
841 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
842 dtime_cnt_str(t, EGRESS_FWDNS_P100));
843 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
844 dtime_cnt_str(t, EGRESS_FWDNS_P101));
845 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
846 dtime_cnt_str(t, EGRESS_ENDHOST));
847 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
848 dtime_cnt_str(t, INGRESS_ENDHOST));
849
850 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
851 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
852 }
853
test_tcp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)854 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
855 {
856 __u32 *dtimes, *errs;
857 const char *addr;
858 int i, t;
859
860 if (family == AF_INET) {
861 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
862 addr = IP4_DST;
863 } else {
864 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
865 addr = IP6_DST;
866 }
867
868 dtimes = skel->bss->dtimes[t];
869 errs = skel->bss->errs[t];
870
871 skel->bss->test = t;
872 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
873
874 /* fwdns_prio100 prog does not read delivery_time_type, so
875 * kernel puts the (rcv) timestamp in __sk_buff->tstamp
876 */
877 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
878 dtime_cnt_str(t, INGRESS_FWDNS_P100));
879 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
880 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
881
882 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
883 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
884 }
885
test_udp_dtime(struct test_tc_dtime * skel,int family,bool bpf_fwd)886 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
887 {
888 __u32 *dtimes, *errs;
889 const char *addr;
890 int i, t;
891
892 if (family == AF_INET) {
893 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
894 addr = IP4_DST;
895 } else {
896 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
897 addr = IP6_DST;
898 }
899
900 dtimes = skel->bss->dtimes[t];
901 errs = skel->bss->errs[t];
902
903 skel->bss->test = t;
904 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
905
906 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
907 dtime_cnt_str(t, INGRESS_FWDNS_P100));
908 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
909 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
910
911 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
912 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
913 }
914
test_tc_redirect_dtime(struct netns_setup_result * setup_result)915 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
916 {
917 struct test_tc_dtime *skel;
918 struct nstoken *nstoken;
919 int hold_tstamp_fd, err;
920
921 /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
922 * is no delay in the kernel net_enable_timestamp().
923 * This ensures the following tests must have
924 * non zero rcv tstamp in the recvmsg().
925 */
926 hold_tstamp_fd = wait_netstamp_needed_key();
927 if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
928 return;
929
930 skel = test_tc_dtime__open();
931 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
932 goto done;
933
934 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
935 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
936
937 err = test_tc_dtime__load(skel);
938 if (!ASSERT_OK(err, "test_tc_dtime__load"))
939 goto done;
940
941 if (netns_load_dtime_bpf(skel, setup_result))
942 goto done;
943
944 nstoken = open_netns(NS_FWD);
945 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
946 goto done;
947 err = set_forwarding(false);
948 close_netns(nstoken);
949 if (!ASSERT_OK(err, "disable forwarding"))
950 goto done;
951
952 test_tcp_clear_dtime(skel);
953
954 test_tcp_dtime(skel, AF_INET, true);
955 test_tcp_dtime(skel, AF_INET6, true);
956 test_udp_dtime(skel, AF_INET, true);
957 test_udp_dtime(skel, AF_INET6, true);
958
959 /* Test the kernel ip[6]_forward path instead
960 * of bpf_redirect_neigh().
961 */
962 nstoken = open_netns(NS_FWD);
963 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
964 goto done;
965 err = set_forwarding(true);
966 close_netns(nstoken);
967 if (!ASSERT_OK(err, "enable forwarding"))
968 goto done;
969
970 test_tcp_dtime(skel, AF_INET, false);
971 test_tcp_dtime(skel, AF_INET6, false);
972 test_udp_dtime(skel, AF_INET, false);
973 test_udp_dtime(skel, AF_INET6, false);
974
975 done:
976 test_tc_dtime__destroy(skel);
977 close(hold_tstamp_fd);
978 }
979
test_tc_redirect_neigh_fib(struct netns_setup_result * setup_result)980 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
981 {
982 struct nstoken *nstoken = NULL;
983 struct test_tc_neigh_fib *skel = NULL;
984
985 nstoken = open_netns(NS_FWD);
986 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
987 return;
988
989 skel = test_tc_neigh_fib__open();
990 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
991 goto done;
992
993 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
994 goto done;
995
996 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
997 skel->progs.tc_chk, setup_result))
998 goto done;
999
1000 /* bpf_fib_lookup() checks if forwarding is enabled */
1001 if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
1002 goto done;
1003
1004 test_connectivity();
1005
1006 done:
1007 if (skel)
1008 test_tc_neigh_fib__destroy(skel);
1009 close_netns(nstoken);
1010 }
1011
test_tc_redirect_neigh(struct netns_setup_result * setup_result)1012 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
1013 {
1014 struct nstoken *nstoken = NULL;
1015 struct test_tc_neigh *skel = NULL;
1016 int err;
1017
1018 nstoken = open_netns(NS_FWD);
1019 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
1020 return;
1021
1022 skel = test_tc_neigh__open();
1023 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
1024 goto done;
1025
1026 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1027 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1028
1029 err = test_tc_neigh__load(skel);
1030 if (!ASSERT_OK(err, "test_tc_neigh__load"))
1031 goto done;
1032
1033 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1034 skel->progs.tc_chk, setup_result))
1035 goto done;
1036
1037 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1038 goto done;
1039
1040 test_connectivity();
1041
1042 done:
1043 if (skel)
1044 test_tc_neigh__destroy(skel);
1045 close_netns(nstoken);
1046 }
1047
test_tc_redirect_peer(struct netns_setup_result * setup_result)1048 static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
1049 {
1050 struct nstoken *nstoken;
1051 struct test_tc_peer *skel;
1052 int err;
1053
1054 nstoken = open_netns(NS_FWD);
1055 if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
1056 return;
1057
1058 skel = test_tc_peer__open();
1059 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1060 goto done;
1061
1062 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
1063 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1064
1065 err = test_tc_peer__load(skel);
1066 if (!ASSERT_OK(err, "test_tc_peer__load"))
1067 goto done;
1068
1069 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
1070 skel->progs.tc_chk, setup_result))
1071 goto done;
1072
1073 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1074 goto done;
1075
1076 test_connectivity();
1077
1078 done:
1079 if (skel)
1080 test_tc_peer__destroy(skel);
1081 close_netns(nstoken);
1082 }
1083
tun_open(char * name)1084 static int tun_open(char *name)
1085 {
1086 struct ifreq ifr;
1087 int fd, err;
1088
1089 fd = open("/dev/net/tun", O_RDWR);
1090 if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
1091 return -1;
1092
1093 memset(&ifr, 0, sizeof(ifr));
1094
1095 ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
1096 if (*name)
1097 strncpy(ifr.ifr_name, name, IFNAMSIZ);
1098
1099 err = ioctl(fd, TUNSETIFF, &ifr);
1100 if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
1101 goto fail;
1102
1103 SYS(fail, "ip link set dev %s up", name);
1104
1105 return fd;
1106 fail:
1107 close(fd);
1108 return -1;
1109 }
1110
1111 enum {
1112 SRC_TO_TARGET = 0,
1113 TARGET_TO_SRC = 1,
1114 };
1115
tun_relay_loop(int src_fd,int target_fd)1116 static int tun_relay_loop(int src_fd, int target_fd)
1117 {
1118 fd_set rfds, wfds;
1119
1120 FD_ZERO(&rfds);
1121 FD_ZERO(&wfds);
1122
1123 for (;;) {
1124 char buf[1500];
1125 int direction, nread, nwrite;
1126
1127 FD_SET(src_fd, &rfds);
1128 FD_SET(target_fd, &rfds);
1129
1130 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
1131 log_err("select failed");
1132 return 1;
1133 }
1134
1135 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
1136
1137 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
1138 if (nread < 0) {
1139 log_err("read failed");
1140 return 1;
1141 }
1142
1143 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
1144 if (nwrite != nread) {
1145 log_err("write failed");
1146 return 1;
1147 }
1148 }
1149 }
1150
test_tc_redirect_peer_l3(struct netns_setup_result * setup_result)1151 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
1152 {
1153 LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
1154 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
1155 struct test_tc_peer *skel = NULL;
1156 struct nstoken *nstoken = NULL;
1157 int err;
1158 int tunnel_pid = -1;
1159 int src_fd, target_fd = -1;
1160 int ifindex;
1161
1162 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
1163 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
1164 * expose the L2 headers encapsulating the IP packet to BPF and hence
1165 * don't have skb in suitable state for this test. Alternative to TUN/TAP
1166 * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
1167 * but that requires much more complicated setup.
1168 */
1169 nstoken = open_netns(NS_SRC);
1170 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
1171 return;
1172
1173 src_fd = tun_open("tun_src");
1174 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
1175 goto fail;
1176
1177 close_netns(nstoken);
1178
1179 nstoken = open_netns(NS_FWD);
1180 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
1181 goto fail;
1182
1183 target_fd = tun_open("tun_fwd");
1184 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
1185 goto fail;
1186
1187 tunnel_pid = fork();
1188 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
1189 goto fail;
1190
1191 if (tunnel_pid == 0)
1192 exit(tun_relay_loop(src_fd, target_fd));
1193
1194 skel = test_tc_peer__open();
1195 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
1196 goto fail;
1197
1198 ifindex = if_nametoindex("tun_fwd");
1199 if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
1200 goto fail;
1201
1202 skel->rodata->IFINDEX_SRC = ifindex;
1203 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
1204
1205 err = test_tc_peer__load(skel);
1206 if (!ASSERT_OK(err, "test_tc_peer__load"))
1207 goto fail;
1208
1209 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
1210 * towards dst, and "tc_dst" to redirect packets
1211 * and "tc_chk" on dst_fwd to drop non-redirected packets.
1212 */
1213 /* tc qdisc add dev tun_fwd clsact */
1214 QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
1215 /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
1216 XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
1217
1218 /* tc qdisc add dev dst_fwd clsact */
1219 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
1220 /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
1221 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
1222 /* tc filter add dev dst_fwd egress bpf da tc_chk */
1223 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
1224
1225 /* Setup route and neigh tables */
1226 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
1227 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
1228
1229 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
1230 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
1231
1232 SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
1233 SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
1234 " dev tun_src scope global");
1235 SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
1236 SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
1237 SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
1238 " dev tun_src scope global");
1239 SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
1240
1241 SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1242 SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
1243
1244 if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
1245 goto fail;
1246
1247 test_connectivity();
1248
1249 fail:
1250 if (tunnel_pid > 0) {
1251 kill(tunnel_pid, SIGTERM);
1252 waitpid(tunnel_pid, NULL, 0);
1253 }
1254 if (src_fd >= 0)
1255 close(src_fd);
1256 if (target_fd >= 0)
1257 close(target_fd);
1258 if (skel)
1259 test_tc_peer__destroy(skel);
1260 if (nstoken)
1261 close_netns(nstoken);
1262 }
1263
1264 #define RUN_TEST(name, mode) \
1265 ({ \
1266 struct netns_setup_result setup_result = { .dev_mode = mode, }; \
1267 if (test__start_subtest(#name)) \
1268 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
1269 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
1270 "setup links and routes")) \
1271 test_ ## name(&setup_result); \
1272 netns_setup_namespaces("delete"); \
1273 } \
1274 })
1275
test_tc_redirect_run_tests(void * arg)1276 static void *test_tc_redirect_run_tests(void *arg)
1277 {
1278 netns_setup_namespaces_nofail("delete");
1279
1280 RUN_TEST(tc_redirect_peer, MODE_VETH);
1281 RUN_TEST(tc_redirect_peer, MODE_NETKIT);
1282 RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
1283 RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
1284 RUN_TEST(tc_redirect_neigh, MODE_VETH);
1285 RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
1286 RUN_TEST(tc_redirect_dtime, MODE_VETH);
1287 return NULL;
1288 }
1289
test_tc_redirect(void)1290 void test_tc_redirect(void)
1291 {
1292 pthread_t test_thread;
1293 int err;
1294
1295 /* Run the tests in their own thread to isolate the namespace changes
1296 * so they do not affect the environment of other tests.
1297 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
1298 */
1299 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
1300 if (ASSERT_OK(err, "pthread_create"))
1301 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
1302 }
1303