1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 3 /* 4 * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link 5 * between src and dst. The netns fwd has veth links to each src and dst. The 6 * client is in src and server in dst. The test installs a TC BPF program to each 7 * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the 8 * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace 9 * switch from ingress side; it also installs a checker prog on the egress side 10 * to drop unexpected traffic. 11 */ 12 13 #include <arpa/inet.h> 14 #include <linux/if_tun.h> 15 #include <linux/limits.h> 16 #include <linux/sysctl.h> 17 #include <linux/time_types.h> 18 #include <linux/net_tstamp.h> 19 #include <net/if.h> 20 #include <stdbool.h> 21 #include <stdio.h> 22 #include <sys/stat.h> 23 #include <unistd.h> 24 25 #include "test_progs.h" 26 #include "network_helpers.h" 27 #include "netlink_helpers.h" 28 #include "test_tc_neigh_fib.skel.h" 29 #include "test_tc_neigh.skel.h" 30 #include "test_tc_peer.skel.h" 31 #include "test_tc_dtime.skel.h" 32 33 #ifndef TCP_TX_DELAY 34 #define TCP_TX_DELAY 37 35 #endif 36 37 #define NS_SRC "ns_src" 38 #define NS_FWD "ns_fwd" 39 #define NS_DST "ns_dst" 40 41 #define IP4_SRC "172.16.1.100" 42 #define IP4_DST "172.16.2.100" 43 #define IP4_TUN_SRC "172.17.1.100" 44 #define IP4_TUN_FWD "172.17.1.200" 45 #define IP4_PORT 9004 46 47 #define IP6_SRC "0::1:dead:beef:cafe" 48 #define IP6_DST "0::2:dead:beef:cafe" 49 #define IP6_TUN_SRC "1::1:dead:beef:cafe" 50 #define IP6_TUN_FWD "1::2:dead:beef:cafe" 51 #define IP6_PORT 9006 52 53 #define IP4_SLL "169.254.0.1" 54 #define IP4_DLL "169.254.0.2" 55 #define IP4_NET "169.254.0.0" 56 57 #define MAC_DST_FWD "00:11:22:33:44:55" 58 #define MAC_DST "00:22:33:44:55:66" 59 #define MAC_SRC_FWD "00:33:44:55:66:77" 60 #define MAC_SRC "00:44:55:66:77:88" 61 62 #define IFADDR_STR_LEN 18 63 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q" 64 65 #define TIMEOUT_MILLIS 10000 66 #define NSEC_PER_SEC 1000000000ULL 67 68 #define log_err(MSG, ...) \ 69 fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 70 __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) 71 72 static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; 73 static struct netns_obj *netns_objs[3]; 74 75 static int write_file(const char *path, const char *newval) 76 { 77 FILE *f; 78 79 f = fopen(path, "r+"); 80 if (!f) 81 return -1; 82 if (fwrite(newval, strlen(newval), 1, f) != 1) { 83 log_err("writing to %s failed", path); 84 fclose(f); 85 return -1; 86 } 87 fclose(f); 88 return 0; 89 } 90 91 static int netns_setup_namespaces(const char *verb) 92 { 93 struct netns_obj **ns_obj = netns_objs; 94 const char * const *ns = namespaces; 95 96 while (*ns) { 97 if (strcmp(verb, "add") == 0) { 98 *ns_obj = netns_new(*ns, false); 99 if (!ASSERT_OK_PTR(*ns_obj, "netns_new")) 100 return -1; 101 } else { 102 if (!ASSERT_OK_PTR(*ns_obj, "netns_obj is NULL")) 103 return -1; 104 netns_free(*ns_obj); 105 *ns_obj = NULL; 106 } 107 ns++; 108 ns_obj++; 109 } 110 return 0; 111 } 112 113 static void netns_setup_namespaces_nofail(const char *verb) 114 { 115 struct netns_obj **ns_obj = netns_objs; 116 const char * const *ns = namespaces; 117 118 while (*ns) { 119 if (strcmp(verb, "add") == 0) { 120 *ns_obj = netns_new(*ns, false); 121 } else { 122 if (*ns_obj) 123 netns_free(*ns_obj); 124 *ns_obj = NULL; 125 } 126 ns++; 127 ns_obj++; 128 } 129 } 130 131 enum dev_mode { 132 MODE_VETH, 133 MODE_NETKIT, 134 }; 135 136 struct netns_setup_result { 137 enum dev_mode dev_mode; 138 int ifindex_src; 139 int ifindex_src_fwd; 140 int ifindex_dst; 141 int ifindex_dst_fwd; 142 }; 143 144 static int get_ifaddr(const char *name, char *ifaddr) 145 { 146 char path[PATH_MAX]; 147 FILE *f; 148 int ret; 149 150 snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name); 151 f = fopen(path, "r"); 152 if (!ASSERT_OK_PTR(f, path)) 153 return -1; 154 155 ret = fread(ifaddr, 1, IFADDR_STR_LEN, f); 156 if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) { 157 fclose(f); 158 return -1; 159 } 160 fclose(f); 161 return 0; 162 } 163 164 static int create_netkit(int mode, char *prim, char *peer) 165 { 166 struct rtattr *linkinfo, *data, *peer_info; 167 struct rtnl_handle rth = { .fd = -1 }; 168 const char *type = "netkit"; 169 struct { 170 struct nlmsghdr n; 171 struct ifinfomsg i; 172 char buf[1024]; 173 } req = {}; 174 int err; 175 176 err = rtnl_open(&rth, 0); 177 if (!ASSERT_OK(err, "open_rtnetlink")) 178 return err; 179 180 memset(&req, 0, sizeof(req)); 181 req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); 182 req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; 183 req.n.nlmsg_type = RTM_NEWLINK; 184 req.i.ifi_family = AF_UNSPEC; 185 186 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim)); 187 linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO); 188 addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type)); 189 data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); 190 addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); 191 peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO); 192 req.n.nlmsg_len += sizeof(struct ifinfomsg); 193 addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer)); 194 addattr_nest_end(&req.n, peer_info); 195 addattr_nest_end(&req.n, data); 196 addattr_nest_end(&req.n, linkinfo); 197 198 err = rtnl_talk(&rth, &req.n, NULL); 199 ASSERT_OK(err, "talk_rtnetlink"); 200 rtnl_close(&rth); 201 return err; 202 } 203 204 static int netns_setup_links_and_routes(struct netns_setup_result *result) 205 { 206 struct nstoken *nstoken = NULL; 207 char src_fwd_addr[IFADDR_STR_LEN+1] = {}; 208 char src_addr[IFADDR_STR_LEN + 1] = {}; 209 int err; 210 211 if (result->dev_mode == MODE_VETH) { 212 SYS(fail, "ip link add src address " MAC_SRC " type veth " 213 "peer name src_fwd address " MAC_SRC_FWD); 214 SYS(fail, "ip link add dst address " MAC_DST " type veth " 215 "peer name dst_fwd address " MAC_DST_FWD); 216 } else if (result->dev_mode == MODE_NETKIT) { 217 err = create_netkit(NETKIT_L3, "src", "src_fwd"); 218 if (!ASSERT_OK(err, "create_ifindex_src")) 219 goto fail; 220 err = create_netkit(NETKIT_L3, "dst", "dst_fwd"); 221 if (!ASSERT_OK(err, "create_ifindex_dst")) 222 goto fail; 223 } 224 225 if (get_ifaddr("src_fwd", src_fwd_addr)) 226 goto fail; 227 228 if (get_ifaddr("src", src_addr)) 229 goto fail; 230 231 result->ifindex_src = if_nametoindex("src"); 232 if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src")) 233 goto fail; 234 235 result->ifindex_src_fwd = if_nametoindex("src_fwd"); 236 if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd")) 237 goto fail; 238 239 result->ifindex_dst = if_nametoindex("dst"); 240 if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst")) 241 goto fail; 242 243 result->ifindex_dst_fwd = if_nametoindex("dst_fwd"); 244 if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd")) 245 goto fail; 246 247 SYS(fail, "ip link set src netns " NS_SRC); 248 SYS(fail, "ip link set src_fwd netns " NS_FWD); 249 SYS(fail, "ip link set dst_fwd netns " NS_FWD); 250 SYS(fail, "ip link set dst netns " NS_DST); 251 252 /** setup in 'src' namespace */ 253 nstoken = open_netns(NS_SRC); 254 if (!ASSERT_OK_PTR(nstoken, "setns src")) 255 goto fail; 256 257 SYS(fail, "ip addr add " IP4_SRC "/32 dev src"); 258 SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad"); 259 SYS(fail, "ip link set dev src up"); 260 261 SYS(fail, "ip route add " IP4_DST "/32 dev src scope global"); 262 SYS(fail, "ip route add " IP4_NET "/16 dev src scope global"); 263 SYS(fail, "ip route add " IP6_DST "/128 dev src scope global"); 264 265 if (result->dev_mode == MODE_VETH) { 266 SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s", 267 src_fwd_addr); 268 SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s", 269 src_fwd_addr); 270 } 271 272 close_netns(nstoken); 273 274 /** setup in 'fwd' namespace */ 275 nstoken = open_netns(NS_FWD); 276 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 277 goto fail; 278 279 /* The fwd netns automatically gets a v6 LL address / routes, but also 280 * needs v4 one in order to start ARP probing. IP4_NET route is added 281 * to the endpoints so that the ARP processing will reply. 282 */ 283 SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd"); 284 SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd"); 285 SYS(fail, "ip link set dev src_fwd up"); 286 SYS(fail, "ip link set dev dst_fwd up"); 287 288 SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global"); 289 SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global"); 290 SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global"); 291 SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global"); 292 293 if (result->dev_mode == MODE_VETH) { 294 SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr); 295 SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr); 296 SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST); 297 SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST); 298 } 299 300 close_netns(nstoken); 301 302 /** setup in 'dst' namespace */ 303 nstoken = open_netns(NS_DST); 304 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 305 goto fail; 306 307 SYS(fail, "ip addr add " IP4_DST "/32 dev dst"); 308 SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad"); 309 SYS(fail, "ip link set dev dst up"); 310 SYS(fail, "ip link set dev lo up"); 311 312 SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global"); 313 SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global"); 314 SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global"); 315 316 if (result->dev_mode == MODE_VETH) { 317 SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD); 318 SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD); 319 } 320 321 close_netns(nstoken); 322 323 return 0; 324 fail: 325 if (nstoken) 326 close_netns(nstoken); 327 return -1; 328 } 329 330 static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex) 331 { 332 char err_str[128], ifname[16]; 333 int err; 334 335 qdisc_hook->ifindex = ifindex; 336 qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; 337 err = bpf_tc_hook_create(qdisc_hook); 338 snprintf(err_str, sizeof(err_str), 339 "qdisc add dev %s clsact", 340 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>"); 341 err_str[sizeof(err_str) - 1] = 0; 342 ASSERT_OK(err, err_str); 343 344 return err; 345 } 346 347 static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook, 348 enum bpf_tc_attach_point xgress, 349 const struct bpf_program *prog, int priority) 350 { 351 LIBBPF_OPTS(bpf_tc_opts, tc_attach); 352 char err_str[128], ifname[16]; 353 int err; 354 355 qdisc_hook->attach_point = xgress; 356 tc_attach.prog_fd = bpf_program__fd(prog); 357 tc_attach.priority = priority; 358 err = bpf_tc_attach(qdisc_hook, &tc_attach); 359 snprintf(err_str, sizeof(err_str), 360 "filter add dev %s %s prio %d bpf da %s", 361 if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>", 362 xgress == BPF_TC_INGRESS ? "ingress" : "egress", 363 priority, bpf_program__name(prog)); 364 err_str[sizeof(err_str) - 1] = 0; 365 ASSERT_OK(err, err_str); 366 367 return err; 368 } 369 370 #define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \ 371 if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \ 372 goto fail; \ 373 }) 374 375 #define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \ 376 if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \ 377 goto fail; \ 378 }) 379 380 static int netns_load_bpf(const struct bpf_program *src_prog, 381 const struct bpf_program *dst_prog, 382 const struct bpf_program *chk_prog, 383 const struct netns_setup_result *setup_result) 384 { 385 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); 386 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); 387 int err; 388 389 /* tc qdisc add dev src_fwd clsact */ 390 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); 391 /* tc filter add dev src_fwd ingress bpf da src_prog */ 392 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0); 393 /* tc filter add dev src_fwd egress bpf da chk_prog */ 394 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0); 395 396 /* tc qdisc add dev dst_fwd clsact */ 397 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); 398 /* tc filter add dev dst_fwd ingress bpf da dst_prog */ 399 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0); 400 /* tc filter add dev dst_fwd egress bpf da chk_prog */ 401 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0); 402 403 return 0; 404 fail: 405 return -1; 406 } 407 408 static void test_tcp(int family, const char *addr, __u16 port) 409 { 410 int listen_fd = -1, accept_fd = -1, client_fd = -1; 411 char buf[] = "testing testing"; 412 int n; 413 struct nstoken *nstoken; 414 415 nstoken = open_netns(NS_DST); 416 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 417 return; 418 419 listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); 420 if (!ASSERT_GE(listen_fd, 0, "listen")) 421 goto done; 422 423 close_netns(nstoken); 424 nstoken = open_netns(NS_SRC); 425 if (!ASSERT_OK_PTR(nstoken, "setns src")) 426 goto done; 427 428 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); 429 if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) 430 goto done; 431 432 accept_fd = accept(listen_fd, NULL, NULL); 433 if (!ASSERT_GE(accept_fd, 0, "accept")) 434 goto done; 435 436 if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo")) 437 goto done; 438 439 n = write(client_fd, buf, sizeof(buf)); 440 if (!ASSERT_EQ(n, sizeof(buf), "send to server")) 441 goto done; 442 443 n = read(accept_fd, buf, sizeof(buf)); 444 ASSERT_EQ(n, sizeof(buf), "recv from server"); 445 446 done: 447 if (nstoken) 448 close_netns(nstoken); 449 if (listen_fd >= 0) 450 close(listen_fd); 451 if (accept_fd >= 0) 452 close(accept_fd); 453 if (client_fd >= 0) 454 close(client_fd); 455 } 456 457 static int test_ping(int family, const char *addr) 458 { 459 SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); 460 return 0; 461 fail: 462 return -1; 463 } 464 465 static void test_connectivity(void) 466 { 467 test_tcp(AF_INET, IP4_DST, IP4_PORT); 468 test_ping(AF_INET, IP4_DST); 469 test_tcp(AF_INET6, IP6_DST, IP6_PORT); 470 test_ping(AF_INET6, IP6_DST); 471 } 472 473 static int set_forwarding(bool enable) 474 { 475 int err; 476 477 err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0"); 478 if (!ASSERT_OK(err, "set ipv4.ip_forward=0")) 479 return err; 480 481 err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0"); 482 if (!ASSERT_OK(err, "set ipv6.forwarding=0")) 483 return err; 484 485 return 0; 486 } 487 488 static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp) 489 { 490 struct timespec pkt_ts = {}; 491 char ctl[CMSG_SPACE(sizeof(pkt_ts))]; 492 struct timespec now_ts; 493 struct msghdr msg = {}; 494 __u64 now_ns, pkt_ns; 495 struct cmsghdr *cmsg; 496 struct iovec iov; 497 char data[32]; 498 int ret; 499 500 iov.iov_base = data; 501 iov.iov_len = sizeof(data); 502 msg.msg_iov = &iov; 503 msg.msg_iovlen = 1; 504 msg.msg_control = &ctl; 505 msg.msg_controllen = sizeof(ctl); 506 507 ret = recvmsg(fd, &msg, 0); 508 if (!ASSERT_EQ(ret, s, "recvmsg")) 509 return -1; 510 ASSERT_STRNEQ(data, expected, s, "expected rcv data"); 511 512 cmsg = CMSG_FIRSTHDR(&msg); 513 if (cmsg && cmsg->cmsg_level == SOL_SOCKET && 514 cmsg->cmsg_type == SO_TIMESTAMPNS) 515 memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts)); 516 517 pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec; 518 if (tstamp) { 519 /* caller will check the tstamp itself */ 520 *tstamp = pkt_ns; 521 return 0; 522 } 523 524 ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp"); 525 526 ret = clock_gettime(CLOCK_REALTIME, &now_ts); 527 ASSERT_OK(ret, "clock_gettime"); 528 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec; 529 530 if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp")) 531 ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC, 532 "check rcv tstamp"); 533 return 0; 534 } 535 536 static void rcv_tstamp(int fd, const char *expected, size_t s) 537 { 538 __rcv_tstamp(fd, expected, s, NULL); 539 } 540 541 static int wait_netstamp_needed_key(void) 542 { 543 int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n; 544 char buf[] = "testing testing"; 545 struct nstoken *nstoken; 546 __u64 tstamp = 0; 547 548 nstoken = open_netns(NS_DST); 549 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 550 return -1; 551 552 srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0); 553 if (!ASSERT_GE(srv_fd, 0, "start_server")) 554 goto done; 555 556 err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS, 557 &opt, sizeof(opt)); 558 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)")) 559 goto done; 560 561 cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS); 562 if (!ASSERT_GE(cli_fd, 0, "connect_to_fd")) 563 goto done; 564 565 again: 566 n = write(cli_fd, buf, sizeof(buf)); 567 if (!ASSERT_EQ(n, sizeof(buf), "send to server")) 568 goto done; 569 err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp); 570 if (!ASSERT_OK(err, "__rcv_tstamp")) 571 goto done; 572 if (!tstamp && nretries++ < 5) { 573 sleep(1); 574 printf("netstamp_needed_key retry#%d\n", nretries); 575 goto again; 576 } 577 578 done: 579 if (!tstamp && srv_fd != -1) { 580 close(srv_fd); 581 srv_fd = -1; 582 } 583 if (cli_fd != -1) 584 close(cli_fd); 585 close_netns(nstoken); 586 return srv_fd; 587 } 588 589 static void snd_tstamp(int fd, char *b, size_t s) 590 { 591 struct sock_txtime opt = { .clockid = CLOCK_TAI }; 592 char ctl[CMSG_SPACE(sizeof(__u64))]; 593 struct timespec now_ts; 594 struct msghdr msg = {}; 595 struct cmsghdr *cmsg; 596 struct iovec iov; 597 __u64 now_ns; 598 int ret; 599 600 ret = clock_gettime(CLOCK_TAI, &now_ts); 601 ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)"); 602 now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec; 603 604 iov.iov_base = b; 605 iov.iov_len = s; 606 msg.msg_iov = &iov; 607 msg.msg_iovlen = 1; 608 msg.msg_control = &ctl; 609 msg.msg_controllen = sizeof(ctl); 610 611 cmsg = CMSG_FIRSTHDR(&msg); 612 cmsg->cmsg_level = SOL_SOCKET; 613 cmsg->cmsg_type = SCM_TXTIME; 614 cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns)); 615 *(__u64 *)CMSG_DATA(cmsg) = now_ns; 616 617 ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt)); 618 ASSERT_OK(ret, "setsockopt(SO_TXTIME)"); 619 620 ret = sendmsg(fd, &msg, 0); 621 ASSERT_EQ(ret, s, "sendmsg"); 622 } 623 624 static void test_inet_dtime(int family, int type, const char *addr, __u16 port) 625 { 626 int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err; 627 char buf[] = "testing testing"; 628 struct nstoken *nstoken; 629 630 nstoken = open_netns(NS_DST); 631 if (!ASSERT_OK_PTR(nstoken, "setns dst")) 632 return; 633 listen_fd = start_server(family, type, addr, port, 0); 634 close_netns(nstoken); 635 636 if (!ASSERT_GE(listen_fd, 0, "listen")) 637 return; 638 639 /* Ensure the kernel puts the (rcv) timestamp for all skb */ 640 err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS, 641 &opt, sizeof(opt)); 642 if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS)")) 643 goto done; 644 645 if (type == SOCK_STREAM) { 646 /* Ensure the kernel set EDT when sending out rst/ack 647 * from the kernel's ctl_sk. 648 */ 649 err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt, 650 sizeof(opt)); 651 if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)")) 652 goto done; 653 } 654 655 nstoken = open_netns(NS_SRC); 656 if (!ASSERT_OK_PTR(nstoken, "setns src")) 657 goto done; 658 client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); 659 close_netns(nstoken); 660 661 if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) 662 goto done; 663 664 if (type == SOCK_STREAM) { 665 int n; 666 667 accept_fd = accept(listen_fd, NULL, NULL); 668 if (!ASSERT_GE(accept_fd, 0, "accept")) 669 goto done; 670 671 n = write(client_fd, buf, sizeof(buf)); 672 if (!ASSERT_EQ(n, sizeof(buf), "send to server")) 673 goto done; 674 rcv_tstamp(accept_fd, buf, sizeof(buf)); 675 } else { 676 snd_tstamp(client_fd, buf, sizeof(buf)); 677 rcv_tstamp(listen_fd, buf, sizeof(buf)); 678 } 679 680 done: 681 close(listen_fd); 682 if (accept_fd != -1) 683 close(accept_fd); 684 if (client_fd != -1) 685 close(client_fd); 686 } 687 688 static int netns_load_dtime_bpf(struct test_tc_dtime *skel, 689 const struct netns_setup_result *setup_result) 690 { 691 LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd); 692 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); 693 LIBBPF_OPTS(bpf_tc_hook, qdisc_src); 694 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst); 695 struct nstoken *nstoken; 696 int err; 697 698 /* setup ns_src tc progs */ 699 nstoken = open_netns(NS_SRC); 700 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) 701 return -1; 702 /* tc qdisc add dev src clsact */ 703 QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src); 704 /* tc filter add dev src ingress bpf da ingress_host */ 705 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0); 706 /* tc filter add dev src egress bpf da egress_host */ 707 XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0); 708 close_netns(nstoken); 709 710 /* setup ns_dst tc progs */ 711 nstoken = open_netns(NS_DST); 712 if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST)) 713 return -1; 714 /* tc qdisc add dev dst clsact */ 715 QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst); 716 /* tc filter add dev dst ingress bpf da ingress_host */ 717 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0); 718 /* tc filter add dev dst egress bpf da egress_host */ 719 XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0); 720 close_netns(nstoken); 721 722 /* setup ns_fwd tc progs */ 723 nstoken = open_netns(NS_FWD); 724 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) 725 return -1; 726 /* tc qdisc add dev dst_fwd clsact */ 727 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); 728 /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ 729 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, 730 skel->progs.ingress_fwdns_prio100, 100); 731 /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ 732 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, 733 skel->progs.ingress_fwdns_prio101, 101); 734 /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */ 735 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, 736 skel->progs.egress_fwdns_prio100, 100); 737 /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */ 738 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, 739 skel->progs.egress_fwdns_prio101, 101); 740 741 /* tc qdisc add dev src_fwd clsact */ 742 QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd); 743 /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */ 744 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, 745 skel->progs.ingress_fwdns_prio100, 100); 746 /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */ 747 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, 748 skel->progs.ingress_fwdns_prio101, 101); 749 /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */ 750 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, 751 skel->progs.egress_fwdns_prio100, 100); 752 /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */ 753 XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, 754 skel->progs.egress_fwdns_prio101, 101); 755 close_netns(nstoken); 756 return 0; 757 758 fail: 759 close_netns(nstoken); 760 return err; 761 } 762 763 enum { 764 INGRESS_FWDNS_P100, 765 INGRESS_FWDNS_P101, 766 EGRESS_FWDNS_P100, 767 EGRESS_FWDNS_P101, 768 INGRESS_ENDHOST, 769 EGRESS_ENDHOST, 770 SET_DTIME, 771 __MAX_CNT, 772 }; 773 774 const char *cnt_names[] = { 775 "ingress_fwdns_p100", 776 "ingress_fwdns_p101", 777 "egress_fwdns_p100", 778 "egress_fwdns_p101", 779 "ingress_endhost", 780 "egress_endhost", 781 "set_dtime", 782 }; 783 784 enum { 785 TCP_IP6_CLEAR_DTIME, 786 TCP_IP4, 787 TCP_IP6, 788 UDP_IP4, 789 UDP_IP6, 790 TCP_IP4_RT_FWD, 791 TCP_IP6_RT_FWD, 792 UDP_IP4_RT_FWD, 793 UDP_IP6_RT_FWD, 794 UKN_TEST, 795 __NR_TESTS, 796 }; 797 798 const char *test_names[] = { 799 "tcp ip6 clear dtime", 800 "tcp ip4", 801 "tcp ip6", 802 "udp ip4", 803 "udp ip6", 804 "tcp ip4 rt fwd", 805 "tcp ip6 rt fwd", 806 "udp ip4 rt fwd", 807 "udp ip6 rt fwd", 808 }; 809 810 static const char *dtime_cnt_str(int test, int cnt) 811 { 812 static char name[64]; 813 814 snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]); 815 816 return name; 817 } 818 819 static const char *dtime_err_str(int test, int cnt) 820 { 821 static char name[64]; 822 823 snprintf(name, sizeof(name), "%s %s errs", test_names[test], 824 cnt_names[cnt]); 825 826 return name; 827 } 828 829 static void test_tcp_clear_dtime(struct test_tc_dtime *skel) 830 { 831 int i, t = TCP_IP6_CLEAR_DTIME; 832 __u32 *dtimes = skel->bss->dtimes[t]; 833 __u32 *errs = skel->bss->errs[t]; 834 835 skel->bss->test = t; 836 test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t); 837 838 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 839 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 840 ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, 841 dtime_cnt_str(t, INGRESS_FWDNS_P101)); 842 ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0, 843 dtime_cnt_str(t, EGRESS_FWDNS_P100)); 844 ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0, 845 dtime_cnt_str(t, EGRESS_FWDNS_P101)); 846 ASSERT_GT(dtimes[EGRESS_ENDHOST], 0, 847 dtime_cnt_str(t, EGRESS_ENDHOST)); 848 ASSERT_GT(dtimes[INGRESS_ENDHOST], 0, 849 dtime_cnt_str(t, INGRESS_ENDHOST)); 850 851 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 852 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 853 } 854 855 static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) 856 { 857 __u32 *dtimes, *errs; 858 const char *addr; 859 int i, t; 860 861 if (family == AF_INET) { 862 t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD; 863 addr = IP4_DST; 864 } else { 865 t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD; 866 addr = IP6_DST; 867 } 868 869 dtimes = skel->bss->dtimes[t]; 870 errs = skel->bss->errs[t]; 871 872 skel->bss->test = t; 873 test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t); 874 875 /* fwdns_prio100 prog does not read delivery_time_type, so 876 * kernel puts the (rcv) timestamp in __sk_buff->tstamp 877 */ 878 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 879 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 880 for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++) 881 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); 882 883 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 884 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 885 } 886 887 static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) 888 { 889 __u32 *dtimes, *errs; 890 const char *addr; 891 int i, t; 892 893 if (family == AF_INET) { 894 t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD; 895 addr = IP4_DST; 896 } else { 897 t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD; 898 addr = IP6_DST; 899 } 900 901 dtimes = skel->bss->dtimes[t]; 902 errs = skel->bss->errs[t]; 903 904 skel->bss->test = t; 905 test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t); 906 907 ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, 908 dtime_cnt_str(t, INGRESS_FWDNS_P100)); 909 for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) 910 ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); 911 912 for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++) 913 ASSERT_EQ(errs[i], 0, dtime_err_str(t, i)); 914 } 915 916 static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) 917 { 918 struct test_tc_dtime *skel; 919 struct nstoken *nstoken; 920 int hold_tstamp_fd, err; 921 922 /* Hold a sk with the SOCK_TIMESTAMP set to ensure there 923 * is no delay in the kernel net_enable_timestamp(). 924 * This ensures the following tests must have 925 * non zero rcv tstamp in the recvmsg(). 926 */ 927 hold_tstamp_fd = wait_netstamp_needed_key(); 928 if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key")) 929 return; 930 931 skel = test_tc_dtime__open(); 932 if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) 933 goto done; 934 935 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; 936 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; 937 938 err = test_tc_dtime__load(skel); 939 if (!ASSERT_OK(err, "test_tc_dtime__load")) 940 goto done; 941 942 if (netns_load_dtime_bpf(skel, setup_result)) 943 goto done; 944 945 nstoken = open_netns(NS_FWD); 946 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 947 goto done; 948 err = set_forwarding(false); 949 close_netns(nstoken); 950 if (!ASSERT_OK(err, "disable forwarding")) 951 goto done; 952 953 test_tcp_clear_dtime(skel); 954 955 test_tcp_dtime(skel, AF_INET, true); 956 test_tcp_dtime(skel, AF_INET6, true); 957 test_udp_dtime(skel, AF_INET, true); 958 test_udp_dtime(skel, AF_INET6, true); 959 960 /* Test the kernel ip[6]_forward path instead 961 * of bpf_redirect_neigh(). 962 */ 963 nstoken = open_netns(NS_FWD); 964 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 965 goto done; 966 err = set_forwarding(true); 967 close_netns(nstoken); 968 if (!ASSERT_OK(err, "enable forwarding")) 969 goto done; 970 971 test_tcp_dtime(skel, AF_INET, false); 972 test_tcp_dtime(skel, AF_INET6, false); 973 test_udp_dtime(skel, AF_INET, false); 974 test_udp_dtime(skel, AF_INET6, false); 975 976 done: 977 test_tc_dtime__destroy(skel); 978 close(hold_tstamp_fd); 979 } 980 981 static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) 982 { 983 struct nstoken *nstoken = NULL; 984 struct test_tc_neigh_fib *skel = NULL; 985 986 nstoken = open_netns(NS_FWD); 987 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 988 return; 989 990 skel = test_tc_neigh_fib__open(); 991 if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) 992 goto done; 993 994 if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) 995 goto done; 996 997 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, 998 skel->progs.tc_chk, setup_result)) 999 goto done; 1000 1001 /* bpf_fib_lookup() checks if forwarding is enabled */ 1002 if (!ASSERT_OK(set_forwarding(true), "enable forwarding")) 1003 goto done; 1004 1005 test_connectivity(); 1006 1007 done: 1008 if (skel) 1009 test_tc_neigh_fib__destroy(skel); 1010 close_netns(nstoken); 1011 } 1012 1013 static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) 1014 { 1015 struct nstoken *nstoken = NULL; 1016 struct test_tc_neigh *skel = NULL; 1017 int err; 1018 1019 nstoken = open_netns(NS_FWD); 1020 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 1021 return; 1022 1023 skel = test_tc_neigh__open(); 1024 if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) 1025 goto done; 1026 1027 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; 1028 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; 1029 1030 err = test_tc_neigh__load(skel); 1031 if (!ASSERT_OK(err, "test_tc_neigh__load")) 1032 goto done; 1033 1034 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, 1035 skel->progs.tc_chk, setup_result)) 1036 goto done; 1037 1038 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 1039 goto done; 1040 1041 test_connectivity(); 1042 1043 done: 1044 if (skel) 1045 test_tc_neigh__destroy(skel); 1046 close_netns(nstoken); 1047 } 1048 1049 static void test_tc_redirect_peer(struct netns_setup_result *setup_result) 1050 { 1051 struct nstoken *nstoken; 1052 struct test_tc_peer *skel; 1053 int err; 1054 1055 nstoken = open_netns(NS_FWD); 1056 if (!ASSERT_OK_PTR(nstoken, "setns fwd")) 1057 return; 1058 1059 skel = test_tc_peer__open(); 1060 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) 1061 goto done; 1062 1063 skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; 1064 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; 1065 1066 err = test_tc_peer__load(skel); 1067 if (!ASSERT_OK(err, "test_tc_peer__load")) 1068 goto done; 1069 1070 if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst, 1071 skel->progs.tc_chk, setup_result)) 1072 goto done; 1073 1074 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 1075 goto done; 1076 1077 test_connectivity(); 1078 1079 done: 1080 if (skel) 1081 test_tc_peer__destroy(skel); 1082 close_netns(nstoken); 1083 } 1084 1085 static int tun_open(char *name) 1086 { 1087 struct ifreq ifr; 1088 int fd, err; 1089 1090 fd = open("/dev/net/tun", O_RDWR); 1091 if (!ASSERT_GE(fd, 0, "open /dev/net/tun")) 1092 return -1; 1093 1094 memset(&ifr, 0, sizeof(ifr)); 1095 1096 ifr.ifr_flags = IFF_TUN | IFF_NO_PI; 1097 if (*name) 1098 strncpy(ifr.ifr_name, name, IFNAMSIZ); 1099 1100 err = ioctl(fd, TUNSETIFF, &ifr); 1101 if (!ASSERT_OK(err, "ioctl TUNSETIFF")) 1102 goto fail; 1103 1104 SYS(fail, "ip link set dev %s up", name); 1105 1106 return fd; 1107 fail: 1108 close(fd); 1109 return -1; 1110 } 1111 1112 enum { 1113 SRC_TO_TARGET = 0, 1114 TARGET_TO_SRC = 1, 1115 }; 1116 1117 static int tun_relay_loop(int src_fd, int target_fd) 1118 { 1119 fd_set rfds, wfds; 1120 1121 FD_ZERO(&rfds); 1122 FD_ZERO(&wfds); 1123 1124 for (;;) { 1125 char buf[1500]; 1126 int direction, nread, nwrite; 1127 1128 FD_SET(src_fd, &rfds); 1129 FD_SET(target_fd, &rfds); 1130 1131 if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) { 1132 log_err("select failed"); 1133 return 1; 1134 } 1135 1136 direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC; 1137 1138 nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf)); 1139 if (nread < 0) { 1140 log_err("read failed"); 1141 return 1; 1142 } 1143 1144 nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread); 1145 if (nwrite != nread) { 1146 log_err("write failed"); 1147 return 1; 1148 } 1149 } 1150 } 1151 1152 static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result) 1153 { 1154 LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd); 1155 LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd); 1156 struct test_tc_peer *skel = NULL; 1157 struct nstoken *nstoken = NULL; 1158 int err; 1159 int tunnel_pid = -1; 1160 int src_fd, target_fd = -1; 1161 int ifindex; 1162 1163 /* Start a L3 TUN/TAP tunnel between the src and dst namespaces. 1164 * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those 1165 * expose the L2 headers encapsulating the IP packet to BPF and hence 1166 * don't have skb in suitable state for this test. Alternative to TUN/TAP 1167 * would be e.g. Wireguard which would appear as a pure L3 device to BPF, 1168 * but that requires much more complicated setup. 1169 */ 1170 nstoken = open_netns(NS_SRC); 1171 if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC)) 1172 return; 1173 1174 src_fd = tun_open("tun_src"); 1175 if (!ASSERT_GE(src_fd, 0, "tun_open tun_src")) 1176 goto fail; 1177 1178 close_netns(nstoken); 1179 1180 nstoken = open_netns(NS_FWD); 1181 if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD)) 1182 goto fail; 1183 1184 target_fd = tun_open("tun_fwd"); 1185 if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd")) 1186 goto fail; 1187 1188 tunnel_pid = fork(); 1189 if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop")) 1190 goto fail; 1191 1192 if (tunnel_pid == 0) 1193 exit(tun_relay_loop(src_fd, target_fd)); 1194 1195 skel = test_tc_peer__open(); 1196 if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) 1197 goto fail; 1198 1199 ifindex = if_nametoindex("tun_fwd"); 1200 if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd")) 1201 goto fail; 1202 1203 skel->rodata->IFINDEX_SRC = ifindex; 1204 skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; 1205 1206 err = test_tc_peer__load(skel); 1207 if (!ASSERT_OK(err, "test_tc_peer__load")) 1208 goto fail; 1209 1210 /* Load "tc_src_l3" to the tun_fwd interface to redirect packets 1211 * towards dst, and "tc_dst" to redirect packets 1212 * and "tc_chk" on dst_fwd to drop non-redirected packets. 1213 */ 1214 /* tc qdisc add dev tun_fwd clsact */ 1215 QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex); 1216 /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */ 1217 XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0); 1218 1219 /* tc qdisc add dev dst_fwd clsact */ 1220 QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd); 1221 /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */ 1222 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0); 1223 /* tc filter add dev dst_fwd egress bpf da tc_chk */ 1224 XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0); 1225 1226 /* Setup route and neigh tables */ 1227 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24"); 1228 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24"); 1229 1230 SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad"); 1231 SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad"); 1232 1233 SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global"); 1234 SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD 1235 " dev tun_src scope global"); 1236 SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global"); 1237 SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global"); 1238 SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD 1239 " dev tun_src scope global"); 1240 SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global"); 1241 1242 SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD); 1243 SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD); 1244 1245 if (!ASSERT_OK(set_forwarding(false), "disable forwarding")) 1246 goto fail; 1247 1248 test_connectivity(); 1249 1250 fail: 1251 if (tunnel_pid > 0) { 1252 kill(tunnel_pid, SIGTERM); 1253 waitpid(tunnel_pid, NULL, 0); 1254 } 1255 if (src_fd >= 0) 1256 close(src_fd); 1257 if (target_fd >= 0) 1258 close(target_fd); 1259 if (skel) 1260 test_tc_peer__destroy(skel); 1261 if (nstoken) 1262 close_netns(nstoken); 1263 } 1264 1265 #define RUN_TEST(name, mode) \ 1266 ({ \ 1267 struct netns_setup_result setup_result = { .dev_mode = mode, }; \ 1268 if (test__start_subtest(#name)) \ 1269 if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \ 1270 if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \ 1271 "setup links and routes")) \ 1272 test_ ## name(&setup_result); \ 1273 netns_setup_namespaces("delete"); \ 1274 } \ 1275 }) 1276 1277 static void *test_tc_redirect_run_tests(void *arg) 1278 { 1279 netns_setup_namespaces_nofail("delete"); 1280 1281 RUN_TEST(tc_redirect_peer, MODE_VETH); 1282 RUN_TEST(tc_redirect_peer, MODE_NETKIT); 1283 RUN_TEST(tc_redirect_peer_l3, MODE_VETH); 1284 RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT); 1285 RUN_TEST(tc_redirect_neigh, MODE_VETH); 1286 RUN_TEST(tc_redirect_neigh_fib, MODE_VETH); 1287 RUN_TEST(tc_redirect_dtime, MODE_VETH); 1288 return NULL; 1289 } 1290 1291 void test_tc_redirect(void) 1292 { 1293 pthread_t test_thread; 1294 int err; 1295 1296 /* Run the tests in their own thread to isolate the namespace changes 1297 * so they do not affect the environment of other tests. 1298 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 1299 */ 1300 err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL); 1301 if (ASSERT_OK(err, "pthread_create")) 1302 ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 1303 } 1304