1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# This tests basic flowtable functionality. 5# Creates following default topology: 6# 7# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) 8# Router1 is the one doing flow offloading, Router2 has no special 9# purpose other than having a link that is smaller than either Originator 10# and responder, i.e. TCPMSS announced values are too large and will still 11# result in fragmentation and/or PMTU discovery. 12# 13# You can check with different Orgininator/Link/Responder MTU eg: 14# nft_flowtable.sh -o8000 -l1500 -r2000 15# 16 17source lib.sh 18 19ret=0 20SOCAT_TIMEOUT=60 21 22nsin="" 23nsin_small="" 24ns1out="" 25ns2out="" 26 27log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) 28 29checktool "nft --version" "run test without nft tool" 30checktool "socat -h" "run test without socat" 31 32setup_ns ns1 ns2 nsr1 nsr2 33 34cleanup() { 35 ip netns pids "$ns1" | xargs kill 2>/dev/null 36 ip netns pids "$ns2" | xargs kill 2>/dev/null 37 38 cleanup_all_ns 39 40 rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out" 41 42 [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" 43} 44 45trap cleanup EXIT 46 47sysctl -q net.netfilter.nf_log_all_netns=1 48 49ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1" 50ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2" 51 52ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2" 53 54for dev in veth0 veth1; do 55 ip -net "$nsr1" link set "$dev" up 56 ip -net "$nsr2" link set "$dev" up 57done 58 59ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0 60ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad 61 62ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1 63ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad 64 65# set different MTUs so we need to push packets coming from ns1 (large MTU) 66# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), 67# or to do PTMU discovery (send ICMP error back to originator). 68# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers 69# is NOT the lowest link mtu. 70 71omtu=9000 72lmtu=1500 73rmtu=2000 74 75filesize=$((2 * 1024 * 1024)) 76filesize_small=$((filesize / 16)) 77 78usage(){ 79 echo "nft_flowtable.sh [OPTIONS]" 80 echo 81 echo "MTU options" 82 echo " -o originator" 83 echo " -l link" 84 echo " -r responder" 85 exit 1 86} 87 88while getopts "o:l:r:s:" o 89do 90 case $o in 91 o) omtu=$OPTARG;; 92 l) lmtu=$OPTARG;; 93 r) rmtu=$OPTARG;; 94 s) 95 filesize=$OPTARG 96 filesize_small=$((OPTARG / 16)) 97 ;; 98 *) usage;; 99 esac 100done 101 102if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then 103 exit 1 104fi 105 106ip -net "$ns1" link set eth0 mtu "$omtu" 107 108if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then 109 exit 1 110fi 111 112if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then 113 exit 1 114fi 115 116if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then 117 exit 1 118fi 119 120ip -net "$ns2" link set eth0 mtu "$rmtu" 121 122# transfer-net between nsr1 and nsr2. 123# these addresses are not used for connections. 124ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1 125ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad 126 127ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0 128ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad 129 130ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 131ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 132for i in 0 1; do 133 ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null 134 ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null 135done 136 137for ns in "$ns1" "$ns2";do 138 ip -net "$ns" link set eth0 up 139 140 if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then 141 echo "ERROR: Check Originator/Responder values (problem during address addition)" 142 exit 1 143 fi 144 # don't set ip DF bit for first two tests 145 ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null 146done 147 148ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 149ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 150ip -net "$ns1" route add default via 10.0.1.1 151ip -net "$ns2" route add default via 10.0.2.1 152ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad 153ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad 154ip -net "$ns1" route add default via dead:1::1 155ip -net "$ns2" route add default via dead:2::1 156 157ip -net "$nsr1" route add default via 192.168.10.2 158ip -6 -net "$nsr1" route add default via fee1:2::2 159ip -net "$nsr2" route add default via 192.168.10.1 160ip -6 -net "$nsr2" route add default via fee1:2::1 161 162ip netns exec "$nsr1" nft -f - <<EOF 163table inet filter { 164 flowtable f1 { 165 hook ingress priority 0 166 devices = { veth0, veth1 } 167 } 168 169 counter routed_orig { } 170 counter routed_repl { } 171 172 chain forward { 173 type filter hook forward priority 0; policy drop; 174 175 # flow offloaded? Tag ct with mark 1, so we can detect when it fails. 176 meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept 177 178 # count packets supposedly offloaded as per direction. 179 ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept 180 181 ct state established,related accept 182 183 meta nfproto ipv4 meta l4proto icmp accept 184 meta nfproto ipv6 meta l4proto icmpv6 accept 185 } 186} 187EOF 188 189if [ $? -ne 0 ]; then 190 echo "SKIP: Could not load nft ruleset" 191 exit $ksft_skip 192fi 193 194ip netns exec "$ns2" nft -f - <<EOF 195table inet filter { 196 counter ip4dscp0 { } 197 counter ip4dscp3 { } 198 199 chain input { 200 type filter hook input priority 0; policy accept; 201 meta l4proto tcp goto { 202 ip dscp cs3 counter name ip4dscp3 accept 203 ip dscp 0 counter name ip4dscp0 accept 204 } 205 } 206} 207EOF 208 209if [ $? -ne 0 ]; then 210 echo -n "SKIP: Could not load ruleset: " 211 nft --version 212 exit $ksft_skip 213fi 214 215# test basic connectivity 216if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then 217 echo "ERROR: $ns1 cannot reach ns2" 1>&2 218 exit 1 219fi 220 221if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then 222 echo "ERROR: $ns2 cannot reach $ns1" 1>&2 223 exit 1 224fi 225 226nsin=$(mktemp) 227nsin_small=$(mktemp) 228ns1out=$(mktemp) 229ns2out=$(mktemp) 230 231make_file() 232{ 233 name="$1" 234 sz="$2" 235 236 head -c "$sz" < /dev/urandom > "$name" 237} 238 239check_counters() 240{ 241 local what=$1 242 local ok=1 243 244 local orig repl 245 orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets) 246 repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets) 247 248 local orig_cnt=${orig#*bytes} 249 local repl_cnt=${repl#*bytes} 250 251 local fs 252 fs=$(du -sb "$nsin") 253 local max_orig=${fs%%/*} 254 local max_repl=$((max_orig)) 255 256 # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook 257 # should always be lower than the size of the transmitted file (max_orig). 258 if [ "$orig_cnt" -gt "$max_orig" ];then 259 echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig, reply counter $repl_cnt" 1>&2 260 ret=1 261 ok=0 262 fi 263 264 if [ "$repl_cnt" -gt $max_repl ];then 265 echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl, original counter $orig_cnt" 1>&2 266 ret=1 267 ok=0 268 fi 269 270 if [ $ok -eq 1 ]; then 271 echo "PASS: $what" 272 fi 273} 274 275check_dscp() 276{ 277 local what=$1 278 local pmtud="$2" 279 local ok=1 280 281 local counter 282 counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets) 283 284 local pc4=${counter%*bytes*} 285 local pc4=${pc4#*packets} 286 287 counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets) 288 local pc4z=${counter%*bytes*} 289 local pc4z=${pc4z#*packets} 290 291 local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected" 292 293 case "$what" in 294 "dscp_none") 295 if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then 296 echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 297 ret=1 298 ok=0 299 fi 300 ;; 301 "dscp_fwd") 302 if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then 303 echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 304 ret=1 305 ok=0 306 fi 307 ;; 308 "dscp_ingress") 309 if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then 310 echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 311 ret=1 312 ok=0 313 fi 314 ;; 315 "dscp_egress") 316 if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then 317 echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 318 ret=1 319 ok=0 320 fi 321 ;; 322 *) 323 echo "$failmsg: Unknown DSCP check" 1>&2 324 ret=1 325 ok=0 326 esac 327 328 if [ "$ok" -eq 1 ] ;then 329 echo "PASS: $what: dscp packet counters match" 330 fi 331} 332 333check_transfer() 334{ 335 local in=$1 336 local out=$2 337 local what=$3 338 339 if ! cmp "$in" "$out" > /dev/null 2>&1; then 340 echo "FAIL: file mismatch for $what" 1>&2 341 ls -l "$in" 342 ls -l "$out" 343 return 1 344 fi 345 346 return 0 347} 348 349listener_ready() 350{ 351 ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345 352} 353 354test_tcp_forwarding_ip() 355{ 356 local nsa=$1 357 local nsb=$2 358 local pmtu=$3 359 local proto=$4 360 local dstip=$5 361 local dstport=$6 362 local lret=0 363 local socatc 364 local socatl 365 local infile="$nsin" 366 367 if [ $pmtu -eq 0 ]; then 368 infile="$nsin_small" 369 fi 370 371 timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \ 372 TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & 373 lpid=$! 374 375 busywait 1000 listener_ready 376 377 timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \ 378 TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" 379 socatc=$? 380 381 wait $lpid 382 socatl=$? 383 384 if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then 385 rc=1 386 fi 387 388 if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then 389 lret=1 390 ret=1 391 fi 392 393 if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then 394 lret=1 395 ret=1 396 fi 397 398 return $lret 399} 400 401test_tcp_forwarding() 402{ 403 local pmtu="$3" 404 local proto="$4" 405 local dstip="$5" 406 local dstport="$6" 407 408 test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" 409 410 return $? 411} 412 413test_tcp_forwarding_set_dscp() 414{ 415 local pmtu="$3" 416 local proto="$4" 417 local dstip="$5" 418 local dstport="$6" 419 420ip netns exec "$nsr1" nft -f - <<EOF 421table netdev dscpmangle { 422 chain setdscp0 { 423 type filter hook ingress device "veth0" priority 0; policy accept 424 ip dscp set cs3 425 } 426} 427EOF 428if [ $? -eq 0 ]; then 429 test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" 430 check_dscp "dscp_ingress" "$pmtu" 431 432 ip netns exec "$nsr1" nft delete table netdev dscpmangle 433else 434 echo "SKIP: Could not load netdev:ingress for veth0" 435fi 436 437ip netns exec "$nsr1" nft -f - <<EOF 438table netdev dscpmangle { 439 chain setdscp0 { 440 type filter hook egress device "veth1" priority 0; policy accept 441 ip dscp set cs3 442 } 443} 444EOF 445if [ $? -eq 0 ]; then 446 test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" 447 check_dscp "dscp_egress" "$pmtu" 448 449 ip netns exec "$nsr1" nft delete table netdev dscpmangle 450else 451 echo "SKIP: Could not load netdev:egress for veth1" 452fi 453 454 # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 455 # counters should have seen packets (before and after ft offload kicks in). 456 ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 457 test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" 458 check_dscp "dscp_fwd" "$pmtu" 459} 460 461test_tcp_forwarding_nat() 462{ 463 local nsa="$1" 464 local nsb="$2" 465 local pmtu="$3" 466 local what="$4" 467 local lret 468 469 [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)" 470 471 test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345 472 lret=$? 473 474 if [ "$lret" -eq 0 ] ; then 475 if [ "$pmtu" -eq 1 ] ;then 476 check_counters "flow offload for ns1/ns2 with masquerade $what" 477 else 478 echo "PASS: flow offload for ns1/ns2 with masquerade $what" 479 fi 480 481 test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666 482 lret=$? 483 if [ "$pmtu" -eq 1 ] ;then 484 check_counters "flow offload for ns1/ns2 with dnat $what" 485 elif [ "$lret" -eq 0 ] ; then 486 echo "PASS: flow offload for ns1/ns2 with dnat $what" 487 fi 488 else 489 echo "FAIL: flow offload for ns1/ns2 with dnat $what" 490 fi 491 492 return $lret 493} 494 495make_file "$nsin" "$filesize" 496make_file "$nsin_small" "$filesize_small" 497 498# First test: 499# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. 500# Due to MTU mismatch in both directions, all packets (except small packets like pure 501# acks) have to be handled by normal forwarding path. Therefore, packet counters 502# are not checked. 503if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then 504 echo "PASS: flow offloaded for ns1/ns2" 505else 506 echo "FAIL: flow offload for ns1/ns2:" 1>&2 507 ip netns exec "$nsr1" nft list ruleset 508 ret=1 509fi 510 511if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then 512 echo "PASS: IPv6 flow offloaded for ns1/ns2" 513else 514 echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2 515 ip netns exec "$nsr1" nft list ruleset 516 ret=1 517fi 518 519# delete default route, i.e. ns2 won't be able to reach ns1 and 520# will depend on ns1 being masqueraded in nsr1. 521# expect ns1 has nsr1 address. 522ip -net "$ns2" route del default via 10.0.2.1 523ip -net "$ns2" route del default via dead:2::1 524ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1 525 526# Second test: 527# Same, but with NAT enabled. Same as in first test: we expect normal forward path 528# to handle most packets. 529ip netns exec "$nsr1" nft -f - <<EOF 530table ip nat { 531 chain prerouting { 532 type nat hook prerouting priority 0; policy accept; 533 meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 534 } 535 536 chain postrouting { 537 type nat hook postrouting priority 0; policy accept; 538 meta oifname "veth1" counter masquerade 539 } 540} 541EOF 542 543check_dscp "dscp_none" "0" 544if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then 545 echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 546 exit 0 547fi 548 549if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then 550 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 551 ip netns exec "$nsr1" nft list ruleset 552 ret=1 553fi 554 555# Third test: 556# Same as second test, but with PMTU discovery enabled. This 557# means that we expect the fastpath to handle packets as soon 558# as the endpoints adjust the packet size. 559ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 560ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 561 562# reset counters. 563# With pmtu in-place we'll also check that nft counters 564# are lower than file size and packets were forwarded via flowtable layer. 565# For earlier tests (large mtus), packets cannot be handled via flowtable 566# (except pure acks and other small packets). 567ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null 568ip netns exec "$ns2" nft reset counters table inet filter >/dev/null 569 570if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then 571 echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 572 exit 0 573fi 574 575ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null 576 577if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then 578 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 579 ip netns exec "$nsr1" nft list ruleset 580fi 581 582# IPIP tunnel test: 583# Add IPIP tunnel interfaces and check flowtable acceleration. 584test_ipip() { 585if ! ip -net "$nsr1" link add name tun0 type ipip \ 586 local 192.168.10.1 remote 192.168.10.2 >/dev/null;then 587 echo "SKIP: could not add ipip tunnel" 588 [ "$ret" -eq 0 ] && ret=$ksft_skip 589 return 590fi 591ip -net "$nsr1" link set tun0 up 592ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0 593ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null 594 595ip -net "$nsr1" link add name tun6 type ip6tnl local fee1:2::1 remote fee1:2::2 596ip -net "$nsr1" link set tun6 up 597ip -net "$nsr1" addr add fee1:3::1/64 dev tun6 nodad 598 599ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1 600ip -net "$nsr2" link set tun0 up 601ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0 602ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null 603 604ip -net "$nsr2" link add name tun6 type ip6tnl local fee1:2::2 remote fee1:2::1 || ret=1 605ip -net "$nsr2" link set tun6 up 606ip -net "$nsr2" addr add fee1:3::2/64 dev tun6 nodad 607 608ip -net "$nsr1" route change default via 192.168.100.2 609ip -net "$nsr2" route change default via 192.168.100.1 610 611# do not use "route change" and delete old default so 612# socat fails to connect in case new default can't be added. 613ip -6 -net "$nsr1" route delete default 614ip -6 -net "$nsr1" route add default via fee1:3::2 615ip -6 -net "$nsr2" route delete default 616ip -6 -net "$nsr2" route add default via fee1:3::1 617ip -net "$ns2" route add default via 10.0.2.1 618ip -6 -net "$ns2" route add default via dead:2::1 619 620ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept' 621ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6 accept' 622ip netns exec "$nsr1" nft -a insert rule inet filter forward \ 623 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept' 624 625if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then 626 echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2 627 ip netns exec "$nsr1" nft list ruleset 628 ret=1 629fi 630 631if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then 632 echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel" 633else 634 echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel" 1>&2 635 ip netns exec "$nsr1" nft list ruleset 636 ret=1 637fi 638 639# Create vlan tagged devices for IPIP traffic. 640ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10 641ip -net "$nsr1" link set veth1.10 up 642ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10 643ip -net "$nsr1" addr add fee1:4::1/64 dev veth1.10 nodad 644ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null 645ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept' 646 647ip -net "$nsr1" link add name tun0.10 type ipip local 192.168.20.1 remote 192.168.20.2 648ip -net "$nsr1" link set tun0.10 up 649ip -net "$nsr1" addr add 192.168.200.1/24 dev tun0.10 650ip -net "$nsr1" route change default via 192.168.200.2 651ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null 652ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0.10 accept' 653 654ip -net "$nsr1" link add name tun6.10 type ip6tnl local fee1:4::1 remote fee1:4::2 655ip -net "$nsr1" link set tun6.10 up 656ip -net "$nsr1" addr add fee1:5::1/64 dev tun6.10 nodad 657ip -6 -net "$nsr1" route delete default 658ip -6 -net "$nsr1" route add default via fee1:5::2 659ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun6.10 accept' 660 661ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10 662ip -net "$nsr2" link set veth0.10 up 663ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10 664ip -net "$nsr2" addr add fee1:4::2/64 dev veth0.10 nodad 665ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null 666 667ip -net "$nsr2" link add name tun0.10 type ipip local 192.168.20.2 remote 192.168.20.1 668ip -net "$nsr2" link set tun0.10 up 669ip -net "$nsr2" addr add 192.168.200.2/24 dev tun0.10 670ip -net "$nsr2" route change default via 192.168.200.1 671ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0/10.forwarding=1 > /dev/null 672 673ip -net "$nsr2" link add name tun6.10 type ip6tnl local fee1:4::2 remote fee1:4::1 || ret=1 674ip -net "$nsr2" link set tun6.10 up 675ip -net "$nsr2" addr add fee1:5::2/64 dev tun6.10 nodad 676ip -6 -net "$nsr2" route delete default 677ip -6 -net "$nsr2" route add default via fee1:5::1 678 679if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then 680 echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2 681 ip netns exec "$nsr1" nft list ruleset 682 ret=1 683fi 684 685if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then 686 echo "PASS: flow offload for ns1/ns2 IP6IP6 tunnel over vlan" 687else 688 echo "FAIL: flow offload for ns1/ns2 with IP6IP6 tunnel over vlan" 1>&2 689 ip netns exec "$nsr1" nft list ruleset 690 ret=1 691fi 692 693# Restore the previous configuration 694ip -net "$nsr1" route change default via 192.168.10.2 695ip -net "$nsr2" route change default via 192.168.10.1 696ip -net "$ns2" route del default via 10.0.2.1 697ip -6 -net "$ns2" route del default via dead:2::1 698} 699 700# Another test: 701# Add bridge interface br0 to Router1, with NAT enabled. 702test_bridge() { 703if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then 704 echo "SKIP: could not add bridge br0" 705 [ "$ret" -eq 0 ] && ret=$ksft_skip 706 return 707fi 708ip -net "$nsr1" addr flush dev veth0 709ip -net "$nsr1" link set up dev veth0 710ip -net "$nsr1" link set veth0 master br0 711ip -net "$nsr1" addr add 10.0.1.1/24 dev br0 712ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad 713ip -net "$nsr1" link set up dev br0 714 715ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null 716 717# br0 with NAT enabled. 718ip netns exec "$nsr1" nft -f - <<EOF 719flush table ip nat 720table ip nat { 721 chain prerouting { 722 type nat hook prerouting priority 0; policy accept; 723 meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 724 } 725 726 chain postrouting { 727 type nat hook postrouting priority 0; policy accept; 728 meta oifname "veth1" counter masquerade 729 } 730} 731EOF 732 733if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then 734 echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 735 ip netns exec "$nsr1" nft list ruleset 736 ret=1 737fi 738 739 740# Another test: 741# Add bridge interface br0 to Router1, with NAT and VLAN. 742ip -net "$nsr1" link set veth0 nomaster 743ip -net "$nsr1" link set down dev veth0 744ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10 745ip -net "$nsr1" link set up dev veth0 746ip -net "$nsr1" link set up dev veth0.10 747ip -net "$nsr1" link set veth0.10 master br0 748 749ip -net "$ns1" addr flush dev eth0 750ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10 751ip -net "$ns1" link set eth0 up 752ip -net "$ns1" link set eth0.10 up 753ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10 754ip -net "$ns1" route add default via 10.0.1.1 755ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad 756 757if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then 758 echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 759 ip netns exec "$nsr1" nft list ruleset 760 ret=1 761fi 762 763# restore test topology (remove bridge and VLAN) 764ip -net "$nsr1" link set veth0 nomaster 765ip -net "$nsr1" link set veth0 down 766ip -net "$nsr1" link set veth0.10 down 767ip -net "$nsr1" link delete veth0.10 type vlan 768ip -net "$nsr1" link delete br0 type bridge 769ip -net "$ns1" addr flush dev eth0.10 770ip -net "$ns1" link set eth0.10 down 771ip -net "$ns1" link set eth0 down 772ip -net "$ns1" link delete eth0.10 type vlan 773 774# restore address in ns1 and nsr1 775ip -net "$ns1" link set eth0 up 776ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 777ip -net "$ns1" route add default via 10.0.1.1 778ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad 779ip -net "$ns1" route add default via dead:1::1 780ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0 781ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad 782ip -net "$nsr1" link set up dev veth0 783} 784 785test_ipip 786 787test_bridge 788 789KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) 790KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1) 791SPI1=$RANDOM 792SPI2=$RANDOM 793 794if [ $SPI1 -eq $SPI2 ]; then 795 SPI2=$((SPI2+1)) 796fi 797 798do_esp() { 799 local ns=$1 800 local me=$2 801 local remote=$3 802 local lnet=$4 803 local rnet=$5 804 local spi_out=$6 805 local spi_in=$7 806 807 ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet" 808 ip -net "$ns" xfrm state add src "$me" dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet" 809 810 # to encrypt packets as they go out (includes forwarded packets that need encapsulation) 811 ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow 812 # to fwd decrypted packets after esp processing: 813 ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow 814} 815 816do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2" 817 818do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1" 819 820ip netns exec "$nsr1" nft delete table ip nat 821 822# restore default routes 823ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 824ip -net "$ns2" route add default via 10.0.2.1 825ip -net "$ns2" route add default via dead:2::1 826 827if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then 828 check_counters "ipsec tunnel mode for ns1/ns2" 829else 830 echo "FAIL: ipsec tunnel mode for ns1/ns2" 831 ip netns exec "$nsr1" nft list ruleset 1>&2 832 ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 833fi 834 835if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then 836 check_counters "IPv6 ipsec tunnel mode for ns1/ns2" 837else 838 echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2" 839 ip netns exec "$nsr1" nft list ruleset 1>&2 840 ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 841fi 842 843if [ "$1" = "" ]; then 844 low=1280 845 mtu=$((65536 - low)) 846 o=$(((RANDOM%mtu) + low)) 847 l=$(((RANDOM%mtu) + low)) 848 r=$(((RANDOM%mtu) + low)) 849 850 MINSIZE=$((2 * 1000 * 1000)) 851 MAXSIZE=$((64 * 1000 * 1000)) 852 853 filesize=$(((RANDOM * RANDOM) % MAXSIZE)) 854 if [ "$filesize" -lt "$MINSIZE" ]; then 855 filesize=$((filesize+MINSIZE)) 856 fi 857 858 echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" 859 $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1 860fi 861 862exit $ret 863