1*d1ae37dcSKuniyuki Iwashima#!/bin/bash 2*d1ae37dcSKuniyuki Iwashima# SPDX-License-Identifier: GPL-2.0 3*d1ae37dcSKuniyuki Iwashima# 4*d1ae37dcSKuniyuki Iwashima# Copyright 2026 Google LLC. 5*d1ae37dcSKuniyuki Iwashima# 6*d1ae37dcSKuniyuki Iwashima# This test verifies TCP flow failover between ECMP routes 7*d1ae37dcSKuniyuki Iwashima# upon carrier loss on the active device. 8*d1ae37dcSKuniyuki Iwashima# 9*d1ae37dcSKuniyuki Iwashima# socat -----------------------------> socat 10*d1ae37dcSKuniyuki Iwashima# | 11*d1ae37dcSKuniyuki Iwashima# .-- veth-c1 -|- veth-s1 --. 12*d1ae37dcSKuniyuki Iwashima# dummy0 -| | |-- dummy0 13*d1ae37dcSKuniyuki Iwashima# '-- veth-c2 -|- veth-s2 --' 14*d1ae37dcSKuniyuki Iwashima# | 15*d1ae37dcSKuniyuki Iwashima# 16*d1ae37dcSKuniyuki Iwashima 17*d1ae37dcSKuniyuki IwashimaREQUIRE_JQ=no 18*d1ae37dcSKuniyuki IwashimaREQUIRE_MZ=no 19*d1ae37dcSKuniyuki IwashimaNUM_NETIFS=0 20*d1ae37dcSKuniyuki Iwashima 21*d1ae37dcSKuniyuki Iwashimasource forwarding/lib.sh 22*d1ae37dcSKuniyuki Iwashima 23*d1ae37dcSKuniyuki IwashimaCLIENT_IP="10.0.59.1" 24*d1ae37dcSKuniyuki IwashimaSERVER_IP="10.0.92.1" 25*d1ae37dcSKuniyuki IwashimaCLIENT_IP6="2001:db8:5a9a::1" 26*d1ae37dcSKuniyuki IwashimaSERVER_IP6="2001:db8:9292::1" 27*d1ae37dcSKuniyuki Iwashima 28*d1ae37dcSKuniyuki Iwashimasetup_server() 29*d1ae37dcSKuniyuki Iwashima{ 30*d1ae37dcSKuniyuki Iwashima IP="ip -n $server" 31*d1ae37dcSKuniyuki Iwashima NS_EXEC="ip netns exec $server" 32*d1ae37dcSKuniyuki Iwashima 33*d1ae37dcSKuniyuki Iwashima $IP link add dummy0 type dummy 34*d1ae37dcSKuniyuki Iwashima $IP link set dummy0 up 35*d1ae37dcSKuniyuki Iwashima 36*d1ae37dcSKuniyuki Iwashima $IP -4 addr add $SERVER_IP/32 dev dummy0 37*d1ae37dcSKuniyuki Iwashima $IP -6 addr add $SERVER_IP6/128 dev dummy0 nodad 38*d1ae37dcSKuniyuki Iwashima 39*d1ae37dcSKuniyuki Iwashima $IP link set veth-s1 up 40*d1ae37dcSKuniyuki Iwashima $IP link set veth-s2 up 41*d1ae37dcSKuniyuki Iwashima 42*d1ae37dcSKuniyuki Iwashima $IP -4 addr add 192.168.1.2/24 dev veth-s1 43*d1ae37dcSKuniyuki Iwashima $IP -4 addr add 192.168.2.2/24 dev veth-s2 44*d1ae37dcSKuniyuki Iwashima 45*d1ae37dcSKuniyuki Iwashima $IP -4 route add $CLIENT_IP/32 \ 46*d1ae37dcSKuniyuki Iwashima nexthop via 192.168.1.1 dev veth-s1 weight 1 \ 47*d1ae37dcSKuniyuki Iwashima nexthop via 192.168.2.1 dev veth-s2 weight 1 48*d1ae37dcSKuniyuki Iwashima 49*d1ae37dcSKuniyuki Iwashima $IP -6 addr add 2001:db8:1::2/64 dev veth-s1 nodad 50*d1ae37dcSKuniyuki Iwashima $IP -6 addr add 2001:db8:2::2/64 dev veth-s2 nodad 51*d1ae37dcSKuniyuki Iwashima 52*d1ae37dcSKuniyuki Iwashima $IP -6 route add $CLIENT_IP6/128 \ 53*d1ae37dcSKuniyuki Iwashima nexthop via 2001:db8:1::1 dev veth-s1 weight 1 \ 54*d1ae37dcSKuniyuki Iwashima nexthop via 2001:db8:2::1 dev veth-s2 weight 1 55*d1ae37dcSKuniyuki Iwashima} 56*d1ae37dcSKuniyuki Iwashima 57*d1ae37dcSKuniyuki Iwashimasetup_client() 58*d1ae37dcSKuniyuki Iwashima{ 59*d1ae37dcSKuniyuki Iwashima IP="ip -n $client" 60*d1ae37dcSKuniyuki Iwashima NS_EXEC="ip netns exec $client" 61*d1ae37dcSKuniyuki Iwashima 62*d1ae37dcSKuniyuki Iwashima $IP link add dummy0 type dummy 63*d1ae37dcSKuniyuki Iwashima $IP link set dummy0 up 64*d1ae37dcSKuniyuki Iwashima 65*d1ae37dcSKuniyuki Iwashima $IP -4 addr add $CLIENT_IP/32 dev dummy0 66*d1ae37dcSKuniyuki Iwashima $IP -6 addr add $CLIENT_IP6/128 dev dummy0 nodad 67*d1ae37dcSKuniyuki Iwashima 68*d1ae37dcSKuniyuki Iwashima $IP link set veth-c1 up 69*d1ae37dcSKuniyuki Iwashima $IP link set veth-c2 up 70*d1ae37dcSKuniyuki Iwashima 71*d1ae37dcSKuniyuki Iwashima $IP -4 addr add 192.168.1.1/24 dev veth-c1 72*d1ae37dcSKuniyuki Iwashima $IP -4 addr add 192.168.2.1/24 dev veth-c2 73*d1ae37dcSKuniyuki Iwashima 74*d1ae37dcSKuniyuki Iwashima $IP -4 route add $SERVER_IP/32 \ 75*d1ae37dcSKuniyuki Iwashima nexthop via 192.168.1.2 dev veth-c1 weight 1 \ 76*d1ae37dcSKuniyuki Iwashima nexthop via 192.168.2.2 dev veth-c2 weight 1 77*d1ae37dcSKuniyuki Iwashima 78*d1ae37dcSKuniyuki Iwashima $IP -6 addr add 2001:db8:1::1/64 dev veth-c1 nodad 79*d1ae37dcSKuniyuki Iwashima $IP -6 addr add 2001:db8:2::1/64 dev veth-c2 nodad 80*d1ae37dcSKuniyuki Iwashima 81*d1ae37dcSKuniyuki Iwashima $IP -6 route add $SERVER_IP6/128 \ 82*d1ae37dcSKuniyuki Iwashima nexthop via 2001:db8:1::2 dev veth-c1 weight 1 \ 83*d1ae37dcSKuniyuki Iwashima nexthop via 2001:db8:2::2 dev veth-c2 weight 1 84*d1ae37dcSKuniyuki Iwashima 85*d1ae37dcSKuniyuki Iwashima # By default, tcp_retries1=3 triggers a route refresh 86*d1ae37dcSKuniyuki Iwashima # after 3 retransmits (~5s). Ensure this never occurs 87*d1ae37dcSKuniyuki Iwashima # for test stability. 88*d1ae37dcSKuniyuki Iwashima $NS_EXEC sysctl -qw net.ipv4.tcp_retries1=100 89*d1ae37dcSKuniyuki Iwashima 90*d1ae37dcSKuniyuki Iwashima # When NETDEV_CHANGE is issued for a dev tied to an ECMP 91*d1ae37dcSKuniyuki Iwashima # route, RTNH_F_LINKDOWN is flagged and the sernum is 92*d1ae37dcSKuniyuki Iwashima # bumped to invalidate the route via sk_dst_check(). 93*d1ae37dcSKuniyuki Iwashima # 94*d1ae37dcSKuniyuki Iwashima # Without ignore_routes_with_linkdown=1, subsequent 95*d1ae37dcSKuniyuki Iwashima # lookups may still select the same RTNH_F_LINKDOWN route. 96*d1ae37dcSKuniyuki Iwashima $NS_EXEC sysctl -qw net.ipv4.conf.veth-c1.ignore_routes_with_linkdown=1 97*d1ae37dcSKuniyuki Iwashima $NS_EXEC sysctl -qw net.ipv4.conf.veth-c2.ignore_routes_with_linkdown=1 98*d1ae37dcSKuniyuki Iwashima 99*d1ae37dcSKuniyuki Iwashima $NS_EXEC sysctl -qw net.ipv6.conf.veth-c1.ignore_routes_with_linkdown=1 100*d1ae37dcSKuniyuki Iwashima $NS_EXEC sysctl -qw net.ipv6.conf.veth-c2.ignore_routes_with_linkdown=1 101*d1ae37dcSKuniyuki Iwashima} 102*d1ae37dcSKuniyuki Iwashima 103*d1ae37dcSKuniyuki Iwashimasetup() 104*d1ae37dcSKuniyuki Iwashima{ 105*d1ae37dcSKuniyuki Iwashima setup_ns client server 106*d1ae37dcSKuniyuki Iwashima 107*d1ae37dcSKuniyuki Iwashima ip -n "$client" link add veth-c1 type veth peer veth-s1 netns "$server" 108*d1ae37dcSKuniyuki Iwashima ip -n "$client" link add veth-c2 type veth peer veth-s2 netns "$server" 109*d1ae37dcSKuniyuki Iwashima 110*d1ae37dcSKuniyuki Iwashima setup_server 111*d1ae37dcSKuniyuki Iwashima setup_client 112*d1ae37dcSKuniyuki Iwashima} 113*d1ae37dcSKuniyuki Iwashima 114*d1ae37dcSKuniyuki Iwashimacleanup() 115*d1ae37dcSKuniyuki Iwashima{ 116*d1ae37dcSKuniyuki Iwashima cleanup_all_ns > /dev/null 2>&1 117*d1ae37dcSKuniyuki Iwashima} 118*d1ae37dcSKuniyuki Iwashima 119*d1ae37dcSKuniyuki Iwashimatcp_ecmp_failover() 120*d1ae37dcSKuniyuki Iwashima{ 121*d1ae37dcSKuniyuki Iwashima local pf=$1; shift 122*d1ae37dcSKuniyuki Iwashima local server_ip=$1; shift 123*d1ae37dcSKuniyuki Iwashima local client_ip=$1; shift 124*d1ae37dcSKuniyuki Iwashima 125*d1ae37dcSKuniyuki Iwashima RET=0 126*d1ae37dcSKuniyuki Iwashima 127*d1ae37dcSKuniyuki Iwashima tcpdump_start veth-s1 "$server" 128*d1ae37dcSKuniyuki Iwashima tcpdump_start veth-s2 "$server" 129*d1ae37dcSKuniyuki Iwashima 130*d1ae37dcSKuniyuki Iwashima ip netns exec "$server" \ 131*d1ae37dcSKuniyuki Iwashima socat -u TCP-LISTEN:8080,pf="$pf",bind="$server_ip",reuseaddr /dev/null & 132*d1ae37dcSKuniyuki Iwashima server_pid=$! 133*d1ae37dcSKuniyuki Iwashima 134*d1ae37dcSKuniyuki Iwashima # Wait for server to start listening. 135*d1ae37dcSKuniyuki Iwashima # Sometimes client fails without this sleep. 136*d1ae37dcSKuniyuki Iwashima sleep 1 137*d1ae37dcSKuniyuki Iwashima 138*d1ae37dcSKuniyuki Iwashima ip netns exec "$client" \ 139*d1ae37dcSKuniyuki Iwashima socat -u /dev/zero TCP:"$server_ip":8080,pf="$pf",bind="$client_ip" & 140*d1ae37dcSKuniyuki Iwashima client_pid=$! 141*d1ae37dcSKuniyuki Iwashima 142*d1ae37dcSKuniyuki Iwashima # To capture enough packets. 143*d1ae37dcSKuniyuki Iwashima sleep 3 144*d1ae37dcSKuniyuki Iwashima 145*d1ae37dcSKuniyuki Iwashima tcpdump_stop veth-s1 146*d1ae37dcSKuniyuki Iwashima tcpdump_stop veth-s2 147*d1ae37dcSKuniyuki Iwashima 148*d1ae37dcSKuniyuki Iwashima pkts_s1=$(tcpdump_show veth-s1 | wc -l) 149*d1ae37dcSKuniyuki Iwashima pkts_s2=$(tcpdump_show veth-s2 | wc -l) 150*d1ae37dcSKuniyuki Iwashima 151*d1ae37dcSKuniyuki Iwashima tcpdump_cleanup veth-s1 152*d1ae37dcSKuniyuki Iwashima tcpdump_cleanup veth-s2 153*d1ae37dcSKuniyuki Iwashima 154*d1ae37dcSKuniyuki Iwashima # Detect the device chosen by the client 155*d1ae37dcSKuniyuki Iwashima if [ "$pkts_s1" -gt "$pkts_s2" ]; then 156*d1ae37dcSKuniyuki Iwashima veth_down=veth-s1 157*d1ae37dcSKuniyuki Iwashima veth_up=veth-s2 158*d1ae37dcSKuniyuki Iwashima else 159*d1ae37dcSKuniyuki Iwashima veth_down=veth-s2 160*d1ae37dcSKuniyuki Iwashima veth_up=veth-s1 161*d1ae37dcSKuniyuki Iwashima fi 162*d1ae37dcSKuniyuki Iwashima 163*d1ae37dcSKuniyuki Iwashima # Taking down $veth_down causes its peer to lose carrier, 164*d1ae37dcSKuniyuki Iwashima # triggering NETDEV_CHANGE. This flags RTNH_F_LINKDOWN 165*d1ae37dcSKuniyuki Iwashima # and bumps the sernum for the route associated with that 166*d1ae37dcSKuniyuki Iwashima # peer, invalidating the cached dst in the TCP socket. 167*d1ae37dcSKuniyuki Iwashima # 168*d1ae37dcSKuniyuki Iwashima # Consequently, sk_dst_check() fails, forcing the subsequent 169*d1ae37dcSKuniyuki Iwashima # lookup to select the remaining healthy route via $veth_up. 170*d1ae37dcSKuniyuki Iwashima ip -n "$server" link set "$veth_down" down 171*d1ae37dcSKuniyuki Iwashima 172*d1ae37dcSKuniyuki Iwashima tcpdump_start "$veth_up" "$server" 173*d1ae37dcSKuniyuki Iwashima 174*d1ae37dcSKuniyuki Iwashima # To capture enough packets. 175*d1ae37dcSKuniyuki Iwashima sleep 3 176*d1ae37dcSKuniyuki Iwashima 177*d1ae37dcSKuniyuki Iwashima tcpdump_stop "$veth_up" 178*d1ae37dcSKuniyuki Iwashima 179*d1ae37dcSKuniyuki Iwashima kill -9 "$client_pid" > /dev/null 2>&1 180*d1ae37dcSKuniyuki Iwashima kill -9 "$server_pid" > /dev/null 2>&1 181*d1ae37dcSKuniyuki Iwashima wait 2> /dev/null 182*d1ae37dcSKuniyuki Iwashima 183*d1ae37dcSKuniyuki Iwashima pkts=$(tcpdump_show $veth_up | wc -l) 184*d1ae37dcSKuniyuki Iwashima 185*d1ae37dcSKuniyuki Iwashima tcpdump_cleanup "$veth_up" 186*d1ae37dcSKuniyuki Iwashima 187*d1ae37dcSKuniyuki Iwashima if [ "$pkts" -lt 1000 ]; then 188*d1ae37dcSKuniyuki Iwashima RET=$ksft_fail 189*d1ae37dcSKuniyuki Iwashima fi 190*d1ae37dcSKuniyuki Iwashima} 191*d1ae37dcSKuniyuki Iwashima 192*d1ae37dcSKuniyuki Iwashimatest_ipv4() 193*d1ae37dcSKuniyuki Iwashima{ 194*d1ae37dcSKuniyuki Iwashima setup 195*d1ae37dcSKuniyuki Iwashima tcp_ecmp_failover IPv4 $SERVER_IP $CLIENT_IP 196*d1ae37dcSKuniyuki Iwashima log_test "TCP IPv4 failover" 197*d1ae37dcSKuniyuki Iwashima cleanup 198*d1ae37dcSKuniyuki Iwashima} 199*d1ae37dcSKuniyuki Iwashima 200*d1ae37dcSKuniyuki Iwashimatest_ipv6() 201*d1ae37dcSKuniyuki Iwashima{ 202*d1ae37dcSKuniyuki Iwashima setup 203*d1ae37dcSKuniyuki Iwashima tcp_ecmp_failover IPv6 "[$SERVER_IP6]" "[$CLIENT_IP6]" 204*d1ae37dcSKuniyuki Iwashima log_test "TCP IPv6 failover" 205*d1ae37dcSKuniyuki Iwashima cleanup 206*d1ae37dcSKuniyuki Iwashima} 207*d1ae37dcSKuniyuki Iwashima 208*d1ae37dcSKuniyuki Iwashimarequire_command socat 209*d1ae37dcSKuniyuki Iwashimarequire_command tcpdump 210*d1ae37dcSKuniyuki Iwashima 211*d1ae37dcSKuniyuki Iwashimatrap cleanup EXIT 212*d1ae37dcSKuniyuki Iwashima 213*d1ae37dcSKuniyuki Iwashimatest_ipv4 214*d1ae37dcSKuniyuki Iwashimatest_ipv6 215*d1ae37dcSKuniyuki Iwashima 216*d1ae37dcSKuniyuki Iwashimaexit "$EXIT_STATUS" 217