xref: /linux/tools/testing/selftests/net/tcp_ecmp_failover.sh (revision fcee7d82f27d6a8b1ddc5bbefda59b4e441e9bc0)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Copyright 2026 Google LLC.
5#
6# This test verifies TCP flow failover between ECMP routes
7# upon carrier loss on the active device.
8#
9#   socat  ----------------------------->  socat
10#                        |
11#           .-- veth-c1 -|- veth-s1 --.
12#   dummy0 -|            |            |-- dummy0
13#           '-- veth-c2 -|- veth-s2 --'
14#                        |
15#
16
17REQUIRE_JQ=no
18REQUIRE_MZ=no
19NUM_NETIFS=0
20
21source forwarding/lib.sh
22
23CLIENT_IP="10.0.59.1"
24SERVER_IP="10.0.92.1"
25CLIENT_IP6="2001:db8:5a9a::1"
26SERVER_IP6="2001:db8:9292::1"
27
28setup_server()
29{
30	IP="ip -n $server"
31	NS_EXEC="ip netns exec $server"
32
33	$IP link add dummy0 type dummy
34	$IP link set dummy0 up
35
36	$IP -4 addr add $SERVER_IP/32 dev dummy0
37	$IP -6 addr add $SERVER_IP6/128 dev dummy0 nodad
38
39	$IP link set veth-s1 up
40	$IP link set veth-s2 up
41
42	$IP -4 addr add 192.168.1.2/24 dev veth-s1
43	$IP -4 addr add 192.168.2.2/24 dev veth-s2
44
45	$IP -4 route add $CLIENT_IP/32 \
46		nexthop via 192.168.1.1 dev veth-s1 weight 1 \
47		nexthop via 192.168.2.1 dev veth-s2 weight 1
48
49	$IP -6 addr add 2001:db8:1::2/64 dev veth-s1 nodad
50	$IP -6 addr add 2001:db8:2::2/64 dev veth-s2 nodad
51
52	$IP -6 route add $CLIENT_IP6/128 \
53		nexthop via 2001:db8:1::1 dev veth-s1 weight 1 \
54		nexthop via 2001:db8:2::1 dev veth-s2 weight 1
55}
56
57setup_client()
58{
59	IP="ip -n $client"
60	NS_EXEC="ip netns exec $client"
61
62	$IP link add dummy0 type dummy
63	$IP link set dummy0 up
64
65	$IP -4 addr add $CLIENT_IP/32 dev dummy0
66	$IP -6 addr add $CLIENT_IP6/128 dev dummy0 nodad
67
68	$IP link set veth-c1 up
69	$IP link set veth-c2 up
70
71	$IP -4 addr add 192.168.1.1/24 dev veth-c1
72	$IP -4 addr add 192.168.2.1/24 dev veth-c2
73
74	$IP -4 route add $SERVER_IP/32 \
75		nexthop via 192.168.1.2 dev veth-c1 weight 1 \
76		nexthop via 192.168.2.2 dev veth-c2 weight 1
77
78	$IP -6 addr add 2001:db8:1::1/64 dev veth-c1 nodad
79	$IP -6 addr add 2001:db8:2::1/64 dev veth-c2 nodad
80
81	$IP -6 route add $SERVER_IP6/128 \
82		nexthop via 2001:db8:1::2 dev veth-c1 weight 1 \
83		nexthop via 2001:db8:2::2 dev veth-c2 weight 1
84
85	# By default, tcp_retries1=3 triggers a route refresh
86	# after 3 retransmits (~5s).  Ensure this never occurs
87	# for test stability.
88	$NS_EXEC sysctl -qw net.ipv4.tcp_retries1=100
89
90	# When NETDEV_CHANGE is issued for a dev tied to an ECMP
91	# route, RTNH_F_LINKDOWN is flagged and the sernum is
92	# bumped to invalidate the route via sk_dst_check().
93	#
94	# Without ignore_routes_with_linkdown=1, subsequent
95	# lookups may still select the same RTNH_F_LINKDOWN route.
96	$NS_EXEC sysctl -qw net.ipv4.conf.veth-c1.ignore_routes_with_linkdown=1
97	$NS_EXEC sysctl -qw net.ipv4.conf.veth-c2.ignore_routes_with_linkdown=1
98
99	$NS_EXEC sysctl -qw net.ipv6.conf.veth-c1.ignore_routes_with_linkdown=1
100	$NS_EXEC sysctl -qw net.ipv6.conf.veth-c2.ignore_routes_with_linkdown=1
101}
102
103setup()
104{
105	setup_ns client server
106
107	ip -n "$client" link add veth-c1 type veth peer veth-s1 netns "$server"
108	ip -n "$client" link add veth-c2 type veth peer veth-s2 netns "$server"
109
110	setup_server
111	setup_client
112}
113
114cleanup()
115{
116	cleanup_all_ns > /dev/null 2>&1
117}
118
119tcp_ecmp_failover()
120{
121	local pf=$1; shift
122	local server_ip=$1; shift
123	local client_ip=$1; shift
124
125	RET=0
126
127	tcpdump_start veth-s1 "$server"
128	tcpdump_start veth-s2 "$server"
129
130	ip netns exec "$server" \
131		socat -u TCP-LISTEN:8080,pf="$pf",bind="$server_ip",reuseaddr /dev/null &
132	server_pid=$!
133
134	# Wait for server to start listening.
135	# Sometimes client fails without this sleep.
136	sleep 1
137
138	ip netns exec "$client" \
139		socat -u /dev/zero TCP:"$server_ip":8080,pf="$pf",bind="$client_ip" &
140	client_pid=$!
141
142	# To capture enough packets.
143	sleep 3
144
145	tcpdump_stop veth-s1
146	tcpdump_stop veth-s2
147
148	pkts_s1=$(tcpdump_show veth-s1 | wc -l)
149	pkts_s2=$(tcpdump_show veth-s2 | wc -l)
150
151	tcpdump_cleanup veth-s1
152	tcpdump_cleanup veth-s2
153
154	# Detect the device chosen by the client
155	if [ "$pkts_s1" -gt "$pkts_s2" ]; then
156		veth_down=veth-s1
157		veth_up=veth-s2
158	else
159		veth_down=veth-s2
160		veth_up=veth-s1
161	fi
162
163	# Taking down $veth_down causes its peer to lose carrier,
164	# triggering NETDEV_CHANGE.  This flags RTNH_F_LINKDOWN
165	# and bumps the sernum for the route associated with that
166	# peer, invalidating the cached dst in the TCP socket.
167	#
168	# Consequently, sk_dst_check() fails, forcing the subsequent
169	# lookup to select the remaining healthy route via $veth_up.
170	ip -n "$server" link set "$veth_down" down
171
172	tcpdump_start "$veth_up" "$server"
173
174	# To capture enough packets.
175	sleep  3
176
177	tcpdump_stop "$veth_up"
178
179	kill -9 "$client_pid" > /dev/null 2>&1
180	kill -9 "$server_pid" > /dev/null 2>&1
181	wait 2> /dev/null
182
183	pkts=$(tcpdump_show $veth_up | wc -l)
184
185	tcpdump_cleanup "$veth_up"
186
187	if [ "$pkts" -lt 1000 ]; then
188		RET=$ksft_fail
189	fi
190}
191
192test_ipv4()
193{
194	setup
195	tcp_ecmp_failover IPv4 $SERVER_IP $CLIENT_IP
196	log_test "TCP IPv4 failover"
197	cleanup
198}
199
200test_ipv6()
201{
202	setup
203	tcp_ecmp_failover IPv6 "[$SERVER_IP6]" "[$CLIENT_IP6]"
204	log_test "TCP IPv6 failover"
205	cleanup
206}
207
208require_command socat
209require_command tcpdump
210
211trap cleanup EXIT
212
213test_ipv4
214test_ipv6
215
216exit "$EXIT_STATUS"
217