1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# Testing for potential kernel soft lockup during IPv6 routing table 5# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup 6# occurs, a kernel panic will be triggered to prevent associated issues. 7# 8# 9# Test Environment Layout 10# 11# ┌----------------┐ ┌----------------┐ 12# | SOURCE_NS | | SINK_NS | 13# | NAMESPACE | | NAMESPACE | 14# |(iperf3 clients)| |(iperf3 servers)| 15# | | | | 16# | | | | 17# | ┌-----------| nexthops |---------┐ | 18# | |veth_source|<--------------------------------------->|veth_sink|<┐ | 19# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | 20# | | ^ 2001:0DB8:1::1:2/96 | | | 21# | | . . | fwd | | 22# | ┌---------┐ | . . | | | 23# | | IPv6 | | . . | V | 24# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | 25# | | table | | . | | lo | | 26# | | nexthop | | . └--------┴-----┴-┘ 27# | | update | | ............................> 2001:0DB8:2::1:1/128 28# | └-------- ┘ | 29# └----------------┘ 30# 31# The test script sets up two network namespaces, source_ns and sink_ns, 32# connected via a veth link. Within source_ns, it continuously updates the 33# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop 34# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a 35# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. 36# 37# Simultaneously, multiple iperf3 clients within source_ns generate heavy 38# outgoing IPv6 traffic. Each client is assigned a unique port number starting 39# at 5000 and incrementing sequentially. Each client targets a unique iperf3 40# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface 41# using the same port number. 42# 43# The number of iperf3 servers and clients is set to half of the total 44# available cores on each machine. 45# 46# NOTE: We have tested this script on machines with various CPU specifications, 47# ranging from lower to higher performance as listed below. The test script 48# effectively triggered a kernel soft lockup on machines running an unpatched 49# kernel in under a minute: 50# 51# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz 52# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz 53# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz 54# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz 55# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz 56# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz 57# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz 58# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz 59# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz 60# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz 61# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz 62# 63# On less performant machines, you may need to increase the TEST_DURATION 64# parameter to enhance the likelihood of encountering a race condition leading 65# to a kernel soft lockup and avoid a false negative result. 66# 67# NOTE: The test may not produce the expected result in virtualized 68# environments (e.g., qemu) due to differences in timing and CPU handling, 69# which can affect the conditions needed to trigger a soft lockup. 70 71source lib.sh 72source net_helper.sh 73 74TEST_DURATION=300 75ROUTING_TABLE_REFRESH_PERIOD=0.01 76 77IPERF3_BITRATE="300m" 78 79 80IPV6_NEXTHOP_ADDR_COUNT="128" 81IPV6_NEXTHOP_ADDR_MASK="96" 82IPV6_NEXTHOP_PREFIX="2001:0DB8:1" 83 84 85SOURCE_TEST_IFACE="veth_source" 86SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" 87 88SINK_TEST_IFACE="veth_sink" 89# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: 90# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} 91SINK_LOOPBACK_IFACE="lo" 92SINK_LOOPBACK_IP_MASK="128" 93SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" 94 95nexthop_ip_list="" 96termination_signal="" 97kernel_softlokup_panic_prev_val="" 98 99terminate_ns_processes_by_pattern() { 100 local ns=$1 101 local pattern=$2 102 103 for pid in $(ip netns pids ${ns}); do 104 [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid 105 done 106} 107 108cleanup() { 109 echo "info: cleaning up namespaces and terminating all processes within them..." 110 111 112 # Terminate iperf3 instances running in the source_ns. To avoid race 113 # conditions, first iterate over the PIDs and terminate those 114 # associated with the bash shells running the 115 # `while true; do iperf3 -c ...; done` loops. In a second iteration, 116 # terminate the individual `iperf3 -c ...` instances. 117 terminate_ns_processes_by_pattern ${source_ns} while 118 terminate_ns_processes_by_pattern ${source_ns} iperf3 119 120 # Repeat the same process for sink_ns 121 terminate_ns_processes_by_pattern ${sink_ns} while 122 terminate_ns_processes_by_pattern ${sink_ns} iperf3 123 124 # Check if any iperf3 instances are still running. This could happen 125 # if a core has entered an infinite loop and the timeout for detecting 126 # the soft lockup has not expired, but either the test interval has 127 # already elapsed or the test was terminated manually (e.g., with ^C) 128 for pid in $(ip netns pids ${source_ns}); do 129 if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then 130 echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" 131 exit ${ksft_fail} 132 fi 133 done 134 135 if [ "$termination_signal" == "SIGINT" ]; then 136 echo "SKIP: Termination due to ^C (SIGINT)" 137 elif [ "$termination_signal" == "SIGALRM" ]; then 138 echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" 139 fi 140 141 cleanup_ns ${source_ns} ${sink_ns} 142 143 sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} 144} 145 146setup_prepare() { 147 setup_ns source_ns sink_ns 148 149 ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} 150 151 # Setting up the Source namespace 152 ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} 153 ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 154 ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up 155 ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 156 157 # Setting up the Sink namespace 158 ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} 159 ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up 160 ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 161 162 ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up 163 ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 164 165 166 # Populate nexthop IPv6 addresses on the test interface in the sink_ns 167 echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." 168 for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do 169 ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; 170 done 171 172 # Preparing list of nexthops 173 for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do 174 nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" 175 done 176} 177 178 179test_soft_lockup_during_routing_table_refresh() { 180 # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, 181 # each listening on ports starting at 5001 and incrementing 182 # sequentially. Since iperf3 instances may terminate unexpectedly, a 183 # while loop is used to automatically restart them in such cases. 184 echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." 185 for i in $(seq 1 ${num_of_iperf_servers}); do 186 cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" 187 ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null 188 done 189 190 # Wait for the iperf3 servers to be ready 191 for i in $(seq ${num_of_iperf_servers}); do 192 port=$(printf '5%03d' ${i}); 193 wait_local_port_listen ${sink_ns} ${port} tcp 194 done 195 196 # Continuously refresh the routing table in the background within 197 # the source_ns namespace 198 ip netns exec ${source_ns} bash -c " 199 while \$(ip netns list | grep -q ${source_ns}); do 200 ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; 201 sleep ${ROUTING_TABLE_REFRESH_PERIOD}; 202 ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; 203 done &" 204 205 # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, 206 # each sending TCP traffic on sequential ports starting at 5001. 207 # Since iperf3 instances may terminate unexpectedly (e.g., if the route 208 # to the server is deleted in the background during a route refresh), a 209 # while loop is used to automatically restart them in such cases. 210 echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." 211 for i in $(seq 1 ${num_of_iperf_servers}); do 212 cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" 213 ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null 214 done 215 216 echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." 217 echo "info: A kernel soft lockup, if detected, results in a kernel panic!" 218 219 wait 220} 221 222# Make sure 'iperf3' is installed, skip the test otherwise 223if [ ! -x "$(command -v "iperf3")" ]; then 224 echo "SKIP: 'iperf3' is not installed. Skipping the test." 225 exit ${ksft_skip} 226fi 227 228# Determine the number of cores on the machine 229num_of_iperf_servers=$(( $(nproc)/2 )) 230 231# Check if we are running on a multi-core machine, skip the test otherwise 232if [ "${num_of_iperf_servers}" -eq 0 ]; then 233 echo "SKIP: This test is not valid on a single core machine!" 234 exit ${ksft_skip} 235fi 236 237# Since the kernel soft lockup we're testing causes at least one core to enter 238# an infinite loop, destabilizing the host and likely affecting subsequent 239# tests, we trigger a kernel panic instead of reporting a failure and 240# continuing 241kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) 242sysctl -qw kernel.softlockup_panic=1 243 244handle_sigint() { 245 termination_signal="SIGINT" 246 cleanup 247 exit ${ksft_skip} 248} 249 250handle_sigalrm() { 251 termination_signal="SIGALRM" 252 cleanup 253 exit ${ksft_pass} 254} 255 256trap handle_sigint SIGINT 257trap handle_sigalrm SIGALRM 258 259(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& 260 261setup_prepare 262test_soft_lockup_during_routing_table_refresh 263