1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# Testing for potential kernel soft lockup during IPv6 routing table 5# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup 6# occurs, a kernel panic will be triggered to prevent associated issues. 7# 8# 9# Test Environment Layout 10# 11# ┌----------------┐ ┌----------------┐ 12# | SOURCE_NS | | SINK_NS | 13# | NAMESPACE | | NAMESPACE | 14# |(iperf3 clients)| |(iperf3 servers)| 15# | | | | 16# | | | | 17# | ┌-----------| nexthops |---------┐ | 18# | |veth_source|<--------------------------------------->|veth_sink|<┐ | 19# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | 20# | | ^ 2001:0DB8:1::1:2/96 | | | 21# | | . . | fwd | | 22# | ┌---------┐ | . . | | | 23# | | IPv6 | | . . | V | 24# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | 25# | | table | | . | | lo | | 26# | | nexthop | | . └--------┴-----┴-┘ 27# | | update | | ............................> 2001:0DB8:2::1:1/128 28# | └-------- ┘ | 29# └----------------┘ 30# 31# The test script sets up two network namespaces, source_ns and sink_ns, 32# connected via a veth link. Within source_ns, it continuously updates the 33# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop 34# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a 35# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. 36# 37# Simultaneously, multiple iperf3 clients within source_ns generate heavy 38# outgoing IPv6 traffic. Each client is assigned a unique port number starting 39# at 5000 and incrementing sequentially. Each client targets a unique iperf3 40# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface 41# using the same port number. 42# 43# The number of iperf3 servers and clients is set to half of the total 44# available cores on each machine. 45# 46# NOTE: We have tested this script on machines with various CPU specifications, 47# ranging from lower to higher performance as listed below. The test script 48# effectively triggered a kernel soft lockup on machines running an unpatched 49# kernel in under a minute: 50# 51# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz 52# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz 53# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz 54# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz 55# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz 56# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz 57# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz 58# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz 59# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz 60# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz 61# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz 62# 63# On less performant machines, you may need to increase the TEST_DURATION 64# parameter to enhance the likelihood of encountering a race condition leading 65# to a kernel soft lockup and avoid a false negative result. 66# 67# NOTE: The test may not produce the expected result in virtualized 68# environments (e.g., qemu) due to differences in timing and CPU handling, 69# which can affect the conditions needed to trigger a soft lockup. 70 71source lib.sh 72 73TEST_DURATION=300 74ROUTING_TABLE_REFRESH_PERIOD=0.01 75 76IPERF3_BITRATE="300m" 77 78 79IPV6_NEXTHOP_ADDR_COUNT="128" 80IPV6_NEXTHOP_ADDR_MASK="96" 81IPV6_NEXTHOP_PREFIX="2001:0DB8:1" 82 83 84SOURCE_TEST_IFACE="veth_source" 85SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" 86 87SINK_TEST_IFACE="veth_sink" 88# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: 89# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} 90SINK_LOOPBACK_IFACE="lo" 91SINK_LOOPBACK_IP_MASK="128" 92SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" 93 94nexthop_ip_list="" 95termination_signal="" 96kernel_softlokup_panic_prev_val="" 97 98terminate_ns_processes_by_pattern() { 99 local ns=$1 100 local pattern=$2 101 102 for pid in $(ip netns pids ${ns}); do 103 [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid 104 done 105} 106 107cleanup() { 108 echo "info: cleaning up namespaces and terminating all processes within them..." 109 110 111 # Terminate iperf3 instances running in the source_ns. To avoid race 112 # conditions, first iterate over the PIDs and terminate those 113 # associated with the bash shells running the 114 # `while true; do iperf3 -c ...; done` loops. In a second iteration, 115 # terminate the individual `iperf3 -c ...` instances. 116 terminate_ns_processes_by_pattern ${source_ns} while 117 terminate_ns_processes_by_pattern ${source_ns} iperf3 118 119 # Repeat the same process for sink_ns 120 terminate_ns_processes_by_pattern ${sink_ns} while 121 terminate_ns_processes_by_pattern ${sink_ns} iperf3 122 123 # Check if any iperf3 instances are still running. This could happen 124 # if a core has entered an infinite loop and the timeout for detecting 125 # the soft lockup has not expired, but either the test interval has 126 # already elapsed or the test was terminated manually (e.g., with ^C) 127 for pid in $(ip netns pids ${source_ns}); do 128 if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then 129 echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" 130 exit ${ksft_fail} 131 fi 132 done 133 134 if [ "$termination_signal" == "SIGINT" ]; then 135 echo "SKIP: Termination due to ^C (SIGINT)" 136 elif [ "$termination_signal" == "SIGALRM" ]; then 137 echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" 138 fi 139 140 cleanup_ns ${source_ns} ${sink_ns} 141 142 sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} 143} 144 145setup_prepare() { 146 setup_ns source_ns sink_ns 147 148 ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} 149 150 # Setting up the Source namespace 151 ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} 152 ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 153 ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up 154 ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 155 156 # Setting up the Sink namespace 157 ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} 158 ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up 159 ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 160 161 ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up 162 ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 163 164 165 # Populate nexthop IPv6 addresses on the test interface in the sink_ns 166 echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." 167 for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do 168 ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; 169 done 170 171 # Preparing list of nexthops 172 for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do 173 nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" 174 done 175} 176 177 178test_soft_lockup_during_routing_table_refresh() { 179 # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, 180 # each listening on ports starting at 5001 and incrementing 181 # sequentially. Since iperf3 instances may terminate unexpectedly, a 182 # while loop is used to automatically restart them in such cases. 183 echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." 184 for i in $(seq 1 ${num_of_iperf_servers}); do 185 cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" 186 ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null 187 done 188 189 # Wait for the iperf3 servers to be ready 190 for i in $(seq ${num_of_iperf_servers}); do 191 port=$(printf '5%03d' ${i}); 192 wait_local_port_listen ${sink_ns} ${port} tcp 193 done 194 195 # Continuously refresh the routing table in the background within 196 # the source_ns namespace 197 ip netns exec ${source_ns} bash -c " 198 while \$(ip netns list | grep -q ${source_ns}); do 199 ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; 200 sleep ${ROUTING_TABLE_REFRESH_PERIOD}; 201 ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; 202 done &" 203 204 # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, 205 # each sending TCP traffic on sequential ports starting at 5001. 206 # Since iperf3 instances may terminate unexpectedly (e.g., if the route 207 # to the server is deleted in the background during a route refresh), a 208 # while loop is used to automatically restart them in such cases. 209 echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." 210 for i in $(seq 1 ${num_of_iperf_servers}); do 211 cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" 212 ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null 213 done 214 215 echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." 216 echo "info: A kernel soft lockup, if detected, results in a kernel panic!" 217 218 wait 219} 220 221# Make sure 'iperf3' is installed, skip the test otherwise 222if [ ! -x "$(command -v "iperf3")" ]; then 223 echo "SKIP: 'iperf3' is not installed. Skipping the test." 224 exit ${ksft_skip} 225fi 226 227# Determine the number of cores on the machine 228num_of_iperf_servers=$(( $(nproc)/2 )) 229 230# Check if we are running on a multi-core machine, skip the test otherwise 231if [ "${num_of_iperf_servers}" -eq 0 ]; then 232 echo "SKIP: This test is not valid on a single core machine!" 233 exit ${ksft_skip} 234fi 235 236# Since the kernel soft lockup we're testing causes at least one core to enter 237# an infinite loop, destabilizing the host and likely affecting subsequent 238# tests, we trigger a kernel panic instead of reporting a failure and 239# continuing 240kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) 241sysctl -qw kernel.softlockup_panic=1 242 243handle_sigint() { 244 termination_signal="SIGINT" 245 cleanup 246 exit ${ksft_skip} 247} 248 249handle_sigalrm() { 250 termination_signal="SIGALRM" 251 cleanup 252 exit ${ksft_pass} 253} 254 255trap handle_sigint SIGINT 256trap handle_sigalrm SIGALRM 257 258(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& 259 260setup_prepare 261test_soft_lockup_during_routing_table_refresh 262