1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3# Author: Breno Leitao <leitao@debian.org> 4""" 5 This test aims to evaluate the netpoll polling mechanism (as in 6 netpoll_poll_dev()). It presents a complex scenario where the network 7 attempts to send a packet but fails, prompting it to poll the NIC from within 8 the netpoll TX side. 9 10 This has been a crucial path in netpoll that was previously untested. Jakub 11 suggested using a single RX/TX queue, pushing traffic to the NIC, and then 12 sending netpoll messages (via netconsole) to trigger the poll. 13 14 In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If 15 so, the test passes, otherwise it will be skipped. This test is very dependent on 16 the driver and environment, given we are trying to trigger a tricky scenario. 17""" 18 19import errno 20import logging 21import os 22import random 23import string 24import threading 25import time 26from typing import Optional 27 28from lib.py import ( 29 bpftrace, 30 CmdExitFailure, 31 defer, 32 ethtool, 33 GenerateTraffic, 34 ksft_exit, 35 ksft_pr, 36 ksft_run, 37 KsftFailEx, 38 KsftSkipEx, 39 NetDrvEpEnv, 40 KsftXfailEx, 41) 42 43# Configure logging 44logging.basicConfig( 45 level=logging.INFO, 46 format="%(asctime)s - %(levelname)s - %(message)s", 47) 48 49NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole" 50NETCONS_REMOTE_PORT: int = 6666 51NETCONS_LOCAL_PORT: int = 1514 52 53# Max number of netcons messages to send. Each iteration will setup 54# netconsole and send MAX_WRITES messages 55ITERATIONS: int = 20 56# Number of writes to /dev/kmsg per iteration 57MAX_WRITES: int = 40 58# MAPS contains the information coming from bpftrace it will have only one 59# key: "hits", which tells the number of times netpoll_poll_dev() was called 60MAPS: dict[str, int] = {} 61# Thread to run bpftrace in parallel 62BPF_THREAD: Optional[threading.Thread] = None 63# Time bpftrace will be running in parallel. 64BPFTRACE_TIMEOUT: int = 10 65 66 67def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]: 68 """ 69 Read the ringsize using ethtool. This will be used to restore it after the test 70 """ 71 try: 72 ethtool_result = ethtool(f"-g {interface_name}", json=True)[0] 73 rxs = ethtool_result["rx"] 74 txs = ethtool_result["tx"] 75 except (KeyError, IndexError) as exception: 76 raise KsftSkipEx( 77 f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them." 78 ) from exception 79 80 return rxs, txs 81 82 83def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool: 84 """Try to the number of RX and TX ringsize.""" 85 rxs = ring_size[0] 86 txs = ring_size[1] 87 88 logging.debug("Setting ring size to %d/%d", rxs, txs) 89 try: 90 ethtool(f"-G {interface_name} rx {rxs} tx {txs}") 91 except CmdExitFailure: 92 # This might fail on real device, retry with a higher value, 93 # worst case, keep it as it is. 94 return False 95 96 return True 97 98 99def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]: 100 """Read the number of RX, TX and combined queues using ethtool""" 101 102 try: 103 ethtool_result = ethtool(f"-l {interface_name}", json=True)[0] 104 rxq = ethtool_result.get("rx", -1) 105 txq = ethtool_result.get("tx", -1) 106 combined = ethtool_result.get("combined", -1) 107 108 except IndexError as exception: 109 raise KsftSkipEx( 110 f"Failed to read queues numbers: {exception}. Not going to mess with them." 111 ) from exception 112 113 return rxq, txq, combined 114 115 116def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None: 117 """Set the number of RX, TX and combined queues using ethtool""" 118 rxq, txq, combined = queues 119 120 cmdline = f"-L {interface_name}" 121 122 if rxq != -1: 123 cmdline += f" rx {rxq}" 124 if txq != -1: 125 cmdline += f" tx {txq}" 126 if combined != -1: 127 cmdline += f" combined {combined}" 128 129 logging.debug("calling: ethtool %s", cmdline) 130 131 try: 132 ethtool(cmdline) 133 except CmdExitFailure as exception: 134 raise KsftSkipEx( 135 f"Failed to configure RX/TX queues: {exception}. Ethtool not available?" 136 ) from exception 137 138 139def netcons_generate_random_target_name() -> str: 140 """Generate a random target name starting with 'netcons'""" 141 random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8)) 142 return f"netcons_{random_suffix}" 143 144 145def netcons_create_target( 146 config_data: dict[str, str], 147 target_name: str, 148) -> None: 149 """Create a netconsole dynamic target against the interfaces""" 150 logging.debug("Using netconsole name: %s", target_name) 151 try: 152 os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True) 153 logging.debug( 154 "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name 155 ) 156 except OSError as exception: 157 if exception.errno != errno.EEXIST: 158 raise KsftFailEx( 159 f"Failed to create netconsole target directory: {exception}" 160 ) from exception 161 162 try: 163 for key, value in config_data.items(): 164 path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}" 165 logging.debug("Writing %s to %s", key, path) 166 with open(path, "w", encoding="utf-8") as file: 167 # Always convert to string to write to file 168 file.write(str(value)) 169 170 # Read all configuration values for debugging purposes 171 for debug_key in config_data.keys(): 172 with open( 173 f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}", 174 "r", 175 encoding="utf-8", 176 ) as file: 177 content = file.read() 178 logging.debug( 179 "%s/%s/%s : %s", 180 NETCONSOLE_CONFIGFS_PATH, 181 target_name, 182 debug_key, 183 content.strip(), 184 ) 185 186 except Exception as exception: 187 raise KsftFailEx( 188 f"Failed to configure netconsole target: {exception}" 189 ) from exception 190 191 192def netcons_configure_target( 193 cfg: NetDrvEpEnv, interface_name: str, target_name: str 194) -> None: 195 """Configure netconsole on the interface with the given target name""" 196 config_data = { 197 "extended": "1", 198 "dev_name": interface_name, 199 "local_port": NETCONS_LOCAL_PORT, 200 "remote_port": NETCONS_REMOTE_PORT, 201 "local_ip": cfg.addr, 202 "remote_ip": cfg.remote_addr, 203 "remote_mac": "00:00:00:00:00:00", # Not important for this test 204 "enabled": "1", 205 } 206 207 netcons_create_target(config_data, target_name) 208 logging.debug( 209 "Created netconsole target: %s on interface %s", target_name, interface_name 210 ) 211 212 213def netcons_delete_target(name: str) -> None: 214 """Delete a netconsole dynamic target""" 215 target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}" 216 try: 217 if os.path.exists(target_path): 218 os.rmdir(target_path) 219 except OSError as exception: 220 raise KsftFailEx( 221 f"Failed to delete netconsole target: {exception}" 222 ) from exception 223 224 225def netcons_load_module() -> None: 226 """Try to load the netconsole module""" 227 os.system("modprobe netconsole") 228 229 230def bpftrace_call() -> None: 231 """Call bpftrace to find how many times netpoll_poll_dev() is called. 232 Output is saved in the global variable `maps`""" 233 234 # This is going to update the global variable, that will be seen by the 235 # main function 236 global MAPS # pylint: disable=W0603 237 238 # This will be passed to bpftrace as in bpftrace -e "expr" 239 expr = "kprobe:netpoll_poll_dev { @hits = count(); }" 240 241 MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True) 242 logging.debug("BPFtrace output: %s", MAPS) 243 244 245def bpftrace_start(): 246 """Start a thread to call `call_bpf` in a parallel thread""" 247 global BPF_THREAD # pylint: disable=W0603 248 249 BPF_THREAD = threading.Thread(target=bpftrace_call) 250 BPF_THREAD.start() 251 if not BPF_THREAD.is_alive(): 252 raise KsftSkipEx("BPFtrace thread is not alive. Skipping test") 253 254 255def bpftrace_stop() -> None: 256 """Stop the bpftrace thread""" 257 if BPF_THREAD: 258 BPF_THREAD.join() 259 260 261def bpftrace_any_hit(join: bool) -> bool: 262 """Check if netpoll_poll_dev() was called by checking the global variable `maps`""" 263 if not BPF_THREAD: 264 raise KsftFailEx("BPFtrace didn't start") 265 266 if BPF_THREAD.is_alive(): 267 if join: 268 # Wait for bpftrace to finish 269 BPF_THREAD.join() 270 else: 271 # bpftrace is still running, so, we will not check the result yet 272 return False 273 274 logging.debug("MAPS coming from bpftrace = %s", MAPS) 275 if "hits" not in MAPS.keys(): 276 raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}") 277 278 logging.debug("Got a total of %d hits", MAPS["hits"]) 279 return MAPS["hits"] > 0 280 281 282def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: 283 """Print messages to the console, trying to trigger a netpoll poll""" 284 # Start bpftrace in parallel, so, it is watching 285 # netpoll_poll_dev() while we are sending netconsole messages 286 bpftrace_start() 287 defer(bpftrace_stop) 288 289 do_netpoll_flush(cfg, ifname, target_name) 290 291 if bpftrace_any_hit(join=True): 292 ksft_pr("netpoll_poll_dev() was called. Success") 293 return 294 295 raise KsftXfailEx("netpoll_poll_dev() was not called during the test...") 296 297 298def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: 299 """Print messages to the console, trying to trigger a netpoll poll""" 300 netcons_configure_target(cfg, ifname, target_name) 301 retry = 0 302 303 for i in range(int(ITERATIONS)): 304 if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False): 305 # bpftrace is done, stop sending messages 306 break 307 308 msg = f"netcons test #{i}" 309 with open("/dev/kmsg", "w", encoding="utf-8") as kmsg: 310 for j in range(MAX_WRITES): 311 try: 312 kmsg.write(f"{msg}-{j}\n") 313 except OSError as exception: 314 # in some cases, kmsg can be busy, so, we will retry 315 time.sleep(1) 316 retry += 1 317 if retry < 5: 318 logging.info("Failed to write to kmsg. Retrying") 319 # Just retry a few times 320 continue 321 raise KsftFailEx( 322 f"Failed to write to kmsg: {exception}" 323 ) from exception 324 325 netcons_delete_target(target_name) 326 netcons_configure_target(cfg, ifname, target_name) 327 # If we sleep here, we will have a better chance of triggering 328 # This number is based on a few tests I ran while developing this test 329 time.sleep(0.4) 330 331 332def configure_network(ifname: str) -> None: 333 """Configure ring size and queue numbers""" 334 335 # Set defined queues to 1 to force congestion 336 prev_queues = ethtool_get_queues_cnt(ifname) 337 logging.debug("RX/TX/combined queues: %s", prev_queues) 338 # Only set the queues to 1 if they exists in the device. I.e, they are > 0 339 ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues)) 340 defer(ethtool_set_queues_cnt, ifname, prev_queues) 341 342 # Try to set the ring size to some low value. 343 # Do not fail if the hardware do not accepted desired values 344 prev_ring_size = ethtool_get_ringsize(ifname) 345 for size in [(1, 1), (128, 128), (256, 256)]: 346 if ethtool_set_ringsize(ifname, size): 347 # hardware accepted the desired ringsize 348 logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size) 349 break 350 defer(ethtool_set_ringsize, ifname, prev_ring_size) 351 352 353def test_netpoll(cfg: NetDrvEpEnv) -> None: 354 """ 355 Test netpoll by sending traffic to the interface and then sending 356 netconsole messages to trigger a poll 357 """ 358 359 ifname = cfg.ifname 360 configure_network(ifname) 361 target_name = netcons_generate_random_target_name() 362 traffic = None 363 364 try: 365 traffic = GenerateTraffic(cfg) 366 do_netpoll_flush_monitored(cfg, ifname, target_name) 367 finally: 368 if traffic: 369 traffic.stop() 370 371 # Revert RX/TX queues 372 netcons_delete_target(target_name) 373 374 375def test_check_dependencies() -> None: 376 """Check if the dependencies are met""" 377 if not os.path.exists(NETCONSOLE_CONFIGFS_PATH): 378 raise KsftSkipEx( 379 f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301 380 ) 381 382 383def main() -> None: 384 """Main function to run the test""" 385 netcons_load_module() 386 test_check_dependencies() 387 with NetDrvEpEnv(__file__) as cfg: 388 ksft_run( 389 [test_netpoll], 390 args=(cfg,), 391 ) 392 ksft_exit() 393 394 395if __name__ == "__main__": 396 main() 397