xref: /linux/tools/testing/selftests/drivers/net/netpoll_basic.py (revision 260f6f4fda93c8485c8037865c941b42b9cba5d2)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3# Author: Breno Leitao <leitao@debian.org>
4"""
5 This test aims to evaluate the netpoll polling mechanism (as in
6 netpoll_poll_dev()). It presents a complex scenario where the network
7 attempts to send a packet but fails, prompting it to poll the NIC from within
8 the netpoll TX side.
9
10 This has been a crucial path in netpoll that was previously untested. Jakub
11 suggested using a single RX/TX queue, pushing traffic to the NIC, and then
12 sending netpoll messages (via netconsole) to trigger the poll.
13
14 In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If
15 so, the test passes, otherwise it will be skipped. This test is very dependent on
16 the driver and environment, given we are trying to trigger a tricky scenario.
17"""
18
19import errno
20import logging
21import os
22import random
23import string
24import threading
25import time
26from typing import Optional
27
28from lib.py import (
29    bpftrace,
30    CmdExitFailure,
31    defer,
32    ethtool,
33    GenerateTraffic,
34    ksft_exit,
35    ksft_pr,
36    ksft_run,
37    KsftFailEx,
38    KsftSkipEx,
39    NetDrvEpEnv,
40    KsftXfailEx,
41)
42
43# Configure logging
44logging.basicConfig(
45    level=logging.INFO,
46    format="%(asctime)s - %(levelname)s - %(message)s",
47)
48
49NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole"
50NETCONS_REMOTE_PORT: int = 6666
51NETCONS_LOCAL_PORT: int = 1514
52
53# Max number of netcons messages to send. Each iteration will setup
54# netconsole and send MAX_WRITES messages
55ITERATIONS: int = 20
56# Number of writes to /dev/kmsg per iteration
57MAX_WRITES: int = 40
58# MAPS contains the information coming from bpftrace it will have only one
59# key: "hits", which tells the number of times netpoll_poll_dev() was called
60MAPS: dict[str, int] = {}
61# Thread to run bpftrace in parallel
62BPF_THREAD: Optional[threading.Thread] = None
63# Time bpftrace will be running in parallel.
64BPFTRACE_TIMEOUT: int = 10
65
66
67def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]:
68    """
69    Read the ringsize using ethtool. This will be used to restore it after the test
70    """
71    try:
72        ethtool_result = ethtool(f"-g {interface_name}", json=True)[0]
73        rxs = ethtool_result["rx"]
74        txs = ethtool_result["tx"]
75    except (KeyError, IndexError) as exception:
76        raise KsftSkipEx(
77            f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them."
78        ) from exception
79
80    return rxs, txs
81
82
83def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool:
84    """Try to the number of RX and TX ringsize."""
85    rxs = ring_size[0]
86    txs = ring_size[1]
87
88    logging.debug("Setting ring size to %d/%d", rxs, txs)
89    try:
90        ethtool(f"-G {interface_name} rx {rxs} tx {txs}")
91    except CmdExitFailure:
92        # This might fail on real device, retry with a higher value,
93        # worst case, keep it as it is.
94        return False
95
96    return True
97
98
99def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]:
100    """Read the number of RX, TX and combined queues using ethtool"""
101
102    try:
103        ethtool_result = ethtool(f"-l {interface_name}", json=True)[0]
104        rxq = ethtool_result.get("rx", -1)
105        txq = ethtool_result.get("tx", -1)
106        combined = ethtool_result.get("combined", -1)
107
108    except IndexError as exception:
109        raise KsftSkipEx(
110            f"Failed to read queues numbers: {exception}. Not going to mess with them."
111        ) from exception
112
113    return rxq, txq, combined
114
115
116def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None:
117    """Set the number of RX, TX and combined queues using ethtool"""
118    rxq, txq, combined = queues
119
120    cmdline = f"-L {interface_name}"
121
122    if rxq != -1:
123        cmdline += f" rx {rxq}"
124    if txq != -1:
125        cmdline += f" tx {txq}"
126    if combined != -1:
127        cmdline += f" combined {combined}"
128
129    logging.debug("calling: ethtool %s", cmdline)
130
131    try:
132        ethtool(cmdline)
133    except CmdExitFailure as exception:
134        raise KsftSkipEx(
135            f"Failed to configure RX/TX queues: {exception}. Ethtool not available?"
136        ) from exception
137
138
139def netcons_generate_random_target_name() -> str:
140    """Generate a random target name starting with 'netcons'"""
141    random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8))
142    return f"netcons_{random_suffix}"
143
144
145def netcons_create_target(
146    config_data: dict[str, str],
147    target_name: str,
148) -> None:
149    """Create a netconsole dynamic target against the interfaces"""
150    logging.debug("Using netconsole name: %s", target_name)
151    try:
152        os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True)
153        logging.debug(
154            "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name
155        )
156    except OSError as exception:
157        if exception.errno != errno.EEXIST:
158            raise KsftFailEx(
159                f"Failed to create netconsole target directory: {exception}"
160            ) from exception
161
162    try:
163        for key, value in config_data.items():
164            path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}"
165            logging.debug("Writing %s to %s", key, path)
166            with open(path, "w", encoding="utf-8") as file:
167                # Always convert to string to write to file
168                file.write(str(value))
169
170        # Read all configuration values for debugging purposes
171        for debug_key in config_data.keys():
172            with open(
173                f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}",
174                "r",
175                encoding="utf-8",
176            ) as file:
177                content = file.read()
178                logging.debug(
179                    "%s/%s/%s : %s",
180                    NETCONSOLE_CONFIGFS_PATH,
181                    target_name,
182                    debug_key,
183                    content.strip(),
184                )
185
186    except Exception as exception:
187        raise KsftFailEx(
188            f"Failed to configure netconsole target: {exception}"
189        ) from exception
190
191
192def netcons_configure_target(
193    cfg: NetDrvEpEnv, interface_name: str, target_name: str
194) -> None:
195    """Configure netconsole on the interface with the given target name"""
196    config_data = {
197        "extended": "1",
198        "dev_name": interface_name,
199        "local_port": NETCONS_LOCAL_PORT,
200        "remote_port": NETCONS_REMOTE_PORT,
201        "local_ip": cfg.addr,
202        "remote_ip": cfg.remote_addr,
203        "remote_mac": "00:00:00:00:00:00",  # Not important for this test
204        "enabled": "1",
205    }
206
207    netcons_create_target(config_data, target_name)
208    logging.debug(
209        "Created netconsole target: %s on interface %s", target_name, interface_name
210    )
211
212
213def netcons_delete_target(name: str) -> None:
214    """Delete a netconsole dynamic target"""
215    target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}"
216    try:
217        if os.path.exists(target_path):
218            os.rmdir(target_path)
219    except OSError as exception:
220        raise KsftFailEx(
221            f"Failed to delete netconsole target: {exception}"
222        ) from exception
223
224
225def netcons_load_module() -> None:
226    """Try to load the netconsole module"""
227    os.system("modprobe netconsole")
228
229
230def bpftrace_call() -> None:
231    """Call bpftrace to find how many times netpoll_poll_dev() is called.
232    Output is saved in the global variable `maps`"""
233
234    # This is going to update the global variable, that will be seen by the
235    # main function
236    global MAPS  # pylint: disable=W0603
237
238    # This will be passed to bpftrace as in bpftrace -e "expr"
239    expr = "kprobe:netpoll_poll_dev { @hits = count(); }"
240
241    MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True)
242    logging.debug("BPFtrace output: %s", MAPS)
243
244
245def bpftrace_start():
246    """Start a thread to call `call_bpf` in a parallel thread"""
247    global BPF_THREAD  # pylint: disable=W0603
248
249    BPF_THREAD = threading.Thread(target=bpftrace_call)
250    BPF_THREAD.start()
251    if not BPF_THREAD.is_alive():
252        raise KsftSkipEx("BPFtrace thread is not alive. Skipping test")
253
254
255def bpftrace_stop() -> None:
256    """Stop the bpftrace thread"""
257    if BPF_THREAD:
258        BPF_THREAD.join()
259
260
261def bpftrace_any_hit(join: bool) -> bool:
262    """Check if netpoll_poll_dev() was called by checking the global variable `maps`"""
263    if not BPF_THREAD:
264        raise KsftFailEx("BPFtrace didn't start")
265
266    if BPF_THREAD.is_alive():
267        if join:
268            # Wait for bpftrace to finish
269            BPF_THREAD.join()
270        else:
271            # bpftrace is still running, so, we will not check the result yet
272            return False
273
274    logging.debug("MAPS coming from bpftrace = %s", MAPS)
275    if "hits" not in MAPS.keys():
276        raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}")
277
278    logging.debug("Got a total of %d hits", MAPS["hits"])
279    return MAPS["hits"] > 0
280
281
282def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
283    """Print messages to the console, trying to trigger a netpoll poll"""
284    # Start bpftrace in parallel, so, it is watching
285    # netpoll_poll_dev() while we are sending netconsole messages
286    bpftrace_start()
287    defer(bpftrace_stop)
288
289    do_netpoll_flush(cfg, ifname, target_name)
290
291    if bpftrace_any_hit(join=True):
292        ksft_pr("netpoll_poll_dev() was called. Success")
293        return
294
295    raise KsftXfailEx("netpoll_poll_dev() was not called during the test...")
296
297
298def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None:
299    """Print messages to the console, trying to trigger a netpoll poll"""
300    netcons_configure_target(cfg, ifname, target_name)
301    retry = 0
302
303    for i in range(int(ITERATIONS)):
304        if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False):
305            # bpftrace is done, stop sending messages
306            break
307
308        msg = f"netcons test #{i}"
309        with open("/dev/kmsg", "w", encoding="utf-8") as kmsg:
310            for j in range(MAX_WRITES):
311                try:
312                    kmsg.write(f"{msg}-{j}\n")
313                except OSError as exception:
314                    # in some cases, kmsg can be busy, so, we will retry
315                    time.sleep(1)
316                    retry += 1
317                    if retry < 5:
318                        logging.info("Failed to write to kmsg. Retrying")
319                        # Just retry a few times
320                        continue
321                    raise KsftFailEx(
322                        f"Failed to write to kmsg: {exception}"
323                    ) from exception
324
325        netcons_delete_target(target_name)
326        netcons_configure_target(cfg, ifname, target_name)
327        # If we sleep here, we will have a better chance of triggering
328        # This number is based on a few tests I ran while developing this test
329        time.sleep(0.4)
330
331
332def configure_network(ifname: str) -> None:
333    """Configure ring size and queue numbers"""
334
335    # Set defined queues to 1 to force congestion
336    prev_queues = ethtool_get_queues_cnt(ifname)
337    logging.debug("RX/TX/combined queues: %s", prev_queues)
338    # Only set the queues to 1 if they exists in the device. I.e, they are > 0
339    ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues))
340    defer(ethtool_set_queues_cnt, ifname, prev_queues)
341
342    # Try to set the ring size to some low value.
343    # Do not fail if the hardware do not accepted desired values
344    prev_ring_size = ethtool_get_ringsize(ifname)
345    for size in [(1, 1), (128, 128), (256, 256)]:
346        if ethtool_set_ringsize(ifname, size):
347            # hardware accepted the desired ringsize
348            logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size)
349            break
350    defer(ethtool_set_ringsize, ifname, prev_ring_size)
351
352
353def test_netpoll(cfg: NetDrvEpEnv) -> None:
354    """
355    Test netpoll by sending traffic to the interface and then sending
356    netconsole messages to trigger a poll
357    """
358
359    ifname = cfg.ifname
360    configure_network(ifname)
361    target_name = netcons_generate_random_target_name()
362    traffic = None
363
364    try:
365        traffic = GenerateTraffic(cfg)
366        do_netpoll_flush_monitored(cfg, ifname, target_name)
367    finally:
368        if traffic:
369            traffic.stop()
370
371        # Revert RX/TX queues
372        netcons_delete_target(target_name)
373
374
375def test_check_dependencies() -> None:
376    """Check if the dependencies are met"""
377    if not os.path.exists(NETCONSOLE_CONFIGFS_PATH):
378        raise KsftSkipEx(
379            f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set."  # pylint: disable=C0301
380        )
381
382
383def main() -> None:
384    """Main function to run the test"""
385    netcons_load_module()
386    test_check_dependencies()
387    with NetDrvEpEnv(__file__) as cfg:
388        ksft_run(
389            [test_netpoll],
390            args=(cfg,),
391        )
392    ksft_exit()
393
394
395if __name__ == "__main__":
396    main()
397