xref: /linux/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py (revision 68a052239fc4b351e961f698b824f7654a346091)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5Test driver resilience vs page pool allocation failures.
6"""
7
8import errno
9import time
10import math
11import os
12from lib.py import ksft_run, ksft_exit, ksft_pr
13from lib.py import KsftSkipEx, KsftFailEx
14from lib.py import NetdevFamily, NlError
15from lib.py import NetDrvEpEnv
16from lib.py import cmd, tool, GenerateTraffic
17
18
19def _write_fail_config(config):
20    for key, value in config.items():
21        path = "/sys/kernel/debug/fail_function/"
22        with open(path + key, "w", encoding='ascii') as fp:
23            fp.write(str(value) + "\n")
24
25
26def _enable_pp_allocation_fail():
27    if not os.path.exists("/sys/kernel/debug/fail_function"):
28        raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
29
30    if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
31        _write_fail_config({"inject": "page_pool_alloc_netmems"})
32
33    _write_fail_config({
34        "verbose": 0,
35        "interval": 511,
36        "probability": 100,
37        "times": -1,
38    })
39
40
41def _disable_pp_allocation_fail():
42    if not os.path.exists("/sys/kernel/debug/fail_function"):
43        return
44
45    if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
46        _write_fail_config({"inject": ""})
47
48    _write_fail_config({
49        "probability": 0,
50        "times": 0,
51    })
52
53
54def test_pp_alloc(cfg, netdevnl):
55    """
56    Configure page pool allocation fail injection while traffic is running.
57    """
58
59    def get_stats():
60        return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
61
62    def check_traffic_flowing():
63        stat1 = get_stats()
64        time.sleep(1)
65        stat2 = get_stats()
66        if stat2['rx-packets'] - stat1['rx-packets'] < 4000:
67            raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
68
69
70    try:
71        stats = get_stats()
72    except NlError as e:
73        if e.nl_msg.error == -errno.EOPNOTSUPP:
74            stats = {}
75        else:
76            raise
77    if 'rx-alloc-fail' not in stats:
78        raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
79
80    set_g = False
81    traffic = None
82    try:
83        traffic = GenerateTraffic(cfg)
84
85        check_traffic_flowing()
86
87        _enable_pp_allocation_fail()
88
89        s1 = get_stats()
90        time.sleep(3)
91        s2 = get_stats()
92
93        seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail']
94        if seen_fails < 1:
95            raise KsftSkipEx("Allocation failures not increasing")
96        pkts = s2['rx-packets'] - s1['rx-packets']
97        # Expecting one failure per 512 buffers, 3.1x safety margin
98        want_fails = math.floor(pkts / 512 / 3.1)
99        if seen_fails < want_fails:
100            raise KsftSkipEx("Allocation increasing too slowly", seen_fails,
101                             "packets:", pkts)
102        ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})")
103
104        # Basic failures are fine, try to wobble some settings to catch extra failures
105        check_traffic_flowing()
106        g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
107        if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
108            new_g = g['rx'] * 2
109        elif 'rx' in g:
110            new_g = g['rx'] // 2
111        else:
112            new_g = None
113
114        if new_g:
115            set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
116            if set_g:
117                ksft_pr("ethtool -G change retval: success")
118            else:
119                ksft_pr("ethtool -G change retval: did not succeed", new_g)
120        else:
121            ksft_pr("ethtool -G change retval: did not try")
122
123        time.sleep(0.1)
124        check_traffic_flowing()
125    finally:
126        _disable_pp_allocation_fail()
127        if traffic:
128            traffic.stop()
129        time.sleep(0.1)
130        if set_g:
131            cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
132
133
134def main() -> None:
135    """ Ksft boiler plate main """
136    netdevnl = NetdevFamily()
137    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
138
139        ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
140    ksft_exit()
141
142
143if __name__ == "__main__":
144    main()
145