1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0 3 4""" 5Test driver resilience vs page pool allocation failures. 6""" 7 8import errno 9import time 10import math 11import os 12from lib.py import ksft_run, ksft_exit, ksft_pr 13from lib.py import KsftSkipEx, KsftFailEx 14from lib.py import NetdevFamily, NlError 15from lib.py import NetDrvEpEnv 16from lib.py import cmd, tool, GenerateTraffic 17 18 19def _write_fail_config(config): 20 for key, value in config.items(): 21 path = "/sys/kernel/debug/fail_function/" 22 with open(path + key, "w", encoding='ascii') as fp: 23 fp.write(str(value) + "\n") 24 25 26def _enable_pp_allocation_fail(): 27 if not os.path.exists("/sys/kernel/debug/fail_function"): 28 raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") 29 30 if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): 31 _write_fail_config({"inject": "page_pool_alloc_netmems"}) 32 33 _write_fail_config({ 34 "verbose": 0, 35 "interval": 511, 36 "probability": 100, 37 "times": -1, 38 }) 39 40 41def _disable_pp_allocation_fail(): 42 if not os.path.exists("/sys/kernel/debug/fail_function"): 43 return 44 45 if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"): 46 _write_fail_config({"inject": ""}) 47 48 _write_fail_config({ 49 "probability": 0, 50 "times": 0, 51 }) 52 53 54def test_pp_alloc(cfg, netdevnl): 55 """ 56 Configure page pool allocation fail injection while traffic is running. 57 """ 58 59 def get_stats(): 60 return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] 61 62 def check_traffic_flowing(): 63 stat1 = get_stats() 64 time.sleep(1) 65 stat2 = get_stats() 66 if stat2['rx-packets'] - stat1['rx-packets'] < 4000: 67 raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets']) 68 69 70 try: 71 stats = get_stats() 72 except NlError as e: 73 if e.nl_msg.error == -errno.EOPNOTSUPP: 74 stats = {} 75 else: 76 raise 77 if 'rx-alloc-fail' not in stats: 78 raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats") 79 80 set_g = False 81 traffic = None 82 try: 83 traffic = GenerateTraffic(cfg) 84 85 check_traffic_flowing() 86 87 _enable_pp_allocation_fail() 88 89 s1 = get_stats() 90 time.sleep(3) 91 s2 = get_stats() 92 93 seen_fails = s2['rx-alloc-fail'] - s1['rx-alloc-fail'] 94 if seen_fails < 1: 95 raise KsftSkipEx("Allocation failures not increasing") 96 pkts = s2['rx-packets'] - s1['rx-packets'] 97 # Expecting one failure per 512 buffers, 3.1x safety margin 98 want_fails = math.floor(pkts / 512 / 3.1) 99 if seen_fails < want_fails: 100 raise KsftSkipEx("Allocation increasing too slowly", seen_fails, 101 "packets:", pkts) 102 ksft_pr(f"Seen: pkts:{pkts} fails:{seen_fails} (pass thrs:{want_fails})") 103 104 # Basic failures are fine, try to wobble some settings to catch extra failures 105 check_traffic_flowing() 106 g = tool("ethtool", "-g " + cfg.ifname, json=True)[0] 107 if 'rx' in g and g["rx"] * 2 <= g["rx-max"]: 108 new_g = g['rx'] * 2 109 elif 'rx' in g: 110 new_g = g['rx'] // 2 111 else: 112 new_g = None 113 114 if new_g: 115 set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0 116 if set_g: 117 ksft_pr("ethtool -G change retval: success") 118 else: 119 ksft_pr("ethtool -G change retval: did not succeed", new_g) 120 else: 121 ksft_pr("ethtool -G change retval: did not try") 122 123 time.sleep(0.1) 124 check_traffic_flowing() 125 finally: 126 _disable_pp_allocation_fail() 127 if traffic: 128 traffic.stop() 129 time.sleep(0.1) 130 if set_g: 131 cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}") 132 133 134def main() -> None: 135 """ Ksft boiler plate main """ 136 netdevnl = NetdevFamily() 137 with NetDrvEpEnv(__file__, nsim_test=False) as cfg: 138 139 ksft_run([test_pp_alloc], args=(cfg, netdevnl, )) 140 ksft_exit() 141 142 143if __name__ == "__main__": 144 main() 145