xref: /linux/tools/testing/selftests/drivers/net/stats.py (revision 55a42f78ffd386e01a5404419f8c5ded7db70a21)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5Tests related to standard netdevice statistics.
6"""
7
8import errno
9import subprocess
10import time
11from lib.py import ksft_run, ksft_exit, ksft_pr
12from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
13from lib.py import KsftSkipEx, KsftFailEx
14from lib.py import ksft_disruptive
15from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
16from lib.py import NetDrvEnv
17from lib.py import cmd, ip, defer
18
19ethnl = EthtoolFamily()
20netfam = NetdevFamily()
21rtnl = RtnlFamily()
22
23
24def check_pause(cfg) -> None:
25    """
26    Check that drivers which support Pause config also report standard
27    pause stats.
28    """
29
30    try:
31        ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
32    except NlError as e:
33        if e.error == errno.EOPNOTSUPP:
34            raise KsftSkipEx("pause not supported by the device") from e
35        raise
36
37    data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
38                                       "flags": {'stats'}}})
39    ksft_true(data['stats'], "driver does not report stats")
40
41
42def check_fec(cfg) -> None:
43    """
44    Check that drivers which support FEC config also report standard
45    FEC stats.
46    """
47
48    try:
49        ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
50    except NlError as e:
51        if e.error == errno.EOPNOTSUPP:
52            raise KsftSkipEx("FEC not supported by the device") from e
53        raise
54
55    data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
56                                     "flags": {'stats'}}})
57    ksft_true(data['stats'], "driver does not report stats")
58
59
60def check_fec_hist(cfg) -> None:
61    """
62    Check that drivers which support FEC histogram statistics report
63    reasonable values.
64    """
65
66    try:
67        data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
68                                         "flags": {'stats'}}})
69    except NlError as e:
70        if e.error == errno.EOPNOTSUPP:
71            raise KsftSkipEx("FEC not supported by the device") from e
72        raise
73    if 'stats' not in data:
74        raise KsftSkipEx("FEC stats not supported by the device")
75    if 'hist' not in data['stats']:
76        raise KsftSkipEx("FEC histogram not supported by the device")
77
78    hist = data['stats']['hist']
79    for fec_bin in hist:
80        for key in ['bin-low', 'bin-high', 'bin-val']:
81            ksft_in(key, fec_bin,
82	            "Drivers should always report FEC bin range and value")
83        ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'],
84                "FEC bin range should be valid")
85        if 'bin-val-per-lane' in fec_bin:
86            ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'],
87                    "FEC bin value should be equal to sum of per-plane values")
88
89
90def pkt_byte_sum(cfg) -> None:
91    """
92    Check that qstat and interface stats match in value.
93    """
94
95    def get_qstat(test):
96        stats = netfam.qstats_get({}, dump=True)
97        if stats:
98            for qs in stats:
99                if qs["ifindex"]== test.ifindex:
100                    return qs
101        return None
102
103    qstat = get_qstat(cfg)
104    if qstat is None:
105        raise KsftSkipEx("qstats not supported by the device")
106
107    for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
108        ksft_in(key, qstat, "Drivers should always report basic keys")
109
110    # Compare stats, rtnl stats and qstats must match,
111    # but the interface may be up, so do a series of dumps
112    # each time the more "recent" stats must be higher or same.
113    def stat_cmp(rstat, qstat):
114        for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
115            if rstat[key] != qstat[key]:
116                return rstat[key] - qstat[key]
117        return 0
118
119    for _ in range(10):
120        rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
121        if stat_cmp(rtstat, qstat) < 0:
122            raise KsftFailEx("RTNL stats are lower, fetched later")
123        qstat = get_qstat(cfg)
124        if stat_cmp(rtstat, qstat) > 0:
125            raise KsftFailEx("Qstats are lower, fetched later")
126
127
128def qstat_by_ifindex(cfg) -> None:
129    """ Qstats Netlink API tests - querying by ifindex. """
130
131    # Construct a map ifindex -> [dump, by-index, dump]
132    ifindexes = {}
133    stats = netfam.qstats_get({}, dump=True)
134    for entry in stats:
135        ifindexes[entry['ifindex']] = [entry, None, None]
136
137    for ifindex in ifindexes:
138        entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
139        ksft_eq(len(entry), 1)
140        ifindexes[entry[0]['ifindex']][1] = entry[0]
141
142    stats = netfam.qstats_get({}, dump=True)
143    for entry in stats:
144        ifindexes[entry['ifindex']][2] = entry
145
146    if len(ifindexes) == 0:
147        raise KsftSkipEx("No ifindex supports qstats")
148
149    # Now make sure the stats match/make sense
150    for ifindex, triple in ifindexes.items():
151        all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
152
153        for key in all_keys:
154            ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
155            ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
156
157    # Sanity check the dumps
158    queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True)
159    # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}}
160    parsed = {}
161    for entry in queues:
162        ifindex = entry["ifindex"]
163        if ifindex not in parsed:
164            parsed[ifindex] = {"rx":[], "tx": []}
165        parsed[ifindex][entry["queue-type"]].append(entry['queue-id'])
166    # Now, validate
167    for ifindex, queues in parsed.items():
168        for qtype in ['rx', 'tx']:
169            ksft_eq(len(queues[qtype]), len(set(queues[qtype])),
170                    comment="repeated queue keys")
171            ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1,
172                    comment="missing queue keys")
173
174    # Test invalid dumps
175    # 0 is invalid
176    with ksft_raises(NlError) as cm:
177        netfam.qstats_get({"ifindex": 0}, dump=True)
178    ksft_eq(cm.exception.nl_msg.error, -34)
179    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
180
181    # loopback has no stats
182    with ksft_raises(NlError) as cm:
183        netfam.qstats_get({"ifindex": 1}, dump=True)
184    ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
185    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
186
187    # Try to get stats for lowest unused ifindex but not 0
188    devs = rtnl.getlink({}, dump=True)
189    all_ifindexes = set(dev["ifi-index"] for dev in devs)
190    lowest = 2
191    while lowest in all_ifindexes:
192        lowest += 1
193
194    with ksft_raises(NlError) as cm:
195        netfam.qstats_get({"ifindex": lowest}, dump=True)
196    ksft_eq(cm.exception.nl_msg.error, -19)
197    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
198
199
200@ksft_disruptive
201def check_down(cfg) -> None:
202    """ Test statistics (interface and qstat) are not impacted by ifdown """
203
204    try:
205        qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
206    except NlError as e:
207        if e.error == errno.EOPNOTSUPP:
208            raise KsftSkipEx("qstats not supported by the device") from e
209        raise
210
211    ip(f"link set dev {cfg.dev['ifname']} down")
212    defer(ip, f"link set dev {cfg.dev['ifname']} up")
213
214    qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
215    for k in qstat:
216        ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
217
218    # exercise per-queue API to make sure that "device down" state
219    # is handled correctly and doesn't crash
220    netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
221
222
223def __run_inf_loop(body):
224    body = body.strip()
225    if body[-1] != ';':
226        body += ';'
227
228    return subprocess.Popen(f"while true; do {body} done", shell=True,
229                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
230
231
232def __stats_increase_sanely(old, new) -> None:
233    for k in old.keys():
234        ksft_ge(new[k], old[k])
235        ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
236
237
238def procfs_hammer(cfg) -> None:
239    """
240    Reading stats via procfs only holds the RCU lock, which is not an exclusive
241    lock, make sure drivers can handle parallel reads of stats.
242    """
243    one = __run_inf_loop("cat /proc/net/dev")
244    defer(one.kill)
245    two = __run_inf_loop("cat /proc/net/dev")
246    defer(two.kill)
247
248    time.sleep(1)
249    # Make sure the processes are running
250    ksft_is(one.poll(), None)
251    ksft_is(two.poll(), None)
252
253    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
254    time.sleep(2)
255    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
256    __stats_increase_sanely(rtstat1, rtstat2)
257    # defers will kill the loops
258
259
260@ksft_disruptive
261def procfs_downup_hammer(cfg) -> None:
262    """
263    Reading stats via procfs only holds the RCU lock, drivers often try
264    to sleep when reading the stats, or don't protect against races.
265    """
266    # Max out the queues, we'll flip between max and 1
267    channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
268    if channels['combined-count'] == 0:
269        rx_type = 'rx'
270    else:
271        rx_type = 'combined'
272    cur_queue_cnt = channels[f'{rx_type}-count']
273    max_queue_cnt = channels[f'{rx_type}-max']
274
275    cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
276    defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
277
278    # Real test stats
279    stats = __run_inf_loop("cat /proc/net/dev")
280    defer(stats.kill)
281
282    ipset = f"ip link set dev {cfg.ifname}"
283    defer(ip, f"link set dev {cfg.ifname} up")
284    # The "echo -n 1" lets us count iterations below
285    updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
286             f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
287             f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
288              "echo -n 1"
289    updown = __run_inf_loop(updown)
290    kill_updown = defer(updown.kill)
291
292    time.sleep(1)
293    # Make sure the processes are running
294    ksft_is(stats.poll(), None)
295    ksft_is(updown.poll(), None)
296
297    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
298    # We're looking for crashes, give it extra time
299    time.sleep(9)
300    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
301    __stats_increase_sanely(rtstat1, rtstat2)
302
303    kill_updown.exec()
304    stdout, _ = updown.communicate(timeout=5)
305    ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
306
307
308def main() -> None:
309    """ Ksft boiler plate main """
310
311    with NetDrvEnv(__file__, queue_count=100) as cfg:
312        ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum,
313		  qstat_by_ifindex, check_down, procfs_hammer,
314		  procfs_downup_hammer],
315                 args=(cfg, ))
316    ksft_exit()
317
318
319if __name__ == "__main__":
320    main()
321