xref: /linux/tools/testing/selftests/drivers/net/stats.py (revision 8f7aa3d3c7323f4ca2768a9e74ebbe359c4f8f88)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5Tests related to standard netdevice statistics.
6"""
7
8import errno
9import subprocess
10import time
11from lib.py import ksft_run, ksft_exit, ksft_pr
12from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
13from lib.py import KsftSkipEx, KsftFailEx
14from lib.py import ksft_disruptive
15from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
16from lib.py import NetDrvEnv
17from lib.py import cmd, ip, defer
18
19ethnl = EthtoolFamily()
20netfam = NetdevFamily()
21rtnl = RtnlFamily()
22
23
24def check_pause(cfg) -> None:
25    """
26    Check that drivers which support Pause config also report standard
27    pause stats.
28    """
29
30    try:
31        ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
32    except NlError as e:
33        if e.error == errno.EOPNOTSUPP:
34            raise KsftSkipEx("pause not supported by the device") from e
35        raise
36
37    data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
38                                       "flags": {'stats'}}})
39    ksft_true(data['stats'], "driver does not report stats")
40
41
42def check_fec(cfg) -> None:
43    """
44    Check that drivers which support FEC config also report standard
45    FEC stats.
46    """
47
48    try:
49        ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
50    except NlError as e:
51        if e.error == errno.EOPNOTSUPP:
52            raise KsftSkipEx("FEC not supported by the device") from e
53        raise
54
55    data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
56                                     "flags": {'stats'}}})
57    ksft_true(data['stats'], "driver does not report stats")
58
59
60def check_fec_hist(cfg) -> None:
61    """
62    Check that drivers which support FEC histogram statistics report
63    reasonable values.
64    """
65
66    try:
67        data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
68                                         "flags": {'stats'}}})
69    except NlError as e:
70        if e.error == errno.EOPNOTSUPP:
71            raise KsftSkipEx("FEC not supported by the device") from e
72        raise
73    if 'stats' not in data:
74        raise KsftSkipEx("FEC stats not supported by the device")
75    if 'hist' not in data['stats']:
76        raise KsftSkipEx("FEC histogram not supported by the device")
77
78    hist = data['stats']['hist']
79    for fec_bin in hist:
80        for key in ['bin-low', 'bin-high', 'bin-val']:
81            ksft_in(key, fec_bin,
82	            "Drivers should always report FEC bin range and value")
83        ksft_ge(fec_bin['bin-high'], fec_bin['bin-low'],
84                "FEC bin range should be valid")
85        if 'bin-val-per-lane' in fec_bin:
86            ksft_eq(sum(fec_bin['bin-val-per-lane']), fec_bin['bin-val'],
87                    "FEC bin value should be equal to sum of per-plane values")
88
89
90def pkt_byte_sum(cfg) -> None:
91    """
92    Check that qstat and interface stats match in value.
93    """
94
95    def get_qstat(test):
96        stats = netfam.qstats_get({}, dump=True)
97        if stats:
98            for qs in stats:
99                if qs["ifindex"]== test.ifindex:
100                    return qs
101        return None
102
103    qstat = get_qstat(cfg)
104    if qstat is None:
105        raise KsftSkipEx("qstats not supported by the device")
106
107    for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
108        ksft_in(key, qstat, "Drivers should always report basic keys")
109
110    # Compare stats, rtnl stats and qstats must match,
111    # but the interface may be up, so do a series of dumps
112    # each time the more "recent" stats must be higher or same.
113    def stat_cmp(rstat, qstat):
114        for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
115            if rstat[key] != qstat[key]:
116                return rstat[key] - qstat[key]
117        return 0
118
119    for _ in range(10):
120        rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
121        if stat_cmp(rtstat, qstat) < 0:
122            raise KsftFailEx("RTNL stats are lower, fetched later")
123        qstat = get_qstat(cfg)
124        if stat_cmp(rtstat, qstat) > 0:
125            raise KsftFailEx("Qstats are lower, fetched later")
126
127
128def qstat_by_ifindex(cfg) -> None:
129    """ Qstats Netlink API tests - querying by ifindex. """
130
131    # Construct a map ifindex -> [dump, by-index, dump]
132    ifindexes = {}
133    stats = netfam.qstats_get({}, dump=True)
134    for entry in stats:
135        ifindexes[entry['ifindex']] = [entry, None, None]
136
137    for ifindex in ifindexes:
138        entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
139        ksft_eq(len(entry), 1)
140        ifindexes[entry[0]['ifindex']][1] = entry[0]
141
142    stats = netfam.qstats_get({}, dump=True)
143    for entry in stats:
144        ifindexes[entry['ifindex']][2] = entry
145
146    if len(ifindexes) == 0:
147        raise KsftSkipEx("No ifindex supports qstats")
148
149    # Now make sure the stats match/make sense
150    for ifindex, triple in ifindexes.items():
151        all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
152
153        for key in all_keys:
154            ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
155            ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
156
157    # Sanity check the dumps
158    queues = NetdevFamily(recv_size=4096).qstats_get({"scope": "queue"}, dump=True)
159    # Reformat the output into {ifindex: {rx: [id, id, ...], tx: [id, id, ...]}}
160    parsed = {}
161    for entry in queues:
162        ifindex = entry["ifindex"]
163        if ifindex not in parsed:
164            parsed[ifindex] = {"rx":[], "tx": []}
165        parsed[ifindex][entry["queue-type"]].append(entry['queue-id'])
166    # Now, validate
167    for ifindex, queues in parsed.items():
168        for qtype in ['rx', 'tx']:
169            ksft_eq(len(queues[qtype]), len(set(queues[qtype])),
170                    comment="repeated queue keys")
171            ksft_eq(len(queues[qtype]), max(queues[qtype]) + 1,
172                    comment="missing queue keys")
173
174    # Test invalid dumps
175    # 0 is invalid
176    with ksft_raises(NlError) as cm:
177        netfam.qstats_get({"ifindex": 0}, dump=True)
178    ksft_eq(cm.exception.nl_msg.error, -34)
179    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
180
181    # loopback has no stats
182    with ksft_raises(NlError) as cm:
183        netfam.qstats_get({"ifindex": 1}, dump=True)
184    ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
185    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
186
187    # Try to get stats for lowest unused ifindex but not 0
188    devs = rtnl.getlink({}, dump=True)
189    all_ifindexes = set(dev["ifi-index"] for dev in devs)
190    lowest = 2
191    while lowest in all_ifindexes:
192        lowest += 1
193
194    with ksft_raises(NlError) as cm:
195        netfam.qstats_get({"ifindex": lowest}, dump=True)
196    ksft_eq(cm.exception.nl_msg.error, -19)
197    ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
198
199
200@ksft_disruptive
201def check_down(cfg) -> None:
202    """ Test statistics (interface and qstat) are not impacted by ifdown """
203
204    try:
205        qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
206    except NlError as e:
207        if e.error == errno.EOPNOTSUPP:
208            raise KsftSkipEx("qstats not supported by the device") from e
209        raise
210
211    ip(f"link set dev {cfg.dev['ifname']} down")
212    defer(ip, f"link set dev {cfg.dev['ifname']} up")
213
214    qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
215    for k in qstat:
216        ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
217
218    # exercise per-queue API to make sure that "device down" state
219    # is handled correctly and doesn't crash
220    netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
221
222
223def __run_inf_loop(body):
224    body = body.strip()
225    if body[-1] != ';':
226        body += ';'
227
228    return subprocess.Popen(f"while true; do {body} done", shell=True,
229                            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
230
231
232def __stats_increase_sanely(old, new) -> None:
233    for k in old.keys():
234        ksft_ge(new[k], old[k])
235        ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
236
237
238def procfs_hammer(cfg) -> None:
239    """
240    Reading stats via procfs only holds the RCU lock, which is not an exclusive
241    lock, make sure drivers can handle parallel reads of stats.
242    """
243    one = __run_inf_loop("cat /proc/net/dev")
244    defer(one.kill)
245    two = __run_inf_loop("cat /proc/net/dev")
246    defer(two.kill)
247
248    time.sleep(1)
249    # Make sure the processes are running
250    ksft_is(one.poll(), None)
251    ksft_is(two.poll(), None)
252
253    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
254    time.sleep(2)
255    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
256    __stats_increase_sanely(rtstat1, rtstat2)
257    # defers will kill the loops
258
259
260@ksft_disruptive
261def procfs_downup_hammer(cfg) -> None:
262    """
263    Reading stats via procfs only holds the RCU lock, drivers often try
264    to sleep when reading the stats, or don't protect against races.
265    """
266    # Set a large number of queues,
267    # we'll flip between min(max_queues, 64) and 1
268    channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
269    if channels['combined-count'] == 0:
270        rx_type = 'rx'
271    else:
272        rx_type = 'combined'
273    cur_queue_cnt = channels[f'{rx_type}-count']
274    max_queue_cnt = min(channels[f'{rx_type}-max'], 64)
275
276    cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
277    defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
278
279    # Real test stats
280    stats = __run_inf_loop("cat /proc/net/dev")
281    defer(stats.kill)
282
283    ipset = f"ip link set dev {cfg.ifname}"
284    defer(ip, f"link set dev {cfg.ifname} up")
285    # The "echo -n 1" lets us count iterations below
286    updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
287             f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
288             f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
289              "echo -n 1"
290    updown = __run_inf_loop(updown)
291    kill_updown = defer(updown.kill)
292
293    time.sleep(1)
294    # Make sure the processes are running
295    ksft_is(stats.poll(), None)
296    ksft_is(updown.poll(), None)
297
298    rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
299    # We're looking for crashes, give it extra time
300    time.sleep(9)
301    rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
302    __stats_increase_sanely(rtstat1, rtstat2)
303
304    kill_updown.exec()
305    stdout, _ = updown.communicate(timeout=5)
306    ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
307
308
309def main() -> None:
310    """ Ksft boiler plate main """
311
312    with NetDrvEnv(__file__, queue_count=100) as cfg:
313        ksft_run([check_pause, check_fec, check_fec_hist, pkt_byte_sum,
314		  qstat_by_ifindex, check_down, procfs_hammer,
315		  procfs_downup_hammer],
316                 args=(cfg, ))
317    ksft_exit()
318
319
320if __name__ == "__main__":
321    main()
322