xref: /linux/tools/testing/selftests/drivers/net/gro.py (revision 30f831b44a98a660ccaf608f88d8bb945318dc59)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5GRO (Generic Receive Offload) conformance tests.
6
7Validates that GRO coalescing works correctly by running the gro
8binary in different configurations and checking for correct packet
9coalescing behavior.
10
11Test cases:
12  - data_same: Same size data packets coalesce
13  - data_lrg_sml: Large packet followed by smaller one coalesces
14  - data_sml_lrg: Small packet followed by larger one doesn't coalesce
15  - ack: Pure ACK packets do not coalesce
16  - flags_psh: Packets with PSH flag don't coalesce
17  - flags_syn: Packets with SYN flag don't coalesce
18  - flags_rst: Packets with RST flag don't coalesce
19  - flags_urg: Packets with URG flag don't coalesce
20  - flags_cwr: Packets with CWR flag don't coalesce
21  - tcp_csum: Packets with incorrect checksum don't coalesce
22  - tcp_seq: Packets with non-consecutive seqno don't coalesce
23  - tcp_ts: Packets with different timestamp options don't coalesce
24  - tcp_opt: Packets with different TCP options don't coalesce
25  - ip_ecn: Packets with different ECN don't coalesce
26  - ip_tos: Packets with different TOS don't coalesce
27  - ip_ttl: (IPv4) Packets with different TTL don't coalesce
28  - ip_opt: (IPv4) Packets with IP options don't coalesce
29  - ip_frag4: (IPv4) IPv4 fragments don't coalesce
30  - ip_id_df*: (IPv4) IP ID field coalescing tests
31  - ip_frag6: (IPv6) IPv6 fragments don't coalesce
32  - ip_v6ext_same: (IPv6) IPv6 ext header with same payload coalesces
33  - ip_v6ext_diff: (IPv6) IPv6 ext header with different payload doesn't coalesce
34  - large_max: Packets exceeding GRO_MAX_SIZE don't coalesce
35  - large_rem: Large packet remainder handling
36"""
37
38import glob
39import os
40import re
41from lib.py import ksft_run, ksft_exit, ksft_pr
42from lib.py import NetDrvEpEnv, KsftXfailEx
43from lib.py import NetdevFamily, EthtoolFamily
44from lib.py import bkg, cmd, defer, ethtool, ip
45from lib.py import ksft_variants, KsftNamedVariant
46
47
48# gro.c uses hardcoded DPORT=8000
49GRO_DPORT = 8000
50
51
52def _resolve_dmac(cfg, ipver):
53    """
54    Find the destination MAC address remote host should use to send packets
55    towards the local host. It may be a router / gateway address.
56    """
57
58    attr = "dmac" + ipver
59    # Cache the response across test cases
60    if hasattr(cfg, attr):
61        return getattr(cfg, attr)
62
63    route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}",
64               json=True, host=cfg.remote)[0]
65    gw = route.get("gateway")
66    # Local L2 segment, address directly
67    if not gw:
68        setattr(cfg, attr, cfg.dev['address'])
69        return getattr(cfg, attr)
70
71    # ping to make sure neighbor is resolved,
72    # bind to an interface, for v6 the GW is likely link local
73    cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote)
74
75    neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}",
76               json=True, host=cfg.remote)[0]
77    setattr(cfg, attr, neigh['lladdr'])
78    return getattr(cfg, attr)
79
80
81def _write_defer_restore(cfg, path, val, defer_undo=False):
82    with open(path, "r", encoding="utf-8") as fp:
83        orig_val = fp.read().strip()
84        if str(val) == orig_val:
85            return
86    with open(path, "w", encoding="utf-8") as fp:
87        fp.write(val)
88    if defer_undo:
89        defer(_write_defer_restore, cfg, path, orig_val)
90
91
92def _set_mtu_restore(dev, mtu, host):
93    if dev['mtu'] < mtu:
94        ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host)
95        defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
96
97
98def _set_ethtool_feat(dev, current, feats, host=None):
99    s2n = {True: "on", False: "off"}
100
101    new = ["-K", dev]
102    old = ["-K", dev]
103    no_change = True
104    for name, state in feats.items():
105        new += [name, s2n[state]]
106        old += [name, s2n[current[name]["active"]]]
107
108        if current[name]["active"] != state:
109            no_change = False
110            if current[name]["fixed"]:
111                raise KsftXfailEx(f"Device does not support {name}")
112    if no_change:
113        return
114
115    eth_cmd = ethtool(" ".join(new), host=host)
116    defer(ethtool, " ".join(old), host=host)
117
118    # If ethtool printed something kernel must have modified some features
119    if eth_cmd.stdout:
120        ksft_pr(eth_cmd)
121
122
123def _get_queue_stats(cfg, queue_id):
124    """Get stats for a specific Rx queue."""
125    cfg.wait_hw_stats_settle()
126    data = cfg.netnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]},
127                                dump=True)
128    for q in data:
129        if q.get('queue-type') == 'rx' and q.get('queue-id') == queue_id:
130            return q
131    return {}
132
133
134def _setup_isolated_queue(cfg):
135    """Set up an isolated queue for testing using ntuple filter.
136
137    Remove queue 1 from the default RSS context and steer test traffic to it.
138    """
139    test_queue = 1
140
141    qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
142    if qcnt < 2:
143        raise KsftXfailEx(f"Need at least 2 queues, have {qcnt}")
144
145    # Remove queue 1 from default RSS context by setting its weight to 0
146    weights = ["1"] * qcnt
147    weights[test_queue] = "0"
148    ethtool(f"-X {cfg.ifname} weight " + " ".join(weights))
149    defer(ethtool, f"-X {cfg.ifname} default")
150
151    # Set up ntuple filter to steer our test traffic to the isolated queue
152    flow  = f"flow-type tcp{cfg.addr_ipver} "
153    flow += f"dst-ip {cfg.addr} dst-port {GRO_DPORT} action {test_queue}"
154    output = ethtool(f"-N {cfg.ifname} {flow}").stdout
155    ntuple_id = int(output.split()[-1])
156    defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
157
158    return test_queue
159
160
161def _setup_queue_count(cfg, num_queues):
162    """Configure the NIC to use a specific number of queues."""
163    channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
164    ch_max = channels.get('combined-max', 0)
165    qcnt = channels['combined-count']
166
167    if ch_max < num_queues:
168        raise KsftXfailEx(f"Need at least {num_queues} queues, max={ch_max}")
169
170    defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
171    ethtool(f"-L {cfg.ifname} combined {num_queues}")
172
173
174def _run_gro_bin(cfg, test_name, protocol=None, num_flows=None,
175                 order_check=False, verbose=False, fail=False):
176    """Run gro binary with given test and return the process result."""
177    if not hasattr(cfg, "bin_remote"):
178        cfg.bin_local = cfg.net_lib_dir / "gro"
179        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
180
181    if protocol is None:
182        ipver = cfg.addr_ipver
183        protocol = f"ipv{ipver}"
184    else:
185        ipver = "6" if protocol[-1] == "6" else "4"
186
187    dmac = _resolve_dmac(cfg, ipver)
188
189    base_args = [
190        f"--{protocol}",
191        f"--dmac {dmac}",
192        f"--smac {cfg.remote_dev['address']}",
193        f"--daddr {cfg.addr_v[ipver]}",
194        f"--saddr {cfg.remote_addr_v[ipver]}",
195        f"--test {test_name}",
196    ]
197    if num_flows:
198        base_args.append(f"--num-flows {num_flows}")
199    if order_check:
200        base_args.append("--order-check")
201    if verbose:
202        base_args.append("--verbose")
203
204    args = " ".join(base_args)
205
206    rx_cmd = f"{cfg.bin_local} {args} --rx --iface {cfg.ifname}"
207    tx_cmd = f"{cfg.bin_remote} {args} --iface {cfg.remote_ifname}"
208
209    with bkg(rx_cmd, ksft_ready=True, exit_wait=True, fail=fail) as rx_proc:
210        cmd(tx_cmd, host=cfg.remote)
211
212    return rx_proc
213
214
215def _setup(cfg, mode, test_name):
216    """ Setup hardware loopback mode for GRO testing. """
217
218    if not hasattr(cfg, "bin_remote"):
219        cfg.bin_local = cfg.net_lib_dir / "gro"
220        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
221
222    if not hasattr(cfg, "feat"):
223        cfg.feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
224        cfg.remote_feat = ethtool(f"-k {cfg.remote_ifname}",
225                                  host=cfg.remote, json=True)[0]
226
227    # "large_*" tests need at least 4k MTU
228    if test_name.startswith("large_"):
229        _set_mtu_restore(cfg.dev, 4096, None)
230        _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
231
232    if mode == "sw":
233        flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
234        irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
235
236        _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
237        _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
238
239        _set_ethtool_feat(cfg.ifname, cfg.feat,
240                          {"generic-receive-offload": True,
241                           "rx-gro-hw": False,
242                           "large-receive-offload": False})
243    elif mode == "hw":
244        _set_ethtool_feat(cfg.ifname, cfg.feat,
245                          {"generic-receive-offload": False,
246                           "rx-gro-hw": True,
247                           "large-receive-offload": False})
248
249        # Some NICs treat HW GRO as a GRO sub-feature so disabling GRO
250        # will also clear HW GRO. Use a hack of installing XDP generic
251        # to skip SW GRO, even when enabled.
252        feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
253        if not feat["rx-gro-hw"]["active"]:
254            ksft_pr("Driver clears HW GRO and SW GRO is cleared, using generic XDP workaround")
255            prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
256            ip(f"link set dev {cfg.ifname} xdpgeneric obj {prog} sec xdp")
257            defer(ip, f"link set dev {cfg.ifname} xdpgeneric off")
258
259            # Attaching XDP may change features, fetch the latest state
260            feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
261
262            _set_ethtool_feat(cfg.ifname, feat,
263                              {"generic-receive-offload": True,
264                               "rx-gro-hw": True,
265                               "large-receive-offload": False})
266    elif mode == "lro":
267        # netdevsim advertises LRO for feature inheritance testing with
268        # bonding/team tests but it doesn't actually perform the offload
269        cfg.require_nsim(nsim_test=False)
270
271        _set_ethtool_feat(cfg.ifname, cfg.feat,
272                          {"generic-receive-offload": False,
273                           "rx-gro-hw": False,
274                           "large-receive-offload": True})
275
276    try:
277        # Disable TSO for local tests
278        cfg.require_nsim()  # will raise KsftXfailEx if not running on nsim
279
280        _set_ethtool_feat(cfg.remote_ifname, cfg.remote_feat,
281                          {"tcp-segmentation-offload": False},
282                          host=cfg.remote)
283    except KsftXfailEx:
284        pass
285
286
287def _gro_variants():
288    """Generator that yields all combinations of protocol and test types."""
289
290    # Tests that work for all protocols
291    common_tests = [
292        "data_same", "data_lrg_sml", "data_sml_lrg", "data_burst",
293        "ack",
294        "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr",
295        "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt",
296        "ip_ecn", "ip_tos",
297        "large_max", "large_rem",
298    ]
299
300    # Tests specific to IPv4
301    ipv4_tests = [
302        "ip_ttl", "ip_opt", "ip_frag4",
303        "ip_id_df1_inc", "ip_id_df1_fixed",
304        "ip_id_df0_inc", "ip_id_df0_fixed",
305        "ip_id_df1_inc_fixed", "ip_id_df1_fixed_inc",
306    ]
307
308    # Tests specific to IPv6
309    ipv6_tests = [
310        "ip_frag6", "ip_v6ext_same", "ip_v6ext_diff",
311    ]
312
313    for mode in ["sw", "hw", "lro"]:
314        for protocol in ["ipv4", "ipv6", "ipip"]:
315            for test_name in common_tests:
316                yield mode, protocol, test_name
317
318            if protocol in ["ipv4", "ipip"]:
319                for test_name in ipv4_tests:
320                    yield mode, protocol, test_name
321            elif protocol == "ipv6":
322                for test_name in ipv6_tests:
323                    yield mode, protocol, test_name
324
325
326@ksft_variants(_gro_variants())
327def test(cfg, mode, protocol, test_name):
328    """Run a single GRO test with retries."""
329
330    ipver = "6" if protocol[-1] == "6" else "4"
331    cfg.require_ipver(ipver)
332
333    _setup(cfg, mode, test_name)
334
335    # Each test is run 6 times to deflake, because given the receive timing,
336    # not all packets that should coalesce will be considered in the same flow
337    # on every try.
338    max_retries = 6
339    for attempt in range(max_retries):
340        fail_now = attempt >= max_retries - 1
341        rx_proc = _run_gro_bin(cfg, test_name, protocol=protocol,
342                               verbose=True, fail=fail_now)
343
344        if rx_proc.ret == 0:
345            return
346
347        ksft_pr(rx_proc)
348
349        if test_name.startswith("large_") and os.environ.get("KSFT_MACHINE_SLOW"):
350            ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
351            return
352
353        ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...")
354
355
356def _capacity_variants():
357    """Generate variants for capacity test: mode x queue setup."""
358    setups = [
359        ("isolated", _setup_isolated_queue),
360        ("1q", lambda cfg: _setup_queue_count(cfg, 1)),
361        ("8q", lambda cfg: _setup_queue_count(cfg, 8)),
362    ]
363    for mode in ["sw", "hw", "lro"]:
364        for name, func in setups:
365            yield KsftNamedVariant(f"{mode}_{name}", mode, func)
366
367
368@ksft_variants(_capacity_variants())
369def test_gro_capacity(cfg, mode, setup_func):
370    """
371    Probe GRO capacity.
372
373    Start with 8 flows and increase by 2x on each successful run.
374    Retry up to 3 times on failure.
375
376    Variants combine mode (sw, hw, lro) with queue setup:
377      - isolated: Use a single queue isolated from RSS
378      - 1q: Configure NIC to use 1 queue
379      - 8q: Configure NIC to use 8 queues
380    """
381    max_retries = 3
382
383    _setup(cfg, mode, "capacity")
384    queue_id = setup_func(cfg)
385
386    num_flows = 8
387    while True:
388        success = False
389        for attempt in range(max_retries):
390            if queue_id is not None:
391                stats_before = _get_queue_stats(cfg, queue_id)
392
393            rx_proc = _run_gro_bin(cfg, "capacity", num_flows=num_flows)
394            output = rx_proc.stdout
395
396            if queue_id is not None:
397                stats_after = _get_queue_stats(cfg, queue_id)
398                qstat_pkts = (stats_after.get('rx-packets', 0) -
399                              stats_before.get('rx-packets', 0))
400                gro_pkts = (stats_after.get('rx-hw-gro-packets', 0) -
401                            stats_before.get('rx-hw-gro-packets', 0))
402                qstat_str = f" qstat={qstat_pkts} hw-gro={gro_pkts}"
403            else:
404                qstat_str = ""
405
406            # Parse and print STATS line
407            match = re.search(
408                r'STATS: received=(\d+) wire=(\d+) coalesced=(\d+)', output)
409            if match:
410                received = int(match.group(1))
411                wire = int(match.group(2))
412                coalesced = int(match.group(3))
413                status = "PASS" if received == num_flows else "MISS"
414                ksft_pr(f"flows={num_flows} attempt={attempt + 1} "
415                        f"received={received} wire={wire} "
416                        f"coalesced={coalesced}{qstat_str} [{status}]")
417                if received == num_flows:
418                    success = True
419                    break
420            else:
421                ksft_pr(rx_proc)
422                ksft_pr(f"flows={num_flows} attempt={attempt + 1}"
423                        f"{qstat_str} [FAIL - can't parse stats]")
424
425        if not success:
426            ksft_pr(f"Stopped at {num_flows} flows")
427            break
428
429        num_flows *= 2
430
431
432def main() -> None:
433    """ Ksft boiler plate main """
434
435    with NetDrvEpEnv(__file__) as cfg:
436        cfg.ethnl = EthtoolFamily()
437        cfg.netnl = NetdevFamily()
438        ksft_run(cases=[test, test_gro_capacity], args=(cfg,))
439    ksft_exit()
440
441
442if __name__ == "__main__":
443    main()
444