xref: /linux/tools/testing/selftests/drivers/net/gro.py (revision 40286d6379aacfcc053253ef78dc78b09addffda)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4"""
5GRO (Generic Receive Offload) conformance tests.
6
7Validates that GRO coalescing works correctly by running the gro
8binary in different configurations and checking for correct packet
9coalescing behavior.
10
11Test cases:
12  - data_same: Same size data packets coalesce
13  - data_lrg_sml: Large packet followed by smaller one coalesces
14  - data_lrg_1byte: Large packet followed by 1B one coalesces (Ethernet padding)
15  - data_sml_lrg: Small packet followed by larger one doesn't coalesce
16  - ack: Pure ACK packets do not coalesce
17  - flags_psh: Packets with PSH flag don't coalesce
18  - flags_syn: Packets with SYN flag don't coalesce
19  - flags_rst: Packets with RST flag don't coalesce
20  - flags_urg: Packets with URG flag don't coalesce
21  - flags_cwr: Packets with CWR flag don't coalesce
22  - tcp_csum: Packets with incorrect checksum don't coalesce
23  - tcp_seq: Packets with non-consecutive seqno don't coalesce
24  - tcp_ts: Packets with different timestamp options don't coalesce
25  - tcp_opt: Packets with different TCP options don't coalesce
26  - ip_ecn: Packets with different ECN don't coalesce
27  - ip_tos: Packets with different TOS don't coalesce
28  - ip_ttl: (IPv4) Packets with different TTL don't coalesce
29  - ip_opt: (IPv4) Packets with IP options don't coalesce
30  - ip_frag4: (IPv4) IPv4 fragments don't coalesce
31  - ip_id_df*: (IPv4) IP ID field coalescing tests
32  - ip_frag6: (IPv6) IPv6 fragments don't coalesce
33  - ip_v6ext_same: (IPv6) IPv6 ext header with same payload coalesces
34  - ip_v6ext_diff: (IPv6) IPv6 ext header with different payload doesn't coalesce
35  - large_max: Packets exceeding GRO_MAX_SIZE don't coalesce
36  - large_rem: Large packet remainder handling
37"""
38
39import glob
40import os
41import re
42from lib.py import ksft_run, ksft_exit, ksft_pr
43from lib.py import NetDrvEpEnv, KsftXfailEx
44from lib.py import NetdevFamily, EthtoolFamily
45from lib.py import bkg, cmd, defer, ethtool, ip
46from lib.py import ksft_variants, KsftNamedVariant
47
48
49# gro.c uses hardcoded DPORT=8000
50GRO_DPORT = 8000
51
52
53def _resolve_dmac(cfg, ipver):
54    """
55    Find the destination MAC address remote host should use to send packets
56    towards the local host. It may be a router / gateway address.
57    """
58
59    attr = "dmac" + ipver
60    # Cache the response across test cases
61    if hasattr(cfg, attr):
62        return getattr(cfg, attr)
63
64    route = ip(f"-{ipver} route get {cfg.addr_v[ipver]}",
65               json=True, host=cfg.remote)[0]
66    gw = route.get("gateway")
67    # Local L2 segment, address directly
68    if not gw:
69        setattr(cfg, attr, cfg.dev['address'])
70        return getattr(cfg, attr)
71
72    # ping to make sure neighbor is resolved,
73    # bind to an interface, for v6 the GW is likely link local
74    cmd(f"ping -c1 -W0 -I{cfg.remote_ifname} {gw}", host=cfg.remote)
75
76    neigh = ip(f"neigh get {gw} dev {cfg.remote_ifname}",
77               json=True, host=cfg.remote)[0]
78    setattr(cfg, attr, neigh['lladdr'])
79    return getattr(cfg, attr)
80
81
82def _write_defer_restore(cfg, path, val, defer_undo=False):
83    with open(path, "r", encoding="utf-8") as fp:
84        orig_val = fp.read().strip()
85        if str(val) == orig_val:
86            return
87    with open(path, "w", encoding="utf-8") as fp:
88        fp.write(val)
89    if defer_undo:
90        defer(_write_defer_restore, cfg, path, orig_val)
91
92
93def _set_mtu_restore(dev, mtu, host):
94    if dev['mtu'] < mtu:
95        ip(f"link set dev {dev['ifname']} mtu {mtu}", host=host)
96        defer(ip, f"link set dev {dev['ifname']} mtu {dev['mtu']}", host=host)
97
98
99def _set_ethtool_feat(dev, current, feats, host=None):
100    s2n = {True: "on", False: "off"}
101
102    new = ["-K", dev]
103    old = ["-K", dev]
104    no_change = True
105    for name, state in feats.items():
106        new += [name, s2n[state]]
107        old += [name, s2n[current[name]["active"]]]
108
109        if current[name]["active"] != state:
110            no_change = False
111            if current[name]["fixed"]:
112                raise KsftXfailEx(f"Device does not support {name}")
113    if no_change:
114        return
115
116    eth_cmd = ethtool(" ".join(new), host=host)
117    defer(ethtool, " ".join(old), host=host)
118
119    # If ethtool printed something kernel must have modified some features
120    if eth_cmd.stdout:
121        ksft_pr(eth_cmd)
122
123
124def _get_queue_stats(cfg, queue_id):
125    """Get stats for a specific Rx queue."""
126    cfg.wait_hw_stats_settle()
127    data = cfg.netnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]},
128                                dump=True)
129    for q in data:
130        if q.get('queue-type') == 'rx' and q.get('queue-id') == queue_id:
131            return q
132    return {}
133
134
135def _setup_isolated_queue(cfg):
136    """Set up an isolated queue for testing using ntuple filter.
137
138    Remove queue 1 from the default RSS context and steer test traffic to it.
139    """
140    test_queue = 1
141
142    qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*"))
143    if qcnt < 2:
144        raise KsftXfailEx(f"Need at least 2 queues, have {qcnt}")
145
146    # Remove queue 1 from default RSS context by setting its weight to 0
147    weights = ["1"] * qcnt
148    weights[test_queue] = "0"
149    ethtool(f"-X {cfg.ifname} weight " + " ".join(weights))
150    defer(ethtool, f"-X {cfg.ifname} default")
151
152    # Set up ntuple filter to steer our test traffic to the isolated queue
153    flow  = f"flow-type tcp{cfg.addr_ipver} "
154    flow += f"dst-ip {cfg.addr} dst-port {GRO_DPORT} action {test_queue}"
155    output = ethtool(f"-N {cfg.ifname} {flow}").stdout
156    ntuple_id = int(output.split()[-1])
157    defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
158
159    return test_queue
160
161
162def _setup_queue_count(cfg, num_queues):
163    """Configure the NIC to use a specific number of queues."""
164    channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
165    ch_max = channels.get('combined-max', 0)
166    qcnt = channels['combined-count']
167
168    if ch_max < num_queues:
169        raise KsftXfailEx(f"Need at least {num_queues} queues, max={ch_max}")
170
171    defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
172    ethtool(f"-L {cfg.ifname} combined {num_queues}")
173
174
175def _run_gro_bin(cfg, test_name, protocol=None, num_flows=None,
176                 order_check=False, verbose=False, fail=False):
177    """Run gro binary with given test and return the process result."""
178    if not hasattr(cfg, "bin_remote"):
179        cfg.bin_local = cfg.net_lib_dir / "gro"
180        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
181
182    if protocol is None:
183        ipver = cfg.addr_ipver
184        protocol = f"ipv{ipver}"
185    else:
186        ipver = "6" if protocol[-1] == "6" else "4"
187
188    dmac = _resolve_dmac(cfg, ipver)
189
190    base_args = [
191        f"--{protocol}",
192        f"--dmac {dmac}",
193        f"--smac {cfg.remote_dev['address']}",
194        f"--daddr {cfg.addr_v[ipver]}",
195        f"--saddr {cfg.remote_addr_v[ipver]}",
196        f"--test {test_name}",
197    ]
198    if num_flows:
199        base_args.append(f"--num-flows {num_flows}")
200    if order_check:
201        base_args.append("--order-check")
202    if verbose:
203        base_args.append("--verbose")
204
205    args = " ".join(base_args)
206
207    rx_cmd = f"{cfg.bin_local} {args} --rx --iface {cfg.ifname}"
208    tx_cmd = f"{cfg.bin_remote} {args} --iface {cfg.remote_ifname}"
209
210    with bkg(rx_cmd, ksft_ready=True, exit_wait=True, fail=fail) as rx_proc:
211        cmd(tx_cmd, host=cfg.remote)
212
213    return rx_proc
214
215
216def _setup(cfg, mode, test_name):
217    """ Setup hardware loopback mode for GRO testing. """
218
219    if not hasattr(cfg, "bin_remote"):
220        cfg.bin_local = cfg.net_lib_dir / "gro"
221        cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
222
223    if not hasattr(cfg, "feat"):
224        cfg.feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
225        cfg.remote_feat = ethtool(f"-k {cfg.remote_ifname}",
226                                  host=cfg.remote, json=True)[0]
227
228    # "large_*" tests need at least 4k MTU
229    if test_name.startswith("large_"):
230        _set_mtu_restore(cfg.dev, 4096, None)
231        _set_mtu_restore(cfg.remote_dev, 4096, cfg.remote)
232
233    if mode == "sw":
234        flush_path = f"/sys/class/net/{cfg.ifname}/gro_flush_timeout"
235        irq_path = f"/sys/class/net/{cfg.ifname}/napi_defer_hard_irqs"
236
237        _write_defer_restore(cfg, flush_path, "200000", defer_undo=True)
238        _write_defer_restore(cfg, irq_path, "10", defer_undo=True)
239
240        _set_ethtool_feat(cfg.ifname, cfg.feat,
241                          {"generic-receive-offload": True,
242                           "rx-gro-hw": False,
243                           "large-receive-offload": False})
244    elif mode == "hw":
245        _set_ethtool_feat(cfg.ifname, cfg.feat,
246                          {"generic-receive-offload": False,
247                           "rx-gro-hw": True,
248                           "large-receive-offload": False})
249
250        # Some NICs treat HW GRO as a GRO sub-feature so disabling GRO
251        # will also clear HW GRO. Use a hack of installing XDP generic
252        # to skip SW GRO, even when enabled.
253        feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
254        if not feat["rx-gro-hw"]["active"]:
255            ksft_pr("Driver clears HW GRO and SW GRO is cleared, using generic XDP workaround")
256            prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
257            ip(f"link set dev {cfg.ifname} xdpgeneric obj {prog} sec xdp")
258            defer(ip, f"link set dev {cfg.ifname} xdpgeneric off")
259
260            # Attaching XDP may change features, fetch the latest state
261            feat = ethtool(f"-k {cfg.ifname}", json=True)[0]
262
263            _set_ethtool_feat(cfg.ifname, feat,
264                              {"generic-receive-offload": True,
265                               "rx-gro-hw": True,
266                               "large-receive-offload": False})
267    elif mode == "lro":
268        # netdevsim advertises LRO for feature inheritance testing with
269        # bonding/team tests but it doesn't actually perform the offload
270        cfg.require_nsim(nsim_test=False)
271
272        _set_ethtool_feat(cfg.ifname, cfg.feat,
273                          {"generic-receive-offload": False,
274                           "rx-gro-hw": False,
275                           "large-receive-offload": True})
276
277    try:
278        # Disable TSO for local tests
279        cfg.require_nsim()  # will raise KsftXfailEx if not running on nsim
280
281        _set_ethtool_feat(cfg.remote_ifname, cfg.remote_feat,
282                          {"tcp-segmentation-offload": False},
283                          host=cfg.remote)
284    except KsftXfailEx:
285        pass
286
287
288def _gro_variants():
289    """Generator that yields all combinations of protocol and test types."""
290
291    # Tests that work for all protocols
292    common_tests = [
293        "data_same", "data_lrg_sml", "data_sml_lrg", "data_lrg_1byte",
294        "data_burst",
295        "ack",
296        "flags_psh", "flags_syn", "flags_rst", "flags_urg", "flags_cwr",
297        "tcp_csum", "tcp_seq", "tcp_ts", "tcp_opt",
298        "ip_ecn", "ip_tos",
299        "large_max", "large_rem",
300    ]
301
302    # Tests specific to IPv4
303    ipv4_tests = [
304        "ip_csum",
305        "ip_ttl", "ip_opt", "ip_frag4",
306        "ip_id_df1_inc", "ip_id_df1_fixed",
307        "ip_id_df0_inc", "ip_id_df0_fixed",
308        "ip_id_df1_inc_fixed", "ip_id_df1_fixed_inc",
309    ]
310
311    # Tests specific to IPv6
312    ipv6_tests = [
313        "ip_frag6", "ip_v6ext_same", "ip_v6ext_diff",
314    ]
315
316    for mode in ["sw", "hw", "lro"]:
317        for protocol in ["ipv4", "ipv6", "ipip", "ip6ip6"]:
318            for test_name in common_tests:
319                yield mode, protocol, test_name
320
321            if protocol in ["ipv4", "ipip"]:
322                for test_name in ipv4_tests:
323                    yield mode, protocol, test_name
324            elif protocol == "ipv6":
325                for test_name in ipv6_tests:
326                    yield mode, protocol, test_name
327
328
329@ksft_variants(_gro_variants())
330def test(cfg, mode, protocol, test_name):
331    """Run a single GRO test with retries."""
332
333    ipver = "6" if protocol[-1] == "6" else "4"
334    cfg.require_ipver(ipver)
335
336    _setup(cfg, mode, test_name)
337
338    # Each test is run 6 times to deflake, because given the receive timing,
339    # not all packets that should coalesce will be considered in the same flow
340    # on every try.
341    max_retries = 6
342    for attempt in range(max_retries):
343        fail_now = attempt >= max_retries - 1
344        rx_proc = _run_gro_bin(cfg, test_name, protocol=protocol,
345                               verbose=True, fail=fail_now)
346
347        if rx_proc.ret == 0:
348            return
349
350        ksft_pr(rx_proc)
351
352        if test_name.startswith("large_") and os.environ.get("KSFT_MACHINE_SLOW"):
353            ksft_pr(f"Ignoring {protocol}/{test_name} failure due to slow environment")
354            return
355
356        ksft_pr(f"Attempt {attempt + 1}/{max_retries} failed, retrying...")
357
358
359def _capacity_variants():
360    """Generate variants for capacity test: mode x queue setup."""
361    setups = [
362        ("isolated", _setup_isolated_queue),
363        ("1q", lambda cfg: _setup_queue_count(cfg, 1)),
364        ("8q", lambda cfg: _setup_queue_count(cfg, 8)),
365    ]
366    for mode in ["sw", "hw", "lro"]:
367        for name, func in setups:
368            yield KsftNamedVariant(f"{mode}_{name}", mode, func)
369
370
371@ksft_variants(_capacity_variants())
372def test_gro_capacity(cfg, mode, setup_func):
373    """
374    Probe GRO capacity.
375
376    Start with 8 flows and increase by 2x on each successful run.
377    Retry up to 3 times on failure.
378
379    Variants combine mode (sw, hw, lro) with queue setup:
380      - isolated: Use a single queue isolated from RSS
381      - 1q: Configure NIC to use 1 queue
382      - 8q: Configure NIC to use 8 queues
383    """
384    max_retries = 3
385
386    _setup(cfg, mode, "capacity")
387    queue_id = setup_func(cfg)
388
389    num_flows = 8
390    while True:
391        success = False
392        for attempt in range(max_retries):
393            if queue_id is not None:
394                stats_before = _get_queue_stats(cfg, queue_id)
395
396            rx_proc = _run_gro_bin(cfg, "capacity", num_flows=num_flows)
397            output = rx_proc.stdout
398
399            if queue_id is not None:
400                stats_after = _get_queue_stats(cfg, queue_id)
401                qstat_pkts = (stats_after.get('rx-packets', 0) -
402                              stats_before.get('rx-packets', 0))
403                gro_pkts = (stats_after.get('rx-hw-gro-packets', 0) -
404                            stats_before.get('rx-hw-gro-packets', 0))
405                qstat_str = f" qstat={qstat_pkts} hw-gro={gro_pkts}"
406            else:
407                qstat_str = ""
408
409            # Parse and print STATS line
410            match = re.search(
411                r'STATS: received=(\d+) wire=(\d+) coalesced=(\d+)', output)
412            if match:
413                received = int(match.group(1))
414                wire = int(match.group(2))
415                coalesced = int(match.group(3))
416                status = "PASS" if received == num_flows else "MISS"
417                ksft_pr(f"flows={num_flows} attempt={attempt + 1} "
418                        f"received={received} wire={wire} "
419                        f"coalesced={coalesced}{qstat_str} [{status}]")
420                if received == num_flows:
421                    success = True
422                    break
423            else:
424                ksft_pr(rx_proc)
425                ksft_pr(f"flows={num_flows} attempt={attempt + 1}"
426                        f"{qstat_str} [FAIL - can't parse stats]")
427
428        if not success:
429            ksft_pr(f"Stopped at {num_flows} flows")
430            break
431
432        num_flows *= 2
433
434
435def main() -> None:
436    """ Ksft boiler plate main """
437
438    with NetDrvEpEnv(__file__) as cfg:
439        cfg.ethnl = EthtoolFamily()
440        cfg.netnl = NetdevFamily()
441        ksft_run(cases=[test, test_gro_capacity], args=(cfg,))
442    ksft_exit()
443
444
445if __name__ == "__main__":
446    main()
447