xref: /linux/tools/testing/selftests/drivers/net/hw/rss_ctx.py (revision daa2be74b1b2302004945b2a5e32424e177cc7da)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4import datetime
5import random
6from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt
7from lib.py import NetDrvEpEnv
8from lib.py import EthtoolFamily, NetdevFamily
9from lib.py import KsftSkipEx
10from lib.py import rand_port
11from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure
12
13
14def _rss_key_str(key):
15    return ":".join(["{:02x}".format(x) for x in key])
16
17
18def _rss_key_rand(length):
19    return [random.randint(0, 255) for _ in range(length)]
20
21
22def get_rss(cfg, context=0):
23    return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
24
25
26def get_drop_err_sum(cfg):
27    stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
28    cnt = 0
29    for key in ['errors', 'dropped', 'over_errors', 'fifo_errors',
30                'length_errors', 'crc_errors', 'missed_errors',
31                'frame_errors']:
32        cnt += stats["stats64"]["rx"][key]
33    return cnt, stats["stats64"]["tx"]["carrier_changes"]
34
35
36def ethtool_create(cfg, act, opts):
37    output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
38    # Output will be something like: "New RSS context is 1" or
39    # "Added rule with ID 7", we want the integer from the end
40    return int(output.split()[-1])
41
42
43def require_ntuple(cfg):
44    features = ethtool(f"-k {cfg.ifname}", json=True)[0]
45    if not features["ntuple-filters"]["active"]:
46        # ntuple is more of a capability than a config knob, don't bother
47        # trying to enable it (until some driver actually needs it).
48        raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
49
50
51# Get Rx packet counts for all queues, as a simple list of integers
52# if @prev is specified the prev counts will be subtracted
53def _get_rx_cnts(cfg, prev=None):
54    cfg.wait_hw_stats_settle()
55    data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True)
56    data = [x for x in data if x['queue-type'] == "rx"]
57    max_q = max([x["queue-id"] for x in data])
58    queue_stats = [0] * (max_q + 1)
59    for q in data:
60        queue_stats[q["queue-id"]] = q["rx-packets"]
61        if prev and q["queue-id"] < len(prev):
62            queue_stats[q["queue-id"]] -= prev[q["queue-id"]]
63    return queue_stats
64
65
66def _send_traffic_check(cfg, port, name, params):
67    # params is a dict with 3 possible keys:
68    #  - "target": required, which queues we expect to get iperf traffic
69    #  - "empty": optional, which queues should see no traffic at all
70    #  - "noise": optional, which queues we expect to see low traffic;
71    #             used for queues of the main context, since some background
72    #             OS activity may use those queues while we're testing
73    # the value for each is a list, or some other iterable containing queue ids.
74
75    cnts = _get_rx_cnts(cfg)
76    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
77    cnts = _get_rx_cnts(cfg, prev=cnts)
78
79    directed = sum(cnts[i] for i in params['target'])
80
81    ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
82    if params.get('noise'):
83        ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
84                "traffic on other queues:" + str(cnts))
85    if params.get('empty'):
86        ksft_eq(sum(cnts[i] for i in params['empty']), 0,
87                "traffic on inactive queues: " + str(cnts))
88
89
90def test_rss_key_indir(cfg):
91    """Test basics like updating the main RSS key and indirection table."""
92
93    if len(_get_rx_cnts(cfg)) < 2:
94        KsftSkipEx("Device has only one queue (or doesn't support queue stats)")
95
96    data = get_rss(cfg)
97    want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
98    for k in want_keys:
99        if k not in data:
100            raise KsftFailEx("ethtool results missing key: " + k)
101        if not data[k]:
102            raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}")
103
104    key_len = len(data['rss-hash-key'])
105
106    # Set the key
107    key = _rss_key_rand(key_len)
108    ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
109
110    data = get_rss(cfg)
111    ksft_eq(key, data['rss-hash-key'])
112
113    # Set the indirection table
114    ethtool(f"-X {cfg.ifname} equal 2")
115    reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
116    data = get_rss(cfg)
117    ksft_eq(0, min(data['rss-indirection-table']))
118    ksft_eq(1, max(data['rss-indirection-table']))
119
120    # Check we only get traffic on the first 2 queues
121    cnts = _get_rx_cnts(cfg)
122    GenerateTraffic(cfg).wait_pkts_and_stop(20000)
123    cnts = _get_rx_cnts(cfg, prev=cnts)
124    # 2 queues, 20k packets, must be at least 5k per queue
125    ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts))
126    ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts))
127    # The other queues should be unused
128    ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts))
129
130    # Restore, and check traffic gets spread again
131    reset_indir.exec()
132
133    cnts = _get_rx_cnts(cfg)
134    GenerateTraffic(cfg).wait_pkts_and_stop(20000)
135    cnts = _get_rx_cnts(cfg, prev=cnts)
136    # First two queues get less traffic than all the rest
137    ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts))
138
139
140def test_rss_queue_reconfigure(cfg, main_ctx=True):
141    """Make sure queue changes can't override requested RSS config.
142
143    By default main RSS table should change to include all queues.
144    When user sets a specific RSS config the driver should preserve it,
145    even when queue count changes. Driver should refuse to deactivate
146    queues used in the user-set RSS config.
147    """
148
149    if not main_ctx:
150        require_ntuple(cfg)
151
152    # Start with 4 queues, an arbitrary known number.
153    try:
154        qcnt = len(_get_rx_cnts(cfg))
155        ethtool(f"-L {cfg.ifname} combined 4")
156        defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
157    except:
158        raise KsftSkipEx("Not enough queues for the test or qstat not supported")
159
160    if main_ctx:
161        ctx_id = 0
162        ctx_ref = ""
163    else:
164        ctx_id = ethtool_create(cfg, "-X", "context new")
165        ctx_ref = f"context {ctx_id}"
166        defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete")
167
168    # Indirection table should be distributing to all queues.
169    data = get_rss(cfg, context=ctx_id)
170    ksft_eq(0, min(data['rss-indirection-table']))
171    ksft_eq(3, max(data['rss-indirection-table']))
172
173    # Increase queues, indirection table should be distributing to all queues.
174    # It's unclear whether tables of additional contexts should be reset, too.
175    if main_ctx:
176        ethtool(f"-L {cfg.ifname} combined 5")
177        data = get_rss(cfg)
178        ksft_eq(0, min(data['rss-indirection-table']))
179        ksft_eq(4, max(data['rss-indirection-table']))
180        ethtool(f"-L {cfg.ifname} combined 4")
181
182    # Configure the table explicitly
183    port = rand_port()
184    ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1")
185    if main_ctx:
186        other_key = 'empty'
187        defer(ethtool, f"-X {cfg.ifname} default")
188    else:
189        other_key = 'noise'
190        flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
191        ntuple = ethtool_create(cfg, "-N", flow)
192        defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
193
194    _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
195                                              other_key: (1, 2) })
196
197    # We should be able to increase queues, but table should be left untouched
198    ethtool(f"-L {cfg.ifname} combined 5")
199    data = get_rss(cfg, context=ctx_id)
200    ksft_eq({0, 3}, set(data['rss-indirection-table']))
201
202    _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
203                                              other_key: (1, 2, 4) })
204
205    # Setting queue count to 3 should fail, queue 3 is used
206    try:
207        ethtool(f"-L {cfg.ifname} combined 3")
208    except CmdExitFailure:
209        pass
210    else:
211        raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
212
213
214def test_rss_resize(cfg):
215    """Test resizing of the RSS table.
216
217    Some devices dynamically increase and decrease the size of the RSS
218    indirection table based on the number of enabled queues.
219    When that happens driver must maintain the balance of entries
220    (preferably duplicating the smaller table).
221    """
222
223    channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
224    ch_max = channels['combined-max']
225    qcnt = channels['combined-count']
226
227    if ch_max < 2:
228        raise KsftSkipEx(f"Not enough queues for the test: {ch_max}")
229
230    ethtool(f"-L {cfg.ifname} combined 2")
231    defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
232
233    ethtool(f"-X {cfg.ifname} weight 1 7")
234    defer(ethtool, f"-X {cfg.ifname} default")
235
236    ethtool(f"-L {cfg.ifname} combined {ch_max}")
237    data = get_rss(cfg)
238    ksft_eq(0, min(data['rss-indirection-table']))
239    ksft_eq(1, max(data['rss-indirection-table']))
240
241    ksft_eq(7,
242            data['rss-indirection-table'].count(1) /
243            data['rss-indirection-table'].count(0),
244            f"Table imbalance after resize: {data['rss-indirection-table']}")
245
246
247def test_hitless_key_update(cfg):
248    """Test that flows may be rehashed without impacting traffic.
249
250    Some workloads may want to rehash the flows in response to an imbalance.
251    Most effective way to do that is changing the RSS key. Check that changing
252    the key does not cause link flaps or traffic disruption.
253
254    Disrupting traffic for key update is not a bug, but makes the key
255    update unusable for rehashing under load.
256    """
257    data = get_rss(cfg)
258    key_len = len(data['rss-hash-key'])
259
260    key = _rss_key_rand(key_len)
261
262    tgen = GenerateTraffic(cfg)
263    try:
264        errors0, carrier0 = get_drop_err_sum(cfg)
265        t0 = datetime.datetime.now()
266        ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
267        t1 = datetime.datetime.now()
268        errors1, carrier1 = get_drop_err_sum(cfg)
269    finally:
270        tgen.wait_pkts_and_stop(5000)
271
272    ksft_lt((t1 - t0).total_seconds(), 0.2)
273    ksft_eq(errors1 - errors1, 0)
274    ksft_eq(carrier1 - carrier0, 0)
275
276
277def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
278    """
279    Test separating traffic into RSS contexts.
280    The queues will be allocated 2 for each context:
281     ctx0  ctx1  ctx2  ctx3
282    [0 1] [2 3] [4 5] [6 7] ...
283    """
284
285    require_ntuple(cfg)
286
287    requested_ctx_cnt = ctx_cnt
288
289    # Try to allocate more queues when necessary
290    qcnt = len(_get_rx_cnts(cfg))
291    if qcnt < 2 + 2 * ctx_cnt:
292        try:
293            ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
294            ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
295            defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
296        except:
297            raise KsftSkipEx("Not enough queues for the test")
298
299    ports = []
300
301    # Use queues 0 and 1 for normal traffic
302    ethtool(f"-X {cfg.ifname} equal 2")
303    defer(ethtool, f"-X {cfg.ifname} default")
304
305    for i in range(ctx_cnt):
306        want_cfg = f"start {2 + i * 2} equal 2"
307        create_cfg = want_cfg if create_with_cfg else ""
308
309        try:
310            ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}")
311            defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
312        except CmdExitFailure:
313            # try to carry on and skip at the end
314            if i == 0:
315                raise
316            ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
317            ctx_cnt = i
318            break
319
320        if not create_with_cfg:
321            ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}")
322
323        # Sanity check the context we just created
324        data = get_rss(cfg, ctx_id)
325        ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data))
326        ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data))
327
328        ports.append(rand_port())
329        flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}"
330        ntuple = ethtool_create(cfg, "-N", flow)
331        defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
332
333    for i in range(ctx_cnt):
334        _send_traffic_check(cfg, ports[i], f"context {i}",
335                            { 'target': (2+i*2, 3+i*2),
336                              'noise': (0, 1),
337                              'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) })
338
339    if requested_ctx_cnt != ctx_cnt:
340        raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
341
342
343def test_rss_context4(cfg):
344    test_rss_context(cfg, 4)
345
346
347def test_rss_context32(cfg):
348    test_rss_context(cfg, 32)
349
350
351def test_rss_context4_create_with_cfg(cfg):
352    test_rss_context(cfg, 4, create_with_cfg=True)
353
354
355def test_rss_context_queue_reconfigure(cfg):
356    test_rss_queue_reconfigure(cfg, main_ctx=False)
357
358
359def test_rss_context_out_of_order(cfg, ctx_cnt=4):
360    """
361    Test separating traffic into RSS contexts.
362    Contexts are removed in semi-random order, and steering re-tested
363    to make sure removal doesn't break steering to surviving contexts.
364    Test requires 3 contexts to work.
365    """
366
367    require_ntuple(cfg)
368
369    requested_ctx_cnt = ctx_cnt
370
371    # Try to allocate more queues when necessary
372    qcnt = len(_get_rx_cnts(cfg))
373    if qcnt < 2 + 2 * ctx_cnt:
374        try:
375            ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
376            ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
377            defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
378        except:
379            raise KsftSkipEx("Not enough queues for the test")
380
381    ntuple = []
382    ctx = []
383    ports = []
384
385    def remove_ctx(idx):
386        ntuple[idx].exec()
387        ntuple[idx] = None
388        ctx[idx].exec()
389        ctx[idx] = None
390
391    def check_traffic():
392        for i in range(ctx_cnt):
393            if ctx[i]:
394                expected = {
395                    'target': (2+i*2, 3+i*2),
396                    'noise': (0, 1),
397                    'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt))
398                }
399            else:
400                expected = {
401                    'target': (0, 1),
402                    'empty':  range(2, 2+2*ctx_cnt)
403                }
404
405            _send_traffic_check(cfg, ports[i], f"context {i}", expected)
406
407    # Use queues 0 and 1 for normal traffic
408    ethtool(f"-X {cfg.ifname} equal 2")
409    defer(ethtool, f"-X {cfg.ifname} default")
410
411    for i in range(ctx_cnt):
412        ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")
413        ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete"))
414
415        ports.append(rand_port())
416        flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}"
417        ntuple_id = ethtool_create(cfg, "-N", flow)
418        ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}"))
419
420    check_traffic()
421
422    # Remove middle context
423    remove_ctx(ctx_cnt // 2)
424    check_traffic()
425
426    # Remove first context
427    remove_ctx(0)
428    check_traffic()
429
430    # Remove last context
431    remove_ctx(-1)
432    check_traffic()
433
434    if requested_ctx_cnt != ctx_cnt:
435        raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
436
437
438def test_rss_context_overlap(cfg, other_ctx=0):
439    """
440    Test contexts overlapping with each other.
441    Use 4 queues for the main context, but only queues 2 and 3 for context 1.
442    """
443
444    require_ntuple(cfg)
445
446    queue_cnt = len(_get_rx_cnts(cfg))
447    if queue_cnt < 4:
448        try:
449            ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
450            ethtool(f"-L {cfg.ifname} combined 4")
451            defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
452        except:
453            raise KsftSkipEx("Not enough queues for the test")
454
455    if other_ctx == 0:
456        ethtool(f"-X {cfg.ifname} equal 4")
457        defer(ethtool, f"-X {cfg.ifname} default")
458    else:
459        other_ctx = ethtool_create(cfg, "-X", "context new")
460        ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4")
461        defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete")
462
463    ctx_id = ethtool_create(cfg, "-X", "context new")
464    ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2")
465    defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
466
467    port = rand_port()
468    if other_ctx:
469        flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}"
470        ntuple_id = ethtool_create(cfg, "-N", flow)
471        ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
472
473    # Test the main context
474    cnts = _get_rx_cnts(cfg)
475    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
476    cnts = _get_rx_cnts(cfg, prev=cnts)
477
478    ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts))
479    ksft_ge(sum(cnts[ :2]),  7000, "traffic on main context (1/2): " + str(cnts))
480    ksft_ge(sum(cnts[2:4]),  7000, "traffic on main context (2/2): " + str(cnts))
481    if other_ctx == 0:
482        ksft_eq(sum(cnts[4: ]),     0, "traffic on other queues: " + str(cnts))
483
484    # Now create a rule for context 1 and make sure traffic goes to a subset
485    if other_ctx:
486        ntuple.exec()
487    flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
488    ntuple_id = ethtool_create(cfg, "-N", flow)
489    defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
490
491    cnts = _get_rx_cnts(cfg)
492    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
493    cnts = _get_rx_cnts(cfg, prev=cnts)
494
495    directed = sum(cnts[2:4])
496    ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts))
497    ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts))
498    if other_ctx == 0:
499        ksft_eq(sum(cnts[4: ]),     0, "traffic on other queues: " + str(cnts))
500
501
502def test_rss_context_overlap2(cfg):
503    test_rss_context_overlap(cfg, True)
504
505
506def main() -> None:
507    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
508        cfg.ethnl = EthtoolFamily()
509        cfg.netdevnl = NetdevFamily()
510
511        ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
512                  test_rss_resize, test_hitless_key_update,
513                  test_rss_context, test_rss_context4, test_rss_context32,
514                  test_rss_context_queue_reconfigure,
515                  test_rss_context_overlap, test_rss_context_overlap2,
516                  test_rss_context_out_of_order, test_rss_context4_create_with_cfg],
517                 args=(cfg, ))
518    ksft_exit()
519
520
521if __name__ == "__main__":
522    main()
523