xref: /linux/tools/testing/selftests/drivers/net/hw/rss_ctx.py (revision 60cb1da6ed4a62ec8331e25ad4be87115cd28feb)
1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0
3
4import datetime
5import random
6from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt
7from lib.py import NetDrvEpEnv
8from lib.py import EthtoolFamily, NetdevFamily
9from lib.py import KsftSkipEx
10from lib.py import rand_port
11from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure
12
13
14def _rss_key_str(key):
15    return ":".join(["{:02x}".format(x) for x in key])
16
17
18def _rss_key_rand(length):
19    return [random.randint(0, 255) for _ in range(length)]
20
21
22def _rss_key_check(cfg, data=None, context=0):
23    if data is None:
24        data = get_rss(cfg, context=context)
25    if 'rss-hash-key' not in data:
26        return
27    non_zero = [x for x in data['rss-hash-key'] if x != 0]
28    ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}")
29
30
31def get_rss(cfg, context=0):
32    return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
33
34
35def get_drop_err_sum(cfg):
36    stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
37    cnt = 0
38    for key in ['errors', 'dropped', 'over_errors', 'fifo_errors',
39                'length_errors', 'crc_errors', 'missed_errors',
40                'frame_errors']:
41        cnt += stats["stats64"]["rx"][key]
42    return cnt, stats["stats64"]["tx"]["carrier_changes"]
43
44
45def ethtool_create(cfg, act, opts):
46    output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
47    # Output will be something like: "New RSS context is 1" or
48    # "Added rule with ID 7", we want the integer from the end
49    return int(output.split()[-1])
50
51
52def require_ntuple(cfg):
53    features = ethtool(f"-k {cfg.ifname}", json=True)[0]
54    if not features["ntuple-filters"]["active"]:
55        # ntuple is more of a capability than a config knob, don't bother
56        # trying to enable it (until some driver actually needs it).
57        raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
58
59
60# Get Rx packet counts for all queues, as a simple list of integers
61# if @prev is specified the prev counts will be subtracted
62def _get_rx_cnts(cfg, prev=None):
63    cfg.wait_hw_stats_settle()
64    data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True)
65    data = [x for x in data if x['queue-type'] == "rx"]
66    max_q = max([x["queue-id"] for x in data])
67    queue_stats = [0] * (max_q + 1)
68    for q in data:
69        queue_stats[q["queue-id"]] = q["rx-packets"]
70        if prev and q["queue-id"] < len(prev):
71            queue_stats[q["queue-id"]] -= prev[q["queue-id"]]
72    return queue_stats
73
74
75def _send_traffic_check(cfg, port, name, params):
76    # params is a dict with 3 possible keys:
77    #  - "target": required, which queues we expect to get iperf traffic
78    #  - "empty": optional, which queues should see no traffic at all
79    #  - "noise": optional, which queues we expect to see low traffic;
80    #             used for queues of the main context, since some background
81    #             OS activity may use those queues while we're testing
82    # the value for each is a list, or some other iterable containing queue ids.
83
84    cnts = _get_rx_cnts(cfg)
85    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
86    cnts = _get_rx_cnts(cfg, prev=cnts)
87
88    directed = sum(cnts[i] for i in params['target'])
89
90    ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
91    if params.get('noise'):
92        ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
93                "traffic on other queues:" + str(cnts))
94    if params.get('empty'):
95        ksft_eq(sum(cnts[i] for i in params['empty']), 0,
96                "traffic on inactive queues: " + str(cnts))
97
98
99def test_rss_key_indir(cfg):
100    """Test basics like updating the main RSS key and indirection table."""
101
102    qcnt = len(_get_rx_cnts(cfg))
103    if qcnt < 3:
104        KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
105
106    data = get_rss(cfg)
107    want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
108    for k in want_keys:
109        if k not in data:
110            raise KsftFailEx("ethtool results missing key: " + k)
111        if not data[k]:
112            raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}")
113
114    _rss_key_check(cfg, data=data)
115    key_len = len(data['rss-hash-key'])
116
117    # Set the key
118    key = _rss_key_rand(key_len)
119    ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
120
121    data = get_rss(cfg)
122    ksft_eq(key, data['rss-hash-key'])
123
124    # Set the indirection table and the key together
125    key = _rss_key_rand(key_len)
126    ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key))
127    reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
128
129    data = get_rss(cfg)
130    _rss_key_check(cfg, data=data)
131    ksft_eq(0, min(data['rss-indirection-table']))
132    ksft_eq(2, max(data['rss-indirection-table']))
133
134    # Reset indirection table and set the key
135    key = _rss_key_rand(key_len)
136    ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key))
137    data = get_rss(cfg)
138    _rss_key_check(cfg, data=data)
139    ksft_eq(0, min(data['rss-indirection-table']))
140    ksft_eq(qcnt - 1, max(data['rss-indirection-table']))
141
142    # Set the indirection table
143    ethtool(f"-X {cfg.ifname} equal 2")
144    data = get_rss(cfg)
145    ksft_eq(0, min(data['rss-indirection-table']))
146    ksft_eq(1, max(data['rss-indirection-table']))
147
148    # Check we only get traffic on the first 2 queues
149    cnts = _get_rx_cnts(cfg)
150    GenerateTraffic(cfg).wait_pkts_and_stop(20000)
151    cnts = _get_rx_cnts(cfg, prev=cnts)
152    # 2 queues, 20k packets, must be at least 5k per queue
153    ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts))
154    ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts))
155    # The other queues should be unused
156    ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts))
157
158    # Restore, and check traffic gets spread again
159    reset_indir.exec()
160
161    cnts = _get_rx_cnts(cfg)
162    GenerateTraffic(cfg).wait_pkts_and_stop(20000)
163    cnts = _get_rx_cnts(cfg, prev=cnts)
164    # First two queues get less traffic than all the rest
165    ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts))
166
167
168def test_rss_queue_reconfigure(cfg, main_ctx=True):
169    """Make sure queue changes can't override requested RSS config.
170
171    By default main RSS table should change to include all queues.
172    When user sets a specific RSS config the driver should preserve it,
173    even when queue count changes. Driver should refuse to deactivate
174    queues used in the user-set RSS config.
175    """
176
177    if not main_ctx:
178        require_ntuple(cfg)
179
180    # Start with 4 queues, an arbitrary known number.
181    try:
182        qcnt = len(_get_rx_cnts(cfg))
183        ethtool(f"-L {cfg.ifname} combined 4")
184        defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
185    except:
186        raise KsftSkipEx("Not enough queues for the test or qstat not supported")
187
188    if main_ctx:
189        ctx_id = 0
190        ctx_ref = ""
191    else:
192        ctx_id = ethtool_create(cfg, "-X", "context new")
193        ctx_ref = f"context {ctx_id}"
194        defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete")
195
196    # Indirection table should be distributing to all queues.
197    data = get_rss(cfg, context=ctx_id)
198    ksft_eq(0, min(data['rss-indirection-table']))
199    ksft_eq(3, max(data['rss-indirection-table']))
200
201    # Increase queues, indirection table should be distributing to all queues.
202    # It's unclear whether tables of additional contexts should be reset, too.
203    if main_ctx:
204        ethtool(f"-L {cfg.ifname} combined 5")
205        data = get_rss(cfg)
206        ksft_eq(0, min(data['rss-indirection-table']))
207        ksft_eq(4, max(data['rss-indirection-table']))
208        ethtool(f"-L {cfg.ifname} combined 4")
209
210    # Configure the table explicitly
211    port = rand_port()
212    ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1")
213    if main_ctx:
214        other_key = 'empty'
215        defer(ethtool, f"-X {cfg.ifname} default")
216    else:
217        other_key = 'noise'
218        flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
219        ntuple = ethtool_create(cfg, "-N", flow)
220        defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
221
222    _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
223                                              other_key: (1, 2) })
224
225    # We should be able to increase queues, but table should be left untouched
226    ethtool(f"-L {cfg.ifname} combined 5")
227    data = get_rss(cfg, context=ctx_id)
228    ksft_eq({0, 3}, set(data['rss-indirection-table']))
229
230    _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
231                                              other_key: (1, 2, 4) })
232
233    # Setting queue count to 3 should fail, queue 3 is used
234    try:
235        ethtool(f"-L {cfg.ifname} combined 3")
236    except CmdExitFailure:
237        pass
238    else:
239        raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
240
241
242def test_rss_resize(cfg):
243    """Test resizing of the RSS table.
244
245    Some devices dynamically increase and decrease the size of the RSS
246    indirection table based on the number of enabled queues.
247    When that happens driver must maintain the balance of entries
248    (preferably duplicating the smaller table).
249    """
250
251    channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
252    ch_max = channels['combined-max']
253    qcnt = channels['combined-count']
254
255    if ch_max < 2:
256        raise KsftSkipEx(f"Not enough queues for the test: {ch_max}")
257
258    ethtool(f"-L {cfg.ifname} combined 2")
259    defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
260
261    ethtool(f"-X {cfg.ifname} weight 1 7")
262    defer(ethtool, f"-X {cfg.ifname} default")
263
264    ethtool(f"-L {cfg.ifname} combined {ch_max}")
265    data = get_rss(cfg)
266    ksft_eq(0, min(data['rss-indirection-table']))
267    ksft_eq(1, max(data['rss-indirection-table']))
268
269    ksft_eq(7,
270            data['rss-indirection-table'].count(1) /
271            data['rss-indirection-table'].count(0),
272            f"Table imbalance after resize: {data['rss-indirection-table']}")
273
274
275def test_hitless_key_update(cfg):
276    """Test that flows may be rehashed without impacting traffic.
277
278    Some workloads may want to rehash the flows in response to an imbalance.
279    Most effective way to do that is changing the RSS key. Check that changing
280    the key does not cause link flaps or traffic disruption.
281
282    Disrupting traffic for key update is not a bug, but makes the key
283    update unusable for rehashing under load.
284    """
285    data = get_rss(cfg)
286    key_len = len(data['rss-hash-key'])
287
288    key = _rss_key_rand(key_len)
289
290    tgen = GenerateTraffic(cfg)
291    try:
292        errors0, carrier0 = get_drop_err_sum(cfg)
293        t0 = datetime.datetime.now()
294        ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
295        t1 = datetime.datetime.now()
296        errors1, carrier1 = get_drop_err_sum(cfg)
297    finally:
298        tgen.wait_pkts_and_stop(5000)
299
300    ksft_lt((t1 - t0).total_seconds(), 0.2)
301    ksft_eq(errors1 - errors1, 0)
302    ksft_eq(carrier1 - carrier0, 0)
303
304
305def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
306    """
307    Test separating traffic into RSS contexts.
308    The queues will be allocated 2 for each context:
309     ctx0  ctx1  ctx2  ctx3
310    [0 1] [2 3] [4 5] [6 7] ...
311    """
312
313    require_ntuple(cfg)
314
315    requested_ctx_cnt = ctx_cnt
316
317    # Try to allocate more queues when necessary
318    qcnt = len(_get_rx_cnts(cfg))
319    if qcnt < 2 + 2 * ctx_cnt:
320        try:
321            ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
322            ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
323            defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
324        except:
325            raise KsftSkipEx("Not enough queues for the test")
326
327    ports = []
328
329    # Use queues 0 and 1 for normal traffic
330    ethtool(f"-X {cfg.ifname} equal 2")
331    defer(ethtool, f"-X {cfg.ifname} default")
332
333    for i in range(ctx_cnt):
334        want_cfg = f"start {2 + i * 2} equal 2"
335        create_cfg = want_cfg if create_with_cfg else ""
336
337        try:
338            ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}")
339            defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
340        except CmdExitFailure:
341            # try to carry on and skip at the end
342            if i == 0:
343                raise
344            ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
345            ctx_cnt = i
346            break
347
348        _rss_key_check(cfg, context=ctx_id)
349
350        if not create_with_cfg:
351            ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}")
352            _rss_key_check(cfg, context=ctx_id)
353
354        # Sanity check the context we just created
355        data = get_rss(cfg, ctx_id)
356        ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data))
357        ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data))
358
359        ports.append(rand_port())
360        flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}"
361        ntuple = ethtool_create(cfg, "-N", flow)
362        defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
363
364    for i in range(ctx_cnt):
365        _send_traffic_check(cfg, ports[i], f"context {i}",
366                            { 'target': (2+i*2, 3+i*2),
367                              'noise': (0, 1),
368                              'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) })
369
370    if requested_ctx_cnt != ctx_cnt:
371        raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
372
373
374def test_rss_context4(cfg):
375    test_rss_context(cfg, 4)
376
377
378def test_rss_context32(cfg):
379    test_rss_context(cfg, 32)
380
381
382def test_rss_context4_create_with_cfg(cfg):
383    test_rss_context(cfg, 4, create_with_cfg=True)
384
385
386def test_rss_context_queue_reconfigure(cfg):
387    test_rss_queue_reconfigure(cfg, main_ctx=False)
388
389
390def test_rss_context_out_of_order(cfg, ctx_cnt=4):
391    """
392    Test separating traffic into RSS contexts.
393    Contexts are removed in semi-random order, and steering re-tested
394    to make sure removal doesn't break steering to surviving contexts.
395    Test requires 3 contexts to work.
396    """
397
398    require_ntuple(cfg)
399
400    requested_ctx_cnt = ctx_cnt
401
402    # Try to allocate more queues when necessary
403    qcnt = len(_get_rx_cnts(cfg))
404    if qcnt < 2 + 2 * ctx_cnt:
405        try:
406            ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
407            ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
408            defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
409        except:
410            raise KsftSkipEx("Not enough queues for the test")
411
412    ntuple = []
413    ctx = []
414    ports = []
415
416    def remove_ctx(idx):
417        ntuple[idx].exec()
418        ntuple[idx] = None
419        ctx[idx].exec()
420        ctx[idx] = None
421
422    def check_traffic():
423        for i in range(ctx_cnt):
424            if ctx[i]:
425                expected = {
426                    'target': (2+i*2, 3+i*2),
427                    'noise': (0, 1),
428                    'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt))
429                }
430            else:
431                expected = {
432                    'target': (0, 1),
433                    'empty':  range(2, 2+2*ctx_cnt)
434                }
435
436            _send_traffic_check(cfg, ports[i], f"context {i}", expected)
437
438    # Use queues 0 and 1 for normal traffic
439    ethtool(f"-X {cfg.ifname} equal 2")
440    defer(ethtool, f"-X {cfg.ifname} default")
441
442    for i in range(ctx_cnt):
443        ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")
444        ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete"))
445
446        ports.append(rand_port())
447        flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}"
448        ntuple_id = ethtool_create(cfg, "-N", flow)
449        ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}"))
450
451    check_traffic()
452
453    # Remove middle context
454    remove_ctx(ctx_cnt // 2)
455    check_traffic()
456
457    # Remove first context
458    remove_ctx(0)
459    check_traffic()
460
461    # Remove last context
462    remove_ctx(-1)
463    check_traffic()
464
465    if requested_ctx_cnt != ctx_cnt:
466        raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
467
468
469def test_rss_context_overlap(cfg, other_ctx=0):
470    """
471    Test contexts overlapping with each other.
472    Use 4 queues for the main context, but only queues 2 and 3 for context 1.
473    """
474
475    require_ntuple(cfg)
476
477    queue_cnt = len(_get_rx_cnts(cfg))
478    if queue_cnt < 4:
479        try:
480            ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
481            ethtool(f"-L {cfg.ifname} combined 4")
482            defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
483        except:
484            raise KsftSkipEx("Not enough queues for the test")
485
486    if other_ctx == 0:
487        ethtool(f"-X {cfg.ifname} equal 4")
488        defer(ethtool, f"-X {cfg.ifname} default")
489    else:
490        other_ctx = ethtool_create(cfg, "-X", "context new")
491        ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4")
492        defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete")
493
494    ctx_id = ethtool_create(cfg, "-X", "context new")
495    ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2")
496    defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
497
498    port = rand_port()
499    if other_ctx:
500        flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}"
501        ntuple_id = ethtool_create(cfg, "-N", flow)
502        ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
503
504    # Test the main context
505    cnts = _get_rx_cnts(cfg)
506    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
507    cnts = _get_rx_cnts(cfg, prev=cnts)
508
509    ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts))
510    ksft_ge(sum(cnts[ :2]),  7000, "traffic on main context (1/2): " + str(cnts))
511    ksft_ge(sum(cnts[2:4]),  7000, "traffic on main context (2/2): " + str(cnts))
512    if other_ctx == 0:
513        ksft_eq(sum(cnts[4: ]),     0, "traffic on other queues: " + str(cnts))
514
515    # Now create a rule for context 1 and make sure traffic goes to a subset
516    if other_ctx:
517        ntuple.exec()
518    flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
519    ntuple_id = ethtool_create(cfg, "-N", flow)
520    defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
521
522    cnts = _get_rx_cnts(cfg)
523    GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
524    cnts = _get_rx_cnts(cfg, prev=cnts)
525
526    directed = sum(cnts[2:4])
527    ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts))
528    ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts))
529    if other_ctx == 0:
530        ksft_eq(sum(cnts[4: ]),     0, "traffic on other queues: " + str(cnts))
531
532
533def test_rss_context_overlap2(cfg):
534    test_rss_context_overlap(cfg, True)
535
536
537def main() -> None:
538    with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
539        cfg.ethnl = EthtoolFamily()
540        cfg.netdevnl = NetdevFamily()
541
542        ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
543                  test_rss_resize, test_hitless_key_update,
544                  test_rss_context, test_rss_context4, test_rss_context32,
545                  test_rss_context_queue_reconfigure,
546                  test_rss_context_overlap, test_rss_context_overlap2,
547                  test_rss_context_out_of_order, test_rss_context4_create_with_cfg],
548                 args=(cfg, ))
549    ksft_exit()
550
551
552if __name__ == "__main__":
553    main()
554