1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13
14 /* Holds rate nodes associated with an E-Switch. */
15 struct mlx5_qos_domain {
16 /* Serializes access to all qos changes in the qos domain. */
17 struct mutex lock;
18 /* List of all mlx5_esw_sched_nodes. */
19 struct list_head nodes;
20 };
21
esw_qos_lock(struct mlx5_eswitch * esw)22 static void esw_qos_lock(struct mlx5_eswitch *esw)
23 {
24 mutex_lock(&esw->qos.domain->lock);
25 }
26
esw_qos_unlock(struct mlx5_eswitch * esw)27 static void esw_qos_unlock(struct mlx5_eswitch *esw)
28 {
29 mutex_unlock(&esw->qos.domain->lock);
30 }
31
esw_assert_qos_lock_held(struct mlx5_eswitch * esw)32 static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw)
33 {
34 lockdep_assert_held(&esw->qos.domain->lock);
35 }
36
esw_qos_domain_alloc(void)37 static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
38 {
39 struct mlx5_qos_domain *qos_domain;
40
41 qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
42 if (!qos_domain)
43 return NULL;
44
45 mutex_init(&qos_domain->lock);
46 INIT_LIST_HEAD(&qos_domain->nodes);
47
48 return qos_domain;
49 }
50
esw_qos_domain_init(struct mlx5_eswitch * esw)51 static int esw_qos_domain_init(struct mlx5_eswitch *esw)
52 {
53 esw->qos.domain = esw_qos_domain_alloc();
54
55 return esw->qos.domain ? 0 : -ENOMEM;
56 }
57
esw_qos_domain_release(struct mlx5_eswitch * esw)58 static void esw_qos_domain_release(struct mlx5_eswitch *esw)
59 {
60 kfree(esw->qos.domain);
61 esw->qos.domain = NULL;
62 }
63
64 enum sched_node_type {
65 SCHED_NODE_TYPE_VPORTS_TSAR,
66 SCHED_NODE_TYPE_VPORT,
67 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
68 SCHED_NODE_TYPE_RATE_LIMITER,
69 SCHED_NODE_TYPE_VPORT_TC,
70 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
71 };
72
73 static const char * const sched_node_type_str[] = {
74 [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
75 [SCHED_NODE_TYPE_VPORT] = "vport",
76 [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR",
77 [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter",
78 [SCHED_NODE_TYPE_VPORT_TC] = "vport TC",
79 [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR",
80 };
81
82 struct mlx5_esw_sched_node {
83 u32 ix;
84 /* Bandwidth parameters. */
85 u32 max_rate;
86 u32 min_rate;
87 /* A computed value indicating relative min_rate between node's children. */
88 u32 bw_share;
89 /* The parent node in the rate hierarchy. */
90 struct mlx5_esw_sched_node *parent;
91 /* Entry in the parent node's children list. */
92 struct list_head entry;
93 /* The type of this node in the rate hierarchy. */
94 enum sched_node_type type;
95 /* The eswitch this node belongs to. */
96 struct mlx5_eswitch *esw;
97 /* The children nodes of this node, empty list for leaf nodes. */
98 struct list_head children;
99 /* Valid only if this node is associated with a vport. */
100 struct mlx5_vport *vport;
101 /* Level in the hierarchy. The root node level is 1. */
102 u8 level;
103 /* Valid only when this node represents a traffic class. */
104 u8 tc;
105 /* Valid only for a TC arbiter node or vport TC arbiter. */
106 u32 tc_bw[DEVLINK_RATE_TCS_MAX];
107 };
108
esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node * node)109 static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
110 {
111 if (!node->parent) {
112 /* Root children are assigned a depth level of 2. */
113 node->level = 2;
114 list_add_tail(&node->entry, &node->esw->qos.domain->nodes);
115 } else {
116 node->level = node->parent->level + 1;
117 list_add_tail(&node->entry, &node->parent->children);
118 }
119 }
120
esw_qos_num_tcs(struct mlx5_core_dev * dev)121 static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
122 {
123 int num_tcs = mlx5_max_tc(dev) + 1;
124
125 return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX;
126 }
127
128 static void
esw_qos_node_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent)129 esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
130 {
131 list_del_init(&node->entry);
132 node->parent = parent;
133 if (parent)
134 node->esw = parent->esw;
135 esw_qos_node_attach_to_parent(node);
136 }
137
esw_qos_nodes_set_parent(struct list_head * nodes,struct mlx5_esw_sched_node * parent)138 static void esw_qos_nodes_set_parent(struct list_head *nodes,
139 struct mlx5_esw_sched_node *parent)
140 {
141 struct mlx5_esw_sched_node *node, *tmp;
142
143 list_for_each_entry_safe(node, tmp, nodes, entry) {
144 esw_qos_node_set_parent(node, parent);
145 if (!list_empty(&node->children) &&
146 parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
147 struct mlx5_esw_sched_node *child;
148
149 list_for_each_entry(child, &node->children, entry) {
150 struct mlx5_vport *vport = child->vport;
151
152 if (vport)
153 vport->qos.sched_node->parent = parent;
154 }
155 }
156 }
157 }
158
mlx5_esw_qos_vport_qos_free(struct mlx5_vport * vport)159 void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
160 {
161 if (vport->qos.sched_nodes) {
162 int num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
163 int i;
164
165 for (i = 0; i < num_tcs; i++)
166 kfree(vport->qos.sched_nodes[i]);
167 kfree(vport->qos.sched_nodes);
168 }
169
170 kfree(vport->qos.sched_node);
171 memset(&vport->qos, 0, sizeof(vport->qos));
172 }
173
mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport * vport)174 u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport)
175 {
176 if (!vport->qos.sched_node)
177 return 0;
178
179 return vport->qos.sched_node->ix;
180 }
181
182 struct mlx5_esw_sched_node *
mlx5_esw_qos_vport_get_parent(const struct mlx5_vport * vport)183 mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
184 {
185 if (!vport->qos.sched_node)
186 return NULL;
187
188 return vport->qos.sched_node->parent;
189 }
190
esw_qos_sched_elem_warn(struct mlx5_esw_sched_node * node,int err,const char * op)191 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
192 {
193 switch (node->type) {
194 case SCHED_NODE_TYPE_VPORTS_TC_TSAR:
195 esw_warn(node->esw->dev,
196 "E-Switch %s %s scheduling element failed (tc=%d,err=%d)\n",
197 op, sched_node_type_str[node->type], node->tc, err);
198 break;
199 case SCHED_NODE_TYPE_VPORT_TC:
200 esw_warn(node->esw->dev,
201 "E-Switch %s %s scheduling element failed (vport=%d,tc=%d,err=%d)\n",
202 op,
203 sched_node_type_str[node->type],
204 node->vport->vport, node->tc, err);
205 break;
206 case SCHED_NODE_TYPE_VPORT:
207 esw_warn(node->esw->dev,
208 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
209 op, sched_node_type_str[node->type], node->vport->vport, err);
210 break;
211 case SCHED_NODE_TYPE_RATE_LIMITER:
212 case SCHED_NODE_TYPE_TC_ARBITER_TSAR:
213 case SCHED_NODE_TYPE_VPORTS_TSAR:
214 esw_warn(node->esw->dev,
215 "E-Switch %s %s scheduling element failed (err=%d)\n",
216 op, sched_node_type_str[node->type], err);
217 break;
218 default:
219 esw_warn(node->esw->dev,
220 "E-Switch %s scheduling element failed (err=%d)\n",
221 op, err);
222 break;
223 }
224 }
225
esw_qos_node_create_sched_element(struct mlx5_esw_sched_node * node,void * ctx,struct netlink_ext_ack * extack)226 static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
227 struct netlink_ext_ack *extack)
228 {
229 int err;
230
231 err = mlx5_create_scheduling_element_cmd(node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx,
232 &node->ix);
233 if (err) {
234 esw_qos_sched_elem_warn(node, err, "create");
235 NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed");
236 }
237
238 return err;
239 }
240
esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)241 static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node,
242 struct netlink_ext_ack *extack)
243 {
244 int err;
245
246 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
247 SCHEDULING_HIERARCHY_E_SWITCH,
248 node->ix);
249 if (err) {
250 esw_qos_sched_elem_warn(node, err, "destroy");
251 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed.");
252 }
253
254 return err;
255 }
256
esw_qos_sched_elem_config(struct mlx5_esw_sched_node * node,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)257 static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share,
258 struct netlink_ext_ack *extack)
259 {
260 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
261 struct mlx5_core_dev *dev = node->esw->dev;
262 u32 bitmask = 0;
263 int err;
264
265 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
266 return -EOPNOTSUPP;
267
268 if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) ||
269 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE))
270 return -EOPNOTSUPP;
271
272 if (node->max_rate == max_rate && node->bw_share == bw_share)
273 return 0;
274
275 if (node->max_rate != max_rate) {
276 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
277 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
278 }
279 if (node->bw_share != bw_share) {
280 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
281 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
282 }
283
284 err = mlx5_modify_scheduling_element_cmd(dev,
285 SCHEDULING_HIERARCHY_E_SWITCH,
286 sched_ctx,
287 node->ix,
288 bitmask);
289 if (err) {
290 esw_qos_sched_elem_warn(node, err, "modify");
291 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed");
292
293 return err;
294 }
295
296 node->max_rate = max_rate;
297 node->bw_share = bw_share;
298 if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR)
299 trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate);
300 else if (node->type == SCHED_NODE_TYPE_VPORT)
301 trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate);
302
303 return 0;
304 }
305
esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)306 static int esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node *node,
307 struct netlink_ext_ack *extack)
308 {
309 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
310
311 if (!mlx5_qos_element_type_supported(
312 node->esw->dev,
313 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT,
314 SCHEDULING_HIERARCHY_E_SWITCH))
315 return -EOPNOTSUPP;
316
317 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, node->max_rate);
318 MLX5_SET(scheduling_context, sched_ctx, element_type,
319 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT);
320
321 return esw_qos_node_create_sched_element(node, sched_ctx, extack);
322 }
323
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent)324 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
325 struct mlx5_esw_sched_node *parent)
326 {
327 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
328 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
329 struct mlx5_esw_sched_node *node;
330 u32 max_guarantee = 0;
331
332 /* Find max min_rate across all nodes.
333 * This will correspond to fw_max_bw_share in the final bw_share calculation.
334 */
335 list_for_each_entry(node, nodes, entry) {
336 if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
337 node->min_rate > max_guarantee)
338 max_guarantee = node->min_rate;
339 }
340
341 if (max_guarantee)
342 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
343
344 /* If nodes max min_rate divider is 0 but their parent has bw_share
345 * configured, then set bw_share for nodes to minimal value.
346 */
347
348 if (parent && parent->bw_share)
349 return 1;
350
351 /* If the node nodes has min_rate configured, a divider of 0 sets all
352 * nodes' bw_share to 0, effectively disabling min guarantees.
353 */
354 return 0;
355 }
356
esw_qos_calc_bw_share(u32 value,u32 divider,u32 fw_max)357 static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max)
358 {
359 if (!divider)
360 return 0;
361 return min_t(u32, fw_max,
362 max_t(u32,
363 DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE));
364 }
365
esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node * node,u32 divider,struct netlink_ext_ack * extack)366 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node,
367 u32 divider,
368 struct netlink_ext_ack *extack)
369 {
370 u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share);
371 u32 bw_share;
372
373 bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share);
374
375 esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack);
376 }
377
esw_qos_normalize_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)378 static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
379 struct mlx5_esw_sched_node *parent,
380 struct netlink_ext_ack *extack)
381 {
382 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
383 u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
384 struct mlx5_esw_sched_node *node;
385
386 list_for_each_entry(node, nodes, entry) {
387 if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
388 continue;
389
390 /* Vports TC TSARs don't have a minimum rate configured,
391 * so there's no need to update the bw_share on them.
392 */
393 if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) {
394 esw_qos_update_sched_node_bw_share(node, divider,
395 extack);
396 }
397
398 if (list_empty(&node->children))
399 continue;
400
401 esw_qos_normalize_min_rate(node->esw, node, extack);
402 }
403 }
404
esw_qos_calculate_tc_bw_divider(u32 * tc_bw)405 static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw)
406 {
407 u32 total = 0;
408 int i;
409
410 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++)
411 total += tc_bw[i];
412
413 /* If total is zero, tc-bw config is disabled and we shouldn't reach
414 * here.
415 */
416 return WARN_ON(!total) ? 1 : total;
417 }
418
esw_qos_set_node_min_rate(struct mlx5_esw_sched_node * node,u32 min_rate,struct netlink_ext_ack * extack)419 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
420 u32 min_rate, struct netlink_ext_ack *extack)
421 {
422 struct mlx5_eswitch *esw = node->esw;
423
424 if (min_rate == node->min_rate)
425 return 0;
426
427 node->min_rate = min_rate;
428 esw_qos_normalize_min_rate(esw, node->parent, extack);
429
430 return 0;
431 }
432
433 static int
esw_qos_create_node_sched_elem(struct mlx5_core_dev * dev,u32 parent_element_id,u32 max_rate,u32 bw_share,u32 * tsar_ix)434 esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id,
435 u32 max_rate, u32 bw_share, u32 *tsar_ix)
436 {
437 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
438 void *attr;
439
440 if (!mlx5_qos_element_type_supported(dev,
441 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
442 SCHEDULING_HIERARCHY_E_SWITCH) ||
443 !mlx5_qos_tsar_type_supported(dev,
444 TSAR_ELEMENT_TSAR_TYPE_DWRR,
445 SCHEDULING_HIERARCHY_E_SWITCH))
446 return -EOPNOTSUPP;
447
448 MLX5_SET(scheduling_context, tsar_ctx, element_type,
449 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
450 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
451 parent_element_id);
452 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate);
453 MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share);
454 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
455 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
456
457 return mlx5_create_scheduling_element_cmd(dev,
458 SCHEDULING_HIERARCHY_E_SWITCH,
459 tsar_ctx,
460 tsar_ix);
461 }
462
463 static int
esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node * vport_node,struct netlink_ext_ack * extack)464 esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
465 struct netlink_ext_ack *extack)
466 {
467 struct mlx5_esw_sched_node *parent = vport_node->parent;
468 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
469 struct mlx5_core_dev *dev = vport_node->esw->dev;
470 void *attr;
471
472 if (!mlx5_qos_element_type_supported(
473 dev,
474 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT,
475 SCHEDULING_HIERARCHY_E_SWITCH))
476 return -EOPNOTSUPP;
477
478 MLX5_SET(scheduling_context, sched_ctx, element_type,
479 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
480 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
481 MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
482 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
483 parent ? parent->ix : vport_node->esw->qos.root_tsar_ix);
484 MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
485 vport_node->max_rate);
486
487 return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
488 }
489
490 static int
esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node * vport_tc_node,u32 rate_limit_elem_ix,struct netlink_ext_ack * extack)491 esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node,
492 u32 rate_limit_elem_ix,
493 struct netlink_ext_ack *extack)
494 {
495 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
496 struct mlx5_core_dev *dev = vport_tc_node->esw->dev;
497 void *attr;
498
499 if (!mlx5_qos_element_type_supported(
500 dev,
501 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC,
502 SCHEDULING_HIERARCHY_E_SWITCH))
503 return -EOPNOTSUPP;
504
505 MLX5_SET(scheduling_context, sched_ctx, element_type,
506 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC);
507 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
508 MLX5_SET(vport_tc_element, attr, vport_number,
509 vport_tc_node->vport->vport);
510 MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc);
511 MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id,
512 rate_limit_elem_ix);
513 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
514 vport_tc_node->parent->ix);
515 MLX5_SET(scheduling_context, sched_ctx, bw_share,
516 vport_tc_node->bw_share);
517
518 return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx,
519 extack);
520 }
521
522 static struct mlx5_esw_sched_node *
__esw_qos_alloc_node(struct mlx5_eswitch * esw,u32 tsar_ix,enum sched_node_type type,struct mlx5_esw_sched_node * parent)523 __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type,
524 struct mlx5_esw_sched_node *parent)
525 {
526 struct mlx5_esw_sched_node *node;
527
528 node = kzalloc(sizeof(*node), GFP_KERNEL);
529 if (!node)
530 return NULL;
531
532 node->esw = esw;
533 node->ix = tsar_ix;
534 node->type = type;
535 node->parent = parent;
536 INIT_LIST_HEAD(&node->children);
537 esw_qos_node_attach_to_parent(node);
538 if (!parent) {
539 /* The caller is responsible for inserting the node into the
540 * parent list if necessary. This function can also be used with
541 * a NULL parent, which doesn't necessarily indicate that it
542 * refers to the root scheduling element.
543 */
544 list_del_init(&node->entry);
545 }
546
547 return node;
548 }
549
__esw_qos_free_node(struct mlx5_esw_sched_node * node)550 static void __esw_qos_free_node(struct mlx5_esw_sched_node *node)
551 {
552 list_del(&node->entry);
553 kfree(node);
554 }
555
esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)556 static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
557 {
558 esw_qos_node_destroy_sched_element(node, extack);
559 __esw_qos_free_node(node);
560 }
561
esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node * parent,u8 tc,struct netlink_ext_ack * extack)562 static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent,
563 u8 tc, struct netlink_ext_ack *extack)
564 {
565 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
566 struct mlx5_core_dev *dev = parent->esw->dev;
567 struct mlx5_esw_sched_node *vports_tc_node;
568 void *attr;
569 int err;
570
571 if (!mlx5_qos_element_type_supported(
572 dev,
573 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
574 SCHEDULING_HIERARCHY_E_SWITCH) ||
575 !mlx5_qos_tsar_type_supported(dev,
576 TSAR_ELEMENT_TSAR_TYPE_DWRR,
577 SCHEDULING_HIERARCHY_E_SWITCH))
578 return -EOPNOTSUPP;
579
580 vports_tc_node = __esw_qos_alloc_node(parent->esw, 0,
581 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
582 parent);
583 if (!vports_tc_node) {
584 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
585 esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc);
586 return -ENOMEM;
587 }
588
589 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
590 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
591 MLX5_SET(tsar_element, attr, traffic_class, tc);
592 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix);
593 MLX5_SET(scheduling_context, tsar_ctx, element_type,
594 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
595
596 err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx,
597 extack);
598 if (err)
599 goto err_create_sched_element;
600
601 vports_tc_node->tc = tc;
602
603 return 0;
604
605 err_create_sched_element:
606 __esw_qos_free_node(vports_tc_node);
607 return err;
608 }
609
610 static void
esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw)611 esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
612 u32 *tc_bw)
613 {
614 memcpy(tc_bw, tc_arbiter_node->tc_bw, sizeof(tc_arbiter_node->tc_bw));
615 }
616
617 static void
esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw,struct netlink_ext_ack * extack)618 esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
619 u32 *tc_bw, struct netlink_ext_ack *extack)
620 {
621 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
622 struct mlx5_esw_sched_node *vports_tc_node;
623 u32 divider, fw_max_bw_share;
624
625 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
626 divider = esw_qos_calculate_tc_bw_divider(tc_bw);
627 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
628 u8 tc = vports_tc_node->tc;
629 u32 bw_share;
630
631 tc_arbiter_node->tc_bw[tc] = tc_bw[tc];
632 bw_share = tc_bw[tc] * fw_max_bw_share;
633 bw_share = esw_qos_calc_bw_share(bw_share, divider,
634 fw_max_bw_share);
635 esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack);
636 }
637 }
638
639 static void
esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)640 esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
641 struct netlink_ext_ack *extack)
642 {
643 struct mlx5_esw_sched_node *vports_tc_node, *tmp;
644
645 list_for_each_entry_safe(vports_tc_node, tmp,
646 &tc_arbiter_node->children, entry)
647 esw_qos_destroy_node(vports_tc_node, extack);
648 }
649
650 static int
esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)651 esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
652 struct netlink_ext_ack *extack)
653 {
654 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
655 int err, i, num_tcs = esw_qos_num_tcs(esw->dev);
656
657 for (i = 0; i < num_tcs; i++) {
658 err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack);
659 if (err)
660 goto err_tc_node_create;
661 }
662
663 return 0;
664
665 err_tc_node_create:
666 esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL);
667 return err;
668 }
669
esw_qos_create_tc_arbiter_sched_elem(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)670 static int esw_qos_create_tc_arbiter_sched_elem(
671 struct mlx5_esw_sched_node *tc_arbiter_node,
672 struct netlink_ext_ack *extack)
673 {
674 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
675 u32 tsar_parent_ix;
676 void *attr;
677
678 if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev,
679 TSAR_ELEMENT_TSAR_TYPE_TC_ARB,
680 SCHEDULING_HIERARCHY_E_SWITCH)) {
681 NL_SET_ERR_MSG_MOD(extack,
682 "E-Switch TC Arbiter scheduling element is not supported");
683 return -EOPNOTSUPP;
684 }
685
686 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
687 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB);
688 tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix :
689 tc_arbiter_node->esw->qos.root_tsar_ix;
690 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
691 tsar_parent_ix);
692 MLX5_SET(scheduling_context, tsar_ctx, element_type,
693 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
694 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw,
695 tc_arbiter_node->max_rate);
696 MLX5_SET(scheduling_context, tsar_ctx, bw_share,
697 tc_arbiter_node->bw_share);
698
699 return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx,
700 extack);
701 }
702
703 static struct mlx5_esw_sched_node *
__esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)704 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent,
705 struct netlink_ext_ack *extack)
706 {
707 struct mlx5_esw_sched_node *node;
708 u32 tsar_ix;
709 int err;
710
711 err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0,
712 0, &tsar_ix);
713 if (err) {
714 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed");
715 return ERR_PTR(err);
716 }
717
718 node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent);
719 if (!node) {
720 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
721 err = -ENOMEM;
722 goto err_alloc_node;
723 }
724
725 list_add_tail(&node->entry, &esw->qos.domain->nodes);
726 esw_qos_normalize_min_rate(esw, NULL, extack);
727 trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
728
729 return node;
730
731 err_alloc_node:
732 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
733 SCHEDULING_HIERARCHY_E_SWITCH,
734 tsar_ix))
735 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed");
736 return ERR_PTR(err);
737 }
738
739 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
740 static void esw_qos_put(struct mlx5_eswitch *esw);
741
742 static struct mlx5_esw_sched_node *
esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)743 esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
744 {
745 struct mlx5_esw_sched_node *node;
746 int err;
747
748 esw_assert_qos_lock_held(esw);
749 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
750 return ERR_PTR(-EOPNOTSUPP);
751
752 err = esw_qos_get(esw, extack);
753 if (err)
754 return ERR_PTR(err);
755
756 node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
757 if (IS_ERR(node))
758 esw_qos_put(esw);
759
760 return node;
761 }
762
__esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)763 static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
764 {
765 struct mlx5_eswitch *esw = node->esw;
766
767 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
768 esw_qos_destroy_vports_tc_nodes(node, extack);
769
770 trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
771 esw_qos_destroy_node(node, extack);
772 esw_qos_normalize_min_rate(esw, NULL, extack);
773 }
774
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)775 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
776 {
777 struct mlx5_core_dev *dev = esw->dev;
778 int err;
779
780 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
781 return -EOPNOTSUPP;
782
783 err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0,
784 &esw->qos.root_tsar_ix);
785 if (err) {
786 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
787 return err;
788 }
789
790 refcount_set(&esw->qos.refcnt, 1);
791
792 return 0;
793 }
794
esw_qos_destroy(struct mlx5_eswitch * esw)795 static void esw_qos_destroy(struct mlx5_eswitch *esw)
796 {
797 int err;
798
799 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
800 SCHEDULING_HIERARCHY_E_SWITCH,
801 esw->qos.root_tsar_ix);
802 if (err)
803 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
804 }
805
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)806 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
807 {
808 int err = 0;
809
810 esw_assert_qos_lock_held(esw);
811 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
812 /* esw_qos_create() set refcount to 1 only on success.
813 * No need to decrement on failure.
814 */
815 err = esw_qos_create(esw, extack);
816 }
817
818 return err;
819 }
820
esw_qos_put(struct mlx5_eswitch * esw)821 static void esw_qos_put(struct mlx5_eswitch *esw)
822 {
823 esw_assert_qos_lock_held(esw);
824 if (refcount_dec_and_test(&esw->qos.refcnt))
825 esw_qos_destroy(esw);
826 }
827
828 static void
esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)829 esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node,
830 struct netlink_ext_ack *extack)
831 {
832 /* Clean up all Vports TC nodes within the TC arbiter node. */
833 esw_qos_destroy_vports_tc_nodes(node, extack);
834 /* Destroy the scheduling element for the TC arbiter node itself. */
835 esw_qos_node_destroy_sched_element(node, extack);
836 }
837
esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)838 static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node,
839 struct netlink_ext_ack *extack)
840 {
841 u32 curr_ix = node->ix;
842 int err;
843
844 err = esw_qos_create_tc_arbiter_sched_elem(node, extack);
845 if (err)
846 return err;
847 /* Initialize the vports TC nodes within created TC arbiter TSAR. */
848 err = esw_qos_create_vports_tc_nodes(node, extack);
849 if (err)
850 goto err_vports_tc_nodes;
851
852 node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR;
853
854 return 0;
855
856 err_vports_tc_nodes:
857 /* If initialization fails, clean up the scheduling element
858 * for the TC arbiter node.
859 */
860 esw_qos_node_destroy_sched_element(node, NULL);
861 node->ix = curr_ix;
862 return err;
863 }
864
865 static int
esw_qos_create_vport_tc_sched_node(struct mlx5_vport * vport,u32 rate_limit_elem_ix,struct mlx5_esw_sched_node * vports_tc_node,struct netlink_ext_ack * extack)866 esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport,
867 u32 rate_limit_elem_ix,
868 struct mlx5_esw_sched_node *vports_tc_node,
869 struct netlink_ext_ack *extack)
870 {
871 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
872 struct mlx5_esw_sched_node *vport_tc_node;
873 u8 tc = vports_tc_node->tc;
874 int err;
875
876 vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0,
877 SCHED_NODE_TYPE_VPORT_TC,
878 vports_tc_node);
879 if (!vport_tc_node)
880 return -ENOMEM;
881
882 vport_tc_node->min_rate = vport_node->min_rate;
883 vport_tc_node->tc = tc;
884 vport_tc_node->vport = vport;
885 err = esw_qos_vport_tc_create_sched_element(vport_tc_node,
886 rate_limit_elem_ix,
887 extack);
888 if (err)
889 goto err_out;
890
891 vport->qos.sched_nodes[tc] = vport_tc_node;
892
893 return 0;
894 err_out:
895 __esw_qos_free_node(vport_tc_node);
896 return err;
897 }
898
899 static void
esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport * vport,struct netlink_ext_ack * extack)900 esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport *vport,
901 struct netlink_ext_ack *extack)
902 {
903 int i, num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
904
905 for (i = 0; i < num_tcs; i++) {
906 if (vport->qos.sched_nodes[i]) {
907 __esw_qos_destroy_node(vport->qos.sched_nodes[i],
908 extack);
909 }
910 }
911
912 kfree(vport->qos.sched_nodes);
913 vport->qos.sched_nodes = NULL;
914 }
915
916 static int
esw_qos_create_vport_tc_sched_elements(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)917 esw_qos_create_vport_tc_sched_elements(struct mlx5_vport *vport,
918 enum sched_node_type type,
919 struct netlink_ext_ack *extack)
920 {
921 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
922 struct mlx5_esw_sched_node *tc_arbiter_node, *vports_tc_node;
923 int err, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
924 u32 rate_limit_elem_ix;
925
926 vport->qos.sched_nodes = kcalloc(num_tcs,
927 sizeof(struct mlx5_esw_sched_node *),
928 GFP_KERNEL);
929 if (!vport->qos.sched_nodes) {
930 NL_SET_ERR_MSG_MOD(extack,
931 "Allocating the vport TC scheduling elements failed.");
932 return -ENOMEM;
933 }
934
935 rate_limit_elem_ix = type == SCHED_NODE_TYPE_RATE_LIMITER ?
936 vport_node->ix : 0;
937 tc_arbiter_node = type == SCHED_NODE_TYPE_RATE_LIMITER ?
938 vport_node->parent : vport_node;
939 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
940 err = esw_qos_create_vport_tc_sched_node(vport,
941 rate_limit_elem_ix,
942 vports_tc_node,
943 extack);
944 if (err)
945 goto err_create_vport_tc;
946 }
947
948 return 0;
949
950 err_create_vport_tc:
951 esw_qos_destroy_vport_tc_sched_elements(vport, NULL);
952
953 return err;
954 }
955
956 static int
esw_qos_vport_tc_enable(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)957 esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type,
958 struct netlink_ext_ack *extack)
959 {
960 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
961 struct mlx5_esw_sched_node *parent = vport_node->parent;
962 int err;
963
964 if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
965 int new_level, max_level;
966
967 /* Increase the parent's level by 2 to account for both the
968 * TC arbiter and the vports TC scheduling element.
969 */
970 new_level = (parent ? parent->level : 2) + 2;
971 max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev,
972 log_esw_max_sched_depth);
973 if (new_level > max_level) {
974 NL_SET_ERR_MSG_MOD(extack,
975 "TC arbitration on leafs is not supported beyond max scheduling depth");
976 return -EOPNOTSUPP;
977 }
978 }
979
980 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
981
982 if (type == SCHED_NODE_TYPE_RATE_LIMITER)
983 err = esw_qos_create_rate_limit_element(vport_node, extack);
984 else
985 err = esw_qos_tc_arbiter_scheduling_setup(vport_node, extack);
986 if (err)
987 return err;
988
989 /* Rate limiters impact multiple nodes not directly connected to them
990 * and are not direct members of the QoS hierarchy.
991 * Unlink it from the parent to reflect that.
992 */
993 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
994 list_del_init(&vport_node->entry);
995 vport_node->level = 0;
996 }
997
998 err = esw_qos_create_vport_tc_sched_elements(vport, type, extack);
999 if (err)
1000 goto err_sched_nodes;
1001
1002 return 0;
1003
1004 err_sched_nodes:
1005 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
1006 esw_qos_node_destroy_sched_element(vport_node, NULL);
1007 esw_qos_node_attach_to_parent(vport_node);
1008 } else {
1009 esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL);
1010 }
1011 return err;
1012 }
1013
esw_qos_vport_tc_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1014 static void esw_qos_vport_tc_disable(struct mlx5_vport *vport,
1015 struct netlink_ext_ack *extack)
1016 {
1017 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1018 enum sched_node_type curr_type = vport_node->type;
1019
1020 esw_qos_destroy_vport_tc_sched_elements(vport, extack);
1021
1022 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER)
1023 esw_qos_node_destroy_sched_element(vport_node, extack);
1024 else
1025 esw_qos_tc_arbiter_scheduling_teardown(vport_node, extack);
1026 }
1027
esw_qos_set_vport_tcs_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1028 static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport,
1029 u32 min_rate,
1030 struct netlink_ext_ack *extack)
1031 {
1032 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1033 int err, i, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
1034
1035 for (i = 0; i < num_tcs; i++) {
1036 err = esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1037 min_rate, extack);
1038 if (err)
1039 goto err_out;
1040 }
1041 vport_node->min_rate = min_rate;
1042
1043 return 0;
1044 err_out:
1045 for (--i; i >= 0; i--) {
1046 esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1047 vport_node->min_rate, extack);
1048 }
1049 return err;
1050 }
1051
esw_qos_vport_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1052 static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
1053 {
1054 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1055 enum sched_node_type curr_type = vport_node->type;
1056
1057 if (curr_type == SCHED_NODE_TYPE_VPORT)
1058 esw_qos_node_destroy_sched_element(vport_node, extack);
1059 else
1060 esw_qos_vport_tc_disable(vport, extack);
1061
1062 vport_node->bw_share = 0;
1063 memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw));
1064 list_del_init(&vport_node->entry);
1065 esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack);
1066
1067 trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
1068 }
1069
esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1070 static int esw_qos_vport_enable(struct mlx5_vport *vport,
1071 enum sched_node_type type,
1072 struct mlx5_esw_sched_node *parent,
1073 struct netlink_ext_ack *extack)
1074 {
1075 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1076 int err;
1077
1078 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1079
1080 esw_qos_node_set_parent(vport_node, parent);
1081 if (type == SCHED_NODE_TYPE_VPORT)
1082 err = esw_qos_vport_create_sched_element(vport_node, extack);
1083 else
1084 err = esw_qos_vport_tc_enable(vport, type, extack);
1085 if (err)
1086 return err;
1087
1088 vport_node->type = type;
1089 esw_qos_normalize_min_rate(vport_node->esw, parent, extack);
1090 trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate,
1091 vport_node->bw_share);
1092
1093 return 0;
1094 }
1095
mlx5_esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,u32 max_rate,u32 min_rate,struct netlink_ext_ack * extack)1096 static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type,
1097 struct mlx5_esw_sched_node *parent, u32 max_rate,
1098 u32 min_rate, struct netlink_ext_ack *extack)
1099 {
1100 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1101 struct mlx5_esw_sched_node *sched_node;
1102 struct mlx5_eswitch *parent_esw;
1103 int err;
1104
1105 esw_assert_qos_lock_held(esw);
1106 err = esw_qos_get(esw, extack);
1107 if (err)
1108 return err;
1109
1110 parent_esw = parent ? parent->esw : esw;
1111 sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent);
1112 if (!sched_node) {
1113 esw_qos_put(esw);
1114 return -ENOMEM;
1115 }
1116 if (!parent)
1117 list_add_tail(&sched_node->entry, &esw->qos.domain->nodes);
1118
1119 sched_node->max_rate = max_rate;
1120 sched_node->min_rate = min_rate;
1121 sched_node->vport = vport;
1122 vport->qos.sched_node = sched_node;
1123 err = esw_qos_vport_enable(vport, type, parent, extack);
1124 if (err) {
1125 __esw_qos_free_node(sched_node);
1126 esw_qos_put(esw);
1127 vport->qos.sched_node = NULL;
1128 }
1129
1130 return err;
1131 }
1132
mlx5_esw_qos_vport_disable_locked(struct mlx5_vport * vport)1133 static void mlx5_esw_qos_vport_disable_locked(struct mlx5_vport *vport)
1134 {
1135 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1136
1137 esw_assert_qos_lock_held(esw);
1138 if (!vport->qos.sched_node)
1139 return;
1140
1141 esw_qos_vport_disable(vport, NULL);
1142 mlx5_esw_qos_vport_qos_free(vport);
1143 esw_qos_put(esw);
1144 }
1145
mlx5_esw_qos_vport_disable(struct mlx5_vport * vport)1146 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
1147 {
1148 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1149 struct mlx5_esw_sched_node *parent;
1150
1151 lockdep_assert_held(&esw->state_lock);
1152 esw_qos_lock(esw);
1153 if (!vport->qos.sched_node)
1154 goto unlock;
1155
1156 parent = vport->qos.sched_node->parent;
1157 WARN(parent, "Disabling QoS on port before detaching it from node");
1158
1159 mlx5_esw_qos_vport_disable_locked(vport);
1160 unlock:
1161 esw_qos_unlock(esw);
1162 }
1163
mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport * vport,u32 max_rate,struct netlink_ext_ack * extack)1164 static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate,
1165 struct netlink_ext_ack *extack)
1166 {
1167 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1168
1169 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1170
1171 if (!vport_node)
1172 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, max_rate, 0,
1173 extack);
1174 else
1175 return esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share,
1176 extack);
1177 }
1178
mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1179 static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate,
1180 struct netlink_ext_ack *extack)
1181 {
1182 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1183
1184 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1185
1186 if (!vport_node)
1187 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate,
1188 extack);
1189 else if (vport_node->type == SCHED_NODE_TYPE_RATE_LIMITER)
1190 return esw_qos_set_vport_tcs_min_rate(vport, min_rate, extack);
1191 else
1192 return esw_qos_set_node_min_rate(vport_node, min_rate, extack);
1193 }
1194
mlx5_esw_qos_set_vport_rate(struct mlx5_vport * vport,u32 max_rate,u32 min_rate)1195 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate)
1196 {
1197 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1198 int err;
1199
1200 esw_qos_lock(esw);
1201 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL);
1202 if (!err)
1203 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL);
1204 esw_qos_unlock(esw);
1205 return err;
1206 }
1207
mlx5_esw_qos_get_vport_rate(struct mlx5_vport * vport,u32 * max_rate,u32 * min_rate)1208 bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate)
1209 {
1210 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1211 bool enabled;
1212
1213 esw_qos_lock(esw);
1214 enabled = !!vport->qos.sched_node;
1215 if (enabled) {
1216 *max_rate = vport->qos.sched_node->max_rate;
1217 *min_rate = vport->qos.sched_node->min_rate;
1218 }
1219 esw_qos_unlock(esw);
1220 return enabled;
1221 }
1222
esw_qos_vport_tc_check_type(enum sched_node_type curr_type,enum sched_node_type new_type,struct netlink_ext_ack * extack)1223 static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type,
1224 enum sched_node_type new_type,
1225 struct netlink_ext_ack *extack)
1226 {
1227 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR &&
1228 new_type == SCHED_NODE_TYPE_RATE_LIMITER) {
1229 NL_SET_ERR_MSG_MOD(extack,
1230 "Cannot switch from vport-level TC arbitration to node-level TC arbitration");
1231 return -EOPNOTSUPP;
1232 }
1233
1234 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER &&
1235 new_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1236 NL_SET_ERR_MSG_MOD(extack,
1237 "Cannot switch from node-level TC arbitration to vport-level TC arbitration");
1238 return -EOPNOTSUPP;
1239 }
1240
1241 return 0;
1242 }
1243
esw_qos_vport_update(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1244 static int esw_qos_vport_update(struct mlx5_vport *vport,
1245 enum sched_node_type type,
1246 struct mlx5_esw_sched_node *parent,
1247 struct netlink_ext_ack *extack)
1248 {
1249 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1250 struct mlx5_esw_sched_node *curr_parent = vport_node->parent;
1251 enum sched_node_type curr_type = vport_node->type;
1252 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
1253 int err;
1254
1255 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1256 if (curr_type == type && curr_parent == parent)
1257 return 0;
1258
1259 err = esw_qos_vport_tc_check_type(curr_type, type, extack);
1260 if (err)
1261 return err;
1262
1263 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type)
1264 esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw);
1265
1266 esw_qos_vport_disable(vport, extack);
1267
1268 err = esw_qos_vport_enable(vport, type, parent, extack);
1269 if (err) {
1270 esw_qos_vport_enable(vport, curr_type, curr_parent, NULL);
1271 extack = NULL;
1272 }
1273
1274 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
1275 esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw,
1276 extack);
1277 }
1278
1279 return err;
1280 }
1281
esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1282 static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1283 struct netlink_ext_ack *extack)
1284 {
1285 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1286 struct mlx5_esw_sched_node *curr_parent;
1287 enum sched_node_type type;
1288
1289 esw_assert_qos_lock_held(esw);
1290 curr_parent = vport->qos.sched_node->parent;
1291 if (curr_parent == parent)
1292 return 0;
1293
1294 /* Set vport QoS type based on parent node type if different from
1295 * default QoS; otherwise, use the vport's current QoS type.
1296 */
1297 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1298 type = SCHED_NODE_TYPE_RATE_LIMITER;
1299 else if (curr_parent &&
1300 curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1301 type = SCHED_NODE_TYPE_VPORT;
1302 else
1303 type = vport->qos.sched_node->type;
1304
1305 return esw_qos_vport_update(vport, type, parent, extack);
1306 }
1307
1308 static void
esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1309 esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node,
1310 struct mlx5_esw_sched_node *node,
1311 struct netlink_ext_ack *extack)
1312 {
1313 struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp;
1314
1315 vports_tc_node = list_first_entry(&tc_arbiter_node->children,
1316 struct mlx5_esw_sched_node,
1317 entry);
1318
1319 list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children,
1320 entry)
1321 esw_qos_vport_update_parent(vport_tc_node->vport, node, extack);
1322 }
1323
esw_qos_switch_tc_arbiter_node_to_vports(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1324 static int esw_qos_switch_tc_arbiter_node_to_vports(
1325 struct mlx5_esw_sched_node *tc_arbiter_node,
1326 struct mlx5_esw_sched_node *node,
1327 struct netlink_ext_ack *extack)
1328 {
1329 u32 parent_tsar_ix = node->parent ?
1330 node->parent->ix : node->esw->qos.root_tsar_ix;
1331 int err;
1332
1333 err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix,
1334 node->max_rate, node->bw_share,
1335 &node->ix);
1336 if (err) {
1337 NL_SET_ERR_MSG_MOD(extack,
1338 "Failed to create scheduling element for vports node when disabling vports TC QoS");
1339 return err;
1340 }
1341
1342 node->type = SCHED_NODE_TYPE_VPORTS_TSAR;
1343
1344 /* Disable TC QoS for vports in the arbiter node. */
1345 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack);
1346
1347 return 0;
1348 }
1349
esw_qos_switch_vports_node_to_tc_arbiter(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)1350 static int esw_qos_switch_vports_node_to_tc_arbiter(
1351 struct mlx5_esw_sched_node *node,
1352 struct mlx5_esw_sched_node *tc_arbiter_node,
1353 struct netlink_ext_ack *extack)
1354 {
1355 struct mlx5_esw_sched_node *vport_node, *tmp;
1356 struct mlx5_vport *vport;
1357 int err;
1358
1359 /* Enable TC QoS for each vport in the node. */
1360 list_for_each_entry_safe(vport_node, tmp, &node->children, entry) {
1361 vport = vport_node->vport;
1362 err = esw_qos_vport_update_parent(vport, tc_arbiter_node,
1363 extack);
1364 if (err)
1365 goto err_out;
1366 }
1367
1368 /* Destroy the current vports node TSAR. */
1369 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
1370 SCHEDULING_HIERARCHY_E_SWITCH,
1371 node->ix);
1372 if (err)
1373 goto err_out;
1374
1375 return 0;
1376 err_out:
1377 /* Restore vports back into the node if an error occurs. */
1378 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL);
1379
1380 return err;
1381 }
1382
1383 static struct mlx5_esw_sched_node *
esw_qos_move_node(struct mlx5_esw_sched_node * curr_node)1384 esw_qos_move_node(struct mlx5_esw_sched_node *curr_node)
1385 {
1386 struct mlx5_esw_sched_node *new_node;
1387
1388 new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix,
1389 curr_node->type, NULL);
1390 if (!new_node)
1391 return ERR_PTR(-ENOMEM);
1392
1393 esw_qos_nodes_set_parent(&curr_node->children, new_node);
1394 return new_node;
1395 }
1396
esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1397 static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node,
1398 struct netlink_ext_ack *extack)
1399 {
1400 struct mlx5_esw_sched_node *curr_node;
1401 int err;
1402
1403 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1404 return 0;
1405
1406 /* Allocate a new rate node to hold the current state, which will allow
1407 * for restoring the vports back to this node after disabling TC
1408 * arbitration.
1409 */
1410 curr_node = esw_qos_move_node(node);
1411 if (IS_ERR(curr_node)) {
1412 NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node");
1413 return PTR_ERR(curr_node);
1414 }
1415
1416 /* Disable TC QoS for all vports, and assign them back to the node. */
1417 err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack);
1418 if (err)
1419 goto err_out;
1420
1421 /* Clean up the TC arbiter node after disabling TC QoS for vports. */
1422 esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack);
1423 goto out;
1424 err_out:
1425 esw_qos_nodes_set_parent(&curr_node->children, node);
1426 out:
1427 __esw_qos_free_node(curr_node);
1428 return err;
1429 }
1430
esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1431 static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node,
1432 struct netlink_ext_ack *extack)
1433 {
1434 struct mlx5_esw_sched_node *curr_node, *child;
1435 int err, new_level, max_level;
1436
1437 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1438 return 0;
1439
1440 /* Increase the hierarchy level by one to account for the additional
1441 * vports TC scheduling node, and verify that the new level does not
1442 * exceed the maximum allowed depth.
1443 */
1444 new_level = node->level + 1;
1445 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1446 if (new_level > max_level) {
1447 NL_SET_ERR_MSG_MOD(extack,
1448 "TC arbitration on nodes is not supported beyond max scheduling depth");
1449 return -EOPNOTSUPP;
1450 }
1451
1452 /* Ensure the node does not contain non-leaf children before assigning
1453 * TC bandwidth.
1454 */
1455 if (!list_empty(&node->children)) {
1456 list_for_each_entry(child, &node->children, entry) {
1457 if (!child->vport) {
1458 NL_SET_ERR_MSG_MOD(extack,
1459 "Cannot configure TC bandwidth on a node with non-leaf children");
1460 return -EOPNOTSUPP;
1461 }
1462 }
1463 }
1464
1465 /* Allocate a new node that will store the information of the current
1466 * node. This will be used later to restore the node if necessary.
1467 */
1468 curr_node = esw_qos_move_node(node);
1469 if (IS_ERR(curr_node)) {
1470 NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS");
1471 return PTR_ERR(curr_node);
1472 }
1473
1474 /* Initialize the TC arbiter node for QoS management.
1475 * This step prepares the node for handling Traffic Class arbitration.
1476 */
1477 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
1478 if (err)
1479 goto err_setup;
1480
1481 /* Enable TC QoS for each vport within the current node. */
1482 err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack);
1483 if (err)
1484 goto err_switch_vports;
1485 goto out;
1486
1487 err_switch_vports:
1488 esw_qos_tc_arbiter_scheduling_teardown(node, NULL);
1489 node->ix = curr_node->ix;
1490 node->type = curr_node->type;
1491 err_setup:
1492 esw_qos_nodes_set_parent(&curr_node->children, node);
1493 out:
1494 __esw_qos_free_node(curr_node);
1495 return err;
1496 }
1497
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)1498 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
1499 {
1500 struct ethtool_link_ksettings lksettings;
1501 struct net_device *slave, *master;
1502 u32 speed = SPEED_UNKNOWN;
1503
1504 /* Lock ensures a stable reference to master and slave netdevice
1505 * while port speed of master is queried.
1506 */
1507 ASSERT_RTNL();
1508
1509 slave = mlx5_uplink_netdev_get(mdev);
1510 if (!slave)
1511 goto out;
1512
1513 master = netdev_master_upper_dev_get(slave);
1514 if (master && !__ethtool_get_link_ksettings(master, &lksettings))
1515 speed = lksettings.base.speed;
1516
1517 out:
1518 return speed;
1519 }
1520
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)1521 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
1522 bool hold_rtnl_lock, struct netlink_ext_ack *extack)
1523 {
1524 int err;
1525
1526 if (!mlx5_lag_is_active(mdev))
1527 goto skip_lag;
1528
1529 if (hold_rtnl_lock)
1530 rtnl_lock();
1531
1532 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
1533
1534 if (hold_rtnl_lock)
1535 rtnl_unlock();
1536
1537 if (*link_speed_max != (u32)SPEED_UNKNOWN)
1538 return 0;
1539
1540 skip_lag:
1541 err = mlx5_port_max_linkspeed(mdev, link_speed_max);
1542 if (err)
1543 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
1544
1545 return err;
1546 }
1547
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)1548 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
1549 const char *name, u32 link_speed_max,
1550 u64 value, struct netlink_ext_ack *extack)
1551 {
1552 if (value > link_speed_max) {
1553 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
1554 name, value, link_speed_max);
1555 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
1556 return -EINVAL;
1557 }
1558
1559 return 0;
1560 }
1561
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)1562 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
1563 {
1564 struct mlx5_vport *vport;
1565 u32 link_speed_max;
1566 int err;
1567
1568 vport = mlx5_eswitch_get_vport(esw, vport_num);
1569 if (IS_ERR(vport))
1570 return PTR_ERR(vport);
1571
1572 if (rate_mbps) {
1573 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
1574 if (err)
1575 return err;
1576
1577 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
1578 link_speed_max, rate_mbps, NULL);
1579 if (err)
1580 return err;
1581 }
1582
1583 esw_qos_lock(esw);
1584 err = mlx5_esw_qos_set_vport_max_rate(vport, rate_mbps, NULL);
1585 esw_qos_unlock(esw);
1586
1587 return err;
1588 }
1589
1590 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
1591
1592 /* Converts bytes per second value passed in a pointer into megabits per
1593 * second, rewriting last. If converted rate exceed link speed or is not a
1594 * fraction of Mbps - returns error.
1595 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)1596 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
1597 u64 *rate, struct netlink_ext_ack *extack)
1598 {
1599 u32 link_speed_max, remainder;
1600 u64 value;
1601 int err;
1602
1603 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
1604 if (remainder) {
1605 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
1606 name, *rate);
1607 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
1608 return -EINVAL;
1609 }
1610
1611 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
1612 if (err)
1613 return err;
1614
1615 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
1616 if (err)
1617 return err;
1618
1619 *rate = value;
1620 return 0;
1621 }
1622
esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch * esw,u32 * tc_bw)1623 static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
1624 u32 *tc_bw)
1625 {
1626 int i, num_tcs = esw_qos_num_tcs(esw->dev);
1627
1628 for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
1629 if (tc_bw[i])
1630 return false;
1631 }
1632
1633 return true;
1634 }
1635
esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport * vport,u32 * tc_bw)1636 static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
1637 u32 *tc_bw)
1638 {
1639 struct mlx5_esw_sched_node *node = vport->qos.sched_node;
1640 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1641
1642 esw = (node && node->parent) ? node->parent->esw : esw;
1643
1644 return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
1645 }
1646
esw_qos_tc_bw_disabled(u32 * tc_bw)1647 static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
1648 {
1649 int i;
1650
1651 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
1652 if (tc_bw[i])
1653 return false;
1654 }
1655
1656 return true;
1657 }
1658
esw_vport_qos_prune_empty(struct mlx5_vport * vport)1659 static void esw_vport_qos_prune_empty(struct mlx5_vport *vport)
1660 {
1661 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1662
1663 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1664 if (!vport_node)
1665 return;
1666
1667 if (vport_node->parent || vport_node->max_rate ||
1668 vport_node->min_rate || !esw_qos_tc_bw_disabled(vport_node->tc_bw))
1669 return;
1670
1671 mlx5_esw_qos_vport_disable_locked(vport);
1672 }
1673
mlx5_esw_qos_init(struct mlx5_eswitch * esw)1674 int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
1675 {
1676 if (esw->qos.domain)
1677 return 0; /* Nothing to change. */
1678
1679 return esw_qos_domain_init(esw);
1680 }
1681
mlx5_esw_qos_cleanup(struct mlx5_eswitch * esw)1682 void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
1683 {
1684 if (esw->qos.domain)
1685 esw_qos_domain_release(esw);
1686 }
1687
1688 /* Eswitch devlink rate API */
1689
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1690 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
1691 u64 tx_share, struct netlink_ext_ack *extack)
1692 {
1693 struct mlx5_vport *vport = priv;
1694 struct mlx5_eswitch *esw;
1695 int err;
1696
1697 esw = vport->dev->priv.eswitch;
1698 if (!mlx5_esw_allowed(esw))
1699 return -EPERM;
1700
1701 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
1702 if (err)
1703 return err;
1704
1705 esw_qos_lock(esw);
1706 err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack);
1707 if (err)
1708 goto out;
1709 esw_vport_qos_prune_empty(vport);
1710 out:
1711 esw_qos_unlock(esw);
1712 return err;
1713 }
1714
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1715 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
1716 u64 tx_max, struct netlink_ext_ack *extack)
1717 {
1718 struct mlx5_vport *vport = priv;
1719 struct mlx5_eswitch *esw;
1720 int err;
1721
1722 esw = vport->dev->priv.eswitch;
1723 if (!mlx5_esw_allowed(esw))
1724 return -EPERM;
1725
1726 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
1727 if (err)
1728 return err;
1729
1730 esw_qos_lock(esw);
1731 err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack);
1732 if (err)
1733 goto out;
1734 esw_vport_qos_prune_empty(vport);
1735 out:
1736 esw_qos_unlock(esw);
1737 return err;
1738 }
1739
mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate * rate_leaf,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1740 int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf,
1741 void *priv,
1742 u32 *tc_bw,
1743 struct netlink_ext_ack *extack)
1744 {
1745 struct mlx5_esw_sched_node *vport_node;
1746 struct mlx5_vport *vport = priv;
1747 struct mlx5_eswitch *esw;
1748 bool disable;
1749 int err = 0;
1750
1751 esw = vport->dev->priv.eswitch;
1752 if (!mlx5_esw_allowed(esw))
1753 return -EPERM;
1754
1755 disable = esw_qos_tc_bw_disabled(tc_bw);
1756 esw_qos_lock(esw);
1757
1758 if (!esw_qos_vport_validate_unsupported_tc_bw(vport, tc_bw)) {
1759 NL_SET_ERR_MSG_MOD(extack,
1760 "E-Switch traffic classes number is not supported");
1761 err = -EOPNOTSUPP;
1762 goto unlock;
1763 }
1764
1765 vport_node = vport->qos.sched_node;
1766 if (disable && !vport_node)
1767 goto unlock;
1768
1769 if (disable) {
1770 if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1771 err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT,
1772 vport_node->parent, extack);
1773 esw_vport_qos_prune_empty(vport);
1774 goto unlock;
1775 }
1776
1777 if (!vport_node) {
1778 err = mlx5_esw_qos_vport_enable(vport,
1779 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1780 NULL, 0, 0, extack);
1781 vport_node = vport->qos.sched_node;
1782 } else {
1783 err = esw_qos_vport_update(vport,
1784 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1785 vport_node->parent, extack);
1786 }
1787 if (!err)
1788 esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack);
1789 unlock:
1790 esw_qos_unlock(esw);
1791 return err;
1792 }
1793
mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate * rate_node,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1794 int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node,
1795 void *priv,
1796 u32 *tc_bw,
1797 struct netlink_ext_ack *extack)
1798 {
1799 struct mlx5_esw_sched_node *node = priv;
1800 struct mlx5_eswitch *esw = node->esw;
1801 bool disable;
1802 int err;
1803
1804 if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) {
1805 NL_SET_ERR_MSG_MOD(extack,
1806 "E-Switch traffic classes number is not supported");
1807 return -EOPNOTSUPP;
1808 }
1809
1810 disable = esw_qos_tc_bw_disabled(tc_bw);
1811 esw_qos_lock(esw);
1812 if (disable) {
1813 err = esw_qos_node_disable_tc_arbitration(node, extack);
1814 goto unlock;
1815 }
1816
1817 err = esw_qos_node_enable_tc_arbitration(node, extack);
1818 if (!err)
1819 esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack);
1820 unlock:
1821 esw_qos_unlock(esw);
1822 return err;
1823 }
1824
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1825 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
1826 u64 tx_share, struct netlink_ext_ack *extack)
1827 {
1828 struct mlx5_esw_sched_node *node = priv;
1829 struct mlx5_eswitch *esw = node->esw;
1830 int err;
1831
1832 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack);
1833 if (err)
1834 return err;
1835
1836 esw_qos_lock(esw);
1837 err = esw_qos_set_node_min_rate(node, tx_share, extack);
1838 esw_qos_unlock(esw);
1839 return err;
1840 }
1841
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1842 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
1843 u64 tx_max, struct netlink_ext_ack *extack)
1844 {
1845 struct mlx5_esw_sched_node *node = priv;
1846 struct mlx5_eswitch *esw = node->esw;
1847 int err;
1848
1849 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack);
1850 if (err)
1851 return err;
1852
1853 esw_qos_lock(esw);
1854 err = esw_qos_sched_elem_config(node, tx_max, node->bw_share, extack);
1855 esw_qos_unlock(esw);
1856 return err;
1857 }
1858
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)1859 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
1860 struct netlink_ext_ack *extack)
1861 {
1862 struct mlx5_esw_sched_node *node;
1863 struct mlx5_eswitch *esw;
1864 int err = 0;
1865
1866 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
1867 if (IS_ERR(esw))
1868 return PTR_ERR(esw);
1869
1870 esw_qos_lock(esw);
1871 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
1872 NL_SET_ERR_MSG_MOD(extack,
1873 "Rate node creation supported only in switchdev mode");
1874 err = -EOPNOTSUPP;
1875 goto unlock;
1876 }
1877
1878 node = esw_qos_create_vports_sched_node(esw, extack);
1879 if (IS_ERR(node)) {
1880 err = PTR_ERR(node);
1881 goto unlock;
1882 }
1883
1884 *priv = node;
1885 unlock:
1886 esw_qos_unlock(esw);
1887 return err;
1888 }
1889
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)1890 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
1891 struct netlink_ext_ack *extack)
1892 {
1893 struct mlx5_esw_sched_node *node = priv;
1894 struct mlx5_eswitch *esw = node->esw;
1895
1896 esw_qos_lock(esw);
1897 __esw_qos_destroy_node(node, extack);
1898 esw_qos_put(esw);
1899 esw_qos_unlock(esw);
1900 return 0;
1901 }
1902
mlx5_esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1903 int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1904 struct netlink_ext_ack *extack)
1905 {
1906 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1907 int err = 0;
1908
1909 if (parent && parent->esw != esw) {
1910 NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
1911 return -EOPNOTSUPP;
1912 }
1913
1914 esw_qos_lock(esw);
1915 if (!vport->qos.sched_node && parent) {
1916 enum sched_node_type type;
1917
1918 type = parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ?
1919 SCHED_NODE_TYPE_RATE_LIMITER : SCHED_NODE_TYPE_VPORT;
1920 err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0,
1921 extack);
1922 } else if (vport->qos.sched_node) {
1923 err = esw_qos_vport_update_parent(vport, parent, extack);
1924 }
1925 esw_qos_unlock(esw);
1926 return err;
1927 }
1928
mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)1929 int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate,
1930 struct devlink_rate *parent,
1931 void *priv, void *parent_priv,
1932 struct netlink_ext_ack *extack)
1933 {
1934 struct mlx5_esw_sched_node *node = parent ? parent_priv : NULL;
1935 struct mlx5_vport *vport = priv;
1936 int err;
1937
1938 err = mlx5_esw_qos_vport_update_parent(vport, node, extack);
1939 if (!err) {
1940 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1941
1942 esw_qos_lock(esw);
1943 esw_vport_qos_prune_empty(vport);
1944 esw_qos_unlock(esw);
1945 }
1946
1947 return err;
1948 }
1949
esw_qos_is_node_empty(struct mlx5_esw_sched_node * node)1950 static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node)
1951 {
1952 if (list_empty(&node->children))
1953 return true;
1954
1955 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1956 return false;
1957
1958 node = list_first_entry(&node->children, struct mlx5_esw_sched_node,
1959 entry);
1960
1961 return esw_qos_is_node_empty(node);
1962 }
1963
1964 static int
mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1965 mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node,
1966 struct mlx5_esw_sched_node *parent,
1967 struct netlink_ext_ack *extack)
1968 {
1969 u8 new_level, max_level;
1970
1971 if (parent && parent->esw != node->esw) {
1972 NL_SET_ERR_MSG_MOD(extack,
1973 "Cannot assign node to another E-Switch");
1974 return -EOPNOTSUPP;
1975 }
1976
1977 if (!esw_qos_is_node_empty(node)) {
1978 NL_SET_ERR_MSG_MOD(extack,
1979 "Cannot reassign a node that contains rate objects");
1980 return -EOPNOTSUPP;
1981 }
1982
1983 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1984 NL_SET_ERR_MSG_MOD(extack,
1985 "Cannot attach a node to a parent with TC bandwidth configured");
1986 return -EOPNOTSUPP;
1987 }
1988
1989 new_level = parent ? parent->level + 1 : 2;
1990 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1991 /* Increase by one to account for the vports TC scheduling
1992 * element.
1993 */
1994 new_level += 1;
1995 }
1996
1997 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1998 if (new_level > max_level) {
1999 NL_SET_ERR_MSG_MOD(extack,
2000 "Node hierarchy depth exceeds the maximum supported level");
2001 return -EOPNOTSUPP;
2002 }
2003
2004 return 0;
2005 }
2006
2007 static int
esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2008 esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node,
2009 struct mlx5_esw_sched_node *parent,
2010 struct netlink_ext_ack *extack)
2011 {
2012 struct mlx5_esw_sched_node *curr_parent = node->parent;
2013 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
2014 struct mlx5_eswitch *esw = node->esw;
2015 int err;
2016
2017 esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw);
2018 esw_qos_tc_arbiter_scheduling_teardown(node, extack);
2019 esw_qos_node_set_parent(node, parent);
2020 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
2021 if (err) {
2022 esw_qos_node_set_parent(node, curr_parent);
2023 if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) {
2024 esw_warn(esw->dev, "Node restore QoS failed\n");
2025 return err;
2026 }
2027 }
2028 esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack);
2029
2030 return err;
2031 }
2032
esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2033 static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node,
2034 struct mlx5_esw_sched_node *parent,
2035 struct netlink_ext_ack *extack)
2036 {
2037 struct mlx5_esw_sched_node *curr_parent = node->parent;
2038 struct mlx5_eswitch *esw = node->esw;
2039 u32 parent_ix;
2040 int err;
2041
2042 parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix;
2043 mlx5_destroy_scheduling_element_cmd(esw->dev,
2044 SCHEDULING_HIERARCHY_E_SWITCH,
2045 node->ix);
2046 err = esw_qos_create_node_sched_elem(esw->dev, parent_ix,
2047 node->max_rate, 0, &node->ix);
2048 if (err) {
2049 NL_SET_ERR_MSG_MOD(extack,
2050 "Failed to create a node under the new hierarchy.");
2051 if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix,
2052 node->max_rate,
2053 node->bw_share,
2054 &node->ix))
2055 esw_warn(esw->dev, "Node restore QoS failed\n");
2056
2057 return err;
2058 }
2059 esw_qos_node_set_parent(node, parent);
2060 node->bw_share = 0;
2061
2062 return 0;
2063 }
2064
mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2065 static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node,
2066 struct mlx5_esw_sched_node *parent,
2067 struct netlink_ext_ack *extack)
2068 {
2069 struct mlx5_esw_sched_node *curr_parent;
2070 struct mlx5_eswitch *esw = node->esw;
2071 int err;
2072
2073 err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack);
2074 if (err)
2075 return err;
2076
2077 esw_qos_lock(esw);
2078 curr_parent = node->parent;
2079 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
2080 err = esw_qos_tc_arbiter_node_update_parent(node, parent,
2081 extack);
2082 } else {
2083 err = esw_qos_vports_node_update_parent(node, parent, extack);
2084 }
2085
2086 if (err)
2087 goto out;
2088
2089 esw_qos_normalize_min_rate(esw, curr_parent, extack);
2090 esw_qos_normalize_min_rate(esw, parent, extack);
2091
2092 out:
2093 esw_qos_unlock(esw);
2094
2095 return err;
2096 }
2097
mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)2098 int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate,
2099 struct devlink_rate *parent,
2100 void *priv, void *parent_priv,
2101 struct netlink_ext_ack *extack)
2102 {
2103 struct mlx5_esw_sched_node *node = priv, *parent_node;
2104
2105 if (!parent)
2106 return mlx5_esw_qos_node_update_parent(node, NULL, extack);
2107
2108 parent_node = parent_priv;
2109 return mlx5_esw_qos_node_update_parent(node, parent_node, extack);
2110 }
2111