1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13
14 /* Holds rate nodes associated with an E-Switch. */
15 struct mlx5_qos_domain {
16 /* Serializes access to all qos changes in the qos domain. */
17 struct mutex lock;
18 /* List of all mlx5_esw_sched_nodes. */
19 struct list_head nodes;
20 };
21
esw_qos_lock(struct mlx5_eswitch * esw)22 static void esw_qos_lock(struct mlx5_eswitch *esw)
23 {
24 mutex_lock(&esw->qos.domain->lock);
25 }
26
esw_qos_unlock(struct mlx5_eswitch * esw)27 static void esw_qos_unlock(struct mlx5_eswitch *esw)
28 {
29 mutex_unlock(&esw->qos.domain->lock);
30 }
31
esw_assert_qos_lock_held(struct mlx5_eswitch * esw)32 static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw)
33 {
34 lockdep_assert_held(&esw->qos.domain->lock);
35 }
36
esw_qos_domain_alloc(void)37 static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
38 {
39 struct mlx5_qos_domain *qos_domain;
40
41 qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
42 if (!qos_domain)
43 return NULL;
44
45 mutex_init(&qos_domain->lock);
46 INIT_LIST_HEAD(&qos_domain->nodes);
47
48 return qos_domain;
49 }
50
esw_qos_domain_init(struct mlx5_eswitch * esw)51 static int esw_qos_domain_init(struct mlx5_eswitch *esw)
52 {
53 esw->qos.domain = esw_qos_domain_alloc();
54
55 return esw->qos.domain ? 0 : -ENOMEM;
56 }
57
esw_qos_domain_release(struct mlx5_eswitch * esw)58 static void esw_qos_domain_release(struct mlx5_eswitch *esw)
59 {
60 kfree(esw->qos.domain);
61 esw->qos.domain = NULL;
62 }
63
64 enum sched_node_type {
65 SCHED_NODE_TYPE_VPORTS_TSAR,
66 SCHED_NODE_TYPE_VPORT,
67 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
68 SCHED_NODE_TYPE_RATE_LIMITER,
69 SCHED_NODE_TYPE_VPORT_TC,
70 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
71 };
72
73 static const char * const sched_node_type_str[] = {
74 [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
75 [SCHED_NODE_TYPE_VPORT] = "vport",
76 [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR",
77 [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter",
78 [SCHED_NODE_TYPE_VPORT_TC] = "vport TC",
79 [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR",
80 };
81
82 struct mlx5_esw_sched_node {
83 u32 ix;
84 /* Bandwidth parameters. */
85 u32 max_rate;
86 u32 min_rate;
87 /* A computed value indicating relative min_rate between node's children. */
88 u32 bw_share;
89 /* The parent node in the rate hierarchy. */
90 struct mlx5_esw_sched_node *parent;
91 /* Entry in the parent node's children list. */
92 struct list_head entry;
93 /* The type of this node in the rate hierarchy. */
94 enum sched_node_type type;
95 /* The eswitch this node belongs to. */
96 struct mlx5_eswitch *esw;
97 /* The children nodes of this node, empty list for leaf nodes. */
98 struct list_head children;
99 /* Valid only if this node is associated with a vport. */
100 struct mlx5_vport *vport;
101 /* Level in the hierarchy. The root node level is 1. */
102 u8 level;
103 /* Valid only when this node represents a traffic class. */
104 u8 tc;
105 /* Valid only for a TC arbiter node or vport TC arbiter. */
106 u32 tc_bw[DEVLINK_RATE_TCS_MAX];
107 };
108
esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node * node)109 static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
110 {
111 if (!node->parent) {
112 /* Root children are assigned a depth level of 2. */
113 node->level = 2;
114 list_add_tail(&node->entry, &node->esw->qos.domain->nodes);
115 } else {
116 node->level = node->parent->level + 1;
117 list_add_tail(&node->entry, &node->parent->children);
118 }
119 }
120
esw_qos_num_tcs(struct mlx5_core_dev * dev)121 static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
122 {
123 int num_tcs = mlx5_max_tc(dev) + 1;
124
125 return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX;
126 }
127
128 static void
esw_qos_node_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent)129 esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
130 {
131 list_del_init(&node->entry);
132 node->parent = parent;
133 if (parent)
134 node->esw = parent->esw;
135 esw_qos_node_attach_to_parent(node);
136 }
137
esw_qos_nodes_set_parent(struct list_head * nodes,struct mlx5_esw_sched_node * parent)138 static void esw_qos_nodes_set_parent(struct list_head *nodes,
139 struct mlx5_esw_sched_node *parent)
140 {
141 struct mlx5_esw_sched_node *node, *tmp;
142
143 list_for_each_entry_safe(node, tmp, nodes, entry) {
144 esw_qos_node_set_parent(node, parent);
145 if (!list_empty(&node->children) &&
146 parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
147 struct mlx5_esw_sched_node *child;
148
149 list_for_each_entry(child, &node->children, entry) {
150 struct mlx5_vport *vport = child->vport;
151
152 if (vport)
153 vport->qos.sched_node->parent = parent;
154 }
155 }
156 }
157 }
158
mlx5_esw_qos_vport_qos_free(struct mlx5_vport * vport)159 void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
160 {
161 if (vport->qos.sched_nodes) {
162 int num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
163 int i;
164
165 for (i = 0; i < num_tcs; i++)
166 kfree(vport->qos.sched_nodes[i]);
167 kfree(vport->qos.sched_nodes);
168 }
169
170 kfree(vport->qos.sched_node);
171 memset(&vport->qos, 0, sizeof(vport->qos));
172 }
173
mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport * vport)174 u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport)
175 {
176 if (!vport->qos.sched_node)
177 return 0;
178
179 return vport->qos.sched_node->ix;
180 }
181
182 struct mlx5_esw_sched_node *
mlx5_esw_qos_vport_get_parent(const struct mlx5_vport * vport)183 mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
184 {
185 if (!vport->qos.sched_node)
186 return NULL;
187
188 return vport->qos.sched_node->parent;
189 }
190
esw_qos_sched_elem_warn(struct mlx5_esw_sched_node * node,int err,const char * op)191 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
192 {
193 switch (node->type) {
194 case SCHED_NODE_TYPE_VPORTS_TC_TSAR:
195 esw_warn(node->esw->dev,
196 "E-Switch %s %s scheduling element failed (tc=%d,err=%d)\n",
197 op, sched_node_type_str[node->type], node->tc, err);
198 break;
199 case SCHED_NODE_TYPE_VPORT_TC:
200 esw_warn(node->esw->dev,
201 "E-Switch %s %s scheduling element failed (vport=%d,tc=%d,err=%d)\n",
202 op,
203 sched_node_type_str[node->type],
204 node->vport->vport, node->tc, err);
205 break;
206 case SCHED_NODE_TYPE_VPORT:
207 esw_warn(node->esw->dev,
208 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
209 op, sched_node_type_str[node->type], node->vport->vport, err);
210 break;
211 case SCHED_NODE_TYPE_RATE_LIMITER:
212 case SCHED_NODE_TYPE_TC_ARBITER_TSAR:
213 case SCHED_NODE_TYPE_VPORTS_TSAR:
214 esw_warn(node->esw->dev,
215 "E-Switch %s %s scheduling element failed (err=%d)\n",
216 op, sched_node_type_str[node->type], err);
217 break;
218 default:
219 esw_warn(node->esw->dev,
220 "E-Switch %s scheduling element failed (err=%d)\n",
221 op, err);
222 break;
223 }
224 }
225
esw_qos_node_create_sched_element(struct mlx5_esw_sched_node * node,void * ctx,struct netlink_ext_ack * extack)226 static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
227 struct netlink_ext_ack *extack)
228 {
229 int err;
230
231 err = mlx5_create_scheduling_element_cmd(node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx,
232 &node->ix);
233 if (err) {
234 esw_qos_sched_elem_warn(node, err, "create");
235 NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed");
236 }
237
238 return err;
239 }
240
esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)241 static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node,
242 struct netlink_ext_ack *extack)
243 {
244 int err;
245
246 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
247 SCHEDULING_HIERARCHY_E_SWITCH,
248 node->ix);
249 if (err) {
250 esw_qos_sched_elem_warn(node, err, "destroy");
251 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed.");
252 }
253
254 return err;
255 }
256
esw_qos_sched_elem_config(struct mlx5_esw_sched_node * node,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)257 static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share,
258 struct netlink_ext_ack *extack)
259 {
260 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
261 struct mlx5_core_dev *dev = node->esw->dev;
262 u32 bitmask = 0;
263 int err;
264
265 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
266 return -EOPNOTSUPP;
267
268 if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) ||
269 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE))
270 return -EOPNOTSUPP;
271
272 if (node->max_rate == max_rate && node->bw_share == bw_share)
273 return 0;
274
275 if (node->max_rate != max_rate) {
276 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
277 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
278 }
279 if (node->bw_share != bw_share) {
280 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
281 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
282 }
283
284 err = mlx5_modify_scheduling_element_cmd(dev,
285 SCHEDULING_HIERARCHY_E_SWITCH,
286 sched_ctx,
287 node->ix,
288 bitmask);
289 if (err) {
290 esw_qos_sched_elem_warn(node, err, "modify");
291 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed");
292
293 return err;
294 }
295
296 node->max_rate = max_rate;
297 node->bw_share = bw_share;
298 if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR)
299 trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate);
300 else if (node->type == SCHED_NODE_TYPE_VPORT)
301 trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate);
302
303 return 0;
304 }
305
esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)306 static int esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node *node,
307 struct netlink_ext_ack *extack)
308 {
309 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
310
311 if (!mlx5_qos_element_type_supported(
312 node->esw->dev,
313 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT,
314 SCHEDULING_HIERARCHY_E_SWITCH))
315 return -EOPNOTSUPP;
316
317 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, node->max_rate);
318 MLX5_SET(scheduling_context, sched_ctx, element_type,
319 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT);
320
321 return esw_qos_node_create_sched_element(node, sched_ctx, extack);
322 }
323
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent)324 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
325 struct mlx5_esw_sched_node *parent)
326 {
327 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
328 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
329 struct mlx5_esw_sched_node *node;
330 u32 max_guarantee = 0;
331
332 /* Find max min_rate across all nodes.
333 * This will correspond to fw_max_bw_share in the final bw_share calculation.
334 */
335 list_for_each_entry(node, nodes, entry) {
336 if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
337 node->min_rate > max_guarantee)
338 max_guarantee = node->min_rate;
339 }
340
341 if (max_guarantee)
342 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
343
344 /* If nodes max min_rate divider is 0 but their parent has bw_share
345 * configured, then set bw_share for nodes to minimal value.
346 */
347
348 if (parent && parent->bw_share)
349 return 1;
350
351 /* If the node nodes has min_rate configured, a divider of 0 sets all
352 * nodes' bw_share to 0, effectively disabling min guarantees.
353 */
354 return 0;
355 }
356
esw_qos_calc_bw_share(u32 value,u32 divider,u32 fw_max)357 static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max)
358 {
359 if (!divider)
360 return 0;
361 return min_t(u32, fw_max,
362 max_t(u32,
363 DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE));
364 }
365
esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node * node,u32 divider,struct netlink_ext_ack * extack)366 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node,
367 u32 divider,
368 struct netlink_ext_ack *extack)
369 {
370 u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share);
371 u32 bw_share;
372
373 bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share);
374
375 esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack);
376 }
377
esw_qos_normalize_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)378 static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
379 struct mlx5_esw_sched_node *parent,
380 struct netlink_ext_ack *extack)
381 {
382 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
383 u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
384 struct mlx5_esw_sched_node *node;
385
386 list_for_each_entry(node, nodes, entry) {
387 if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
388 continue;
389
390 /* Vports TC TSARs don't have a minimum rate configured,
391 * so there's no need to update the bw_share on them.
392 */
393 if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) {
394 esw_qos_update_sched_node_bw_share(node, divider,
395 extack);
396 }
397
398 if (list_empty(&node->children))
399 continue;
400
401 esw_qos_normalize_min_rate(node->esw, node, extack);
402 }
403 }
404
esw_qos_calculate_tc_bw_divider(u32 * tc_bw)405 static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw)
406 {
407 u32 total = 0;
408 int i;
409
410 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++)
411 total += tc_bw[i];
412
413 /* If total is zero, tc-bw config is disabled and we shouldn't reach
414 * here.
415 */
416 return WARN_ON(!total) ? 1 : total;
417 }
418
esw_qos_set_node_min_rate(struct mlx5_esw_sched_node * node,u32 min_rate,struct netlink_ext_ack * extack)419 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
420 u32 min_rate, struct netlink_ext_ack *extack)
421 {
422 struct mlx5_eswitch *esw = node->esw;
423
424 if (min_rate == node->min_rate)
425 return 0;
426
427 node->min_rate = min_rate;
428 esw_qos_normalize_min_rate(esw, node->parent, extack);
429
430 return 0;
431 }
432
433 static int
esw_qos_create_node_sched_elem(struct mlx5_core_dev * dev,u32 parent_element_id,u32 max_rate,u32 bw_share,u32 * tsar_ix)434 esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id,
435 u32 max_rate, u32 bw_share, u32 *tsar_ix)
436 {
437 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
438 void *attr;
439
440 if (!mlx5_qos_element_type_supported(dev,
441 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
442 SCHEDULING_HIERARCHY_E_SWITCH) ||
443 !mlx5_qos_tsar_type_supported(dev,
444 TSAR_ELEMENT_TSAR_TYPE_DWRR,
445 SCHEDULING_HIERARCHY_E_SWITCH))
446 return -EOPNOTSUPP;
447
448 MLX5_SET(scheduling_context, tsar_ctx, element_type,
449 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
450 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
451 parent_element_id);
452 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate);
453 MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share);
454 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
455 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
456
457 return mlx5_create_scheduling_element_cmd(dev,
458 SCHEDULING_HIERARCHY_E_SWITCH,
459 tsar_ctx,
460 tsar_ix);
461 }
462
463 static int
esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node * vport_node,struct netlink_ext_ack * extack)464 esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
465 struct netlink_ext_ack *extack)
466 {
467 struct mlx5_esw_sched_node *parent = vport_node->parent;
468 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
469 struct mlx5_core_dev *dev = vport_node->esw->dev;
470 void *attr;
471
472 if (!mlx5_qos_element_type_supported(
473 dev,
474 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT,
475 SCHEDULING_HIERARCHY_E_SWITCH))
476 return -EOPNOTSUPP;
477
478 MLX5_SET(scheduling_context, sched_ctx, element_type,
479 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
480 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
481 MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
482 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
483 parent ? parent->ix : vport_node->esw->qos.root_tsar_ix);
484 MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
485 vport_node->max_rate);
486
487 return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
488 }
489
490 static int
esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node * vport_tc_node,u32 rate_limit_elem_ix,struct netlink_ext_ack * extack)491 esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node,
492 u32 rate_limit_elem_ix,
493 struct netlink_ext_ack *extack)
494 {
495 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
496 struct mlx5_core_dev *dev = vport_tc_node->esw->dev;
497 void *attr;
498
499 if (!mlx5_qos_element_type_supported(
500 dev,
501 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC,
502 SCHEDULING_HIERARCHY_E_SWITCH))
503 return -EOPNOTSUPP;
504
505 MLX5_SET(scheduling_context, sched_ctx, element_type,
506 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC);
507 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
508 MLX5_SET(vport_tc_element, attr, vport_number,
509 vport_tc_node->vport->vport);
510 MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc);
511 MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id,
512 rate_limit_elem_ix);
513 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
514 vport_tc_node->parent->ix);
515 MLX5_SET(scheduling_context, sched_ctx, bw_share,
516 vport_tc_node->bw_share);
517
518 return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx,
519 extack);
520 }
521
522 static struct mlx5_esw_sched_node *
__esw_qos_alloc_node(struct mlx5_eswitch * esw,u32 tsar_ix,enum sched_node_type type,struct mlx5_esw_sched_node * parent)523 __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type,
524 struct mlx5_esw_sched_node *parent)
525 {
526 struct mlx5_esw_sched_node *node;
527
528 node = kzalloc(sizeof(*node), GFP_KERNEL);
529 if (!node)
530 return NULL;
531
532 node->esw = esw;
533 node->ix = tsar_ix;
534 node->type = type;
535 node->parent = parent;
536 INIT_LIST_HEAD(&node->children);
537 esw_qos_node_attach_to_parent(node);
538 if (!parent) {
539 /* The caller is responsible for inserting the node into the
540 * parent list if necessary. This function can also be used with
541 * a NULL parent, which doesn't necessarily indicate that it
542 * refers to the root scheduling element.
543 */
544 list_del_init(&node->entry);
545 }
546
547 return node;
548 }
549
__esw_qos_free_node(struct mlx5_esw_sched_node * node)550 static void __esw_qos_free_node(struct mlx5_esw_sched_node *node)
551 {
552 list_del(&node->entry);
553 kfree(node);
554 }
555
esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)556 static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
557 {
558 esw_qos_node_destroy_sched_element(node, extack);
559 __esw_qos_free_node(node);
560 }
561
esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node * parent,u8 tc,struct netlink_ext_ack * extack)562 static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent,
563 u8 tc, struct netlink_ext_ack *extack)
564 {
565 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
566 struct mlx5_core_dev *dev = parent->esw->dev;
567 struct mlx5_esw_sched_node *vports_tc_node;
568 void *attr;
569 int err;
570
571 if (!mlx5_qos_element_type_supported(
572 dev,
573 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
574 SCHEDULING_HIERARCHY_E_SWITCH) ||
575 !mlx5_qos_tsar_type_supported(dev,
576 TSAR_ELEMENT_TSAR_TYPE_DWRR,
577 SCHEDULING_HIERARCHY_E_SWITCH))
578 return -EOPNOTSUPP;
579
580 vports_tc_node = __esw_qos_alloc_node(parent->esw, 0,
581 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
582 parent);
583 if (!vports_tc_node) {
584 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
585 esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc);
586 return -ENOMEM;
587 }
588
589 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
590 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
591 MLX5_SET(tsar_element, attr, traffic_class, tc);
592 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix);
593 MLX5_SET(scheduling_context, tsar_ctx, element_type,
594 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
595
596 err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx,
597 extack);
598 if (err)
599 goto err_create_sched_element;
600
601 vports_tc_node->tc = tc;
602
603 return 0;
604
605 err_create_sched_element:
606 __esw_qos_free_node(vports_tc_node);
607 return err;
608 }
609
610 static void
esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw)611 esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
612 u32 *tc_bw)
613 {
614 memcpy(tc_bw, tc_arbiter_node->tc_bw, sizeof(tc_arbiter_node->tc_bw));
615 }
616
617 static void
esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw,struct netlink_ext_ack * extack)618 esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
619 u32 *tc_bw, struct netlink_ext_ack *extack)
620 {
621 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
622 struct mlx5_esw_sched_node *vports_tc_node;
623 u32 divider, fw_max_bw_share;
624
625 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
626 divider = esw_qos_calculate_tc_bw_divider(tc_bw);
627 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
628 u8 tc = vports_tc_node->tc;
629 u32 bw_share;
630
631 tc_arbiter_node->tc_bw[tc] = tc_bw[tc];
632 bw_share = tc_bw[tc] * fw_max_bw_share;
633 bw_share = esw_qos_calc_bw_share(bw_share, divider,
634 fw_max_bw_share);
635 esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack);
636 }
637 }
638
639 static void
esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)640 esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
641 struct netlink_ext_ack *extack)
642 {
643 struct mlx5_esw_sched_node *vports_tc_node, *tmp;
644
645 list_for_each_entry_safe(vports_tc_node, tmp,
646 &tc_arbiter_node->children, entry)
647 esw_qos_destroy_node(vports_tc_node, extack);
648 }
649
650 static int
esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)651 esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
652 struct netlink_ext_ack *extack)
653 {
654 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
655 int err, i, num_tcs = esw_qos_num_tcs(esw->dev);
656
657 for (i = 0; i < num_tcs; i++) {
658 err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack);
659 if (err)
660 goto err_tc_node_create;
661 }
662
663 return 0;
664
665 err_tc_node_create:
666 esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL);
667 return err;
668 }
669
esw_qos_create_tc_arbiter_sched_elem(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)670 static int esw_qos_create_tc_arbiter_sched_elem(
671 struct mlx5_esw_sched_node *tc_arbiter_node,
672 struct netlink_ext_ack *extack)
673 {
674 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
675 u32 tsar_parent_ix;
676 void *attr;
677
678 if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev,
679 TSAR_ELEMENT_TSAR_TYPE_TC_ARB,
680 SCHEDULING_HIERARCHY_E_SWITCH)) {
681 NL_SET_ERR_MSG_MOD(extack,
682 "E-Switch TC Arbiter scheduling element is not supported");
683 return -EOPNOTSUPP;
684 }
685
686 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
687 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB);
688 tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix :
689 tc_arbiter_node->esw->qos.root_tsar_ix;
690 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
691 tsar_parent_ix);
692 MLX5_SET(scheduling_context, tsar_ctx, element_type,
693 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
694 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw,
695 tc_arbiter_node->max_rate);
696 MLX5_SET(scheduling_context, tsar_ctx, bw_share,
697 tc_arbiter_node->bw_share);
698
699 return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx,
700 extack);
701 }
702
703 static struct mlx5_esw_sched_node *
__esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)704 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent,
705 struct netlink_ext_ack *extack)
706 {
707 struct mlx5_esw_sched_node *node;
708 u32 tsar_ix;
709 int err;
710
711 err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0,
712 0, &tsar_ix);
713 if (err) {
714 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed");
715 return ERR_PTR(err);
716 }
717
718 node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent);
719 if (!node) {
720 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
721 err = -ENOMEM;
722 goto err_alloc_node;
723 }
724
725 list_add_tail(&node->entry, &esw->qos.domain->nodes);
726 esw_qos_normalize_min_rate(esw, NULL, extack);
727 trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
728
729 return node;
730
731 err_alloc_node:
732 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
733 SCHEDULING_HIERARCHY_E_SWITCH,
734 tsar_ix))
735 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed");
736 return ERR_PTR(err);
737 }
738
739 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
740 static void esw_qos_put(struct mlx5_eswitch *esw);
741
742 static struct mlx5_esw_sched_node *
esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)743 esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
744 {
745 struct mlx5_esw_sched_node *node;
746 int err;
747
748 esw_assert_qos_lock_held(esw);
749 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
750 return ERR_PTR(-EOPNOTSUPP);
751
752 err = esw_qos_get(esw, extack);
753 if (err)
754 return ERR_PTR(err);
755
756 node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
757 if (IS_ERR(node))
758 esw_qos_put(esw);
759
760 return node;
761 }
762
__esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)763 static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
764 {
765 struct mlx5_eswitch *esw = node->esw;
766
767 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
768 esw_qos_destroy_vports_tc_nodes(node, extack);
769
770 trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
771 esw_qos_destroy_node(node, extack);
772 esw_qos_normalize_min_rate(esw, NULL, extack);
773 }
774
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)775 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
776 {
777 struct mlx5_core_dev *dev = esw->dev;
778 int err;
779
780 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
781 return -EOPNOTSUPP;
782
783 err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0,
784 &esw->qos.root_tsar_ix);
785 if (err) {
786 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
787 return err;
788 }
789
790 refcount_set(&esw->qos.refcnt, 1);
791
792 return 0;
793 }
794
esw_qos_destroy(struct mlx5_eswitch * esw)795 static void esw_qos_destroy(struct mlx5_eswitch *esw)
796 {
797 int err;
798
799 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
800 SCHEDULING_HIERARCHY_E_SWITCH,
801 esw->qos.root_tsar_ix);
802 if (err)
803 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
804 }
805
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)806 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
807 {
808 int err = 0;
809
810 esw_assert_qos_lock_held(esw);
811 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
812 /* esw_qos_create() set refcount to 1 only on success.
813 * No need to decrement on failure.
814 */
815 err = esw_qos_create(esw, extack);
816 }
817
818 return err;
819 }
820
esw_qos_put(struct mlx5_eswitch * esw)821 static void esw_qos_put(struct mlx5_eswitch *esw)
822 {
823 esw_assert_qos_lock_held(esw);
824 if (refcount_dec_and_test(&esw->qos.refcnt))
825 esw_qos_destroy(esw);
826 }
827
828 static void
esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)829 esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node,
830 struct netlink_ext_ack *extack)
831 {
832 /* Clean up all Vports TC nodes within the TC arbiter node. */
833 esw_qos_destroy_vports_tc_nodes(node, extack);
834 /* Destroy the scheduling element for the TC arbiter node itself. */
835 esw_qos_node_destroy_sched_element(node, extack);
836 }
837
esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)838 static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node,
839 struct netlink_ext_ack *extack)
840 {
841 u32 curr_ix = node->ix;
842 int err;
843
844 err = esw_qos_create_tc_arbiter_sched_elem(node, extack);
845 if (err)
846 return err;
847 /* Initialize the vports TC nodes within created TC arbiter TSAR. */
848 err = esw_qos_create_vports_tc_nodes(node, extack);
849 if (err)
850 goto err_vports_tc_nodes;
851
852 node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR;
853
854 return 0;
855
856 err_vports_tc_nodes:
857 /* If initialization fails, clean up the scheduling element
858 * for the TC arbiter node.
859 */
860 esw_qos_node_destroy_sched_element(node, NULL);
861 node->ix = curr_ix;
862 return err;
863 }
864
865 static int
esw_qos_create_vport_tc_sched_node(struct mlx5_vport * vport,u32 rate_limit_elem_ix,struct mlx5_esw_sched_node * vports_tc_node,struct netlink_ext_ack * extack)866 esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport,
867 u32 rate_limit_elem_ix,
868 struct mlx5_esw_sched_node *vports_tc_node,
869 struct netlink_ext_ack *extack)
870 {
871 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
872 struct mlx5_esw_sched_node *vport_tc_node;
873 u8 tc = vports_tc_node->tc;
874 int err;
875
876 vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0,
877 SCHED_NODE_TYPE_VPORT_TC,
878 vports_tc_node);
879 if (!vport_tc_node)
880 return -ENOMEM;
881
882 vport_tc_node->min_rate = vport_node->min_rate;
883 vport_tc_node->tc = tc;
884 vport_tc_node->vport = vport;
885 err = esw_qos_vport_tc_create_sched_element(vport_tc_node,
886 rate_limit_elem_ix,
887 extack);
888 if (err)
889 goto err_out;
890
891 vport->qos.sched_nodes[tc] = vport_tc_node;
892
893 return 0;
894 err_out:
895 __esw_qos_free_node(vport_tc_node);
896 return err;
897 }
898
899 static void
esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport * vport,struct netlink_ext_ack * extack)900 esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport *vport,
901 struct netlink_ext_ack *extack)
902 {
903 int i, num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
904
905 for (i = 0; i < num_tcs; i++) {
906 if (vport->qos.sched_nodes[i]) {
907 __esw_qos_destroy_node(vport->qos.sched_nodes[i],
908 extack);
909 }
910 }
911
912 kfree(vport->qos.sched_nodes);
913 vport->qos.sched_nodes = NULL;
914 }
915
916 static int
esw_qos_create_vport_tc_sched_elements(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)917 esw_qos_create_vport_tc_sched_elements(struct mlx5_vport *vport,
918 enum sched_node_type type,
919 struct netlink_ext_ack *extack)
920 {
921 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
922 struct mlx5_esw_sched_node *tc_arbiter_node, *vports_tc_node;
923 int err, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
924 u32 rate_limit_elem_ix;
925
926 vport->qos.sched_nodes = kcalloc(num_tcs,
927 sizeof(struct mlx5_esw_sched_node *),
928 GFP_KERNEL);
929 if (!vport->qos.sched_nodes) {
930 NL_SET_ERR_MSG_MOD(extack,
931 "Allocating the vport TC scheduling elements failed.");
932 return -ENOMEM;
933 }
934
935 rate_limit_elem_ix = type == SCHED_NODE_TYPE_RATE_LIMITER ?
936 vport_node->ix : 0;
937 tc_arbiter_node = type == SCHED_NODE_TYPE_RATE_LIMITER ?
938 vport_node->parent : vport_node;
939 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
940 err = esw_qos_create_vport_tc_sched_node(vport,
941 rate_limit_elem_ix,
942 vports_tc_node,
943 extack);
944 if (err)
945 goto err_create_vport_tc;
946 }
947
948 return 0;
949
950 err_create_vport_tc:
951 esw_qos_destroy_vport_tc_sched_elements(vport, NULL);
952
953 return err;
954 }
955
956 static int
esw_qos_vport_tc_enable(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)957 esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type,
958 struct netlink_ext_ack *extack)
959 {
960 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
961 struct mlx5_esw_sched_node *parent = vport_node->parent;
962 int err;
963
964 if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
965 int new_level, max_level;
966
967 /* Increase the parent's level by 2 to account for both the
968 * TC arbiter and the vports TC scheduling element.
969 */
970 new_level = (parent ? parent->level : 2) + 2;
971 max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev,
972 log_esw_max_sched_depth);
973 if (new_level > max_level) {
974 NL_SET_ERR_MSG_MOD(extack,
975 "TC arbitration on leafs is not supported beyond max scheduling depth");
976 return -EOPNOTSUPP;
977 }
978 }
979
980 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
981
982 if (type == SCHED_NODE_TYPE_RATE_LIMITER)
983 err = esw_qos_create_rate_limit_element(vport_node, extack);
984 else
985 err = esw_qos_tc_arbiter_scheduling_setup(vport_node, extack);
986 if (err)
987 return err;
988
989 /* Rate limiters impact multiple nodes not directly connected to them
990 * and are not direct members of the QoS hierarchy.
991 * Unlink it from the parent to reflect that.
992 */
993 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
994 list_del_init(&vport_node->entry);
995 vport_node->level = 0;
996 }
997
998 err = esw_qos_create_vport_tc_sched_elements(vport, type, extack);
999 if (err)
1000 goto err_sched_nodes;
1001
1002 return 0;
1003
1004 err_sched_nodes:
1005 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
1006 esw_qos_node_destroy_sched_element(vport_node, NULL);
1007 esw_qos_node_attach_to_parent(vport_node);
1008 } else {
1009 esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL);
1010 }
1011 return err;
1012 }
1013
esw_qos_vport_tc_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1014 static void esw_qos_vport_tc_disable(struct mlx5_vport *vport,
1015 struct netlink_ext_ack *extack)
1016 {
1017 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1018 enum sched_node_type curr_type = vport_node->type;
1019
1020 esw_qos_destroy_vport_tc_sched_elements(vport, extack);
1021
1022 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER)
1023 esw_qos_node_destroy_sched_element(vport_node, extack);
1024 else
1025 esw_qos_tc_arbiter_scheduling_teardown(vport_node, extack);
1026 }
1027
esw_qos_set_vport_tcs_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1028 static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport,
1029 u32 min_rate,
1030 struct netlink_ext_ack *extack)
1031 {
1032 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1033 int err, i, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
1034
1035 for (i = 0; i < num_tcs; i++) {
1036 err = esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1037 min_rate, extack);
1038 if (err)
1039 goto err_out;
1040 }
1041 vport_node->min_rate = min_rate;
1042
1043 return 0;
1044 err_out:
1045 for (--i; i >= 0; i--) {
1046 esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1047 vport_node->min_rate, extack);
1048 }
1049 return err;
1050 }
1051
esw_qos_vport_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1052 static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
1053 {
1054 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1055 enum sched_node_type curr_type = vport_node->type;
1056
1057 if (curr_type == SCHED_NODE_TYPE_VPORT)
1058 esw_qos_node_destroy_sched_element(vport_node, extack);
1059 else
1060 esw_qos_vport_tc_disable(vport, extack);
1061
1062 vport_node->bw_share = 0;
1063 memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw));
1064 list_del_init(&vport_node->entry);
1065 esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack);
1066
1067 trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
1068 }
1069
esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1070 static int esw_qos_vport_enable(struct mlx5_vport *vport,
1071 enum sched_node_type type,
1072 struct mlx5_esw_sched_node *parent,
1073 struct netlink_ext_ack *extack)
1074 {
1075 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1076 int err;
1077
1078 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1079
1080 esw_qos_node_set_parent(vport_node, parent);
1081 if (type == SCHED_NODE_TYPE_VPORT)
1082 err = esw_qos_vport_create_sched_element(vport_node, extack);
1083 else
1084 err = esw_qos_vport_tc_enable(vport, type, extack);
1085 if (err)
1086 return err;
1087
1088 vport_node->type = type;
1089 esw_qos_normalize_min_rate(vport_node->esw, parent, extack);
1090 trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate,
1091 vport_node->bw_share);
1092
1093 return 0;
1094 }
1095
mlx5_esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,u32 max_rate,u32 min_rate,struct netlink_ext_ack * extack)1096 static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type,
1097 struct mlx5_esw_sched_node *parent, u32 max_rate,
1098 u32 min_rate, struct netlink_ext_ack *extack)
1099 {
1100 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1101 struct mlx5_esw_sched_node *sched_node;
1102 struct mlx5_eswitch *parent_esw;
1103 int err;
1104
1105 esw_assert_qos_lock_held(esw);
1106 err = esw_qos_get(esw, extack);
1107 if (err)
1108 return err;
1109
1110 parent_esw = parent ? parent->esw : esw;
1111 sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent);
1112 if (!sched_node) {
1113 esw_qos_put(esw);
1114 return -ENOMEM;
1115 }
1116 if (!parent)
1117 list_add_tail(&sched_node->entry, &esw->qos.domain->nodes);
1118
1119 sched_node->max_rate = max_rate;
1120 sched_node->min_rate = min_rate;
1121 sched_node->vport = vport;
1122 vport->qos.sched_node = sched_node;
1123 err = esw_qos_vport_enable(vport, type, parent, extack);
1124 if (err) {
1125 __esw_qos_free_node(sched_node);
1126 esw_qos_put(esw);
1127 vport->qos.sched_node = NULL;
1128 }
1129
1130 return err;
1131 }
1132
mlx5_esw_qos_vport_disable_locked(struct mlx5_vport * vport)1133 static void mlx5_esw_qos_vport_disable_locked(struct mlx5_vport *vport)
1134 {
1135 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1136
1137 esw_assert_qos_lock_held(esw);
1138 if (!vport->qos.sched_node)
1139 return;
1140
1141 esw_qos_vport_disable(vport, NULL);
1142 mlx5_esw_qos_vport_qos_free(vport);
1143 esw_qos_put(esw);
1144 }
1145
mlx5_esw_qos_vport_disable(struct mlx5_vport * vport)1146 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
1147 {
1148 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1149 struct mlx5_esw_sched_node *parent;
1150
1151 lockdep_assert_held(&esw->state_lock);
1152 esw_qos_lock(esw);
1153 if (!vport->qos.sched_node)
1154 goto unlock;
1155
1156 parent = vport->qos.sched_node->parent;
1157 WARN(parent, "Disabling QoS on port before detaching it from node");
1158
1159 mlx5_esw_qos_vport_disable_locked(vport);
1160 unlock:
1161 esw_qos_unlock(esw);
1162 }
1163
mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport * vport,u32 max_rate,struct netlink_ext_ack * extack)1164 static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate,
1165 struct netlink_ext_ack *extack)
1166 {
1167 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1168
1169 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1170
1171 if (!vport_node)
1172 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, max_rate, 0,
1173 extack);
1174 else
1175 return esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share,
1176 extack);
1177 }
1178
mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1179 static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate,
1180 struct netlink_ext_ack *extack)
1181 {
1182 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1183
1184 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1185
1186 if (!vport_node)
1187 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate,
1188 extack);
1189 else if (vport_node->type == SCHED_NODE_TYPE_RATE_LIMITER)
1190 return esw_qos_set_vport_tcs_min_rate(vport, min_rate, extack);
1191 else
1192 return esw_qos_set_node_min_rate(vport_node, min_rate, extack);
1193 }
1194
mlx5_esw_qos_set_vport_rate(struct mlx5_vport * vport,u32 max_rate,u32 min_rate)1195 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate)
1196 {
1197 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1198 int err;
1199
1200 esw_qos_lock(esw);
1201 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL);
1202 if (!err)
1203 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL);
1204 esw_qos_unlock(esw);
1205 return err;
1206 }
1207
mlx5_esw_qos_get_vport_rate(struct mlx5_vport * vport,u32 * max_rate,u32 * min_rate)1208 bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate)
1209 {
1210 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1211 bool enabled;
1212
1213 esw_qos_lock(esw);
1214 enabled = !!vport->qos.sched_node;
1215 if (enabled) {
1216 *max_rate = vport->qos.sched_node->max_rate;
1217 *min_rate = vport->qos.sched_node->min_rate;
1218 }
1219 esw_qos_unlock(esw);
1220 return enabled;
1221 }
1222
esw_qos_vport_tc_check_type(enum sched_node_type curr_type,enum sched_node_type new_type,struct netlink_ext_ack * extack)1223 static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type,
1224 enum sched_node_type new_type,
1225 struct netlink_ext_ack *extack)
1226 {
1227 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR &&
1228 new_type == SCHED_NODE_TYPE_RATE_LIMITER) {
1229 NL_SET_ERR_MSG_MOD(extack,
1230 "Cannot switch from vport-level TC arbitration to node-level TC arbitration");
1231 return -EOPNOTSUPP;
1232 }
1233
1234 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER &&
1235 new_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1236 NL_SET_ERR_MSG_MOD(extack,
1237 "Cannot switch from node-level TC arbitration to vport-level TC arbitration");
1238 return -EOPNOTSUPP;
1239 }
1240
1241 return 0;
1242 }
1243
esw_qos_vport_update(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1244 static int esw_qos_vport_update(struct mlx5_vport *vport,
1245 enum sched_node_type type,
1246 struct mlx5_esw_sched_node *parent,
1247 struct netlink_ext_ack *extack)
1248 {
1249 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1250 struct mlx5_esw_sched_node *curr_parent = vport_node->parent;
1251 enum sched_node_type curr_type = vport_node->type;
1252 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
1253 int err;
1254
1255 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1256 if (curr_type == type && curr_parent == parent)
1257 return 0;
1258
1259 err = esw_qos_vport_tc_check_type(curr_type, type, extack);
1260 if (err)
1261 return err;
1262
1263 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type)
1264 esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw);
1265
1266 esw_qos_vport_disable(vport, extack);
1267
1268 err = esw_qos_vport_enable(vport, type, parent, extack);
1269 if (err) {
1270 esw_qos_vport_enable(vport, curr_type, curr_parent, NULL);
1271 extack = NULL;
1272 }
1273
1274 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
1275 esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw,
1276 extack);
1277 }
1278
1279 return err;
1280 }
1281
esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1282 static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1283 struct netlink_ext_ack *extack)
1284 {
1285 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1286 struct mlx5_esw_sched_node *curr_parent;
1287 enum sched_node_type type;
1288
1289 esw_assert_qos_lock_held(esw);
1290 curr_parent = vport->qos.sched_node->parent;
1291 if (curr_parent == parent)
1292 return 0;
1293
1294 /* Set vport QoS type based on parent node type if different from
1295 * default QoS; otherwise, use the vport's current QoS type.
1296 */
1297 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1298 type = SCHED_NODE_TYPE_RATE_LIMITER;
1299 else if (curr_parent &&
1300 curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1301 type = SCHED_NODE_TYPE_VPORT;
1302 else
1303 type = vport->qos.sched_node->type;
1304
1305 return esw_qos_vport_update(vport, type, parent, extack);
1306 }
1307
1308 static void
esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1309 esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node,
1310 struct mlx5_esw_sched_node *node,
1311 struct netlink_ext_ack *extack)
1312 {
1313 struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp;
1314
1315 vports_tc_node = list_first_entry(&tc_arbiter_node->children,
1316 struct mlx5_esw_sched_node,
1317 entry);
1318
1319 list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children,
1320 entry)
1321 esw_qos_vport_update_parent(vport_tc_node->vport, node, extack);
1322 }
1323
esw_qos_switch_tc_arbiter_node_to_vports(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1324 static int esw_qos_switch_tc_arbiter_node_to_vports(
1325 struct mlx5_esw_sched_node *tc_arbiter_node,
1326 struct mlx5_esw_sched_node *node,
1327 struct netlink_ext_ack *extack)
1328 {
1329 u32 parent_tsar_ix = node->parent ?
1330 node->parent->ix : node->esw->qos.root_tsar_ix;
1331 int err;
1332
1333 err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix,
1334 node->max_rate, node->bw_share,
1335 &node->ix);
1336 if (err) {
1337 NL_SET_ERR_MSG_MOD(extack,
1338 "Failed to create scheduling element for vports node when disabling vports TC QoS");
1339 return err;
1340 }
1341
1342 node->type = SCHED_NODE_TYPE_VPORTS_TSAR;
1343
1344 /* Disable TC QoS for vports in the arbiter node. */
1345 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack);
1346
1347 return 0;
1348 }
1349
esw_qos_switch_vports_node_to_tc_arbiter(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)1350 static int esw_qos_switch_vports_node_to_tc_arbiter(
1351 struct mlx5_esw_sched_node *node,
1352 struct mlx5_esw_sched_node *tc_arbiter_node,
1353 struct netlink_ext_ack *extack)
1354 {
1355 struct mlx5_esw_sched_node *vport_node, *tmp;
1356 struct mlx5_vport *vport;
1357 int err;
1358
1359 /* Enable TC QoS for each vport in the node. */
1360 list_for_each_entry_safe(vport_node, tmp, &node->children, entry) {
1361 vport = vport_node->vport;
1362 err = esw_qos_vport_update_parent(vport, tc_arbiter_node,
1363 extack);
1364 if (err)
1365 goto err_out;
1366 }
1367
1368 /* Destroy the current vports node TSAR. */
1369 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
1370 SCHEDULING_HIERARCHY_E_SWITCH,
1371 node->ix);
1372 if (err)
1373 goto err_out;
1374
1375 return 0;
1376 err_out:
1377 /* Restore vports back into the node if an error occurs. */
1378 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL);
1379
1380 return err;
1381 }
1382
1383 static struct mlx5_esw_sched_node *
esw_qos_move_node(struct mlx5_esw_sched_node * curr_node)1384 esw_qos_move_node(struct mlx5_esw_sched_node *curr_node)
1385 {
1386 struct mlx5_esw_sched_node *new_node;
1387
1388 new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix,
1389 curr_node->type, NULL);
1390 if (!new_node)
1391 return ERR_PTR(-ENOMEM);
1392
1393 esw_qos_nodes_set_parent(&curr_node->children, new_node);
1394 return new_node;
1395 }
1396
esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1397 static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node,
1398 struct netlink_ext_ack *extack)
1399 {
1400 struct mlx5_esw_sched_node *curr_node;
1401 int err;
1402
1403 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1404 return 0;
1405
1406 /* Allocate a new rate node to hold the current state, which will allow
1407 * for restoring the vports back to this node after disabling TC
1408 * arbitration.
1409 */
1410 curr_node = esw_qos_move_node(node);
1411 if (IS_ERR(curr_node)) {
1412 NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node");
1413 return PTR_ERR(curr_node);
1414 }
1415
1416 /* Disable TC QoS for all vports, and assign them back to the node. */
1417 err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack);
1418 if (err)
1419 goto err_out;
1420
1421 /* Clean up the TC arbiter node after disabling TC QoS for vports. */
1422 esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack);
1423 goto out;
1424 err_out:
1425 esw_qos_nodes_set_parent(&curr_node->children, node);
1426 out:
1427 __esw_qos_free_node(curr_node);
1428 return err;
1429 }
1430
esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1431 static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node,
1432 struct netlink_ext_ack *extack)
1433 {
1434 struct mlx5_esw_sched_node *curr_node, *child;
1435 int err, new_level, max_level;
1436
1437 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1438 return 0;
1439
1440 /* Increase the hierarchy level by one to account for the additional
1441 * vports TC scheduling node, and verify that the new level does not
1442 * exceed the maximum allowed depth.
1443 */
1444 new_level = node->level + 1;
1445 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1446 if (new_level > max_level) {
1447 NL_SET_ERR_MSG_MOD(extack,
1448 "TC arbitration on nodes is not supported beyond max scheduling depth");
1449 return -EOPNOTSUPP;
1450 }
1451
1452 /* Ensure the node does not contain non-leaf children before assigning
1453 * TC bandwidth.
1454 */
1455 if (!list_empty(&node->children)) {
1456 list_for_each_entry(child, &node->children, entry) {
1457 if (!child->vport) {
1458 NL_SET_ERR_MSG_MOD(extack,
1459 "Cannot configure TC bandwidth on a node with non-leaf children");
1460 return -EOPNOTSUPP;
1461 }
1462 }
1463 }
1464
1465 /* Allocate a new node that will store the information of the current
1466 * node. This will be used later to restore the node if necessary.
1467 */
1468 curr_node = esw_qos_move_node(node);
1469 if (IS_ERR(curr_node)) {
1470 NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS");
1471 return PTR_ERR(curr_node);
1472 }
1473
1474 /* Initialize the TC arbiter node for QoS management.
1475 * This step prepares the node for handling Traffic Class arbitration.
1476 */
1477 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
1478 if (err)
1479 goto err_setup;
1480
1481 /* Enable TC QoS for each vport within the current node. */
1482 err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack);
1483 if (err)
1484 goto err_switch_vports;
1485 goto out;
1486
1487 err_switch_vports:
1488 esw_qos_tc_arbiter_scheduling_teardown(node, NULL);
1489 node->ix = curr_node->ix;
1490 node->type = curr_node->type;
1491 err_setup:
1492 esw_qos_nodes_set_parent(&curr_node->children, node);
1493 out:
1494 __esw_qos_free_node(curr_node);
1495 return err;
1496 }
1497
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)1498 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
1499 {
1500 struct ethtool_link_ksettings lksettings;
1501 struct net_device *slave, *master;
1502 u32 speed = SPEED_UNKNOWN;
1503
1504 /* Lock ensures a stable reference to master and slave netdevice
1505 * while port speed of master is queried.
1506 */
1507 ASSERT_RTNL();
1508
1509 slave = mlx5_uplink_netdev_get(mdev);
1510 if (!slave)
1511 goto out;
1512
1513 master = netdev_master_upper_dev_get(slave);
1514 if (master && !__ethtool_get_link_ksettings(master, &lksettings))
1515 speed = lksettings.base.speed;
1516
1517 out:
1518 mlx5_uplink_netdev_put(mdev, slave);
1519 return speed;
1520 }
1521
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)1522 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
1523 bool hold_rtnl_lock, struct netlink_ext_ack *extack)
1524 {
1525 int err;
1526
1527 if (!mlx5_lag_is_active(mdev))
1528 goto skip_lag;
1529
1530 if (hold_rtnl_lock)
1531 rtnl_lock();
1532
1533 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
1534
1535 if (hold_rtnl_lock)
1536 rtnl_unlock();
1537
1538 if (*link_speed_max != (u32)SPEED_UNKNOWN)
1539 return 0;
1540
1541 skip_lag:
1542 err = mlx5_port_max_linkspeed(mdev, link_speed_max);
1543 if (err)
1544 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
1545
1546 return err;
1547 }
1548
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)1549 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
1550 const char *name, u32 link_speed_max,
1551 u64 value, struct netlink_ext_ack *extack)
1552 {
1553 if (value > link_speed_max) {
1554 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
1555 name, value, link_speed_max);
1556 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
1557 return -EINVAL;
1558 }
1559
1560 return 0;
1561 }
1562
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)1563 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
1564 {
1565 struct mlx5_vport *vport;
1566 u32 link_speed_max;
1567 int err;
1568
1569 vport = mlx5_eswitch_get_vport(esw, vport_num);
1570 if (IS_ERR(vport))
1571 return PTR_ERR(vport);
1572
1573 if (rate_mbps) {
1574 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
1575 if (err)
1576 return err;
1577
1578 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
1579 link_speed_max, rate_mbps, NULL);
1580 if (err)
1581 return err;
1582 }
1583
1584 esw_qos_lock(esw);
1585 err = mlx5_esw_qos_set_vport_max_rate(vport, rate_mbps, NULL);
1586 esw_qos_unlock(esw);
1587
1588 return err;
1589 }
1590
1591 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
1592
1593 /* Converts bytes per second value passed in a pointer into megabits per
1594 * second, rewriting last. If converted rate exceed link speed or is not a
1595 * fraction of Mbps - returns error.
1596 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)1597 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
1598 u64 *rate, struct netlink_ext_ack *extack)
1599 {
1600 u32 link_speed_max, remainder;
1601 u64 value;
1602 int err;
1603
1604 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
1605 if (remainder) {
1606 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
1607 name, *rate);
1608 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
1609 return -EINVAL;
1610 }
1611
1612 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
1613 if (err)
1614 return err;
1615
1616 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
1617 if (err)
1618 return err;
1619
1620 *rate = value;
1621 return 0;
1622 }
1623
esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch * esw,u32 * tc_bw)1624 static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
1625 u32 *tc_bw)
1626 {
1627 int i, num_tcs = esw_qos_num_tcs(esw->dev);
1628
1629 for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
1630 if (tc_bw[i])
1631 return false;
1632 }
1633
1634 return true;
1635 }
1636
esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport * vport,u32 * tc_bw)1637 static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
1638 u32 *tc_bw)
1639 {
1640 struct mlx5_esw_sched_node *node = vport->qos.sched_node;
1641 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1642
1643 esw = (node && node->parent) ? node->parent->esw : esw;
1644
1645 return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
1646 }
1647
esw_qos_tc_bw_disabled(u32 * tc_bw)1648 static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
1649 {
1650 int i;
1651
1652 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
1653 if (tc_bw[i])
1654 return false;
1655 }
1656
1657 return true;
1658 }
1659
esw_vport_qos_prune_empty(struct mlx5_vport * vport)1660 static void esw_vport_qos_prune_empty(struct mlx5_vport *vport)
1661 {
1662 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1663
1664 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1665 if (!vport_node)
1666 return;
1667
1668 if (vport_node->parent || vport_node->max_rate ||
1669 vport_node->min_rate || !esw_qos_tc_bw_disabled(vport_node->tc_bw))
1670 return;
1671
1672 mlx5_esw_qos_vport_disable_locked(vport);
1673 }
1674
mlx5_esw_qos_init(struct mlx5_eswitch * esw)1675 int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
1676 {
1677 if (esw->qos.domain)
1678 return 0; /* Nothing to change. */
1679
1680 return esw_qos_domain_init(esw);
1681 }
1682
mlx5_esw_qos_cleanup(struct mlx5_eswitch * esw)1683 void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
1684 {
1685 if (esw->qos.domain)
1686 esw_qos_domain_release(esw);
1687 }
1688
1689 /* Eswitch devlink rate API */
1690
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1691 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
1692 u64 tx_share, struct netlink_ext_ack *extack)
1693 {
1694 struct mlx5_vport *vport = priv;
1695 struct mlx5_eswitch *esw;
1696 int err;
1697
1698 esw = vport->dev->priv.eswitch;
1699 if (!mlx5_esw_allowed(esw))
1700 return -EPERM;
1701
1702 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
1703 if (err)
1704 return err;
1705
1706 esw_qos_lock(esw);
1707 err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack);
1708 if (err)
1709 goto out;
1710 esw_vport_qos_prune_empty(vport);
1711 out:
1712 esw_qos_unlock(esw);
1713 return err;
1714 }
1715
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1716 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
1717 u64 tx_max, struct netlink_ext_ack *extack)
1718 {
1719 struct mlx5_vport *vport = priv;
1720 struct mlx5_eswitch *esw;
1721 int err;
1722
1723 esw = vport->dev->priv.eswitch;
1724 if (!mlx5_esw_allowed(esw))
1725 return -EPERM;
1726
1727 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
1728 if (err)
1729 return err;
1730
1731 esw_qos_lock(esw);
1732 err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack);
1733 if (err)
1734 goto out;
1735 esw_vport_qos_prune_empty(vport);
1736 out:
1737 esw_qos_unlock(esw);
1738 return err;
1739 }
1740
mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate * rate_leaf,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1741 int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf,
1742 void *priv,
1743 u32 *tc_bw,
1744 struct netlink_ext_ack *extack)
1745 {
1746 struct mlx5_esw_sched_node *vport_node;
1747 struct mlx5_vport *vport = priv;
1748 struct mlx5_eswitch *esw;
1749 bool disable;
1750 int err = 0;
1751
1752 esw = vport->dev->priv.eswitch;
1753 if (!mlx5_esw_allowed(esw))
1754 return -EPERM;
1755
1756 disable = esw_qos_tc_bw_disabled(tc_bw);
1757 esw_qos_lock(esw);
1758
1759 if (!esw_qos_vport_validate_unsupported_tc_bw(vport, tc_bw)) {
1760 NL_SET_ERR_MSG_MOD(extack,
1761 "E-Switch traffic classes number is not supported");
1762 err = -EOPNOTSUPP;
1763 goto unlock;
1764 }
1765
1766 vport_node = vport->qos.sched_node;
1767 if (disable && !vport_node)
1768 goto unlock;
1769
1770 if (disable) {
1771 if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1772 err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT,
1773 vport_node->parent, extack);
1774 esw_vport_qos_prune_empty(vport);
1775 goto unlock;
1776 }
1777
1778 if (!vport_node) {
1779 err = mlx5_esw_qos_vport_enable(vport,
1780 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1781 NULL, 0, 0, extack);
1782 vport_node = vport->qos.sched_node;
1783 } else {
1784 err = esw_qos_vport_update(vport,
1785 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1786 vport_node->parent, extack);
1787 }
1788 if (!err)
1789 esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack);
1790 unlock:
1791 esw_qos_unlock(esw);
1792 return err;
1793 }
1794
mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate * rate_node,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1795 int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node,
1796 void *priv,
1797 u32 *tc_bw,
1798 struct netlink_ext_ack *extack)
1799 {
1800 struct mlx5_esw_sched_node *node = priv;
1801 struct mlx5_eswitch *esw = node->esw;
1802 bool disable;
1803 int err;
1804
1805 if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) {
1806 NL_SET_ERR_MSG_MOD(extack,
1807 "E-Switch traffic classes number is not supported");
1808 return -EOPNOTSUPP;
1809 }
1810
1811 disable = esw_qos_tc_bw_disabled(tc_bw);
1812 esw_qos_lock(esw);
1813 if (disable) {
1814 err = esw_qos_node_disable_tc_arbitration(node, extack);
1815 goto unlock;
1816 }
1817
1818 err = esw_qos_node_enable_tc_arbitration(node, extack);
1819 if (!err)
1820 esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack);
1821 unlock:
1822 esw_qos_unlock(esw);
1823 return err;
1824 }
1825
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1826 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
1827 u64 tx_share, struct netlink_ext_ack *extack)
1828 {
1829 struct mlx5_esw_sched_node *node = priv;
1830 struct mlx5_eswitch *esw = node->esw;
1831 int err;
1832
1833 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack);
1834 if (err)
1835 return err;
1836
1837 esw_qos_lock(esw);
1838 err = esw_qos_set_node_min_rate(node, tx_share, extack);
1839 esw_qos_unlock(esw);
1840 return err;
1841 }
1842
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1843 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
1844 u64 tx_max, struct netlink_ext_ack *extack)
1845 {
1846 struct mlx5_esw_sched_node *node = priv;
1847 struct mlx5_eswitch *esw = node->esw;
1848 int err;
1849
1850 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack);
1851 if (err)
1852 return err;
1853
1854 esw_qos_lock(esw);
1855 err = esw_qos_sched_elem_config(node, tx_max, node->bw_share, extack);
1856 esw_qos_unlock(esw);
1857 return err;
1858 }
1859
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)1860 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
1861 struct netlink_ext_ack *extack)
1862 {
1863 struct mlx5_esw_sched_node *node;
1864 struct mlx5_eswitch *esw;
1865 int err = 0;
1866
1867 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
1868 if (IS_ERR(esw))
1869 return PTR_ERR(esw);
1870
1871 esw_qos_lock(esw);
1872 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
1873 NL_SET_ERR_MSG_MOD(extack,
1874 "Rate node creation supported only in switchdev mode");
1875 err = -EOPNOTSUPP;
1876 goto unlock;
1877 }
1878
1879 node = esw_qos_create_vports_sched_node(esw, extack);
1880 if (IS_ERR(node)) {
1881 err = PTR_ERR(node);
1882 goto unlock;
1883 }
1884
1885 *priv = node;
1886 unlock:
1887 esw_qos_unlock(esw);
1888 return err;
1889 }
1890
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)1891 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
1892 struct netlink_ext_ack *extack)
1893 {
1894 struct mlx5_esw_sched_node *node = priv;
1895 struct mlx5_eswitch *esw = node->esw;
1896
1897 esw_qos_lock(esw);
1898 __esw_qos_destroy_node(node, extack);
1899 esw_qos_put(esw);
1900 esw_qos_unlock(esw);
1901 return 0;
1902 }
1903
mlx5_esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1904 int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1905 struct netlink_ext_ack *extack)
1906 {
1907 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1908 int err = 0;
1909
1910 if (parent && parent->esw != esw) {
1911 NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
1912 return -EOPNOTSUPP;
1913 }
1914
1915 esw_qos_lock(esw);
1916 if (!vport->qos.sched_node && parent) {
1917 enum sched_node_type type;
1918
1919 type = parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ?
1920 SCHED_NODE_TYPE_RATE_LIMITER : SCHED_NODE_TYPE_VPORT;
1921 err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0,
1922 extack);
1923 } else if (vport->qos.sched_node) {
1924 err = esw_qos_vport_update_parent(vport, parent, extack);
1925 }
1926 esw_qos_unlock(esw);
1927 return err;
1928 }
1929
mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)1930 int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate,
1931 struct devlink_rate *parent,
1932 void *priv, void *parent_priv,
1933 struct netlink_ext_ack *extack)
1934 {
1935 struct mlx5_esw_sched_node *node = parent ? parent_priv : NULL;
1936 struct mlx5_vport *vport = priv;
1937 int err;
1938
1939 err = mlx5_esw_qos_vport_update_parent(vport, node, extack);
1940 if (!err) {
1941 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1942
1943 esw_qos_lock(esw);
1944 esw_vport_qos_prune_empty(vport);
1945 esw_qos_unlock(esw);
1946 }
1947
1948 return err;
1949 }
1950
esw_qos_is_node_empty(struct mlx5_esw_sched_node * node)1951 static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node)
1952 {
1953 if (list_empty(&node->children))
1954 return true;
1955
1956 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1957 return false;
1958
1959 node = list_first_entry(&node->children, struct mlx5_esw_sched_node,
1960 entry);
1961
1962 return esw_qos_is_node_empty(node);
1963 }
1964
1965 static int
mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1966 mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node,
1967 struct mlx5_esw_sched_node *parent,
1968 struct netlink_ext_ack *extack)
1969 {
1970 u8 new_level, max_level;
1971
1972 if (parent && parent->esw != node->esw) {
1973 NL_SET_ERR_MSG_MOD(extack,
1974 "Cannot assign node to another E-Switch");
1975 return -EOPNOTSUPP;
1976 }
1977
1978 if (!esw_qos_is_node_empty(node)) {
1979 NL_SET_ERR_MSG_MOD(extack,
1980 "Cannot reassign a node that contains rate objects");
1981 return -EOPNOTSUPP;
1982 }
1983
1984 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1985 NL_SET_ERR_MSG_MOD(extack,
1986 "Cannot attach a node to a parent with TC bandwidth configured");
1987 return -EOPNOTSUPP;
1988 }
1989
1990 new_level = parent ? parent->level + 1 : 2;
1991 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1992 /* Increase by one to account for the vports TC scheduling
1993 * element.
1994 */
1995 new_level += 1;
1996 }
1997
1998 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1999 if (new_level > max_level) {
2000 NL_SET_ERR_MSG_MOD(extack,
2001 "Node hierarchy depth exceeds the maximum supported level");
2002 return -EOPNOTSUPP;
2003 }
2004
2005 return 0;
2006 }
2007
2008 static int
esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2009 esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node,
2010 struct mlx5_esw_sched_node *parent,
2011 struct netlink_ext_ack *extack)
2012 {
2013 struct mlx5_esw_sched_node *curr_parent = node->parent;
2014 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
2015 struct mlx5_eswitch *esw = node->esw;
2016 int err;
2017
2018 esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw);
2019 esw_qos_tc_arbiter_scheduling_teardown(node, extack);
2020 esw_qos_node_set_parent(node, parent);
2021 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
2022 if (err) {
2023 esw_qos_node_set_parent(node, curr_parent);
2024 if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) {
2025 esw_warn(esw->dev, "Node restore QoS failed\n");
2026 return err;
2027 }
2028 }
2029 esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack);
2030
2031 return err;
2032 }
2033
esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2034 static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node,
2035 struct mlx5_esw_sched_node *parent,
2036 struct netlink_ext_ack *extack)
2037 {
2038 struct mlx5_esw_sched_node *curr_parent = node->parent;
2039 struct mlx5_eswitch *esw = node->esw;
2040 u32 parent_ix;
2041 int err;
2042
2043 parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix;
2044 mlx5_destroy_scheduling_element_cmd(esw->dev,
2045 SCHEDULING_HIERARCHY_E_SWITCH,
2046 node->ix);
2047 err = esw_qos_create_node_sched_elem(esw->dev, parent_ix,
2048 node->max_rate, 0, &node->ix);
2049 if (err) {
2050 NL_SET_ERR_MSG_MOD(extack,
2051 "Failed to create a node under the new hierarchy.");
2052 if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix,
2053 node->max_rate,
2054 node->bw_share,
2055 &node->ix))
2056 esw_warn(esw->dev, "Node restore QoS failed\n");
2057
2058 return err;
2059 }
2060 esw_qos_node_set_parent(node, parent);
2061 node->bw_share = 0;
2062
2063 return 0;
2064 }
2065
mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2066 static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node,
2067 struct mlx5_esw_sched_node *parent,
2068 struct netlink_ext_ack *extack)
2069 {
2070 struct mlx5_esw_sched_node *curr_parent;
2071 struct mlx5_eswitch *esw = node->esw;
2072 int err;
2073
2074 err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack);
2075 if (err)
2076 return err;
2077
2078 esw_qos_lock(esw);
2079 curr_parent = node->parent;
2080 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
2081 err = esw_qos_tc_arbiter_node_update_parent(node, parent,
2082 extack);
2083 } else {
2084 err = esw_qos_vports_node_update_parent(node, parent, extack);
2085 }
2086
2087 if (err)
2088 goto out;
2089
2090 esw_qos_normalize_min_rate(esw, curr_parent, extack);
2091 esw_qos_normalize_min_rate(esw, parent, extack);
2092
2093 out:
2094 esw_qos_unlock(esw);
2095
2096 return err;
2097 }
2098
mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)2099 int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate,
2100 struct devlink_rate *parent,
2101 void *priv, void *parent_priv,
2102 struct netlink_ext_ack *extack)
2103 {
2104 struct mlx5_esw_sched_node *node = priv, *parent_node;
2105
2106 if (!parent)
2107 return mlx5_esw_qos_node_update_parent(node, NULL, extack);
2108
2109 parent_node = parent_priv;
2110 return mlx5_esw_qos_node_update_parent(node, parent_node, extack);
2111 }
2112