1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13
14 /* Holds rate nodes associated with an E-Switch. */
15 struct mlx5_qos_domain {
16 /* Serializes access to all qos changes in the qos domain. */
17 struct mutex lock;
18 /* List of all mlx5_esw_sched_nodes. */
19 struct list_head nodes;
20 };
21
esw_qos_lock(struct mlx5_eswitch * esw)22 static void esw_qos_lock(struct mlx5_eswitch *esw)
23 {
24 mutex_lock(&esw->qos.domain->lock);
25 }
26
esw_qos_unlock(struct mlx5_eswitch * esw)27 static void esw_qos_unlock(struct mlx5_eswitch *esw)
28 {
29 mutex_unlock(&esw->qos.domain->lock);
30 }
31
esw_assert_qos_lock_held(struct mlx5_eswitch * esw)32 static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw)
33 {
34 lockdep_assert_held(&esw->qos.domain->lock);
35 }
36
esw_qos_domain_alloc(void)37 static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
38 {
39 struct mlx5_qos_domain *qos_domain;
40
41 qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
42 if (!qos_domain)
43 return NULL;
44
45 mutex_init(&qos_domain->lock);
46 INIT_LIST_HEAD(&qos_domain->nodes);
47
48 return qos_domain;
49 }
50
esw_qos_domain_init(struct mlx5_eswitch * esw)51 static int esw_qos_domain_init(struct mlx5_eswitch *esw)
52 {
53 esw->qos.domain = esw_qos_domain_alloc();
54
55 return esw->qos.domain ? 0 : -ENOMEM;
56 }
57
esw_qos_domain_release(struct mlx5_eswitch * esw)58 static void esw_qos_domain_release(struct mlx5_eswitch *esw)
59 {
60 kfree(esw->qos.domain);
61 esw->qos.domain = NULL;
62 }
63
64 enum sched_node_type {
65 SCHED_NODE_TYPE_VPORTS_TSAR,
66 SCHED_NODE_TYPE_VPORT,
67 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
68 SCHED_NODE_TYPE_RATE_LIMITER,
69 SCHED_NODE_TYPE_VPORT_TC,
70 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
71 };
72
73 static const char * const sched_node_type_str[] = {
74 [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
75 [SCHED_NODE_TYPE_VPORT] = "vport",
76 [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR",
77 [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter",
78 [SCHED_NODE_TYPE_VPORT_TC] = "vport TC",
79 [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR",
80 };
81
82 struct mlx5_esw_sched_node {
83 u32 ix;
84 /* Bandwidth parameters. */
85 u32 max_rate;
86 u32 min_rate;
87 /* A computed value indicating relative min_rate between node's children. */
88 u32 bw_share;
89 /* The parent node in the rate hierarchy. */
90 struct mlx5_esw_sched_node *parent;
91 /* Entry in the parent node's children list. */
92 struct list_head entry;
93 /* The type of this node in the rate hierarchy. */
94 enum sched_node_type type;
95 /* The eswitch this node belongs to. */
96 struct mlx5_eswitch *esw;
97 /* The children nodes of this node, empty list for leaf nodes. */
98 struct list_head children;
99 /* Valid only if this node is associated with a vport. */
100 struct mlx5_vport *vport;
101 /* Level in the hierarchy. The root node level is 1. */
102 u8 level;
103 /* Valid only when this node represents a traffic class. */
104 u8 tc;
105 /* Valid only for a TC arbiter node or vport TC arbiter. */
106 u32 tc_bw[DEVLINK_RATE_TCS_MAX];
107 };
108
esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node * node)109 static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
110 {
111 if (!node->parent) {
112 /* Root children are assigned a depth level of 2. */
113 node->level = 2;
114 list_add_tail(&node->entry, &node->esw->qos.domain->nodes);
115 } else {
116 node->level = node->parent->level + 1;
117 list_add_tail(&node->entry, &node->parent->children);
118 }
119 }
120
esw_qos_num_tcs(struct mlx5_core_dev * dev)121 static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
122 {
123 int num_tcs = mlx5_max_tc(dev) + 1;
124
125 return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX;
126 }
127
128 static void
esw_qos_node_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent)129 esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
130 {
131 list_del_init(&node->entry);
132 node->parent = parent;
133 if (parent)
134 node->esw = parent->esw;
135 esw_qos_node_attach_to_parent(node);
136 }
137
esw_qos_nodes_set_parent(struct list_head * nodes,struct mlx5_esw_sched_node * parent)138 static void esw_qos_nodes_set_parent(struct list_head *nodes,
139 struct mlx5_esw_sched_node *parent)
140 {
141 struct mlx5_esw_sched_node *node, *tmp;
142
143 list_for_each_entry_safe(node, tmp, nodes, entry) {
144 esw_qos_node_set_parent(node, parent);
145 if (!list_empty(&node->children) &&
146 parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
147 struct mlx5_esw_sched_node *child;
148
149 list_for_each_entry(child, &node->children, entry) {
150 struct mlx5_vport *vport = child->vport;
151
152 if (vport)
153 vport->qos.sched_node->parent = parent;
154 }
155 }
156 }
157 }
158
mlx5_esw_qos_vport_qos_free(struct mlx5_vport * vport)159 void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
160 {
161 if (vport->qos.sched_nodes) {
162 int num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
163 int i;
164
165 for (i = 0; i < num_tcs; i++)
166 kfree(vport->qos.sched_nodes[i]);
167 kfree(vport->qos.sched_nodes);
168 }
169
170 kfree(vport->qos.sched_node);
171 memset(&vport->qos, 0, sizeof(vport->qos));
172 }
173
mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport * vport)174 u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport)
175 {
176 if (!vport->qos.sched_node)
177 return 0;
178
179 return vport->qos.sched_node->ix;
180 }
181
182 struct mlx5_esw_sched_node *
mlx5_esw_qos_vport_get_parent(const struct mlx5_vport * vport)183 mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
184 {
185 if (!vport->qos.sched_node)
186 return NULL;
187
188 return vport->qos.sched_node->parent;
189 }
190
esw_qos_sched_elem_warn(struct mlx5_esw_sched_node * node,int err,const char * op)191 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
192 {
193 switch (node->type) {
194 case SCHED_NODE_TYPE_VPORTS_TC_TSAR:
195 esw_warn(node->esw->dev,
196 "E-Switch %s %s scheduling element failed (tc=%d,err=%d)\n",
197 op, sched_node_type_str[node->type], node->tc, err);
198 break;
199 case SCHED_NODE_TYPE_VPORT_TC:
200 esw_warn(node->esw->dev,
201 "E-Switch %s %s scheduling element failed (vport=%d,tc=%d,err=%d)\n",
202 op,
203 sched_node_type_str[node->type],
204 node->vport->vport, node->tc, err);
205 break;
206 case SCHED_NODE_TYPE_VPORT:
207 esw_warn(node->esw->dev,
208 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
209 op, sched_node_type_str[node->type], node->vport->vport, err);
210 break;
211 case SCHED_NODE_TYPE_RATE_LIMITER:
212 case SCHED_NODE_TYPE_TC_ARBITER_TSAR:
213 case SCHED_NODE_TYPE_VPORTS_TSAR:
214 esw_warn(node->esw->dev,
215 "E-Switch %s %s scheduling element failed (err=%d)\n",
216 op, sched_node_type_str[node->type], err);
217 break;
218 default:
219 esw_warn(node->esw->dev,
220 "E-Switch %s scheduling element failed (err=%d)\n",
221 op, err);
222 break;
223 }
224 }
225
esw_qos_node_create_sched_element(struct mlx5_esw_sched_node * node,void * ctx,struct netlink_ext_ack * extack)226 static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
227 struct netlink_ext_ack *extack)
228 {
229 int err;
230
231 err = mlx5_create_scheduling_element_cmd(node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx,
232 &node->ix);
233 if (err) {
234 esw_qos_sched_elem_warn(node, err, "create");
235 NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed");
236 }
237
238 return err;
239 }
240
esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)241 static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node,
242 struct netlink_ext_ack *extack)
243 {
244 int err;
245
246 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
247 SCHEDULING_HIERARCHY_E_SWITCH,
248 node->ix);
249 if (err) {
250 esw_qos_sched_elem_warn(node, err, "destroy");
251 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed.");
252 }
253
254 return err;
255 }
256
esw_qos_sched_elem_config(struct mlx5_esw_sched_node * node,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)257 static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share,
258 struct netlink_ext_ack *extack)
259 {
260 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
261 struct mlx5_core_dev *dev = node->esw->dev;
262 u32 bitmask = 0;
263 int err;
264
265 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
266 return -EOPNOTSUPP;
267
268 if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) ||
269 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE))
270 return -EOPNOTSUPP;
271
272 if (node->max_rate == max_rate && node->bw_share == bw_share)
273 return 0;
274
275 if (node->max_rate != max_rate) {
276 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
277 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
278 }
279 if (node->bw_share != bw_share) {
280 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
281 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
282 }
283
284 err = mlx5_modify_scheduling_element_cmd(dev,
285 SCHEDULING_HIERARCHY_E_SWITCH,
286 sched_ctx,
287 node->ix,
288 bitmask);
289 if (err) {
290 esw_qos_sched_elem_warn(node, err, "modify");
291 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed");
292
293 return err;
294 }
295
296 node->max_rate = max_rate;
297 node->bw_share = bw_share;
298 if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR)
299 trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate);
300 else if (node->type == SCHED_NODE_TYPE_VPORT)
301 trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate);
302
303 return 0;
304 }
305
esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)306 static int esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node *node,
307 struct netlink_ext_ack *extack)
308 {
309 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
310
311 if (!mlx5_qos_element_type_supported(
312 node->esw->dev,
313 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT,
314 SCHEDULING_HIERARCHY_E_SWITCH))
315 return -EOPNOTSUPP;
316
317 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, node->max_rate);
318 MLX5_SET(scheduling_context, sched_ctx, element_type,
319 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT);
320
321 return esw_qos_node_create_sched_element(node, sched_ctx, extack);
322 }
323
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent)324 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
325 struct mlx5_esw_sched_node *parent)
326 {
327 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
328 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
329 struct mlx5_esw_sched_node *node;
330 u32 max_guarantee = 0;
331
332 /* Find max min_rate across all nodes.
333 * This will correspond to fw_max_bw_share in the final bw_share calculation.
334 */
335 list_for_each_entry(node, nodes, entry) {
336 if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
337 node->min_rate > max_guarantee)
338 max_guarantee = node->min_rate;
339 }
340
341 if (max_guarantee)
342 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
343
344 /* If nodes max min_rate divider is 0 but their parent has bw_share
345 * configured, then set bw_share for nodes to minimal value.
346 */
347
348 if (parent && parent->bw_share)
349 return 1;
350
351 /* If the node nodes has min_rate configured, a divider of 0 sets all
352 * nodes' bw_share to 0, effectively disabling min guarantees.
353 */
354 return 0;
355 }
356
esw_qos_calc_bw_share(u32 value,u32 divider,u32 fw_max)357 static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max)
358 {
359 if (!divider)
360 return 0;
361 return min_t(u32, fw_max,
362 max_t(u32,
363 DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE));
364 }
365
esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node * node,u32 divider,struct netlink_ext_ack * extack)366 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node,
367 u32 divider,
368 struct netlink_ext_ack *extack)
369 {
370 u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share);
371 u32 bw_share;
372
373 bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share);
374
375 esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack);
376 }
377
esw_qos_normalize_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)378 static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
379 struct mlx5_esw_sched_node *parent,
380 struct netlink_ext_ack *extack)
381 {
382 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
383 u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
384 struct mlx5_esw_sched_node *node;
385
386 list_for_each_entry(node, nodes, entry) {
387 if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
388 continue;
389
390 /* Vports TC TSARs don't have a minimum rate configured,
391 * so there's no need to update the bw_share on them.
392 */
393 if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) {
394 esw_qos_update_sched_node_bw_share(node, divider,
395 extack);
396 }
397
398 if (list_empty(&node->children))
399 continue;
400
401 esw_qos_normalize_min_rate(node->esw, node, extack);
402 }
403 }
404
esw_qos_calculate_tc_bw_divider(u32 * tc_bw)405 static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw)
406 {
407 u32 total = 0;
408 int i;
409
410 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++)
411 total += tc_bw[i];
412
413 /* If total is zero, tc-bw config is disabled and we shouldn't reach
414 * here.
415 */
416 return WARN_ON(!total) ? 1 : total;
417 }
418
esw_qos_set_node_min_rate(struct mlx5_esw_sched_node * node,u32 min_rate,struct netlink_ext_ack * extack)419 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
420 u32 min_rate, struct netlink_ext_ack *extack)
421 {
422 struct mlx5_eswitch *esw = node->esw;
423
424 if (min_rate == node->min_rate)
425 return 0;
426
427 node->min_rate = min_rate;
428 esw_qos_normalize_min_rate(esw, node->parent, extack);
429
430 return 0;
431 }
432
433 static int
esw_qos_create_node_sched_elem(struct mlx5_core_dev * dev,u32 parent_element_id,u32 max_rate,u32 bw_share,u32 * tsar_ix)434 esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id,
435 u32 max_rate, u32 bw_share, u32 *tsar_ix)
436 {
437 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
438 void *attr;
439
440 if (!mlx5_qos_element_type_supported(dev,
441 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
442 SCHEDULING_HIERARCHY_E_SWITCH) ||
443 !mlx5_qos_tsar_type_supported(dev,
444 TSAR_ELEMENT_TSAR_TYPE_DWRR,
445 SCHEDULING_HIERARCHY_E_SWITCH))
446 return -EOPNOTSUPP;
447
448 MLX5_SET(scheduling_context, tsar_ctx, element_type,
449 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
450 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
451 parent_element_id);
452 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate);
453 MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share);
454 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
455 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
456
457 return mlx5_create_scheduling_element_cmd(dev,
458 SCHEDULING_HIERARCHY_E_SWITCH,
459 tsar_ctx,
460 tsar_ix);
461 }
462
463 static int
esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node * vport_node,struct netlink_ext_ack * extack)464 esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
465 struct netlink_ext_ack *extack)
466 {
467 struct mlx5_esw_sched_node *parent = vport_node->parent;
468 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
469 struct mlx5_core_dev *dev = vport_node->esw->dev;
470 void *attr;
471
472 if (!mlx5_qos_element_type_supported(
473 dev,
474 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT,
475 SCHEDULING_HIERARCHY_E_SWITCH))
476 return -EOPNOTSUPP;
477
478 MLX5_SET(scheduling_context, sched_ctx, element_type,
479 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
480 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
481 MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
482 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
483 parent ? parent->ix : vport_node->esw->qos.root_tsar_ix);
484 MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
485 vport_node->max_rate);
486
487 return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
488 }
489
490 static int
esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node * vport_tc_node,u32 rate_limit_elem_ix,struct netlink_ext_ack * extack)491 esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node,
492 u32 rate_limit_elem_ix,
493 struct netlink_ext_ack *extack)
494 {
495 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
496 struct mlx5_core_dev *dev = vport_tc_node->esw->dev;
497 void *attr;
498
499 if (!mlx5_qos_element_type_supported(
500 dev,
501 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC,
502 SCHEDULING_HIERARCHY_E_SWITCH))
503 return -EOPNOTSUPP;
504
505 MLX5_SET(scheduling_context, sched_ctx, element_type,
506 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC);
507 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
508 MLX5_SET(vport_tc_element, attr, vport_number,
509 vport_tc_node->vport->vport);
510 MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc);
511 MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id,
512 rate_limit_elem_ix);
513 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
514 vport_tc_node->parent->ix);
515 MLX5_SET(scheduling_context, sched_ctx, bw_share,
516 vport_tc_node->bw_share);
517
518 return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx,
519 extack);
520 }
521
522 static struct mlx5_esw_sched_node *
__esw_qos_alloc_node(struct mlx5_eswitch * esw,u32 tsar_ix,enum sched_node_type type,struct mlx5_esw_sched_node * parent)523 __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type,
524 struct mlx5_esw_sched_node *parent)
525 {
526 struct mlx5_esw_sched_node *node;
527
528 node = kzalloc(sizeof(*node), GFP_KERNEL);
529 if (!node)
530 return NULL;
531
532 node->esw = esw;
533 node->ix = tsar_ix;
534 node->type = type;
535 node->parent = parent;
536 INIT_LIST_HEAD(&node->children);
537 esw_qos_node_attach_to_parent(node);
538 if (!parent) {
539 /* The caller is responsible for inserting the node into the
540 * parent list if necessary. This function can also be used with
541 * a NULL parent, which doesn't necessarily indicate that it
542 * refers to the root scheduling element.
543 */
544 list_del_init(&node->entry);
545 }
546
547 return node;
548 }
549
__esw_qos_free_node(struct mlx5_esw_sched_node * node)550 static void __esw_qos_free_node(struct mlx5_esw_sched_node *node)
551 {
552 list_del(&node->entry);
553 kfree(node);
554 }
555
esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)556 static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
557 {
558 esw_qos_node_destroy_sched_element(node, extack);
559 __esw_qos_free_node(node);
560 }
561
esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node * parent,u8 tc,struct netlink_ext_ack * extack)562 static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent,
563 u8 tc, struct netlink_ext_ack *extack)
564 {
565 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
566 struct mlx5_core_dev *dev = parent->esw->dev;
567 struct mlx5_esw_sched_node *vports_tc_node;
568 void *attr;
569 int err;
570
571 if (!mlx5_qos_element_type_supported(
572 dev,
573 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
574 SCHEDULING_HIERARCHY_E_SWITCH) ||
575 !mlx5_qos_tsar_type_supported(dev,
576 TSAR_ELEMENT_TSAR_TYPE_DWRR,
577 SCHEDULING_HIERARCHY_E_SWITCH))
578 return -EOPNOTSUPP;
579
580 vports_tc_node = __esw_qos_alloc_node(parent->esw, 0,
581 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
582 parent);
583 if (!vports_tc_node) {
584 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
585 esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc);
586 return -ENOMEM;
587 }
588
589 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
590 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
591 MLX5_SET(tsar_element, attr, traffic_class, tc);
592 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix);
593 MLX5_SET(scheduling_context, tsar_ctx, element_type,
594 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
595
596 err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx,
597 extack);
598 if (err)
599 goto err_create_sched_element;
600
601 vports_tc_node->tc = tc;
602
603 return 0;
604
605 err_create_sched_element:
606 __esw_qos_free_node(vports_tc_node);
607 return err;
608 }
609
610 static void
esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw)611 esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
612 u32 *tc_bw)
613 {
614 memcpy(tc_bw, tc_arbiter_node->tc_bw, sizeof(tc_arbiter_node->tc_bw));
615 }
616
617 static void
esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw,struct netlink_ext_ack * extack)618 esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
619 u32 *tc_bw, struct netlink_ext_ack *extack)
620 {
621 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
622 struct mlx5_esw_sched_node *vports_tc_node;
623 u32 divider, fw_max_bw_share;
624
625 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
626 divider = esw_qos_calculate_tc_bw_divider(tc_bw);
627 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
628 u8 tc = vports_tc_node->tc;
629 u32 bw_share;
630
631 tc_arbiter_node->tc_bw[tc] = tc_bw[tc];
632 bw_share = tc_bw[tc] * fw_max_bw_share;
633 bw_share = esw_qos_calc_bw_share(bw_share, divider,
634 fw_max_bw_share);
635 esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack);
636 }
637 }
638
639 static void
esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)640 esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
641 struct netlink_ext_ack *extack)
642 {
643 struct mlx5_esw_sched_node *vports_tc_node, *tmp;
644
645 list_for_each_entry_safe(vports_tc_node, tmp,
646 &tc_arbiter_node->children, entry)
647 esw_qos_destroy_node(vports_tc_node, extack);
648 }
649
650 static int
esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)651 esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
652 struct netlink_ext_ack *extack)
653 {
654 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
655 int err, i, num_tcs = esw_qos_num_tcs(esw->dev);
656
657 for (i = 0; i < num_tcs; i++) {
658 err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack);
659 if (err)
660 goto err_tc_node_create;
661 }
662
663 return 0;
664
665 err_tc_node_create:
666 esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL);
667 return err;
668 }
669
esw_qos_create_tc_arbiter_sched_elem(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)670 static int esw_qos_create_tc_arbiter_sched_elem(
671 struct mlx5_esw_sched_node *tc_arbiter_node,
672 struct netlink_ext_ack *extack)
673 {
674 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
675 u32 tsar_parent_ix;
676 void *attr;
677
678 if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev,
679 TSAR_ELEMENT_TSAR_TYPE_TC_ARB,
680 SCHEDULING_HIERARCHY_E_SWITCH)) {
681 NL_SET_ERR_MSG_MOD(extack,
682 "E-Switch TC Arbiter scheduling element is not supported");
683 return -EOPNOTSUPP;
684 }
685
686 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
687 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB);
688 tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix :
689 tc_arbiter_node->esw->qos.root_tsar_ix;
690 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
691 tsar_parent_ix);
692 MLX5_SET(scheduling_context, tsar_ctx, element_type,
693 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
694 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw,
695 tc_arbiter_node->max_rate);
696 MLX5_SET(scheduling_context, tsar_ctx, bw_share,
697 tc_arbiter_node->bw_share);
698
699 return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx,
700 extack);
701 }
702
703 static struct mlx5_esw_sched_node *
__esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)704 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent,
705 struct netlink_ext_ack *extack)
706 {
707 struct mlx5_esw_sched_node *node;
708 u32 tsar_ix;
709 int err;
710
711 err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0,
712 0, &tsar_ix);
713 if (err) {
714 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed");
715 return ERR_PTR(err);
716 }
717
718 node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent);
719 if (!node) {
720 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
721 err = -ENOMEM;
722 goto err_alloc_node;
723 }
724
725 list_add_tail(&node->entry, &esw->qos.domain->nodes);
726 esw_qos_normalize_min_rate(esw, NULL, extack);
727 trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
728
729 return node;
730
731 err_alloc_node:
732 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
733 SCHEDULING_HIERARCHY_E_SWITCH,
734 tsar_ix))
735 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed");
736 return ERR_PTR(err);
737 }
738
739 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
740 static void esw_qos_put(struct mlx5_eswitch *esw);
741
742 static struct mlx5_esw_sched_node *
esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)743 esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
744 {
745 struct mlx5_esw_sched_node *node;
746 int err;
747
748 esw_assert_qos_lock_held(esw);
749 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
750 return ERR_PTR(-EOPNOTSUPP);
751
752 err = esw_qos_get(esw, extack);
753 if (err)
754 return ERR_PTR(err);
755
756 node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
757 if (IS_ERR(node))
758 esw_qos_put(esw);
759
760 return node;
761 }
762
__esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)763 static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
764 {
765 struct mlx5_eswitch *esw = node->esw;
766
767 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
768 esw_qos_destroy_vports_tc_nodes(node, extack);
769
770 trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
771 esw_qos_destroy_node(node, extack);
772 esw_qos_normalize_min_rate(esw, NULL, extack);
773 }
774
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)775 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
776 {
777 struct mlx5_core_dev *dev = esw->dev;
778 int err;
779
780 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
781 return -EOPNOTSUPP;
782
783 err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0,
784 &esw->qos.root_tsar_ix);
785 if (err) {
786 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
787 return err;
788 }
789
790 refcount_set(&esw->qos.refcnt, 1);
791
792 return 0;
793 }
794
esw_qos_destroy(struct mlx5_eswitch * esw)795 static void esw_qos_destroy(struct mlx5_eswitch *esw)
796 {
797 int err;
798
799 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
800 SCHEDULING_HIERARCHY_E_SWITCH,
801 esw->qos.root_tsar_ix);
802 if (err)
803 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
804 }
805
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)806 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
807 {
808 int err = 0;
809
810 esw_assert_qos_lock_held(esw);
811 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
812 /* esw_qos_create() set refcount to 1 only on success.
813 * No need to decrement on failure.
814 */
815 err = esw_qos_create(esw, extack);
816 }
817
818 return err;
819 }
820
esw_qos_put(struct mlx5_eswitch * esw)821 static void esw_qos_put(struct mlx5_eswitch *esw)
822 {
823 esw_assert_qos_lock_held(esw);
824 if (refcount_dec_and_test(&esw->qos.refcnt))
825 esw_qos_destroy(esw);
826 }
827
828 static void
esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)829 esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node,
830 struct netlink_ext_ack *extack)
831 {
832 /* Clean up all Vports TC nodes within the TC arbiter node. */
833 esw_qos_destroy_vports_tc_nodes(node, extack);
834 /* Destroy the scheduling element for the TC arbiter node itself. */
835 esw_qos_node_destroy_sched_element(node, extack);
836 }
837
esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)838 static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node,
839 struct netlink_ext_ack *extack)
840 {
841 u32 curr_ix = node->ix;
842 int err;
843
844 err = esw_qos_create_tc_arbiter_sched_elem(node, extack);
845 if (err)
846 return err;
847 /* Initialize the vports TC nodes within created TC arbiter TSAR. */
848 err = esw_qos_create_vports_tc_nodes(node, extack);
849 if (err)
850 goto err_vports_tc_nodes;
851
852 node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR;
853
854 return 0;
855
856 err_vports_tc_nodes:
857 /* If initialization fails, clean up the scheduling element
858 * for the TC arbiter node.
859 */
860 esw_qos_node_destroy_sched_element(node, NULL);
861 node->ix = curr_ix;
862 return err;
863 }
864
865 static int
esw_qos_create_vport_tc_sched_node(struct mlx5_vport * vport,u32 rate_limit_elem_ix,struct mlx5_esw_sched_node * vports_tc_node,struct netlink_ext_ack * extack)866 esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport,
867 u32 rate_limit_elem_ix,
868 struct mlx5_esw_sched_node *vports_tc_node,
869 struct netlink_ext_ack *extack)
870 {
871 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
872 struct mlx5_esw_sched_node *vport_tc_node;
873 u8 tc = vports_tc_node->tc;
874 int err;
875
876 vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0,
877 SCHED_NODE_TYPE_VPORT_TC,
878 vports_tc_node);
879 if (!vport_tc_node)
880 return -ENOMEM;
881
882 vport_tc_node->min_rate = vport_node->min_rate;
883 vport_tc_node->tc = tc;
884 vport_tc_node->vport = vport;
885 err = esw_qos_vport_tc_create_sched_element(vport_tc_node,
886 rate_limit_elem_ix,
887 extack);
888 if (err)
889 goto err_out;
890
891 vport->qos.sched_nodes[tc] = vport_tc_node;
892
893 return 0;
894 err_out:
895 __esw_qos_free_node(vport_tc_node);
896 return err;
897 }
898
899 static void
esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport * vport,struct netlink_ext_ack * extack)900 esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport *vport,
901 struct netlink_ext_ack *extack)
902 {
903 int i, num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
904
905 for (i = 0; i < num_tcs; i++) {
906 if (vport->qos.sched_nodes[i]) {
907 __esw_qos_destroy_node(vport->qos.sched_nodes[i],
908 extack);
909 }
910 }
911
912 kfree(vport->qos.sched_nodes);
913 vport->qos.sched_nodes = NULL;
914 }
915
916 static int
esw_qos_create_vport_tc_sched_elements(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)917 esw_qos_create_vport_tc_sched_elements(struct mlx5_vport *vport,
918 enum sched_node_type type,
919 struct netlink_ext_ack *extack)
920 {
921 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
922 struct mlx5_esw_sched_node *tc_arbiter_node, *vports_tc_node;
923 int err, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
924 u32 rate_limit_elem_ix;
925
926 vport->qos.sched_nodes = kcalloc(num_tcs,
927 sizeof(struct mlx5_esw_sched_node *),
928 GFP_KERNEL);
929 if (!vport->qos.sched_nodes) {
930 NL_SET_ERR_MSG_MOD(extack,
931 "Allocating the vport TC scheduling elements failed.");
932 return -ENOMEM;
933 }
934
935 rate_limit_elem_ix = type == SCHED_NODE_TYPE_RATE_LIMITER ?
936 vport_node->ix : 0;
937 tc_arbiter_node = type == SCHED_NODE_TYPE_RATE_LIMITER ?
938 vport_node->parent : vport_node;
939 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
940 err = esw_qos_create_vport_tc_sched_node(vport,
941 rate_limit_elem_ix,
942 vports_tc_node,
943 extack);
944 if (err)
945 goto err_create_vport_tc;
946 }
947
948 return 0;
949
950 err_create_vport_tc:
951 esw_qos_destroy_vport_tc_sched_elements(vport, NULL);
952
953 return err;
954 }
955
956 static int
esw_qos_vport_tc_enable(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)957 esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type,
958 struct netlink_ext_ack *extack)
959 {
960 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
961 struct mlx5_esw_sched_node *parent = vport_node->parent;
962 int err;
963
964 if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
965 int new_level, max_level;
966
967 /* Increase the parent's level by 2 to account for both the
968 * TC arbiter and the vports TC scheduling element.
969 */
970 new_level = (parent ? parent->level : 2) + 2;
971 max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev,
972 log_esw_max_sched_depth);
973 if (new_level > max_level) {
974 NL_SET_ERR_MSG_FMT_MOD(extack,
975 "TC arbitration on leafs is not supported beyond max depth %d",
976 max_level);
977 return -EOPNOTSUPP;
978 }
979 }
980
981 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
982
983 if (type == SCHED_NODE_TYPE_RATE_LIMITER)
984 err = esw_qos_create_rate_limit_element(vport_node, extack);
985 else
986 err = esw_qos_tc_arbiter_scheduling_setup(vport_node, extack);
987 if (err)
988 return err;
989
990 /* Rate limiters impact multiple nodes not directly connected to them
991 * and are not direct members of the QoS hierarchy.
992 * Unlink it from the parent to reflect that.
993 */
994 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
995 list_del_init(&vport_node->entry);
996 vport_node->level = 0;
997 }
998
999 err = esw_qos_create_vport_tc_sched_elements(vport, type, extack);
1000 if (err)
1001 goto err_sched_nodes;
1002
1003 return 0;
1004
1005 err_sched_nodes:
1006 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
1007 esw_qos_node_destroy_sched_element(vport_node, NULL);
1008 esw_qos_node_attach_to_parent(vport_node);
1009 } else {
1010 esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL);
1011 }
1012 return err;
1013 }
1014
esw_qos_vport_tc_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1015 static void esw_qos_vport_tc_disable(struct mlx5_vport *vport,
1016 struct netlink_ext_ack *extack)
1017 {
1018 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1019 enum sched_node_type curr_type = vport_node->type;
1020
1021 esw_qos_destroy_vport_tc_sched_elements(vport, extack);
1022
1023 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER)
1024 esw_qos_node_destroy_sched_element(vport_node, extack);
1025 else
1026 esw_qos_tc_arbiter_scheduling_teardown(vport_node, extack);
1027 }
1028
esw_qos_set_vport_tcs_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1029 static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport,
1030 u32 min_rate,
1031 struct netlink_ext_ack *extack)
1032 {
1033 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1034 int err, i, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
1035
1036 for (i = 0; i < num_tcs; i++) {
1037 err = esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1038 min_rate, extack);
1039 if (err)
1040 goto err_out;
1041 }
1042 vport_node->min_rate = min_rate;
1043
1044 return 0;
1045 err_out:
1046 for (--i; i >= 0; i--) {
1047 esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1048 vport_node->min_rate, extack);
1049 }
1050 return err;
1051 }
1052
esw_qos_vport_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1053 static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
1054 {
1055 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1056 enum sched_node_type curr_type = vport_node->type;
1057
1058 if (curr_type == SCHED_NODE_TYPE_VPORT)
1059 esw_qos_node_destroy_sched_element(vport_node, extack);
1060 else
1061 esw_qos_vport_tc_disable(vport, extack);
1062
1063 vport_node->bw_share = 0;
1064 memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw));
1065 list_del_init(&vport_node->entry);
1066 esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack);
1067
1068 trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
1069 }
1070
esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1071 static int esw_qos_vport_enable(struct mlx5_vport *vport,
1072 enum sched_node_type type,
1073 struct mlx5_esw_sched_node *parent,
1074 struct netlink_ext_ack *extack)
1075 {
1076 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1077 int err;
1078
1079 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1080
1081 esw_qos_node_set_parent(vport_node, parent);
1082 if (type == SCHED_NODE_TYPE_VPORT)
1083 err = esw_qos_vport_create_sched_element(vport_node, extack);
1084 else
1085 err = esw_qos_vport_tc_enable(vport, type, extack);
1086 if (err)
1087 return err;
1088
1089 vport_node->type = type;
1090 esw_qos_normalize_min_rate(vport_node->esw, parent, extack);
1091 trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate,
1092 vport_node->bw_share);
1093
1094 return 0;
1095 }
1096
mlx5_esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,u32 max_rate,u32 min_rate,struct netlink_ext_ack * extack)1097 static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type,
1098 struct mlx5_esw_sched_node *parent, u32 max_rate,
1099 u32 min_rate, struct netlink_ext_ack *extack)
1100 {
1101 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1102 struct mlx5_esw_sched_node *sched_node;
1103 struct mlx5_eswitch *parent_esw;
1104 int err;
1105
1106 esw_assert_qos_lock_held(esw);
1107 err = esw_qos_get(esw, extack);
1108 if (err)
1109 return err;
1110
1111 parent_esw = parent ? parent->esw : esw;
1112 sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent);
1113 if (!sched_node) {
1114 esw_qos_put(esw);
1115 return -ENOMEM;
1116 }
1117 if (!parent)
1118 list_add_tail(&sched_node->entry, &esw->qos.domain->nodes);
1119
1120 sched_node->max_rate = max_rate;
1121 sched_node->min_rate = min_rate;
1122 sched_node->vport = vport;
1123 vport->qos.sched_node = sched_node;
1124 err = esw_qos_vport_enable(vport, type, parent, extack);
1125 if (err) {
1126 __esw_qos_free_node(sched_node);
1127 esw_qos_put(esw);
1128 vport->qos.sched_node = NULL;
1129 }
1130
1131 return err;
1132 }
1133
mlx5_esw_qos_vport_disable_locked(struct mlx5_vport * vport)1134 static void mlx5_esw_qos_vport_disable_locked(struct mlx5_vport *vport)
1135 {
1136 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1137
1138 esw_assert_qos_lock_held(esw);
1139 if (!vport->qos.sched_node)
1140 return;
1141
1142 esw_qos_vport_disable(vport, NULL);
1143 mlx5_esw_qos_vport_qos_free(vport);
1144 esw_qos_put(esw);
1145 }
1146
mlx5_esw_qos_vport_disable(struct mlx5_vport * vport)1147 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
1148 {
1149 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1150 struct mlx5_esw_sched_node *parent;
1151
1152 lockdep_assert_held(&esw->state_lock);
1153 esw_qos_lock(esw);
1154 if (!vport->qos.sched_node)
1155 goto unlock;
1156
1157 parent = vport->qos.sched_node->parent;
1158 WARN(parent, "Disabling QoS on port before detaching it from node");
1159
1160 mlx5_esw_qos_vport_disable_locked(vport);
1161 unlock:
1162 esw_qos_unlock(esw);
1163 }
1164
mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport * vport,u32 max_rate,struct netlink_ext_ack * extack)1165 static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate,
1166 struct netlink_ext_ack *extack)
1167 {
1168 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1169
1170 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1171
1172 if (!vport_node)
1173 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, max_rate, 0,
1174 extack);
1175 else
1176 return esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share,
1177 extack);
1178 }
1179
mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1180 static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate,
1181 struct netlink_ext_ack *extack)
1182 {
1183 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1184
1185 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1186
1187 if (!vport_node)
1188 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate,
1189 extack);
1190 else if (vport_node->type == SCHED_NODE_TYPE_RATE_LIMITER)
1191 return esw_qos_set_vport_tcs_min_rate(vport, min_rate, extack);
1192 else
1193 return esw_qos_set_node_min_rate(vport_node, min_rate, extack);
1194 }
1195
mlx5_esw_qos_set_vport_rate(struct mlx5_vport * vport,u32 max_rate,u32 min_rate)1196 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate)
1197 {
1198 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1199 int err;
1200
1201 esw_qos_lock(esw);
1202 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL);
1203 if (!err)
1204 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL);
1205 esw_qos_unlock(esw);
1206 return err;
1207 }
1208
mlx5_esw_qos_get_vport_rate(struct mlx5_vport * vport,u32 * max_rate,u32 * min_rate)1209 bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate)
1210 {
1211 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1212 bool enabled;
1213
1214 esw_qos_lock(esw);
1215 enabled = !!vport->qos.sched_node;
1216 if (enabled) {
1217 *max_rate = vport->qos.sched_node->max_rate;
1218 *min_rate = vport->qos.sched_node->min_rate;
1219 }
1220 esw_qos_unlock(esw);
1221 return enabled;
1222 }
1223
esw_qos_vport_tc_check_type(enum sched_node_type curr_type,enum sched_node_type new_type,struct netlink_ext_ack * extack)1224 static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type,
1225 enum sched_node_type new_type,
1226 struct netlink_ext_ack *extack)
1227 {
1228 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR &&
1229 new_type == SCHED_NODE_TYPE_RATE_LIMITER) {
1230 NL_SET_ERR_MSG_MOD(extack,
1231 "Cannot switch from vport-level TC arbitration to node-level TC arbitration");
1232 return -EOPNOTSUPP;
1233 }
1234
1235 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER &&
1236 new_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1237 NL_SET_ERR_MSG_MOD(extack,
1238 "Cannot switch from node-level TC arbitration to vport-level TC arbitration");
1239 return -EOPNOTSUPP;
1240 }
1241
1242 return 0;
1243 }
1244
esw_qos_vport_update(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1245 static int esw_qos_vport_update(struct mlx5_vport *vport,
1246 enum sched_node_type type,
1247 struct mlx5_esw_sched_node *parent,
1248 struct netlink_ext_ack *extack)
1249 {
1250 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1251 struct mlx5_esw_sched_node *curr_parent = vport_node->parent;
1252 enum sched_node_type curr_type = vport_node->type;
1253 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
1254 int err;
1255
1256 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1257 if (curr_type == type && curr_parent == parent)
1258 return 0;
1259
1260 err = esw_qos_vport_tc_check_type(curr_type, type, extack);
1261 if (err)
1262 return err;
1263
1264 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type)
1265 esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw);
1266
1267 esw_qos_vport_disable(vport, extack);
1268
1269 err = esw_qos_vport_enable(vport, type, parent, extack);
1270 if (err) {
1271 esw_qos_vport_enable(vport, curr_type, curr_parent, NULL);
1272 extack = NULL;
1273 }
1274
1275 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
1276 esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw,
1277 extack);
1278 }
1279
1280 return err;
1281 }
1282
esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1283 static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1284 struct netlink_ext_ack *extack)
1285 {
1286 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1287 struct mlx5_esw_sched_node *curr_parent;
1288 enum sched_node_type type;
1289
1290 esw_assert_qos_lock_held(esw);
1291 curr_parent = vport->qos.sched_node->parent;
1292 if (curr_parent == parent)
1293 return 0;
1294
1295 /* Set vport QoS type based on parent node type if different from
1296 * default QoS; otherwise, use the vport's current QoS type.
1297 */
1298 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1299 type = SCHED_NODE_TYPE_RATE_LIMITER;
1300 else if (curr_parent &&
1301 curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1302 type = SCHED_NODE_TYPE_VPORT;
1303 else
1304 type = vport->qos.sched_node->type;
1305
1306 return esw_qos_vport_update(vport, type, parent, extack);
1307 }
1308
1309 static void
esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1310 esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node,
1311 struct mlx5_esw_sched_node *node,
1312 struct netlink_ext_ack *extack)
1313 {
1314 struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp;
1315
1316 vports_tc_node = list_first_entry(&tc_arbiter_node->children,
1317 struct mlx5_esw_sched_node,
1318 entry);
1319
1320 list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children,
1321 entry)
1322 esw_qos_vport_update_parent(vport_tc_node->vport, node, extack);
1323 }
1324
esw_qos_switch_tc_arbiter_node_to_vports(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1325 static int esw_qos_switch_tc_arbiter_node_to_vports(
1326 struct mlx5_esw_sched_node *tc_arbiter_node,
1327 struct mlx5_esw_sched_node *node,
1328 struct netlink_ext_ack *extack)
1329 {
1330 u32 parent_tsar_ix = node->parent ?
1331 node->parent->ix : node->esw->qos.root_tsar_ix;
1332 int err;
1333
1334 err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix,
1335 node->max_rate, node->bw_share,
1336 &node->ix);
1337 if (err) {
1338 NL_SET_ERR_MSG_MOD(extack,
1339 "Failed to create scheduling element for vports node when disabling vports TC QoS");
1340 return err;
1341 }
1342
1343 node->type = SCHED_NODE_TYPE_VPORTS_TSAR;
1344
1345 /* Disable TC QoS for vports in the arbiter node. */
1346 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack);
1347
1348 return 0;
1349 }
1350
esw_qos_switch_vports_node_to_tc_arbiter(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)1351 static int esw_qos_switch_vports_node_to_tc_arbiter(
1352 struct mlx5_esw_sched_node *node,
1353 struct mlx5_esw_sched_node *tc_arbiter_node,
1354 struct netlink_ext_ack *extack)
1355 {
1356 struct mlx5_esw_sched_node *vport_node, *tmp;
1357 struct mlx5_vport *vport;
1358 int err;
1359
1360 /* Enable TC QoS for each vport in the node. */
1361 list_for_each_entry_safe(vport_node, tmp, &node->children, entry) {
1362 vport = vport_node->vport;
1363 err = esw_qos_vport_update_parent(vport, tc_arbiter_node,
1364 extack);
1365 if (err)
1366 goto err_out;
1367 }
1368
1369 /* Destroy the current vports node TSAR. */
1370 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
1371 SCHEDULING_HIERARCHY_E_SWITCH,
1372 node->ix);
1373 if (err)
1374 goto err_out;
1375
1376 return 0;
1377 err_out:
1378 /* Restore vports back into the node if an error occurs. */
1379 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL);
1380
1381 return err;
1382 }
1383
1384 static struct mlx5_esw_sched_node *
esw_qos_move_node(struct mlx5_esw_sched_node * curr_node)1385 esw_qos_move_node(struct mlx5_esw_sched_node *curr_node)
1386 {
1387 struct mlx5_esw_sched_node *new_node;
1388
1389 new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix,
1390 curr_node->type, NULL);
1391 if (!new_node)
1392 return ERR_PTR(-ENOMEM);
1393
1394 esw_qos_nodes_set_parent(&curr_node->children, new_node);
1395 return new_node;
1396 }
1397
esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1398 static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node,
1399 struct netlink_ext_ack *extack)
1400 {
1401 struct mlx5_esw_sched_node *curr_node;
1402 int err;
1403
1404 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1405 return 0;
1406
1407 /* Allocate a new rate node to hold the current state, which will allow
1408 * for restoring the vports back to this node after disabling TC
1409 * arbitration.
1410 */
1411 curr_node = esw_qos_move_node(node);
1412 if (IS_ERR(curr_node)) {
1413 NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node");
1414 return PTR_ERR(curr_node);
1415 }
1416
1417 /* Disable TC QoS for all vports, and assign them back to the node. */
1418 err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack);
1419 if (err)
1420 goto err_out;
1421
1422 /* Clean up the TC arbiter node after disabling TC QoS for vports. */
1423 esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack);
1424 goto out;
1425 err_out:
1426 esw_qos_nodes_set_parent(&curr_node->children, node);
1427 out:
1428 __esw_qos_free_node(curr_node);
1429 return err;
1430 }
1431
esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1432 static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node,
1433 struct netlink_ext_ack *extack)
1434 {
1435 struct mlx5_esw_sched_node *curr_node, *child;
1436 int err, new_level, max_level;
1437
1438 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1439 return 0;
1440
1441 /* Increase the hierarchy level by one to account for the additional
1442 * vports TC scheduling node, and verify that the new level does not
1443 * exceed the maximum allowed depth.
1444 */
1445 new_level = node->level + 1;
1446 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1447 if (new_level > max_level) {
1448 NL_SET_ERR_MSG_FMT_MOD(extack,
1449 "TC arbitration on nodes is not supported beyond max depth %d",
1450 max_level);
1451 return -EOPNOTSUPP;
1452 }
1453
1454 /* Ensure the node does not contain non-leaf children before assigning
1455 * TC bandwidth.
1456 */
1457 if (!list_empty(&node->children)) {
1458 list_for_each_entry(child, &node->children, entry) {
1459 if (!child->vport) {
1460 NL_SET_ERR_MSG_MOD(extack,
1461 "Cannot configure TC bandwidth on a node with non-leaf children");
1462 return -EOPNOTSUPP;
1463 }
1464 }
1465 }
1466
1467 /* Allocate a new node that will store the information of the current
1468 * node. This will be used later to restore the node if necessary.
1469 */
1470 curr_node = esw_qos_move_node(node);
1471 if (IS_ERR(curr_node)) {
1472 NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS");
1473 return PTR_ERR(curr_node);
1474 }
1475
1476 /* Initialize the TC arbiter node for QoS management.
1477 * This step prepares the node for handling Traffic Class arbitration.
1478 */
1479 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
1480 if (err)
1481 goto err_setup;
1482
1483 /* Enable TC QoS for each vport within the current node. */
1484 err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack);
1485 if (err)
1486 goto err_switch_vports;
1487 goto out;
1488
1489 err_switch_vports:
1490 esw_qos_tc_arbiter_scheduling_teardown(node, NULL);
1491 node->ix = curr_node->ix;
1492 node->type = curr_node->type;
1493 err_setup:
1494 esw_qos_nodes_set_parent(&curr_node->children, node);
1495 out:
1496 __esw_qos_free_node(curr_node);
1497 return err;
1498 }
1499
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)1500 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
1501 {
1502 struct ethtool_link_ksettings lksettings;
1503 struct net_device *slave, *master;
1504 u32 speed = SPEED_UNKNOWN;
1505
1506 /* Lock ensures a stable reference to master and slave netdevice
1507 * while port speed of master is queried.
1508 */
1509 ASSERT_RTNL();
1510
1511 slave = mlx5_uplink_netdev_get(mdev);
1512 if (!slave)
1513 goto out;
1514
1515 master = netdev_master_upper_dev_get(slave);
1516 if (master && !__ethtool_get_link_ksettings(master, &lksettings))
1517 speed = lksettings.base.speed;
1518
1519 out:
1520 mlx5_uplink_netdev_put(mdev, slave);
1521 return speed;
1522 }
1523
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)1524 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
1525 bool hold_rtnl_lock, struct netlink_ext_ack *extack)
1526 {
1527 int err;
1528
1529 if (!mlx5_lag_is_active(mdev))
1530 goto skip_lag;
1531
1532 if (hold_rtnl_lock)
1533 rtnl_lock();
1534
1535 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
1536
1537 if (hold_rtnl_lock)
1538 rtnl_unlock();
1539
1540 if (*link_speed_max != (u32)SPEED_UNKNOWN)
1541 return 0;
1542
1543 skip_lag:
1544 err = mlx5_port_max_linkspeed(mdev, link_speed_max);
1545 if (err)
1546 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
1547
1548 return err;
1549 }
1550
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)1551 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
1552 const char *name, u32 link_speed_max,
1553 u64 value, struct netlink_ext_ack *extack)
1554 {
1555 if (value > link_speed_max) {
1556 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
1557 name, value, link_speed_max);
1558 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
1559 return -EINVAL;
1560 }
1561
1562 return 0;
1563 }
1564
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)1565 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
1566 {
1567 struct mlx5_vport *vport;
1568 u32 link_speed_max;
1569 int err;
1570
1571 vport = mlx5_eswitch_get_vport(esw, vport_num);
1572 if (IS_ERR(vport))
1573 return PTR_ERR(vport);
1574
1575 if (rate_mbps) {
1576 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
1577 if (err)
1578 return err;
1579
1580 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
1581 link_speed_max, rate_mbps, NULL);
1582 if (err)
1583 return err;
1584 }
1585
1586 esw_qos_lock(esw);
1587 err = mlx5_esw_qos_set_vport_max_rate(vport, rate_mbps, NULL);
1588 esw_qos_unlock(esw);
1589
1590 return err;
1591 }
1592
1593 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
1594
1595 /* Converts bytes per second value passed in a pointer into megabits per
1596 * second, rewriting last. If converted rate exceed link speed or is not a
1597 * fraction of Mbps - returns error.
1598 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)1599 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
1600 u64 *rate, struct netlink_ext_ack *extack)
1601 {
1602 u32 link_speed_max, remainder;
1603 u64 value;
1604 int err;
1605
1606 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
1607 if (remainder) {
1608 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
1609 name, *rate);
1610 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
1611 return -EINVAL;
1612 }
1613
1614 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
1615 if (err)
1616 return err;
1617
1618 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
1619 if (err)
1620 return err;
1621
1622 *rate = value;
1623 return 0;
1624 }
1625
esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch * esw,u32 * tc_bw)1626 static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
1627 u32 *tc_bw)
1628 {
1629 int i, num_tcs = esw_qos_num_tcs(esw->dev);
1630
1631 for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
1632 if (tc_bw[i])
1633 return false;
1634 }
1635
1636 return true;
1637 }
1638
esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport * vport,u32 * tc_bw)1639 static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
1640 u32 *tc_bw)
1641 {
1642 struct mlx5_esw_sched_node *node = vport->qos.sched_node;
1643 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1644
1645 esw = (node && node->parent) ? node->parent->esw : esw;
1646
1647 return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
1648 }
1649
esw_qos_tc_bw_disabled(u32 * tc_bw)1650 static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
1651 {
1652 int i;
1653
1654 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
1655 if (tc_bw[i])
1656 return false;
1657 }
1658
1659 return true;
1660 }
1661
esw_vport_qos_prune_empty(struct mlx5_vport * vport)1662 static void esw_vport_qos_prune_empty(struct mlx5_vport *vport)
1663 {
1664 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1665
1666 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1667 if (!vport_node)
1668 return;
1669
1670 if (vport_node->parent || vport_node->max_rate ||
1671 vport_node->min_rate || !esw_qos_tc_bw_disabled(vport_node->tc_bw))
1672 return;
1673
1674 mlx5_esw_qos_vport_disable_locked(vport);
1675 }
1676
mlx5_esw_qos_init(struct mlx5_eswitch * esw)1677 int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
1678 {
1679 if (esw->qos.domain)
1680 return 0; /* Nothing to change. */
1681
1682 return esw_qos_domain_init(esw);
1683 }
1684
mlx5_esw_qos_cleanup(struct mlx5_eswitch * esw)1685 void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
1686 {
1687 if (esw->qos.domain)
1688 esw_qos_domain_release(esw);
1689 }
1690
1691 /* Eswitch devlink rate API */
1692
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1693 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
1694 u64 tx_share, struct netlink_ext_ack *extack)
1695 {
1696 struct mlx5_vport *vport = priv;
1697 struct mlx5_eswitch *esw;
1698 int err;
1699
1700 esw = vport->dev->priv.eswitch;
1701 if (!mlx5_esw_allowed(esw))
1702 return -EPERM;
1703
1704 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
1705 if (err)
1706 return err;
1707
1708 esw_qos_lock(esw);
1709 err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack);
1710 if (err)
1711 goto out;
1712 esw_vport_qos_prune_empty(vport);
1713 out:
1714 esw_qos_unlock(esw);
1715 return err;
1716 }
1717
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1718 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
1719 u64 tx_max, struct netlink_ext_ack *extack)
1720 {
1721 struct mlx5_vport *vport = priv;
1722 struct mlx5_eswitch *esw;
1723 int err;
1724
1725 esw = vport->dev->priv.eswitch;
1726 if (!mlx5_esw_allowed(esw))
1727 return -EPERM;
1728
1729 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
1730 if (err)
1731 return err;
1732
1733 esw_qos_lock(esw);
1734 err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack);
1735 if (err)
1736 goto out;
1737 esw_vport_qos_prune_empty(vport);
1738 out:
1739 esw_qos_unlock(esw);
1740 return err;
1741 }
1742
mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate * rate_leaf,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1743 int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf,
1744 void *priv,
1745 u32 *tc_bw,
1746 struct netlink_ext_ack *extack)
1747 {
1748 struct mlx5_esw_sched_node *vport_node;
1749 struct mlx5_vport *vport = priv;
1750 struct mlx5_eswitch *esw;
1751 bool disable;
1752 int err = 0;
1753
1754 esw = vport->dev->priv.eswitch;
1755 if (!mlx5_esw_allowed(esw))
1756 return -EPERM;
1757
1758 disable = esw_qos_tc_bw_disabled(tc_bw);
1759 esw_qos_lock(esw);
1760
1761 if (!esw_qos_vport_validate_unsupported_tc_bw(vport, tc_bw)) {
1762 NL_SET_ERR_MSG_MOD(extack,
1763 "E-Switch traffic classes number is not supported");
1764 err = -EOPNOTSUPP;
1765 goto unlock;
1766 }
1767
1768 vport_node = vport->qos.sched_node;
1769 if (disable && !vport_node)
1770 goto unlock;
1771
1772 if (disable) {
1773 if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1774 err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT,
1775 vport_node->parent, extack);
1776 esw_vport_qos_prune_empty(vport);
1777 goto unlock;
1778 }
1779
1780 if (!vport_node) {
1781 err = mlx5_esw_qos_vport_enable(vport,
1782 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1783 NULL, 0, 0, extack);
1784 vport_node = vport->qos.sched_node;
1785 } else {
1786 err = esw_qos_vport_update(vport,
1787 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1788 vport_node->parent, extack);
1789 }
1790 if (!err)
1791 esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack);
1792 unlock:
1793 esw_qos_unlock(esw);
1794 return err;
1795 }
1796
mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate * rate_node,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1797 int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node,
1798 void *priv,
1799 u32 *tc_bw,
1800 struct netlink_ext_ack *extack)
1801 {
1802 struct mlx5_esw_sched_node *node = priv;
1803 struct mlx5_eswitch *esw = node->esw;
1804 bool disable;
1805 int err;
1806
1807 if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) {
1808 NL_SET_ERR_MSG_MOD(extack,
1809 "E-Switch traffic classes number is not supported");
1810 return -EOPNOTSUPP;
1811 }
1812
1813 disable = esw_qos_tc_bw_disabled(tc_bw);
1814 esw_qos_lock(esw);
1815 if (disable) {
1816 err = esw_qos_node_disable_tc_arbitration(node, extack);
1817 goto unlock;
1818 }
1819
1820 err = esw_qos_node_enable_tc_arbitration(node, extack);
1821 if (!err)
1822 esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack);
1823 unlock:
1824 esw_qos_unlock(esw);
1825 return err;
1826 }
1827
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1828 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
1829 u64 tx_share, struct netlink_ext_ack *extack)
1830 {
1831 struct mlx5_esw_sched_node *node = priv;
1832 struct mlx5_eswitch *esw = node->esw;
1833 int err;
1834
1835 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack);
1836 if (err)
1837 return err;
1838
1839 esw_qos_lock(esw);
1840 err = esw_qos_set_node_min_rate(node, tx_share, extack);
1841 esw_qos_unlock(esw);
1842 return err;
1843 }
1844
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1845 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
1846 u64 tx_max, struct netlink_ext_ack *extack)
1847 {
1848 struct mlx5_esw_sched_node *node = priv;
1849 struct mlx5_eswitch *esw = node->esw;
1850 int err;
1851
1852 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack);
1853 if (err)
1854 return err;
1855
1856 esw_qos_lock(esw);
1857 err = esw_qos_sched_elem_config(node, tx_max, node->bw_share, extack);
1858 esw_qos_unlock(esw);
1859 return err;
1860 }
1861
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)1862 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
1863 struct netlink_ext_ack *extack)
1864 {
1865 struct mlx5_esw_sched_node *node;
1866 struct mlx5_eswitch *esw;
1867 int err = 0;
1868
1869 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
1870 if (IS_ERR(esw))
1871 return PTR_ERR(esw);
1872
1873 esw_qos_lock(esw);
1874 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
1875 NL_SET_ERR_MSG_MOD(extack,
1876 "Rate node creation supported only in switchdev mode");
1877 err = -EOPNOTSUPP;
1878 goto unlock;
1879 }
1880
1881 node = esw_qos_create_vports_sched_node(esw, extack);
1882 if (IS_ERR(node)) {
1883 err = PTR_ERR(node);
1884 goto unlock;
1885 }
1886
1887 *priv = node;
1888 unlock:
1889 esw_qos_unlock(esw);
1890 return err;
1891 }
1892
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)1893 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
1894 struct netlink_ext_ack *extack)
1895 {
1896 struct mlx5_esw_sched_node *node = priv;
1897 struct mlx5_eswitch *esw = node->esw;
1898
1899 esw_qos_lock(esw);
1900 __esw_qos_destroy_node(node, extack);
1901 esw_qos_put(esw);
1902 esw_qos_unlock(esw);
1903 return 0;
1904 }
1905
mlx5_esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1906 int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1907 struct netlink_ext_ack *extack)
1908 {
1909 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1910 int err = 0;
1911
1912 if (parent && parent->esw != esw) {
1913 NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
1914 return -EOPNOTSUPP;
1915 }
1916
1917 esw_qos_lock(esw);
1918 if (!vport->qos.sched_node && parent) {
1919 enum sched_node_type type;
1920
1921 type = parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ?
1922 SCHED_NODE_TYPE_RATE_LIMITER : SCHED_NODE_TYPE_VPORT;
1923 err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0,
1924 extack);
1925 } else if (vport->qos.sched_node) {
1926 err = esw_qos_vport_update_parent(vport, parent, extack);
1927 }
1928 esw_qos_unlock(esw);
1929 return err;
1930 }
1931
mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)1932 int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate,
1933 struct devlink_rate *parent,
1934 void *priv, void *parent_priv,
1935 struct netlink_ext_ack *extack)
1936 {
1937 struct mlx5_esw_sched_node *node = parent ? parent_priv : NULL;
1938 struct mlx5_vport *vport = priv;
1939 int err;
1940
1941 err = mlx5_esw_qos_vport_update_parent(vport, node, extack);
1942 if (!err) {
1943 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1944
1945 esw_qos_lock(esw);
1946 esw_vport_qos_prune_empty(vport);
1947 esw_qos_unlock(esw);
1948 }
1949
1950 return err;
1951 }
1952
esw_qos_is_node_empty(struct mlx5_esw_sched_node * node)1953 static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node)
1954 {
1955 if (list_empty(&node->children))
1956 return true;
1957
1958 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1959 return false;
1960
1961 node = list_first_entry(&node->children, struct mlx5_esw_sched_node,
1962 entry);
1963
1964 return esw_qos_is_node_empty(node);
1965 }
1966
1967 static int
mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1968 mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node,
1969 struct mlx5_esw_sched_node *parent,
1970 struct netlink_ext_ack *extack)
1971 {
1972 u8 new_level, max_level;
1973
1974 if (parent && parent->esw != node->esw) {
1975 NL_SET_ERR_MSG_MOD(extack,
1976 "Cannot assign node to another E-Switch");
1977 return -EOPNOTSUPP;
1978 }
1979
1980 if (!esw_qos_is_node_empty(node)) {
1981 NL_SET_ERR_MSG_MOD(extack,
1982 "Cannot reassign a node that contains rate objects");
1983 return -EOPNOTSUPP;
1984 }
1985
1986 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1987 NL_SET_ERR_MSG_MOD(extack,
1988 "Cannot attach a node to a parent with TC bandwidth configured");
1989 return -EOPNOTSUPP;
1990 }
1991
1992 new_level = parent ? parent->level + 1 : 2;
1993 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1994 /* Increase by one to account for the vports TC scheduling
1995 * element.
1996 */
1997 new_level += 1;
1998 }
1999
2000 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
2001 if (new_level > max_level) {
2002 NL_SET_ERR_MSG_FMT_MOD(extack,
2003 "Node hierarchy depth %d exceeds the maximum supported level %d",
2004 new_level, max_level);
2005 return -EOPNOTSUPP;
2006 }
2007
2008 return 0;
2009 }
2010
2011 static int
esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2012 esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node,
2013 struct mlx5_esw_sched_node *parent,
2014 struct netlink_ext_ack *extack)
2015 {
2016 struct mlx5_esw_sched_node *curr_parent = node->parent;
2017 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
2018 struct mlx5_eswitch *esw = node->esw;
2019 int err;
2020
2021 esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw);
2022 esw_qos_tc_arbiter_scheduling_teardown(node, extack);
2023 esw_qos_node_set_parent(node, parent);
2024 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
2025 if (err) {
2026 esw_qos_node_set_parent(node, curr_parent);
2027 if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) {
2028 esw_warn(esw->dev, "Node restore QoS failed\n");
2029 return err;
2030 }
2031 }
2032 esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack);
2033
2034 return err;
2035 }
2036
esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2037 static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node,
2038 struct mlx5_esw_sched_node *parent,
2039 struct netlink_ext_ack *extack)
2040 {
2041 struct mlx5_esw_sched_node *curr_parent = node->parent;
2042 struct mlx5_eswitch *esw = node->esw;
2043 u32 parent_ix;
2044 int err;
2045
2046 parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix;
2047 mlx5_destroy_scheduling_element_cmd(esw->dev,
2048 SCHEDULING_HIERARCHY_E_SWITCH,
2049 node->ix);
2050 err = esw_qos_create_node_sched_elem(esw->dev, parent_ix,
2051 node->max_rate, 0, &node->ix);
2052 if (err) {
2053 NL_SET_ERR_MSG_MOD(extack,
2054 "Failed to create a node under the new hierarchy.");
2055 if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix,
2056 node->max_rate,
2057 node->bw_share,
2058 &node->ix))
2059 esw_warn(esw->dev, "Node restore QoS failed\n");
2060
2061 return err;
2062 }
2063 esw_qos_node_set_parent(node, parent);
2064 node->bw_share = 0;
2065
2066 return 0;
2067 }
2068
mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2069 static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node,
2070 struct mlx5_esw_sched_node *parent,
2071 struct netlink_ext_ack *extack)
2072 {
2073 struct mlx5_esw_sched_node *curr_parent;
2074 struct mlx5_eswitch *esw = node->esw;
2075 int err;
2076
2077 err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack);
2078 if (err)
2079 return err;
2080
2081 esw_qos_lock(esw);
2082 curr_parent = node->parent;
2083 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
2084 err = esw_qos_tc_arbiter_node_update_parent(node, parent,
2085 extack);
2086 } else {
2087 err = esw_qos_vports_node_update_parent(node, parent, extack);
2088 }
2089
2090 if (err)
2091 goto out;
2092
2093 esw_qos_normalize_min_rate(esw, curr_parent, extack);
2094 esw_qos_normalize_min_rate(esw, parent, extack);
2095
2096 out:
2097 esw_qos_unlock(esw);
2098
2099 return err;
2100 }
2101
mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)2102 int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate,
2103 struct devlink_rate *parent,
2104 void *priv, void *parent_priv,
2105 struct netlink_ext_ack *extack)
2106 {
2107 struct mlx5_esw_sched_node *node = priv, *parent_node;
2108
2109 if (!parent)
2110 return mlx5_esw_qos_node_update_parent(node, NULL, extack);
2111
2112 parent_node = parent_priv;
2113 return mlx5_esw_qos_node_update_parent(node, parent_node, extack);
2114 }
2115