1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13
14 /* Holds rate nodes associated with an E-Switch. */
15 struct mlx5_qos_domain {
16 /* Serializes access to all qos changes in the qos domain. */
17 struct mutex lock;
18 /* List of all mlx5_esw_sched_nodes. */
19 struct list_head nodes;
20 };
21
esw_qos_lock(struct mlx5_eswitch * esw)22 static void esw_qos_lock(struct mlx5_eswitch *esw)
23 {
24 mutex_lock(&esw->qos.domain->lock);
25 }
26
esw_qos_unlock(struct mlx5_eswitch * esw)27 static void esw_qos_unlock(struct mlx5_eswitch *esw)
28 {
29 mutex_unlock(&esw->qos.domain->lock);
30 }
31
esw_assert_qos_lock_held(struct mlx5_eswitch * esw)32 static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw)
33 {
34 lockdep_assert_held(&esw->qos.domain->lock);
35 }
36
esw_qos_domain_alloc(void)37 static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
38 {
39 struct mlx5_qos_domain *qos_domain;
40
41 qos_domain = kzalloc_obj(*qos_domain);
42 if (!qos_domain)
43 return NULL;
44
45 mutex_init(&qos_domain->lock);
46 INIT_LIST_HEAD(&qos_domain->nodes);
47
48 return qos_domain;
49 }
50
esw_qos_domain_init(struct mlx5_eswitch * esw)51 static int esw_qos_domain_init(struct mlx5_eswitch *esw)
52 {
53 esw->qos.domain = esw_qos_domain_alloc();
54
55 return esw->qos.domain ? 0 : -ENOMEM;
56 }
57
esw_qos_domain_release(struct mlx5_eswitch * esw)58 static void esw_qos_domain_release(struct mlx5_eswitch *esw)
59 {
60 kfree(esw->qos.domain);
61 esw->qos.domain = NULL;
62 }
63
64 enum sched_node_type {
65 SCHED_NODE_TYPE_VPORTS_TSAR,
66 SCHED_NODE_TYPE_VPORT,
67 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
68 SCHED_NODE_TYPE_RATE_LIMITER,
69 SCHED_NODE_TYPE_VPORT_TC,
70 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
71 };
72
73 static const char * const sched_node_type_str[] = {
74 [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
75 [SCHED_NODE_TYPE_VPORT] = "vport",
76 [SCHED_NODE_TYPE_TC_ARBITER_TSAR] = "TC Arbiter TSAR",
77 [SCHED_NODE_TYPE_RATE_LIMITER] = "Rate Limiter",
78 [SCHED_NODE_TYPE_VPORT_TC] = "vport TC",
79 [SCHED_NODE_TYPE_VPORTS_TC_TSAR] = "vports TC TSAR",
80 };
81
82 struct mlx5_esw_sched_node {
83 u32 ix;
84 /* Bandwidth parameters. */
85 u32 max_rate;
86 u32 min_rate;
87 /* A computed value indicating relative min_rate between node's children. */
88 u32 bw_share;
89 /* The parent node in the rate hierarchy. */
90 struct mlx5_esw_sched_node *parent;
91 /* Entry in the parent node's children list. */
92 struct list_head entry;
93 /* The type of this node in the rate hierarchy. */
94 enum sched_node_type type;
95 /* The eswitch this node belongs to. */
96 struct mlx5_eswitch *esw;
97 /* The children nodes of this node, empty list for leaf nodes. */
98 struct list_head children;
99 /* Valid only if this node is associated with a vport. */
100 struct mlx5_vport *vport;
101 /* Level in the hierarchy. The root node level is 1. */
102 u8 level;
103 /* Valid only when this node represents a traffic class. */
104 u8 tc;
105 /* Valid only for a TC arbiter node or vport TC arbiter. */
106 u32 tc_bw[DEVLINK_RATE_TCS_MAX];
107 };
108
esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node * node)109 static void esw_qos_node_attach_to_parent(struct mlx5_esw_sched_node *node)
110 {
111 if (!node->parent) {
112 /* Root children are assigned a depth level of 2. */
113 node->level = 2;
114 list_add_tail(&node->entry, &node->esw->qos.domain->nodes);
115 } else {
116 node->level = node->parent->level + 1;
117 list_add_tail(&node->entry, &node->parent->children);
118 }
119 }
120
esw_qos_num_tcs(struct mlx5_core_dev * dev)121 static int esw_qos_num_tcs(struct mlx5_core_dev *dev)
122 {
123 int num_tcs = mlx5_max_tc(dev) + 1;
124
125 return num_tcs < DEVLINK_RATE_TCS_MAX ? num_tcs : DEVLINK_RATE_TCS_MAX;
126 }
127
128 static void
esw_qos_node_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent)129 esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
130 {
131 list_del_init(&node->entry);
132 node->parent = parent;
133 if (parent)
134 node->esw = parent->esw;
135 esw_qos_node_attach_to_parent(node);
136 }
137
esw_qos_nodes_set_parent(struct list_head * nodes,struct mlx5_esw_sched_node * parent)138 static void esw_qos_nodes_set_parent(struct list_head *nodes,
139 struct mlx5_esw_sched_node *parent)
140 {
141 struct mlx5_esw_sched_node *node, *tmp;
142
143 list_for_each_entry_safe(node, tmp, nodes, entry) {
144 esw_qos_node_set_parent(node, parent);
145 if (!list_empty(&node->children) &&
146 parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
147 struct mlx5_esw_sched_node *child;
148
149 list_for_each_entry(child, &node->children, entry) {
150 struct mlx5_vport *vport = child->vport;
151
152 if (vport)
153 vport->qos.sched_node->parent = parent;
154 }
155 }
156 }
157 }
158
mlx5_esw_qos_vport_qos_free(struct mlx5_vport * vport)159 void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
160 {
161 if (vport->qos.sched_nodes) {
162 int num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
163 int i;
164
165 for (i = 0; i < num_tcs; i++)
166 kfree(vport->qos.sched_nodes[i]);
167 kfree(vport->qos.sched_nodes);
168 }
169
170 kfree(vport->qos.sched_node);
171 memset(&vport->qos, 0, sizeof(vport->qos));
172 }
173
mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport * vport)174 u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport)
175 {
176 if (!vport->qos.sched_node)
177 return 0;
178
179 return vport->qos.sched_node->ix;
180 }
181
182 struct mlx5_esw_sched_node *
mlx5_esw_qos_vport_get_parent(const struct mlx5_vport * vport)183 mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
184 {
185 if (!vport->qos.sched_node)
186 return NULL;
187
188 return vport->qos.sched_node->parent;
189 }
190
esw_qos_sched_elem_warn(struct mlx5_esw_sched_node * node,int err,const char * op)191 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
192 {
193 switch (node->type) {
194 case SCHED_NODE_TYPE_VPORTS_TC_TSAR:
195 esw_warn(node->esw->dev,
196 "E-Switch %s %s scheduling element failed (tc=%d,err=%d)\n",
197 op, sched_node_type_str[node->type], node->tc, err);
198 break;
199 case SCHED_NODE_TYPE_VPORT_TC:
200 esw_warn(node->esw->dev,
201 "E-Switch %s %s scheduling element failed (vport=%d,tc=%d,err=%d)\n",
202 op,
203 sched_node_type_str[node->type],
204 node->vport->vport, node->tc, err);
205 break;
206 case SCHED_NODE_TYPE_VPORT:
207 esw_warn(node->esw->dev,
208 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
209 op, sched_node_type_str[node->type], node->vport->vport, err);
210 break;
211 case SCHED_NODE_TYPE_RATE_LIMITER:
212 case SCHED_NODE_TYPE_TC_ARBITER_TSAR:
213 case SCHED_NODE_TYPE_VPORTS_TSAR:
214 esw_warn(node->esw->dev,
215 "E-Switch %s %s scheduling element failed (err=%d)\n",
216 op, sched_node_type_str[node->type], err);
217 break;
218 default:
219 esw_warn(node->esw->dev,
220 "E-Switch %s scheduling element failed (err=%d)\n",
221 op, err);
222 break;
223 }
224 }
225
esw_qos_node_create_sched_element(struct mlx5_esw_sched_node * node,void * ctx,struct netlink_ext_ack * extack)226 static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
227 struct netlink_ext_ack *extack)
228 {
229 int err;
230
231 err = mlx5_create_scheduling_element_cmd(node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx,
232 &node->ix);
233 if (err) {
234 esw_qos_sched_elem_warn(node, err, "create");
235 NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed");
236 }
237
238 return err;
239 }
240
esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)241 static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node,
242 struct netlink_ext_ack *extack)
243 {
244 int err;
245
246 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
247 SCHEDULING_HIERARCHY_E_SWITCH,
248 node->ix);
249 if (err) {
250 esw_qos_sched_elem_warn(node, err, "destroy");
251 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed.");
252 }
253
254 return err;
255 }
256
esw_qos_sched_elem_config(struct mlx5_esw_sched_node * node,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)257 static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share,
258 struct netlink_ext_ack *extack)
259 {
260 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
261 struct mlx5_core_dev *dev = node->esw->dev;
262 u32 bitmask = 0;
263 int err;
264
265 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
266 return -EOPNOTSUPP;
267
268 if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) ||
269 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE))
270 return -EOPNOTSUPP;
271
272 if (node->max_rate == max_rate && node->bw_share == bw_share)
273 return 0;
274
275 if (node->max_rate != max_rate) {
276 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
277 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
278 }
279 if (node->bw_share != bw_share) {
280 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
281 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
282 }
283
284 err = mlx5_modify_scheduling_element_cmd(dev,
285 SCHEDULING_HIERARCHY_E_SWITCH,
286 sched_ctx,
287 node->ix,
288 bitmask);
289 if (err) {
290 esw_qos_sched_elem_warn(node, err, "modify");
291 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed");
292
293 return err;
294 }
295
296 node->max_rate = max_rate;
297 node->bw_share = bw_share;
298 if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR)
299 trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate);
300 else if (node->type == SCHED_NODE_TYPE_VPORT)
301 trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate);
302
303 return 0;
304 }
305
esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)306 static int esw_qos_create_rate_limit_element(struct mlx5_esw_sched_node *node,
307 struct netlink_ext_ack *extack)
308 {
309 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
310
311 if (!mlx5_qos_element_type_supported(
312 node->esw->dev,
313 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT,
314 SCHEDULING_HIERARCHY_E_SWITCH))
315 return -EOPNOTSUPP;
316
317 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, node->max_rate);
318 MLX5_SET(scheduling_context, sched_ctx, element_type,
319 SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT);
320
321 return esw_qos_node_create_sched_element(node, sched_ctx, extack);
322 }
323
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent)324 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
325 struct mlx5_esw_sched_node *parent)
326 {
327 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
328 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
329 struct mlx5_esw_sched_node *node;
330 u32 max_guarantee = 0;
331
332 /* Find max min_rate across all nodes.
333 * This will correspond to fw_max_bw_share in the final bw_share calculation.
334 */
335 list_for_each_entry(node, nodes, entry) {
336 if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
337 node->min_rate > max_guarantee)
338 max_guarantee = node->min_rate;
339 }
340
341 if (max_guarantee)
342 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
343
344 /* If the node nodes has min_rate configured, a divider of 0 sets all
345 * nodes' bw_share to 0, effectively disabling min guarantees.
346 */
347 return 0;
348 }
349
esw_qos_calc_bw_share(u32 value,u32 divider,u32 fw_max)350 static u32 esw_qos_calc_bw_share(u32 value, u32 divider, u32 fw_max)
351 {
352 if (!divider)
353 return 0;
354 return min_t(u32, fw_max,
355 max_t(u32,
356 DIV_ROUND_UP(value, divider), MLX5_MIN_BW_SHARE));
357 }
358
esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node * node,u32 divider,struct netlink_ext_ack * extack)359 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node,
360 u32 divider,
361 struct netlink_ext_ack *extack)
362 {
363 u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share);
364 u32 bw_share;
365
366 bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share);
367
368 esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack);
369 }
370
esw_qos_normalize_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)371 static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
372 struct mlx5_esw_sched_node *parent,
373 struct netlink_ext_ack *extack)
374 {
375 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
376 u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
377 struct mlx5_esw_sched_node *node;
378
379 list_for_each_entry(node, nodes, entry) {
380 if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
381 continue;
382
383 /* Vports TC TSARs don't have a minimum rate configured,
384 * so there's no need to update the bw_share on them.
385 */
386 if (node->type != SCHED_NODE_TYPE_VPORTS_TC_TSAR) {
387 esw_qos_update_sched_node_bw_share(node, divider,
388 extack);
389 }
390
391 if (list_empty(&node->children))
392 continue;
393
394 esw_qos_normalize_min_rate(node->esw, node, extack);
395 }
396 }
397
esw_qos_calculate_tc_bw_divider(u32 * tc_bw)398 static u32 esw_qos_calculate_tc_bw_divider(u32 *tc_bw)
399 {
400 u32 total = 0;
401 int i;
402
403 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++)
404 total += tc_bw[i];
405
406 /* If total is zero, tc-bw config is disabled and we shouldn't reach
407 * here.
408 */
409 return WARN_ON(!total) ? 1 : total;
410 }
411
esw_qos_set_node_min_rate(struct mlx5_esw_sched_node * node,u32 min_rate,struct netlink_ext_ack * extack)412 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
413 u32 min_rate, struct netlink_ext_ack *extack)
414 {
415 struct mlx5_eswitch *esw = node->esw;
416
417 if (min_rate == node->min_rate)
418 return 0;
419
420 node->min_rate = min_rate;
421 esw_qos_normalize_min_rate(esw, node->parent, extack);
422
423 return 0;
424 }
425
426 static int
esw_qos_create_node_sched_elem(struct mlx5_core_dev * dev,u32 parent_element_id,u32 max_rate,u32 bw_share,u32 * tsar_ix)427 esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id,
428 u32 max_rate, u32 bw_share, u32 *tsar_ix)
429 {
430 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
431 void *attr;
432
433 if (!mlx5_qos_element_type_supported(dev,
434 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
435 SCHEDULING_HIERARCHY_E_SWITCH) ||
436 !mlx5_qos_tsar_type_supported(dev,
437 TSAR_ELEMENT_TSAR_TYPE_DWRR,
438 SCHEDULING_HIERARCHY_E_SWITCH))
439 return -EOPNOTSUPP;
440
441 MLX5_SET(scheduling_context, tsar_ctx, element_type,
442 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
443 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
444 parent_element_id);
445 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw, max_rate);
446 MLX5_SET(scheduling_context, tsar_ctx, bw_share, bw_share);
447 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
448 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
449
450 return mlx5_create_scheduling_element_cmd(dev,
451 SCHEDULING_HIERARCHY_E_SWITCH,
452 tsar_ctx,
453 tsar_ix);
454 }
455
456 static int
esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node * vport_node,struct netlink_ext_ack * extack)457 esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
458 struct netlink_ext_ack *extack)
459 {
460 struct mlx5_esw_sched_node *parent = vport_node->parent;
461 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
462 struct mlx5_core_dev *dev = vport_node->esw->dev;
463 void *attr;
464
465 if (!mlx5_qos_element_type_supported(
466 dev,
467 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT,
468 SCHEDULING_HIERARCHY_E_SWITCH))
469 return -EOPNOTSUPP;
470
471 MLX5_SET(scheduling_context, sched_ctx, element_type,
472 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
473 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
474 MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
475 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
476 parent ? parent->ix : vport_node->esw->qos.root_tsar_ix);
477 MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
478 vport_node->max_rate);
479
480 return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
481 }
482
483 static int
esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node * vport_tc_node,u32 rate_limit_elem_ix,struct netlink_ext_ack * extack)484 esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node,
485 u32 rate_limit_elem_ix,
486 struct netlink_ext_ack *extack)
487 {
488 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
489 struct mlx5_core_dev *dev = vport_tc_node->esw->dev;
490 void *attr;
491
492 if (!mlx5_qos_element_type_supported(
493 dev,
494 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC,
495 SCHEDULING_HIERARCHY_E_SWITCH))
496 return -EOPNOTSUPP;
497
498 MLX5_SET(scheduling_context, sched_ctx, element_type,
499 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC);
500 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
501 MLX5_SET(vport_tc_element, attr, vport_number,
502 vport_tc_node->vport->vport);
503 MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc);
504 MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id,
505 rate_limit_elem_ix);
506 MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
507 vport_tc_node->parent->ix);
508 MLX5_SET(scheduling_context, sched_ctx, bw_share,
509 vport_tc_node->bw_share);
510
511 return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx,
512 extack);
513 }
514
515 static struct mlx5_esw_sched_node *
__esw_qos_alloc_node(struct mlx5_eswitch * esw,u32 tsar_ix,enum sched_node_type type,struct mlx5_esw_sched_node * parent)516 __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type,
517 struct mlx5_esw_sched_node *parent)
518 {
519 struct mlx5_esw_sched_node *node;
520
521 node = kzalloc_obj(*node);
522 if (!node)
523 return NULL;
524
525 node->esw = esw;
526 node->ix = tsar_ix;
527 node->type = type;
528 node->parent = parent;
529 INIT_LIST_HEAD(&node->children);
530 esw_qos_node_attach_to_parent(node);
531 if (!parent) {
532 /* The caller is responsible for inserting the node into the
533 * parent list if necessary. This function can also be used with
534 * a NULL parent, which doesn't necessarily indicate that it
535 * refers to the root scheduling element.
536 */
537 list_del_init(&node->entry);
538 }
539
540 return node;
541 }
542
__esw_qos_free_node(struct mlx5_esw_sched_node * node)543 static void __esw_qos_free_node(struct mlx5_esw_sched_node *node)
544 {
545 list_del(&node->entry);
546 kfree(node);
547 }
548
esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)549 static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
550 {
551 esw_qos_node_destroy_sched_element(node, extack);
552 __esw_qos_free_node(node);
553 }
554
esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node * parent,u8 tc,struct netlink_ext_ack * extack)555 static int esw_qos_create_vports_tc_node(struct mlx5_esw_sched_node *parent,
556 u8 tc, struct netlink_ext_ack *extack)
557 {
558 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
559 struct mlx5_core_dev *dev = parent->esw->dev;
560 struct mlx5_esw_sched_node *vports_tc_node;
561 void *attr;
562 int err;
563
564 if (!mlx5_qos_element_type_supported(
565 dev,
566 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
567 SCHEDULING_HIERARCHY_E_SWITCH) ||
568 !mlx5_qos_tsar_type_supported(dev,
569 TSAR_ELEMENT_TSAR_TYPE_DWRR,
570 SCHEDULING_HIERARCHY_E_SWITCH))
571 return -EOPNOTSUPP;
572
573 vports_tc_node = __esw_qos_alloc_node(parent->esw, 0,
574 SCHED_NODE_TYPE_VPORTS_TC_TSAR,
575 parent);
576 if (!vports_tc_node) {
577 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
578 esw_warn(dev, "Failed to alloc vports TC node (tc=%d)\n", tc);
579 return -ENOMEM;
580 }
581
582 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
583 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
584 MLX5_SET(tsar_element, attr, traffic_class, tc);
585 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, parent->ix);
586 MLX5_SET(scheduling_context, tsar_ctx, element_type,
587 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
588
589 err = esw_qos_node_create_sched_element(vports_tc_node, tsar_ctx,
590 extack);
591 if (err)
592 goto err_create_sched_element;
593
594 vports_tc_node->tc = tc;
595
596 return 0;
597
598 err_create_sched_element:
599 __esw_qos_free_node(vports_tc_node);
600 return err;
601 }
602
603 static void
esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw)604 esw_qos_tc_arbiter_get_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
605 u32 *tc_bw)
606 {
607 memcpy(tc_bw, tc_arbiter_node->tc_bw, sizeof(tc_arbiter_node->tc_bw));
608 }
609
610 static void
esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node * tc_arbiter_node,u32 * tc_bw,struct netlink_ext_ack * extack)611 esw_qos_set_tc_arbiter_bw_shares(struct mlx5_esw_sched_node *tc_arbiter_node,
612 u32 *tc_bw, struct netlink_ext_ack *extack)
613 {
614 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
615 struct mlx5_esw_sched_node *vports_tc_node;
616 u32 divider, fw_max_bw_share;
617
618 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
619 divider = esw_qos_calculate_tc_bw_divider(tc_bw);
620 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
621 u8 tc = vports_tc_node->tc;
622 u32 bw_share;
623
624 tc_arbiter_node->tc_bw[tc] = tc_bw[tc];
625 bw_share = tc_bw[tc] * fw_max_bw_share;
626 bw_share = esw_qos_calc_bw_share(bw_share, divider,
627 fw_max_bw_share);
628 esw_qos_sched_elem_config(vports_tc_node, 0, bw_share, extack);
629 }
630 }
631
632 static void
esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)633 esw_qos_destroy_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
634 struct netlink_ext_ack *extack)
635 {
636 struct mlx5_esw_sched_node *vports_tc_node, *tmp;
637
638 list_for_each_entry_safe(vports_tc_node, tmp,
639 &tc_arbiter_node->children, entry)
640 esw_qos_destroy_node(vports_tc_node, extack);
641 }
642
643 static int
esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)644 esw_qos_create_vports_tc_nodes(struct mlx5_esw_sched_node *tc_arbiter_node,
645 struct netlink_ext_ack *extack)
646 {
647 struct mlx5_eswitch *esw = tc_arbiter_node->esw;
648 int err, i, num_tcs = esw_qos_num_tcs(esw->dev);
649
650 for (i = 0; i < num_tcs; i++) {
651 err = esw_qos_create_vports_tc_node(tc_arbiter_node, i, extack);
652 if (err)
653 goto err_tc_node_create;
654 }
655
656 return 0;
657
658 err_tc_node_create:
659 esw_qos_destroy_vports_tc_nodes(tc_arbiter_node, NULL);
660 return err;
661 }
662
esw_qos_create_tc_arbiter_sched_elem(struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)663 static int esw_qos_create_tc_arbiter_sched_elem(
664 struct mlx5_esw_sched_node *tc_arbiter_node,
665 struct netlink_ext_ack *extack)
666 {
667 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
668 u32 tsar_parent_ix;
669 void *attr;
670
671 if (!mlx5_qos_tsar_type_supported(tc_arbiter_node->esw->dev,
672 TSAR_ELEMENT_TSAR_TYPE_TC_ARB,
673 SCHEDULING_HIERARCHY_E_SWITCH)) {
674 NL_SET_ERR_MSG_MOD(extack,
675 "E-Switch TC Arbiter scheduling element is not supported");
676 return -EOPNOTSUPP;
677 }
678
679 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
680 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_TC_ARB);
681 tsar_parent_ix = tc_arbiter_node->parent ? tc_arbiter_node->parent->ix :
682 tc_arbiter_node->esw->qos.root_tsar_ix;
683 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
684 tsar_parent_ix);
685 MLX5_SET(scheduling_context, tsar_ctx, element_type,
686 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
687 MLX5_SET(scheduling_context, tsar_ctx, max_average_bw,
688 tc_arbiter_node->max_rate);
689 MLX5_SET(scheduling_context, tsar_ctx, bw_share,
690 tc_arbiter_node->bw_share);
691
692 return esw_qos_node_create_sched_element(tc_arbiter_node, tsar_ctx,
693 extack);
694 }
695
696 static struct mlx5_esw_sched_node *
__esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)697 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent,
698 struct netlink_ext_ack *extack)
699 {
700 struct mlx5_esw_sched_node *node;
701 u32 tsar_ix;
702 int err;
703
704 err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, 0,
705 0, &tsar_ix);
706 if (err) {
707 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed");
708 return ERR_PTR(err);
709 }
710
711 node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent);
712 if (!node) {
713 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
714 err = -ENOMEM;
715 goto err_alloc_node;
716 }
717
718 list_add_tail(&node->entry, &esw->qos.domain->nodes);
719 esw_qos_normalize_min_rate(esw, NULL, extack);
720 trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
721
722 return node;
723
724 err_alloc_node:
725 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
726 SCHEDULING_HIERARCHY_E_SWITCH,
727 tsar_ix))
728 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed");
729 return ERR_PTR(err);
730 }
731
732 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
733 static void esw_qos_put(struct mlx5_eswitch *esw);
734
735 static struct mlx5_esw_sched_node *
esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)736 esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
737 {
738 struct mlx5_esw_sched_node *node;
739 int err;
740
741 esw_assert_qos_lock_held(esw);
742 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
743 return ERR_PTR(-EOPNOTSUPP);
744
745 err = esw_qos_get(esw, extack);
746 if (err)
747 return ERR_PTR(err);
748
749 node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
750 if (IS_ERR(node))
751 esw_qos_put(esw);
752
753 return node;
754 }
755
__esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)756 static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
757 {
758 struct mlx5_eswitch *esw = node->esw;
759
760 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
761 esw_qos_destroy_vports_tc_nodes(node, extack);
762
763 trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
764 esw_qos_destroy_node(node, extack);
765 esw_qos_normalize_min_rate(esw, NULL, extack);
766 }
767
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)768 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
769 {
770 struct mlx5_core_dev *dev = esw->dev;
771 int err;
772
773 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
774 return -EOPNOTSUPP;
775
776 err = esw_qos_create_node_sched_elem(esw->dev, 0, 0, 0,
777 &esw->qos.root_tsar_ix);
778 if (err) {
779 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
780 return err;
781 }
782
783 refcount_set(&esw->qos.refcnt, 1);
784
785 return 0;
786 }
787
esw_qos_destroy(struct mlx5_eswitch * esw)788 static void esw_qos_destroy(struct mlx5_eswitch *esw)
789 {
790 int err;
791
792 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
793 SCHEDULING_HIERARCHY_E_SWITCH,
794 esw->qos.root_tsar_ix);
795 if (err)
796 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
797 }
798
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)799 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
800 {
801 int err = 0;
802
803 esw_assert_qos_lock_held(esw);
804 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
805 /* esw_qos_create() set refcount to 1 only on success.
806 * No need to decrement on failure.
807 */
808 err = esw_qos_create(esw, extack);
809 }
810
811 return err;
812 }
813
esw_qos_put(struct mlx5_eswitch * esw)814 static void esw_qos_put(struct mlx5_eswitch *esw)
815 {
816 esw_assert_qos_lock_held(esw);
817 if (refcount_dec_and_test(&esw->qos.refcnt))
818 esw_qos_destroy(esw);
819 }
820
821 static void
esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)822 esw_qos_tc_arbiter_scheduling_teardown(struct mlx5_esw_sched_node *node,
823 struct netlink_ext_ack *extack)
824 {
825 /* Clean up all Vports TC nodes within the TC arbiter node. */
826 esw_qos_destroy_vports_tc_nodes(node, extack);
827 /* Destroy the scheduling element for the TC arbiter node itself. */
828 esw_qos_node_destroy_sched_element(node, extack);
829 }
830
esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)831 static int esw_qos_tc_arbiter_scheduling_setup(struct mlx5_esw_sched_node *node,
832 struct netlink_ext_ack *extack)
833 {
834 u32 curr_ix = node->ix;
835 int err;
836
837 err = esw_qos_create_tc_arbiter_sched_elem(node, extack);
838 if (err)
839 return err;
840 /* Initialize the vports TC nodes within created TC arbiter TSAR. */
841 err = esw_qos_create_vports_tc_nodes(node, extack);
842 if (err)
843 goto err_vports_tc_nodes;
844
845 node->type = SCHED_NODE_TYPE_TC_ARBITER_TSAR;
846
847 return 0;
848
849 err_vports_tc_nodes:
850 /* If initialization fails, clean up the scheduling element
851 * for the TC arbiter node.
852 */
853 esw_qos_node_destroy_sched_element(node, NULL);
854 node->ix = curr_ix;
855 return err;
856 }
857
858 static int
esw_qos_create_vport_tc_sched_node(struct mlx5_vport * vport,u32 rate_limit_elem_ix,struct mlx5_esw_sched_node * vports_tc_node,struct netlink_ext_ack * extack)859 esw_qos_create_vport_tc_sched_node(struct mlx5_vport *vport,
860 u32 rate_limit_elem_ix,
861 struct mlx5_esw_sched_node *vports_tc_node,
862 struct netlink_ext_ack *extack)
863 {
864 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
865 struct mlx5_esw_sched_node *vport_tc_node;
866 u8 tc = vports_tc_node->tc;
867 int err;
868
869 vport_tc_node = __esw_qos_alloc_node(vport_node->esw, 0,
870 SCHED_NODE_TYPE_VPORT_TC,
871 vports_tc_node);
872 if (!vport_tc_node)
873 return -ENOMEM;
874
875 vport_tc_node->min_rate = vport_node->min_rate;
876 vport_tc_node->tc = tc;
877 vport_tc_node->vport = vport;
878 err = esw_qos_vport_tc_create_sched_element(vport_tc_node,
879 rate_limit_elem_ix,
880 extack);
881 if (err)
882 goto err_out;
883
884 vport->qos.sched_nodes[tc] = vport_tc_node;
885
886 return 0;
887 err_out:
888 __esw_qos_free_node(vport_tc_node);
889 return err;
890 }
891
892 static void
esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport * vport,struct netlink_ext_ack * extack)893 esw_qos_destroy_vport_tc_sched_elements(struct mlx5_vport *vport,
894 struct netlink_ext_ack *extack)
895 {
896 int i, num_tcs = esw_qos_num_tcs(vport->qos.sched_node->esw->dev);
897
898 for (i = 0; i < num_tcs; i++) {
899 if (vport->qos.sched_nodes[i]) {
900 __esw_qos_destroy_node(vport->qos.sched_nodes[i],
901 extack);
902 }
903 }
904
905 kfree(vport->qos.sched_nodes);
906 vport->qos.sched_nodes = NULL;
907 }
908
909 static int
esw_qos_create_vport_tc_sched_elements(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)910 esw_qos_create_vport_tc_sched_elements(struct mlx5_vport *vport,
911 enum sched_node_type type,
912 struct netlink_ext_ack *extack)
913 {
914 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
915 struct mlx5_esw_sched_node *tc_arbiter_node, *vports_tc_node;
916 int err, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
917 u32 rate_limit_elem_ix;
918
919 vport->qos.sched_nodes = kzalloc_objs(struct mlx5_esw_sched_node *,
920 num_tcs);
921 if (!vport->qos.sched_nodes) {
922 NL_SET_ERR_MSG_MOD(extack,
923 "Allocating the vport TC scheduling elements failed.");
924 return -ENOMEM;
925 }
926
927 rate_limit_elem_ix = type == SCHED_NODE_TYPE_RATE_LIMITER ?
928 vport_node->ix : 0;
929 tc_arbiter_node = type == SCHED_NODE_TYPE_RATE_LIMITER ?
930 vport_node->parent : vport_node;
931 list_for_each_entry(vports_tc_node, &tc_arbiter_node->children, entry) {
932 err = esw_qos_create_vport_tc_sched_node(vport,
933 rate_limit_elem_ix,
934 vports_tc_node,
935 extack);
936 if (err)
937 goto err_create_vport_tc;
938 }
939
940 return 0;
941
942 err_create_vport_tc:
943 esw_qos_destroy_vport_tc_sched_elements(vport, NULL);
944
945 return err;
946 }
947
948 static int
esw_qos_vport_tc_enable(struct mlx5_vport * vport,enum sched_node_type type,struct netlink_ext_ack * extack)949 esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type,
950 struct netlink_ext_ack *extack)
951 {
952 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
953 struct mlx5_esw_sched_node *parent = vport_node->parent;
954 int err;
955
956 if (type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
957 int new_level, max_level;
958
959 /* Increase the parent's level by 2 to account for both the
960 * TC arbiter and the vports TC scheduling element.
961 */
962 new_level = (parent ? parent->level : 2) + 2;
963 max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev,
964 log_esw_max_sched_depth);
965 if (new_level > max_level) {
966 NL_SET_ERR_MSG_FMT_MOD(extack,
967 "TC arbitration on leafs is not supported beyond max depth %d",
968 max_level);
969 return -EOPNOTSUPP;
970 }
971 }
972
973 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
974
975 if (type == SCHED_NODE_TYPE_RATE_LIMITER)
976 err = esw_qos_create_rate_limit_element(vport_node, extack);
977 else
978 err = esw_qos_tc_arbiter_scheduling_setup(vport_node, extack);
979 if (err)
980 return err;
981
982 /* Rate limiters impact multiple nodes not directly connected to them
983 * and are not direct members of the QoS hierarchy.
984 * Unlink it from the parent to reflect that.
985 */
986 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
987 list_del_init(&vport_node->entry);
988 vport_node->level = 0;
989 }
990
991 err = esw_qos_create_vport_tc_sched_elements(vport, type, extack);
992 if (err)
993 goto err_sched_nodes;
994
995 return 0;
996
997 err_sched_nodes:
998 if (type == SCHED_NODE_TYPE_RATE_LIMITER) {
999 esw_qos_node_destroy_sched_element(vport_node, NULL);
1000 esw_qos_node_attach_to_parent(vport_node);
1001 } else {
1002 esw_qos_tc_arbiter_scheduling_teardown(vport_node, NULL);
1003 }
1004 return err;
1005 }
1006
esw_qos_vport_tc_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1007 static void esw_qos_vport_tc_disable(struct mlx5_vport *vport,
1008 struct netlink_ext_ack *extack)
1009 {
1010 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1011 enum sched_node_type curr_type = vport_node->type;
1012
1013 esw_qos_destroy_vport_tc_sched_elements(vport, extack);
1014
1015 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER)
1016 esw_qos_node_destroy_sched_element(vport_node, extack);
1017 else
1018 esw_qos_tc_arbiter_scheduling_teardown(vport_node, extack);
1019 }
1020
esw_qos_set_vport_tcs_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1021 static int esw_qos_set_vport_tcs_min_rate(struct mlx5_vport *vport,
1022 u32 min_rate,
1023 struct netlink_ext_ack *extack)
1024 {
1025 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1026 int err, i, num_tcs = esw_qos_num_tcs(vport_node->esw->dev);
1027
1028 for (i = 0; i < num_tcs; i++) {
1029 err = esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1030 min_rate, extack);
1031 if (err)
1032 goto err_out;
1033 }
1034 vport_node->min_rate = min_rate;
1035
1036 return 0;
1037 err_out:
1038 for (--i; i >= 0; i--) {
1039 esw_qos_set_node_min_rate(vport->qos.sched_nodes[i],
1040 vport_node->min_rate, extack);
1041 }
1042 return err;
1043 }
1044
esw_qos_vport_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)1045 static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
1046 {
1047 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1048 enum sched_node_type curr_type = vport_node->type;
1049
1050 if (curr_type == SCHED_NODE_TYPE_VPORT)
1051 esw_qos_node_destroy_sched_element(vport_node, extack);
1052 else
1053 esw_qos_vport_tc_disable(vport, extack);
1054
1055 vport_node->bw_share = 0;
1056 memset(vport_node->tc_bw, 0, sizeof(vport_node->tc_bw));
1057 list_del_init(&vport_node->entry);
1058 esw_qos_normalize_min_rate(vport_node->esw, vport_node->parent, extack);
1059
1060 trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
1061 }
1062
esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1063 static int esw_qos_vport_enable(struct mlx5_vport *vport,
1064 enum sched_node_type type,
1065 struct mlx5_esw_sched_node *parent,
1066 struct netlink_ext_ack *extack)
1067 {
1068 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1069 int err;
1070
1071 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1072
1073 esw_qos_node_set_parent(vport_node, parent);
1074 if (type == SCHED_NODE_TYPE_VPORT)
1075 err = esw_qos_vport_create_sched_element(vport_node, extack);
1076 else
1077 err = esw_qos_vport_tc_enable(vport, type, extack);
1078 if (err)
1079 return err;
1080
1081 vport_node->type = type;
1082 esw_qos_normalize_min_rate(vport_node->esw, parent, extack);
1083 trace_mlx5_esw_vport_qos_create(vport->dev, vport, vport_node->max_rate,
1084 vport_node->bw_share);
1085
1086 return 0;
1087 }
1088
mlx5_esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,u32 max_rate,u32 min_rate,struct netlink_ext_ack * extack)1089 static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type,
1090 struct mlx5_esw_sched_node *parent, u32 max_rate,
1091 u32 min_rate, struct netlink_ext_ack *extack)
1092 {
1093 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1094 struct mlx5_esw_sched_node *sched_node;
1095 struct mlx5_eswitch *parent_esw;
1096 int err;
1097
1098 esw_assert_qos_lock_held(esw);
1099 err = esw_qos_get(esw, extack);
1100 if (err)
1101 return err;
1102
1103 parent_esw = parent ? parent->esw : esw;
1104 sched_node = __esw_qos_alloc_node(parent_esw, 0, type, parent);
1105 if (!sched_node) {
1106 esw_qos_put(esw);
1107 return -ENOMEM;
1108 }
1109 if (!parent)
1110 list_add_tail(&sched_node->entry, &esw->qos.domain->nodes);
1111
1112 sched_node->max_rate = max_rate;
1113 sched_node->min_rate = min_rate;
1114 sched_node->vport = vport;
1115 vport->qos.sched_node = sched_node;
1116 err = esw_qos_vport_enable(vport, type, parent, extack);
1117 if (err) {
1118 __esw_qos_free_node(sched_node);
1119 esw_qos_put(esw);
1120 vport->qos.sched_node = NULL;
1121 }
1122
1123 return err;
1124 }
1125
mlx5_esw_qos_vport_disable_locked(struct mlx5_vport * vport)1126 static void mlx5_esw_qos_vport_disable_locked(struct mlx5_vport *vport)
1127 {
1128 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1129
1130 esw_assert_qos_lock_held(esw);
1131 if (!vport->qos.sched_node)
1132 return;
1133
1134 esw_qos_vport_disable(vport, NULL);
1135 mlx5_esw_qos_vport_qos_free(vport);
1136 esw_qos_put(esw);
1137 }
1138
mlx5_esw_qos_vport_disable(struct mlx5_vport * vport)1139 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
1140 {
1141 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1142 struct mlx5_esw_sched_node *parent;
1143
1144 lockdep_assert_held(&esw->state_lock);
1145 esw_qos_lock(esw);
1146 if (!vport->qos.sched_node)
1147 goto unlock;
1148
1149 parent = vport->qos.sched_node->parent;
1150 WARN(parent, "Disabling QoS on port before detaching it from node");
1151
1152 mlx5_esw_qos_vport_disable_locked(vport);
1153 unlock:
1154 esw_qos_unlock(esw);
1155 }
1156
mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport * vport,u32 max_rate,struct netlink_ext_ack * extack)1157 static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate,
1158 struct netlink_ext_ack *extack)
1159 {
1160 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1161
1162 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1163
1164 if (!vport_node)
1165 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, max_rate, 0,
1166 extack);
1167 else
1168 return esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share,
1169 extack);
1170 }
1171
mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)1172 static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate,
1173 struct netlink_ext_ack *extack)
1174 {
1175 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1176
1177 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1178
1179 if (!vport_node)
1180 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate,
1181 extack);
1182 else if (vport_node->type == SCHED_NODE_TYPE_RATE_LIMITER)
1183 return esw_qos_set_vport_tcs_min_rate(vport, min_rate, extack);
1184 else
1185 return esw_qos_set_node_min_rate(vport_node, min_rate, extack);
1186 }
1187
mlx5_esw_qos_set_vport_rate(struct mlx5_vport * vport,u32 max_rate,u32 min_rate)1188 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate)
1189 {
1190 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1191 int err;
1192
1193 esw_qos_lock(esw);
1194 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL);
1195 if (!err)
1196 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL);
1197 esw_qos_unlock(esw);
1198 return err;
1199 }
1200
mlx5_esw_qos_get_vport_rate(struct mlx5_vport * vport,u32 * max_rate,u32 * min_rate)1201 bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate)
1202 {
1203 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1204 bool enabled;
1205
1206 esw_qos_lock(esw);
1207 enabled = !!vport->qos.sched_node;
1208 if (enabled) {
1209 *max_rate = vport->qos.sched_node->max_rate;
1210 *min_rate = vport->qos.sched_node->min_rate;
1211 }
1212 esw_qos_unlock(esw);
1213 return enabled;
1214 }
1215
esw_qos_vport_tc_check_type(enum sched_node_type curr_type,enum sched_node_type new_type,struct netlink_ext_ack * extack)1216 static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type,
1217 enum sched_node_type new_type,
1218 struct netlink_ext_ack *extack)
1219 {
1220 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR &&
1221 new_type == SCHED_NODE_TYPE_RATE_LIMITER) {
1222 NL_SET_ERR_MSG_MOD(extack,
1223 "Cannot switch from vport-level TC arbitration to node-level TC arbitration");
1224 return -EOPNOTSUPP;
1225 }
1226
1227 if (curr_type == SCHED_NODE_TYPE_RATE_LIMITER &&
1228 new_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1229 NL_SET_ERR_MSG_MOD(extack,
1230 "Cannot switch from node-level TC arbitration to vport-level TC arbitration");
1231 return -EOPNOTSUPP;
1232 }
1233
1234 return 0;
1235 }
1236
esw_qos_vport_update(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1237 static int esw_qos_vport_update(struct mlx5_vport *vport,
1238 enum sched_node_type type,
1239 struct mlx5_esw_sched_node *parent,
1240 struct netlink_ext_ack *extack)
1241 {
1242 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1243 struct mlx5_esw_sched_node *curr_parent = vport_node->parent;
1244 enum sched_node_type curr_type = vport_node->type;
1245 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
1246 int err;
1247
1248 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1249 if (curr_type == type && curr_parent == parent)
1250 return 0;
1251
1252 err = esw_qos_vport_tc_check_type(curr_type, type, extack);
1253 if (err)
1254 return err;
1255
1256 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type)
1257 esw_qos_tc_arbiter_get_bw_shares(vport_node, curr_tc_bw);
1258
1259 esw_qos_vport_disable(vport, extack);
1260
1261 err = esw_qos_vport_enable(vport, type, parent, extack);
1262 if (err) {
1263 esw_qos_vport_enable(vport, curr_type, curr_parent, NULL);
1264 extack = NULL;
1265 }
1266
1267 if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
1268 esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw,
1269 extack);
1270 }
1271
1272 return err;
1273 }
1274
esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1275 static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1276 struct netlink_ext_ack *extack)
1277 {
1278 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1279 struct mlx5_esw_sched_node *curr_parent;
1280 enum sched_node_type type;
1281
1282 esw_assert_qos_lock_held(esw);
1283 curr_parent = vport->qos.sched_node->parent;
1284 if (curr_parent == parent)
1285 return 0;
1286
1287 /* Set vport QoS type based on parent node type if different from
1288 * default QoS; otherwise, use the vport's current QoS type.
1289 */
1290 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1291 type = SCHED_NODE_TYPE_RATE_LIMITER;
1292 else if (curr_parent &&
1293 curr_parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1294 type = SCHED_NODE_TYPE_VPORT;
1295 else
1296 type = vport->qos.sched_node->type;
1297
1298 return esw_qos_vport_update(vport, type, parent, extack);
1299 }
1300
1301 static void
esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1302 esw_qos_switch_vport_tcs_to_vport(struct mlx5_esw_sched_node *tc_arbiter_node,
1303 struct mlx5_esw_sched_node *node,
1304 struct netlink_ext_ack *extack)
1305 {
1306 struct mlx5_esw_sched_node *vports_tc_node, *vport_tc_node, *tmp;
1307
1308 vports_tc_node = list_first_entry(&tc_arbiter_node->children,
1309 struct mlx5_esw_sched_node,
1310 entry);
1311
1312 list_for_each_entry_safe(vport_tc_node, tmp, &vports_tc_node->children,
1313 entry)
1314 esw_qos_vport_update_parent(vport_tc_node->vport, node, extack);
1315 }
1316
esw_qos_switch_tc_arbiter_node_to_vports(struct mlx5_esw_sched_node * tc_arbiter_node,struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1317 static int esw_qos_switch_tc_arbiter_node_to_vports(
1318 struct mlx5_esw_sched_node *tc_arbiter_node,
1319 struct mlx5_esw_sched_node *node,
1320 struct netlink_ext_ack *extack)
1321 {
1322 u32 parent_tsar_ix = node->parent ?
1323 node->parent->ix : node->esw->qos.root_tsar_ix;
1324 int err;
1325
1326 err = esw_qos_create_node_sched_elem(node->esw->dev, parent_tsar_ix,
1327 node->max_rate, node->bw_share,
1328 &node->ix);
1329 if (err) {
1330 NL_SET_ERR_MSG_MOD(extack,
1331 "Failed to create scheduling element for vports node when disabling vports TC QoS");
1332 return err;
1333 }
1334
1335 node->type = SCHED_NODE_TYPE_VPORTS_TSAR;
1336
1337 /* Disable TC QoS for vports in the arbiter node. */
1338 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, extack);
1339
1340 return 0;
1341 }
1342
esw_qos_switch_vports_node_to_tc_arbiter(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * tc_arbiter_node,struct netlink_ext_ack * extack)1343 static int esw_qos_switch_vports_node_to_tc_arbiter(
1344 struct mlx5_esw_sched_node *node,
1345 struct mlx5_esw_sched_node *tc_arbiter_node,
1346 struct netlink_ext_ack *extack)
1347 {
1348 struct mlx5_esw_sched_node *vport_node, *tmp;
1349 struct mlx5_vport *vport;
1350 int err;
1351
1352 /* Enable TC QoS for each vport in the node. */
1353 list_for_each_entry_safe(vport_node, tmp, &node->children, entry) {
1354 vport = vport_node->vport;
1355 err = esw_qos_vport_update_parent(vport, tc_arbiter_node,
1356 extack);
1357 if (err)
1358 goto err_out;
1359 }
1360
1361 /* Destroy the current vports node TSAR. */
1362 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
1363 SCHEDULING_HIERARCHY_E_SWITCH,
1364 node->ix);
1365 if (err)
1366 goto err_out;
1367
1368 return 0;
1369 err_out:
1370 /* Restore vports back into the node if an error occurs. */
1371 esw_qos_switch_vport_tcs_to_vport(tc_arbiter_node, node, NULL);
1372
1373 return err;
1374 }
1375
1376 static struct mlx5_esw_sched_node *
esw_qos_move_node(struct mlx5_esw_sched_node * curr_node)1377 esw_qos_move_node(struct mlx5_esw_sched_node *curr_node)
1378 {
1379 struct mlx5_esw_sched_node *new_node;
1380
1381 new_node = __esw_qos_alloc_node(curr_node->esw, curr_node->ix,
1382 curr_node->type, NULL);
1383 if (!new_node)
1384 return ERR_PTR(-ENOMEM);
1385
1386 esw_qos_nodes_set_parent(&curr_node->children, new_node);
1387 return new_node;
1388 }
1389
esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1390 static int esw_qos_node_disable_tc_arbitration(struct mlx5_esw_sched_node *node,
1391 struct netlink_ext_ack *extack)
1392 {
1393 struct mlx5_esw_sched_node *curr_node;
1394 int err;
1395
1396 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1397 return 0;
1398
1399 /* Allocate a new rate node to hold the current state, which will allow
1400 * for restoring the vports back to this node after disabling TC
1401 * arbitration.
1402 */
1403 curr_node = esw_qos_move_node(node);
1404 if (IS_ERR(curr_node)) {
1405 NL_SET_ERR_MSG_MOD(extack, "Failed setting up vports node");
1406 return PTR_ERR(curr_node);
1407 }
1408
1409 /* Disable TC QoS for all vports, and assign them back to the node. */
1410 err = esw_qos_switch_tc_arbiter_node_to_vports(curr_node, node, extack);
1411 if (err)
1412 goto err_out;
1413
1414 /* Clean up the TC arbiter node after disabling TC QoS for vports. */
1415 esw_qos_tc_arbiter_scheduling_teardown(curr_node, extack);
1416 goto out;
1417 err_out:
1418 esw_qos_nodes_set_parent(&curr_node->children, node);
1419 out:
1420 __esw_qos_free_node(curr_node);
1421 return err;
1422 }
1423
esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)1424 static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node,
1425 struct netlink_ext_ack *extack)
1426 {
1427 struct mlx5_esw_sched_node *curr_node, *child;
1428 int err, new_level, max_level;
1429
1430 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1431 return 0;
1432
1433 /* Increase the hierarchy level by one to account for the additional
1434 * vports TC scheduling node, and verify that the new level does not
1435 * exceed the maximum allowed depth.
1436 */
1437 new_level = node->level + 1;
1438 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1439 if (new_level > max_level) {
1440 NL_SET_ERR_MSG_FMT_MOD(extack,
1441 "TC arbitration on nodes is not supported beyond max depth %d",
1442 max_level);
1443 return -EOPNOTSUPP;
1444 }
1445
1446 /* Ensure the node does not contain non-leaf children before assigning
1447 * TC bandwidth.
1448 */
1449 if (!list_empty(&node->children)) {
1450 list_for_each_entry(child, &node->children, entry) {
1451 if (!child->vport) {
1452 NL_SET_ERR_MSG_MOD(extack,
1453 "Cannot configure TC bandwidth on a node with non-leaf children");
1454 return -EOPNOTSUPP;
1455 }
1456 }
1457 }
1458
1459 /* Allocate a new node that will store the information of the current
1460 * node. This will be used later to restore the node if necessary.
1461 */
1462 curr_node = esw_qos_move_node(node);
1463 if (IS_ERR(curr_node)) {
1464 NL_SET_ERR_MSG_MOD(extack, "Failed setting up node TC QoS");
1465 return PTR_ERR(curr_node);
1466 }
1467
1468 /* Initialize the TC arbiter node for QoS management.
1469 * This step prepares the node for handling Traffic Class arbitration.
1470 */
1471 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
1472 if (err)
1473 goto err_setup;
1474
1475 /* Enable TC QoS for each vport within the current node. */
1476 err = esw_qos_switch_vports_node_to_tc_arbiter(curr_node, node, extack);
1477 if (err)
1478 goto err_switch_vports;
1479 goto out;
1480
1481 err_switch_vports:
1482 esw_qos_tc_arbiter_scheduling_teardown(node, NULL);
1483 node->ix = curr_node->ix;
1484 node->type = curr_node->type;
1485 err_setup:
1486 esw_qos_nodes_set_parent(&curr_node->children, node);
1487 out:
1488 __esw_qos_free_node(curr_node);
1489 return err;
1490 }
1491
mlx5_esw_qos_lag_link_speed_get(struct mlx5_core_dev * mdev,bool take_rtnl)1492 static u32 mlx5_esw_qos_lag_link_speed_get(struct mlx5_core_dev *mdev,
1493 bool take_rtnl)
1494 {
1495 struct ethtool_link_ksettings lksettings;
1496 struct net_device *slave, *master;
1497 u32 speed = SPEED_UNKNOWN;
1498
1499 slave = mlx5_uplink_netdev_get(mdev);
1500 if (!slave)
1501 goto out;
1502
1503 if (take_rtnl)
1504 rtnl_lock();
1505 master = netdev_master_upper_dev_get(slave);
1506 if (master && !__ethtool_get_link_ksettings(master, &lksettings))
1507 speed = lksettings.base.speed;
1508 if (take_rtnl)
1509 rtnl_unlock();
1510
1511 out:
1512 mlx5_uplink_netdev_put(mdev, slave);
1513 return speed;
1514 }
1515
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool take_rtnl,struct netlink_ext_ack * extack)1516 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
1517 bool take_rtnl,
1518 struct netlink_ext_ack *extack)
1519 {
1520 int err;
1521
1522 if (!mlx5_lag_is_active(mdev))
1523 goto skip_lag;
1524
1525 *link_speed_max = mlx5_esw_qos_lag_link_speed_get(mdev, take_rtnl);
1526
1527 if (*link_speed_max != (u32)SPEED_UNKNOWN)
1528 return 0;
1529
1530 skip_lag:
1531 err = mlx5_port_max_linkspeed(mdev, link_speed_max);
1532 if (err)
1533 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
1534
1535 return err;
1536 }
1537
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)1538 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
1539 const char *name, u32 link_speed_max,
1540 u64 value, struct netlink_ext_ack *extack)
1541 {
1542 if (value > link_speed_max) {
1543 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
1544 name, value, link_speed_max);
1545 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
1546 return -EINVAL;
1547 }
1548
1549 return 0;
1550 }
1551
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)1552 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
1553 {
1554 struct mlx5_vport *vport;
1555 u32 link_speed_max;
1556 int err;
1557
1558 vport = mlx5_eswitch_get_vport(esw, vport_num);
1559 if (IS_ERR(vport))
1560 return PTR_ERR(vport);
1561
1562 if (rate_mbps) {
1563 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
1564 if (err)
1565 return err;
1566
1567 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
1568 link_speed_max, rate_mbps, NULL);
1569 if (err)
1570 return err;
1571 }
1572
1573 esw_qos_lock(esw);
1574 err = mlx5_esw_qos_set_vport_max_rate(vport, rate_mbps, NULL);
1575 esw_qos_unlock(esw);
1576
1577 return err;
1578 }
1579
1580 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
1581
1582 /* Converts bytes per second value passed in a pointer into megabits per
1583 * second, rewriting last. If converted rate exceed link speed or is not a
1584 * fraction of Mbps - returns error.
1585 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)1586 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
1587 u64 *rate, struct netlink_ext_ack *extack)
1588 {
1589 u32 link_speed_max, remainder;
1590 u64 value;
1591 int err;
1592
1593 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
1594 if (remainder) {
1595 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
1596 name, *rate);
1597 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
1598 return -EINVAL;
1599 }
1600
1601 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
1602 if (err)
1603 return err;
1604
1605 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
1606 if (err)
1607 return err;
1608
1609 *rate = value;
1610 return 0;
1611 }
1612
esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch * esw,u32 * tc_bw)1613 static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
1614 u32 *tc_bw)
1615 {
1616 int i, num_tcs = esw_qos_num_tcs(esw->dev);
1617
1618 for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
1619 if (tc_bw[i])
1620 return false;
1621 }
1622
1623 return true;
1624 }
1625
esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport * vport,u32 * tc_bw)1626 static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
1627 u32 *tc_bw)
1628 {
1629 struct mlx5_esw_sched_node *node = vport->qos.sched_node;
1630 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1631
1632 esw = (node && node->parent) ? node->parent->esw : esw;
1633
1634 return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
1635 }
1636
esw_qos_tc_bw_disabled(u32 * tc_bw)1637 static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
1638 {
1639 int i;
1640
1641 for (i = 0; i < DEVLINK_RATE_TCS_MAX; i++) {
1642 if (tc_bw[i])
1643 return false;
1644 }
1645
1646 return true;
1647 }
1648
esw_vport_qos_prune_empty(struct mlx5_vport * vport)1649 static void esw_vport_qos_prune_empty(struct mlx5_vport *vport)
1650 {
1651 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
1652
1653 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
1654 if (!vport_node)
1655 return;
1656
1657 if (vport_node->parent || vport_node->max_rate ||
1658 vport_node->min_rate || !esw_qos_tc_bw_disabled(vport_node->tc_bw))
1659 return;
1660
1661 mlx5_esw_qos_vport_disable_locked(vport);
1662 }
1663
mlx5_esw_qos_init(struct mlx5_eswitch * esw)1664 int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
1665 {
1666 if (esw->qos.domain)
1667 return 0; /* Nothing to change. */
1668
1669 return esw_qos_domain_init(esw);
1670 }
1671
mlx5_esw_qos_cleanup(struct mlx5_eswitch * esw)1672 void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
1673 {
1674 if (esw->qos.domain)
1675 esw_qos_domain_release(esw);
1676 }
1677
1678 /* Eswitch devlink rate API */
1679
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1680 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
1681 u64 tx_share, struct netlink_ext_ack *extack)
1682 {
1683 struct mlx5_vport *vport = priv;
1684 struct mlx5_eswitch *esw;
1685 int err;
1686
1687 esw = vport->dev->priv.eswitch;
1688 if (!mlx5_esw_allowed(esw))
1689 return -EPERM;
1690
1691 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
1692 if (err)
1693 return err;
1694
1695 esw_qos_lock(esw);
1696 err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack);
1697 if (err)
1698 goto out;
1699 esw_vport_qos_prune_empty(vport);
1700 out:
1701 esw_qos_unlock(esw);
1702 return err;
1703 }
1704
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1705 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
1706 u64 tx_max, struct netlink_ext_ack *extack)
1707 {
1708 struct mlx5_vport *vport = priv;
1709 struct mlx5_eswitch *esw;
1710 int err;
1711
1712 esw = vport->dev->priv.eswitch;
1713 if (!mlx5_esw_allowed(esw))
1714 return -EPERM;
1715
1716 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
1717 if (err)
1718 return err;
1719
1720 esw_qos_lock(esw);
1721 err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack);
1722 if (err)
1723 goto out;
1724 esw_vport_qos_prune_empty(vport);
1725 out:
1726 esw_qos_unlock(esw);
1727 return err;
1728 }
1729
mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate * rate_leaf,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1730 int mlx5_esw_devlink_rate_leaf_tc_bw_set(struct devlink_rate *rate_leaf,
1731 void *priv,
1732 u32 *tc_bw,
1733 struct netlink_ext_ack *extack)
1734 {
1735 struct mlx5_esw_sched_node *vport_node;
1736 struct mlx5_vport *vport = priv;
1737 struct mlx5_eswitch *esw;
1738 bool disable;
1739 int err = 0;
1740
1741 esw = vport->dev->priv.eswitch;
1742 if (!mlx5_esw_allowed(esw))
1743 return -EPERM;
1744
1745 disable = esw_qos_tc_bw_disabled(tc_bw);
1746 esw_qos_lock(esw);
1747
1748 if (!esw_qos_vport_validate_unsupported_tc_bw(vport, tc_bw)) {
1749 NL_SET_ERR_MSG_MOD(extack,
1750 "E-Switch traffic classes number is not supported");
1751 err = -EOPNOTSUPP;
1752 goto unlock;
1753 }
1754
1755 vport_node = vport->qos.sched_node;
1756 if (disable && !vport_node)
1757 goto unlock;
1758
1759 if (disable) {
1760 if (vport_node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1761 err = esw_qos_vport_update(vport, SCHED_NODE_TYPE_VPORT,
1762 vport_node->parent, extack);
1763 esw_vport_qos_prune_empty(vport);
1764 goto unlock;
1765 }
1766
1767 if (!vport_node) {
1768 err = mlx5_esw_qos_vport_enable(vport,
1769 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1770 NULL, 0, 0, extack);
1771 vport_node = vport->qos.sched_node;
1772 } else {
1773 err = esw_qos_vport_update(vport,
1774 SCHED_NODE_TYPE_TC_ARBITER_TSAR,
1775 vport_node->parent, extack);
1776 }
1777 if (!err)
1778 esw_qos_set_tc_arbiter_bw_shares(vport_node, tc_bw, extack);
1779 unlock:
1780 esw_qos_unlock(esw);
1781 return err;
1782 }
1783
mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate * rate_node,void * priv,u32 * tc_bw,struct netlink_ext_ack * extack)1784 int mlx5_esw_devlink_rate_node_tc_bw_set(struct devlink_rate *rate_node,
1785 void *priv,
1786 u32 *tc_bw,
1787 struct netlink_ext_ack *extack)
1788 {
1789 struct mlx5_esw_sched_node *node = priv;
1790 struct mlx5_eswitch *esw = node->esw;
1791 bool disable;
1792 int err;
1793
1794 if (!esw_qos_validate_unsupported_tc_bw(esw, tc_bw)) {
1795 NL_SET_ERR_MSG_MOD(extack,
1796 "E-Switch traffic classes number is not supported");
1797 return -EOPNOTSUPP;
1798 }
1799
1800 disable = esw_qos_tc_bw_disabled(tc_bw);
1801 esw_qos_lock(esw);
1802 if (disable) {
1803 err = esw_qos_node_disable_tc_arbitration(node, extack);
1804 goto unlock;
1805 }
1806
1807 err = esw_qos_node_enable_tc_arbitration(node, extack);
1808 if (!err)
1809 esw_qos_set_tc_arbiter_bw_shares(node, tc_bw, extack);
1810 unlock:
1811 esw_qos_unlock(esw);
1812 return err;
1813 }
1814
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)1815 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
1816 u64 tx_share, struct netlink_ext_ack *extack)
1817 {
1818 struct mlx5_esw_sched_node *node = priv;
1819 struct mlx5_eswitch *esw = node->esw;
1820 int err;
1821
1822 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack);
1823 if (err)
1824 return err;
1825
1826 esw_qos_lock(esw);
1827 err = esw_qos_set_node_min_rate(node, tx_share, extack);
1828 esw_qos_unlock(esw);
1829 return err;
1830 }
1831
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)1832 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
1833 u64 tx_max, struct netlink_ext_ack *extack)
1834 {
1835 struct mlx5_esw_sched_node *node = priv;
1836 struct mlx5_eswitch *esw = node->esw;
1837 int err;
1838
1839 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack);
1840 if (err)
1841 return err;
1842
1843 esw_qos_lock(esw);
1844 err = esw_qos_sched_elem_config(node, tx_max, node->bw_share, extack);
1845 esw_qos_unlock(esw);
1846 return err;
1847 }
1848
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)1849 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
1850 struct netlink_ext_ack *extack)
1851 {
1852 struct mlx5_esw_sched_node *node;
1853 struct mlx5_eswitch *esw;
1854 int err = 0;
1855
1856 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
1857 if (IS_ERR(esw))
1858 return PTR_ERR(esw);
1859
1860 esw_qos_lock(esw);
1861 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
1862 NL_SET_ERR_MSG_MOD(extack,
1863 "Rate node creation supported only in switchdev mode");
1864 err = -EOPNOTSUPP;
1865 goto unlock;
1866 }
1867
1868 node = esw_qos_create_vports_sched_node(esw, extack);
1869 if (IS_ERR(node)) {
1870 err = PTR_ERR(node);
1871 goto unlock;
1872 }
1873
1874 *priv = node;
1875 unlock:
1876 esw_qos_unlock(esw);
1877 return err;
1878 }
1879
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)1880 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
1881 struct netlink_ext_ack *extack)
1882 {
1883 struct mlx5_esw_sched_node *node = priv;
1884 struct mlx5_eswitch *esw = node->esw;
1885
1886 esw_qos_lock(esw);
1887 __esw_qos_destroy_node(node, extack);
1888 esw_qos_put(esw);
1889 esw_qos_unlock(esw);
1890 return 0;
1891 }
1892
mlx5_esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1893 int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
1894 struct netlink_ext_ack *extack)
1895 {
1896 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1897 int err = 0;
1898
1899 if (parent && parent->esw != esw) {
1900 NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
1901 return -EOPNOTSUPP;
1902 }
1903
1904 esw_qos_lock(esw);
1905 if (!vport->qos.sched_node && parent) {
1906 enum sched_node_type type;
1907
1908 type = parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR ?
1909 SCHED_NODE_TYPE_RATE_LIMITER : SCHED_NODE_TYPE_VPORT;
1910 err = mlx5_esw_qos_vport_enable(vport, type, parent, 0, 0,
1911 extack);
1912 } else if (vport->qos.sched_node) {
1913 err = esw_qos_vport_update_parent(vport, parent, extack);
1914 }
1915 esw_qos_unlock(esw);
1916 return err;
1917 }
1918
mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)1919 int mlx5_esw_devlink_rate_leaf_parent_set(struct devlink_rate *devlink_rate,
1920 struct devlink_rate *parent,
1921 void *priv, void *parent_priv,
1922 struct netlink_ext_ack *extack)
1923 {
1924 struct mlx5_esw_sched_node *node = parent ? parent_priv : NULL;
1925 struct mlx5_vport *vport = priv;
1926 int err;
1927
1928 err = mlx5_esw_qos_vport_update_parent(vport, node, extack);
1929 if (!err) {
1930 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
1931
1932 esw_qos_lock(esw);
1933 esw_vport_qos_prune_empty(vport);
1934 esw_qos_unlock(esw);
1935 }
1936
1937 return err;
1938 }
1939
esw_qos_is_node_empty(struct mlx5_esw_sched_node * node)1940 static bool esw_qos_is_node_empty(struct mlx5_esw_sched_node *node)
1941 {
1942 if (list_empty(&node->children))
1943 return true;
1944
1945 if (node->type != SCHED_NODE_TYPE_TC_ARBITER_TSAR)
1946 return false;
1947
1948 node = list_first_entry(&node->children, struct mlx5_esw_sched_node,
1949 entry);
1950
1951 return esw_qos_is_node_empty(node);
1952 }
1953
1954 static int
mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1955 mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node,
1956 struct mlx5_esw_sched_node *parent,
1957 struct netlink_ext_ack *extack)
1958 {
1959 u8 new_level, max_level;
1960
1961 if (parent && parent->esw != node->esw) {
1962 NL_SET_ERR_MSG_MOD(extack,
1963 "Cannot assign node to another E-Switch");
1964 return -EOPNOTSUPP;
1965 }
1966
1967 if (!esw_qos_is_node_empty(node)) {
1968 NL_SET_ERR_MSG_MOD(extack,
1969 "Cannot reassign a node that contains rate objects");
1970 return -EOPNOTSUPP;
1971 }
1972
1973 if (parent && parent->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1974 NL_SET_ERR_MSG_MOD(extack,
1975 "Cannot attach a node to a parent with TC bandwidth configured");
1976 return -EOPNOTSUPP;
1977 }
1978
1979 new_level = parent ? parent->level + 1 : 2;
1980 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
1981 /* Increase by one to account for the vports TC scheduling
1982 * element.
1983 */
1984 new_level += 1;
1985 }
1986
1987 max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth);
1988 if (new_level > max_level) {
1989 NL_SET_ERR_MSG_FMT_MOD(extack,
1990 "Node hierarchy depth %d exceeds the maximum supported level %d",
1991 new_level, max_level);
1992 return -EOPNOTSUPP;
1993 }
1994
1995 return 0;
1996 }
1997
1998 static int
esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)1999 esw_qos_tc_arbiter_node_update_parent(struct mlx5_esw_sched_node *node,
2000 struct mlx5_esw_sched_node *parent,
2001 struct netlink_ext_ack *extack)
2002 {
2003 struct mlx5_esw_sched_node *curr_parent = node->parent;
2004 u32 curr_tc_bw[DEVLINK_RATE_TCS_MAX] = {0};
2005 struct mlx5_eswitch *esw = node->esw;
2006 int err;
2007
2008 esw_qos_tc_arbiter_get_bw_shares(node, curr_tc_bw);
2009 esw_qos_tc_arbiter_scheduling_teardown(node, extack);
2010 esw_qos_node_set_parent(node, parent);
2011 err = esw_qos_tc_arbiter_scheduling_setup(node, extack);
2012 if (err) {
2013 esw_qos_node_set_parent(node, curr_parent);
2014 if (esw_qos_tc_arbiter_scheduling_setup(node, extack)) {
2015 esw_warn(esw->dev, "Node restore QoS failed\n");
2016 return err;
2017 }
2018 }
2019 esw_qos_set_tc_arbiter_bw_shares(node, curr_tc_bw, extack);
2020
2021 return err;
2022 }
2023
esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2024 static int esw_qos_vports_node_update_parent(struct mlx5_esw_sched_node *node,
2025 struct mlx5_esw_sched_node *parent,
2026 struct netlink_ext_ack *extack)
2027 {
2028 struct mlx5_esw_sched_node *curr_parent = node->parent;
2029 struct mlx5_eswitch *esw = node->esw;
2030 u32 parent_ix;
2031 int err;
2032
2033 parent_ix = parent ? parent->ix : node->esw->qos.root_tsar_ix;
2034 mlx5_destroy_scheduling_element_cmd(esw->dev,
2035 SCHEDULING_HIERARCHY_E_SWITCH,
2036 node->ix);
2037 err = esw_qos_create_node_sched_elem(esw->dev, parent_ix,
2038 node->max_rate, 0, &node->ix);
2039 if (err) {
2040 NL_SET_ERR_MSG_MOD(extack,
2041 "Failed to create a node under the new hierarchy.");
2042 if (esw_qos_create_node_sched_elem(esw->dev, curr_parent->ix,
2043 node->max_rate,
2044 node->bw_share,
2045 &node->ix))
2046 esw_warn(esw->dev, "Node restore QoS failed\n");
2047
2048 return err;
2049 }
2050 esw_qos_node_set_parent(node, parent);
2051 node->bw_share = 0;
2052
2053 return 0;
2054 }
2055
mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)2056 static int mlx5_esw_qos_node_update_parent(struct mlx5_esw_sched_node *node,
2057 struct mlx5_esw_sched_node *parent,
2058 struct netlink_ext_ack *extack)
2059 {
2060 struct mlx5_esw_sched_node *curr_parent;
2061 struct mlx5_eswitch *esw = node->esw;
2062 int err;
2063
2064 err = mlx5_esw_qos_node_validate_set_parent(node, parent, extack);
2065 if (err)
2066 return err;
2067
2068 esw_qos_lock(esw);
2069 curr_parent = node->parent;
2070 if (node->type == SCHED_NODE_TYPE_TC_ARBITER_TSAR) {
2071 err = esw_qos_tc_arbiter_node_update_parent(node, parent,
2072 extack);
2073 } else {
2074 err = esw_qos_vports_node_update_parent(node, parent, extack);
2075 }
2076
2077 if (err)
2078 goto out;
2079
2080 esw_qos_normalize_min_rate(esw, curr_parent, extack);
2081 esw_qos_normalize_min_rate(esw, parent, extack);
2082
2083 out:
2084 esw_qos_unlock(esw);
2085
2086 return err;
2087 }
2088
mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)2089 int mlx5_esw_devlink_rate_node_parent_set(struct devlink_rate *devlink_rate,
2090 struct devlink_rate *parent,
2091 void *priv, void *parent_priv,
2092 struct netlink_ext_ack *extack)
2093 {
2094 struct mlx5_esw_sched_node *node = priv, *parent_node;
2095
2096 if (!parent)
2097 return mlx5_esw_qos_node_update_parent(node, NULL, extack);
2098
2099 parent_node = parent_priv;
2100 return mlx5_esw_qos_node_update_parent(node, parent_node, extack);
2101 }
2102