1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13
14 /* Holds rate nodes associated with an E-Switch. */
15 struct mlx5_qos_domain {
16 /* Serializes access to all qos changes in the qos domain. */
17 struct mutex lock;
18 /* List of all mlx5_esw_sched_nodes. */
19 struct list_head nodes;
20 };
21
esw_qos_lock(struct mlx5_eswitch * esw)22 static void esw_qos_lock(struct mlx5_eswitch *esw)
23 {
24 mutex_lock(&esw->qos.domain->lock);
25 }
26
esw_qos_unlock(struct mlx5_eswitch * esw)27 static void esw_qos_unlock(struct mlx5_eswitch *esw)
28 {
29 mutex_unlock(&esw->qos.domain->lock);
30 }
31
esw_assert_qos_lock_held(struct mlx5_eswitch * esw)32 static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw)
33 {
34 lockdep_assert_held(&esw->qos.domain->lock);
35 }
36
esw_qos_domain_alloc(void)37 static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
38 {
39 struct mlx5_qos_domain *qos_domain;
40
41 qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
42 if (!qos_domain)
43 return NULL;
44
45 mutex_init(&qos_domain->lock);
46 INIT_LIST_HEAD(&qos_domain->nodes);
47
48 return qos_domain;
49 }
50
esw_qos_domain_init(struct mlx5_eswitch * esw)51 static int esw_qos_domain_init(struct mlx5_eswitch *esw)
52 {
53 esw->qos.domain = esw_qos_domain_alloc();
54
55 return esw->qos.domain ? 0 : -ENOMEM;
56 }
57
esw_qos_domain_release(struct mlx5_eswitch * esw)58 static void esw_qos_domain_release(struct mlx5_eswitch *esw)
59 {
60 kfree(esw->qos.domain);
61 esw->qos.domain = NULL;
62 }
63
64 enum sched_node_type {
65 SCHED_NODE_TYPE_VPORTS_TSAR,
66 SCHED_NODE_TYPE_VPORT,
67 };
68
69 static const char * const sched_node_type_str[] = {
70 [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
71 [SCHED_NODE_TYPE_VPORT] = "vport",
72 };
73
74 struct mlx5_esw_sched_node {
75 u32 ix;
76 /* Bandwidth parameters. */
77 u32 max_rate;
78 u32 min_rate;
79 /* A computed value indicating relative min_rate between node's children. */
80 u32 bw_share;
81 /* The parent node in the rate hierarchy. */
82 struct mlx5_esw_sched_node *parent;
83 /* Entry in the parent node's children list. */
84 struct list_head entry;
85 /* The type of this node in the rate hierarchy. */
86 enum sched_node_type type;
87 /* The eswitch this node belongs to. */
88 struct mlx5_eswitch *esw;
89 /* The children nodes of this node, empty list for leaf nodes. */
90 struct list_head children;
91 /* Valid only if this node is associated with a vport. */
92 struct mlx5_vport *vport;
93 };
94
95 static void
esw_qos_node_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent)96 esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
97 {
98 list_del_init(&node->entry);
99 node->parent = parent;
100 list_add_tail(&node->entry, &parent->children);
101 node->esw = parent->esw;
102 }
103
mlx5_esw_qos_vport_qos_free(struct mlx5_vport * vport)104 void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
105 {
106 kfree(vport->qos.sched_node);
107 memset(&vport->qos, 0, sizeof(vport->qos));
108 }
109
mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport * vport)110 u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport)
111 {
112 if (!vport->qos.sched_node)
113 return 0;
114
115 return vport->qos.sched_node->ix;
116 }
117
118 struct mlx5_esw_sched_node *
mlx5_esw_qos_vport_get_parent(const struct mlx5_vport * vport)119 mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
120 {
121 if (!vport->qos.sched_node)
122 return NULL;
123
124 return vport->qos.sched_node->parent;
125 }
126
esw_qos_sched_elem_warn(struct mlx5_esw_sched_node * node,int err,const char * op)127 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
128 {
129 if (node->vport) {
130 esw_warn(node->esw->dev,
131 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
132 op, sched_node_type_str[node->type], node->vport->vport, err);
133 return;
134 }
135
136 esw_warn(node->esw->dev,
137 "E-Switch %s %s scheduling element failed (err=%d)\n",
138 op, sched_node_type_str[node->type], err);
139 }
140
esw_qos_node_create_sched_element(struct mlx5_esw_sched_node * node,void * ctx,struct netlink_ext_ack * extack)141 static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
142 struct netlink_ext_ack *extack)
143 {
144 int err;
145
146 err = mlx5_create_scheduling_element_cmd(node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx,
147 &node->ix);
148 if (err) {
149 esw_qos_sched_elem_warn(node, err, "create");
150 NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed");
151 }
152
153 return err;
154 }
155
esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)156 static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node,
157 struct netlink_ext_ack *extack)
158 {
159 int err;
160
161 err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
162 SCHEDULING_HIERARCHY_E_SWITCH,
163 node->ix);
164 if (err) {
165 esw_qos_sched_elem_warn(node, err, "destroy");
166 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed.");
167 }
168
169 return err;
170 }
171
esw_qos_sched_elem_config(struct mlx5_esw_sched_node * node,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)172 static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share,
173 struct netlink_ext_ack *extack)
174 {
175 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
176 struct mlx5_core_dev *dev = node->esw->dev;
177 u32 bitmask = 0;
178 int err;
179
180 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
181 return -EOPNOTSUPP;
182
183 if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) ||
184 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE))
185 return -EOPNOTSUPP;
186
187 if (node->max_rate == max_rate && node->bw_share == bw_share)
188 return 0;
189
190 if (node->max_rate != max_rate) {
191 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
192 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
193 }
194 if (node->bw_share != bw_share) {
195 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
196 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
197 }
198
199 err = mlx5_modify_scheduling_element_cmd(dev,
200 SCHEDULING_HIERARCHY_E_SWITCH,
201 sched_ctx,
202 node->ix,
203 bitmask);
204 if (err) {
205 esw_qos_sched_elem_warn(node, err, "modify");
206 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed");
207
208 return err;
209 }
210
211 node->max_rate = max_rate;
212 node->bw_share = bw_share;
213 if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR)
214 trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate);
215 else if (node->type == SCHED_NODE_TYPE_VPORT)
216 trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate);
217
218 return 0;
219 }
220
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent)221 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
222 struct mlx5_esw_sched_node *parent)
223 {
224 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
225 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
226 struct mlx5_esw_sched_node *node;
227 u32 max_guarantee = 0;
228
229 /* Find max min_rate across all nodes.
230 * This will correspond to fw_max_bw_share in the final bw_share calculation.
231 */
232 list_for_each_entry(node, nodes, entry) {
233 if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
234 node->min_rate > max_guarantee)
235 max_guarantee = node->min_rate;
236 }
237
238 if (max_guarantee)
239 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
240
241 /* If nodes max min_rate divider is 0 but their parent has bw_share
242 * configured, then set bw_share for nodes to minimal value.
243 */
244
245 if (parent && parent->bw_share)
246 return 1;
247
248 /* If the node nodes has min_rate configured, a divider of 0 sets all
249 * nodes' bw_share to 0, effectively disabling min guarantees.
250 */
251 return 0;
252 }
253
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)254 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
255 {
256 if (!divider)
257 return 0;
258 return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max);
259 }
260
esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node * node,u32 divider,struct netlink_ext_ack * extack)261 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node,
262 u32 divider,
263 struct netlink_ext_ack *extack)
264 {
265 u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share);
266 u32 bw_share;
267
268 bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share);
269
270 esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack);
271 }
272
esw_qos_normalize_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)273 static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
274 struct mlx5_esw_sched_node *parent,
275 struct netlink_ext_ack *extack)
276 {
277 struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
278 u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
279 struct mlx5_esw_sched_node *node;
280
281 list_for_each_entry(node, nodes, entry) {
282 if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
283 continue;
284
285 esw_qos_update_sched_node_bw_share(node, divider, extack);
286
287 if (list_empty(&node->children))
288 continue;
289
290 esw_qos_normalize_min_rate(node->esw, node, extack);
291 }
292 }
293
esw_qos_set_node_min_rate(struct mlx5_esw_sched_node * node,u32 min_rate,struct netlink_ext_ack * extack)294 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
295 u32 min_rate, struct netlink_ext_ack *extack)
296 {
297 struct mlx5_eswitch *esw = node->esw;
298
299 if (min_rate == node->min_rate)
300 return 0;
301
302 node->min_rate = min_rate;
303 esw_qos_normalize_min_rate(esw, node->parent, extack);
304
305 return 0;
306 }
307
esw_qos_create_node_sched_elem(struct mlx5_core_dev * dev,u32 parent_element_id,u32 * tsar_ix)308 static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id,
309 u32 *tsar_ix)
310 {
311 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
312 void *attr;
313
314 if (!mlx5_qos_element_type_supported(dev,
315 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
316 SCHEDULING_HIERARCHY_E_SWITCH) ||
317 !mlx5_qos_tsar_type_supported(dev,
318 TSAR_ELEMENT_TSAR_TYPE_DWRR,
319 SCHEDULING_HIERARCHY_E_SWITCH))
320 return -EOPNOTSUPP;
321
322 MLX5_SET(scheduling_context, tsar_ctx, element_type,
323 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
324 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
325 parent_element_id);
326 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
327 MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
328
329 return mlx5_create_scheduling_element_cmd(dev,
330 SCHEDULING_HIERARCHY_E_SWITCH,
331 tsar_ctx,
332 tsar_ix);
333 }
334
esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node * vport_node,struct netlink_ext_ack * extack)335 static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
336 struct netlink_ext_ack *extack)
337 {
338 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
339 struct mlx5_core_dev *dev = vport_node->esw->dev;
340 void *attr;
341
342 if (!mlx5_qos_element_type_supported(dev,
343 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT,
344 SCHEDULING_HIERARCHY_E_SWITCH))
345 return -EOPNOTSUPP;
346
347 MLX5_SET(scheduling_context, sched_ctx, element_type,
348 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
349 attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
350 MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
351 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, vport_node->parent->ix);
352 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate);
353
354 return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
355 }
356
357 static struct mlx5_esw_sched_node *
__esw_qos_alloc_node(struct mlx5_eswitch * esw,u32 tsar_ix,enum sched_node_type type,struct mlx5_esw_sched_node * parent)358 __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type,
359 struct mlx5_esw_sched_node *parent)
360 {
361 struct list_head *parent_children;
362 struct mlx5_esw_sched_node *node;
363
364 node = kzalloc(sizeof(*node), GFP_KERNEL);
365 if (!node)
366 return NULL;
367
368 node->esw = esw;
369 node->ix = tsar_ix;
370 node->type = type;
371 node->parent = parent;
372 INIT_LIST_HEAD(&node->children);
373 parent_children = parent ? &parent->children : &esw->qos.domain->nodes;
374 list_add_tail(&node->entry, parent_children);
375
376 return node;
377 }
378
__esw_qos_free_node(struct mlx5_esw_sched_node * node)379 static void __esw_qos_free_node(struct mlx5_esw_sched_node *node)
380 {
381 list_del(&node->entry);
382 kfree(node);
383 }
384
esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)385 static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
386 {
387 esw_qos_node_destroy_sched_element(node, extack);
388 __esw_qos_free_node(node);
389 }
390
391 static struct mlx5_esw_sched_node *
__esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)392 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent,
393 struct netlink_ext_ack *extack)
394 {
395 struct mlx5_esw_sched_node *node;
396 u32 tsar_ix;
397 int err;
398
399 err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, &tsar_ix);
400 if (err) {
401 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed");
402 return ERR_PTR(err);
403 }
404
405 node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent);
406 if (!node) {
407 NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
408 err = -ENOMEM;
409 goto err_alloc_node;
410 }
411
412 esw_qos_normalize_min_rate(esw, NULL, extack);
413 trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
414
415 return node;
416
417 err_alloc_node:
418 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
419 SCHEDULING_HIERARCHY_E_SWITCH,
420 tsar_ix))
421 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed");
422 return ERR_PTR(err);
423 }
424
425 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
426 static void esw_qos_put(struct mlx5_eswitch *esw);
427
428 static struct mlx5_esw_sched_node *
esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)429 esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
430 {
431 struct mlx5_esw_sched_node *node;
432 int err;
433
434 esw_assert_qos_lock_held(esw);
435 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
436 return ERR_PTR(-EOPNOTSUPP);
437
438 err = esw_qos_get(esw, extack);
439 if (err)
440 return ERR_PTR(err);
441
442 node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
443 if (IS_ERR(node))
444 esw_qos_put(esw);
445
446 return node;
447 }
448
__esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)449 static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
450 {
451 struct mlx5_eswitch *esw = node->esw;
452
453 trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
454 esw_qos_destroy_node(node, extack);
455 esw_qos_normalize_min_rate(esw, NULL, extack);
456 }
457
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)458 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
459 {
460 struct mlx5_core_dev *dev = esw->dev;
461 int err;
462
463 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
464 return -EOPNOTSUPP;
465
466 err = esw_qos_create_node_sched_elem(esw->dev, 0, &esw->qos.root_tsar_ix);
467 if (err) {
468 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
469 return err;
470 }
471
472 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
473 esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack);
474 } else {
475 /* The eswitch doesn't support scheduling nodes.
476 * Create a software-only node0 using the root TSAR to attach vport QoS to.
477 */
478 if (!__esw_qos_alloc_node(esw,
479 esw->qos.root_tsar_ix,
480 SCHED_NODE_TYPE_VPORTS_TSAR,
481 NULL))
482 esw->qos.node0 = ERR_PTR(-ENOMEM);
483 }
484 if (IS_ERR(esw->qos.node0)) {
485 err = PTR_ERR(esw->qos.node0);
486 esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err);
487 goto err_node0;
488 }
489 refcount_set(&esw->qos.refcnt, 1);
490
491 return 0;
492
493 err_node0:
494 if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
495 esw->qos.root_tsar_ix))
496 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
497
498 return err;
499 }
500
esw_qos_destroy(struct mlx5_eswitch * esw)501 static void esw_qos_destroy(struct mlx5_eswitch *esw)
502 {
503 int err;
504
505 if (esw->qos.node0->ix != esw->qos.root_tsar_ix)
506 __esw_qos_destroy_node(esw->qos.node0, NULL);
507 else
508 __esw_qos_free_node(esw->qos.node0);
509 esw->qos.node0 = NULL;
510
511 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
512 SCHEDULING_HIERARCHY_E_SWITCH,
513 esw->qos.root_tsar_ix);
514 if (err)
515 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
516 }
517
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)518 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
519 {
520 int err = 0;
521
522 esw_assert_qos_lock_held(esw);
523 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
524 /* esw_qos_create() set refcount to 1 only on success.
525 * No need to decrement on failure.
526 */
527 err = esw_qos_create(esw, extack);
528 }
529
530 return err;
531 }
532
esw_qos_put(struct mlx5_eswitch * esw)533 static void esw_qos_put(struct mlx5_eswitch *esw)
534 {
535 esw_assert_qos_lock_held(esw);
536 if (refcount_dec_and_test(&esw->qos.refcnt))
537 esw_qos_destroy(esw);
538 }
539
esw_qos_vport_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)540 static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
541 {
542 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
543 struct mlx5_esw_sched_node *parent = vport_node->parent;
544
545 esw_qos_node_destroy_sched_element(vport_node, extack);
546
547 vport_node->bw_share = 0;
548 list_del_init(&vport_node->entry);
549 esw_qos_normalize_min_rate(parent->esw, parent, extack);
550
551 trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
552 }
553
esw_qos_vport_enable(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)554 static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
555 struct netlink_ext_ack *extack)
556 {
557 int err;
558
559 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
560
561 esw_qos_node_set_parent(vport->qos.sched_node, parent);
562 err = esw_qos_vport_create_sched_element(vport->qos.sched_node, extack);
563 if (err)
564 return err;
565
566 esw_qos_normalize_min_rate(parent->esw, parent, extack);
567
568 return 0;
569 }
570
mlx5_esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,u32 max_rate,u32 min_rate,struct netlink_ext_ack * extack)571 static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type,
572 struct mlx5_esw_sched_node *parent, u32 max_rate,
573 u32 min_rate, struct netlink_ext_ack *extack)
574 {
575 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
576 struct mlx5_esw_sched_node *sched_node;
577 int err;
578
579 esw_assert_qos_lock_held(esw);
580 err = esw_qos_get(esw, extack);
581 if (err)
582 return err;
583
584 parent = parent ?: esw->qos.node0;
585 sched_node = __esw_qos_alloc_node(parent->esw, 0, type, parent);
586 if (!sched_node)
587 return -ENOMEM;
588
589 sched_node->max_rate = max_rate;
590 sched_node->min_rate = min_rate;
591 sched_node->vport = vport;
592 vport->qos.sched_node = sched_node;
593 err = esw_qos_vport_enable(vport, parent, extack);
594 if (err)
595 esw_qos_put(esw);
596
597 return err;
598 }
599
mlx5_esw_qos_vport_disable(struct mlx5_vport * vport)600 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
601 {
602 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
603 struct mlx5_esw_sched_node *parent;
604
605 lockdep_assert_held(&esw->state_lock);
606 esw_qos_lock(esw);
607 if (!vport->qos.sched_node)
608 goto unlock;
609
610 parent = vport->qos.sched_node->parent;
611 WARN(parent != esw->qos.node0, "Disabling QoS on port before detaching it from node");
612
613 esw_qos_vport_disable(vport, NULL);
614 mlx5_esw_qos_vport_qos_free(vport);
615 esw_qos_put(esw);
616 unlock:
617 esw_qos_unlock(esw);
618 }
619
mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport * vport,u32 max_rate,struct netlink_ext_ack * extack)620 static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate,
621 struct netlink_ext_ack *extack)
622 {
623 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
624
625 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
626
627 if (!vport_node)
628 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, max_rate, 0,
629 extack);
630 else
631 return esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share,
632 extack);
633 }
634
mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)635 static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate,
636 struct netlink_ext_ack *extack)
637 {
638 struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
639
640 esw_assert_qos_lock_held(vport->dev->priv.eswitch);
641
642 if (!vport_node)
643 return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate,
644 extack);
645 else
646 return esw_qos_set_node_min_rate(vport_node, min_rate, extack);
647 }
648
mlx5_esw_qos_set_vport_rate(struct mlx5_vport * vport,u32 max_rate,u32 min_rate)649 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate)
650 {
651 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
652 int err;
653
654 esw_qos_lock(esw);
655 err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL);
656 if (!err)
657 err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL);
658 esw_qos_unlock(esw);
659 return err;
660 }
661
mlx5_esw_qos_get_vport_rate(struct mlx5_vport * vport,u32 * max_rate,u32 * min_rate)662 bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate)
663 {
664 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
665 bool enabled;
666
667 esw_qos_lock(esw);
668 enabled = !!vport->qos.sched_node;
669 if (enabled) {
670 *max_rate = vport->qos.sched_node->max_rate;
671 *min_rate = vport->qos.sched_node->min_rate;
672 }
673 esw_qos_unlock(esw);
674 return enabled;
675 }
676
esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)677 static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
678 struct netlink_ext_ack *extack)
679 {
680 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
681 struct mlx5_esw_sched_node *curr_parent;
682 int err;
683
684 esw_assert_qos_lock_held(esw);
685 curr_parent = vport->qos.sched_node->parent;
686 parent = parent ?: esw->qos.node0;
687 if (curr_parent == parent)
688 return 0;
689
690 esw_qos_vport_disable(vport, extack);
691
692 err = esw_qos_vport_enable(vport, parent, extack);
693 if (err) {
694 if (esw_qos_vport_enable(vport, curr_parent, NULL))
695 esw_warn(parent->esw->dev, "vport restore QoS failed (vport=%d)\n",
696 vport->vport);
697 }
698
699 return err;
700 }
701
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)702 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
703 {
704 struct ethtool_link_ksettings lksettings;
705 struct net_device *slave, *master;
706 u32 speed = SPEED_UNKNOWN;
707
708 /* Lock ensures a stable reference to master and slave netdevice
709 * while port speed of master is queried.
710 */
711 ASSERT_RTNL();
712
713 slave = mlx5_uplink_netdev_get(mdev);
714 if (!slave)
715 goto out;
716
717 master = netdev_master_upper_dev_get(slave);
718 if (master && !__ethtool_get_link_ksettings(master, &lksettings))
719 speed = lksettings.base.speed;
720
721 out:
722 return speed;
723 }
724
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)725 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
726 bool hold_rtnl_lock, struct netlink_ext_ack *extack)
727 {
728 int err;
729
730 if (!mlx5_lag_is_active(mdev))
731 goto skip_lag;
732
733 if (hold_rtnl_lock)
734 rtnl_lock();
735
736 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
737
738 if (hold_rtnl_lock)
739 rtnl_unlock();
740
741 if (*link_speed_max != (u32)SPEED_UNKNOWN)
742 return 0;
743
744 skip_lag:
745 err = mlx5_port_max_linkspeed(mdev, link_speed_max);
746 if (err)
747 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
748
749 return err;
750 }
751
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)752 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
753 const char *name, u32 link_speed_max,
754 u64 value, struct netlink_ext_ack *extack)
755 {
756 if (value > link_speed_max) {
757 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
758 name, value, link_speed_max);
759 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
760 return -EINVAL;
761 }
762
763 return 0;
764 }
765
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)766 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
767 {
768 struct mlx5_vport *vport;
769 u32 link_speed_max;
770 int err;
771
772 vport = mlx5_eswitch_get_vport(esw, vport_num);
773 if (IS_ERR(vport))
774 return PTR_ERR(vport);
775
776 if (rate_mbps) {
777 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
778 if (err)
779 return err;
780
781 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
782 link_speed_max, rate_mbps, NULL);
783 if (err)
784 return err;
785 }
786
787 esw_qos_lock(esw);
788 err = mlx5_esw_qos_set_vport_max_rate(vport, rate_mbps, NULL);
789 esw_qos_unlock(esw);
790
791 return err;
792 }
793
794 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
795
796 /* Converts bytes per second value passed in a pointer into megabits per
797 * second, rewriting last. If converted rate exceed link speed or is not a
798 * fraction of Mbps - returns error.
799 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)800 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
801 u64 *rate, struct netlink_ext_ack *extack)
802 {
803 u32 link_speed_max, remainder;
804 u64 value;
805 int err;
806
807 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
808 if (remainder) {
809 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
810 name, *rate);
811 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
812 return -EINVAL;
813 }
814
815 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
816 if (err)
817 return err;
818
819 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
820 if (err)
821 return err;
822
823 *rate = value;
824 return 0;
825 }
826
mlx5_esw_qos_init(struct mlx5_eswitch * esw)827 int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
828 {
829 if (esw->qos.domain)
830 return 0; /* Nothing to change. */
831
832 return esw_qos_domain_init(esw);
833 }
834
mlx5_esw_qos_cleanup(struct mlx5_eswitch * esw)835 void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
836 {
837 if (esw->qos.domain)
838 esw_qos_domain_release(esw);
839 }
840
841 /* Eswitch devlink rate API */
842
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)843 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
844 u64 tx_share, struct netlink_ext_ack *extack)
845 {
846 struct mlx5_vport *vport = priv;
847 struct mlx5_eswitch *esw;
848 int err;
849
850 esw = vport->dev->priv.eswitch;
851 if (!mlx5_esw_allowed(esw))
852 return -EPERM;
853
854 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
855 if (err)
856 return err;
857
858 esw_qos_lock(esw);
859 err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack);
860 esw_qos_unlock(esw);
861 return err;
862 }
863
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)864 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
865 u64 tx_max, struct netlink_ext_ack *extack)
866 {
867 struct mlx5_vport *vport = priv;
868 struct mlx5_eswitch *esw;
869 int err;
870
871 esw = vport->dev->priv.eswitch;
872 if (!mlx5_esw_allowed(esw))
873 return -EPERM;
874
875 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
876 if (err)
877 return err;
878
879 esw_qos_lock(esw);
880 err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack);
881 esw_qos_unlock(esw);
882 return err;
883 }
884
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)885 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
886 u64 tx_share, struct netlink_ext_ack *extack)
887 {
888 struct mlx5_esw_sched_node *node = priv;
889 struct mlx5_eswitch *esw = node->esw;
890 int err;
891
892 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack);
893 if (err)
894 return err;
895
896 esw_qos_lock(esw);
897 err = esw_qos_set_node_min_rate(node, tx_share, extack);
898 esw_qos_unlock(esw);
899 return err;
900 }
901
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)902 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
903 u64 tx_max, struct netlink_ext_ack *extack)
904 {
905 struct mlx5_esw_sched_node *node = priv;
906 struct mlx5_eswitch *esw = node->esw;
907 int err;
908
909 err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack);
910 if (err)
911 return err;
912
913 esw_qos_lock(esw);
914 err = esw_qos_sched_elem_config(node, tx_max, node->bw_share, extack);
915 esw_qos_unlock(esw);
916 return err;
917 }
918
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)919 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
920 struct netlink_ext_ack *extack)
921 {
922 struct mlx5_esw_sched_node *node;
923 struct mlx5_eswitch *esw;
924 int err = 0;
925
926 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
927 if (IS_ERR(esw))
928 return PTR_ERR(esw);
929
930 esw_qos_lock(esw);
931 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
932 NL_SET_ERR_MSG_MOD(extack,
933 "Rate node creation supported only in switchdev mode");
934 err = -EOPNOTSUPP;
935 goto unlock;
936 }
937
938 node = esw_qos_create_vports_sched_node(esw, extack);
939 if (IS_ERR(node)) {
940 err = PTR_ERR(node);
941 goto unlock;
942 }
943
944 *priv = node;
945 unlock:
946 esw_qos_unlock(esw);
947 return err;
948 }
949
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)950 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
951 struct netlink_ext_ack *extack)
952 {
953 struct mlx5_esw_sched_node *node = priv;
954 struct mlx5_eswitch *esw = node->esw;
955
956 esw_qos_lock(esw);
957 __esw_qos_destroy_node(node, extack);
958 esw_qos_put(esw);
959 esw_qos_unlock(esw);
960 return 0;
961 }
962
mlx5_esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)963 int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
964 struct netlink_ext_ack *extack)
965 {
966 struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
967 int err = 0;
968
969 if (parent && parent->esw != esw) {
970 NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
971 return -EOPNOTSUPP;
972 }
973
974 esw_qos_lock(esw);
975 if (!vport->qos.sched_node && parent)
976 err = mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, parent, 0, 0, extack);
977 else if (vport->qos.sched_node)
978 err = esw_qos_vport_update_parent(vport, parent, extack);
979 esw_qos_unlock(esw);
980 return err;
981 }
982
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)983 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
984 struct devlink_rate *parent,
985 void *priv, void *parent_priv,
986 struct netlink_ext_ack *extack)
987 {
988 struct mlx5_esw_sched_node *node;
989 struct mlx5_vport *vport = priv;
990
991 if (!parent)
992 return mlx5_esw_qos_vport_update_parent(vport, NULL, extack);
993
994 node = parent_priv;
995 return mlx5_esw_qos_vport_update_parent(vport, node, extack);
996 }
997