1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13
14 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
15 min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
16
17 struct mlx5_esw_rate_group {
18 u32 tsar_ix;
19 u32 max_rate;
20 u32 min_rate;
21 u32 bw_share;
22 struct list_head list;
23 };
24
esw_qos_tsar_config(struct mlx5_core_dev * dev,u32 * sched_ctx,u32 tsar_ix,u32 max_rate,u32 bw_share)25 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
26 u32 tsar_ix, u32 max_rate, u32 bw_share)
27 {
28 u32 bitmask = 0;
29
30 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
31 return -EOPNOTSUPP;
32
33 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
34 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
35 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
36 bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
37
38 return mlx5_modify_scheduling_element_cmd(dev,
39 SCHEDULING_HIERARCHY_E_SWITCH,
40 sched_ctx,
41 tsar_ix,
42 bitmask);
43 }
44
esw_qos_group_config(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)45 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
46 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
47 {
48 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
49 struct mlx5_core_dev *dev = esw->dev;
50 int err;
51
52 err = esw_qos_tsar_config(dev, sched_ctx,
53 group->tsar_ix,
54 max_rate, bw_share);
55 if (err)
56 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
57
58 trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
59
60 return err;
61 }
62
esw_qos_vport_config(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)63 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
64 struct mlx5_vport *vport,
65 u32 max_rate, u32 bw_share,
66 struct netlink_ext_ack *extack)
67 {
68 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
69 struct mlx5_core_dev *dev = esw->dev;
70 int err;
71
72 if (!vport->qos.enabled)
73 return -EIO;
74
75 err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
76 max_rate, bw_share);
77 if (err) {
78 esw_warn(esw->dev,
79 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
80 vport->vport, err);
81 NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
82 return err;
83 }
84
85 trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
86
87 return 0;
88 }
89
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,bool group_level)90 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
91 struct mlx5_esw_rate_group *group,
92 bool group_level)
93 {
94 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
95 struct mlx5_vport *evport;
96 u32 max_guarantee = 0;
97 unsigned long i;
98
99 if (group_level) {
100 struct mlx5_esw_rate_group *group;
101
102 list_for_each_entry(group, &esw->qos.groups, list) {
103 if (group->min_rate < max_guarantee)
104 continue;
105 max_guarantee = group->min_rate;
106 }
107 } else {
108 mlx5_esw_for_each_vport(esw, i, evport) {
109 if (!evport->enabled || !evport->qos.enabled ||
110 evport->qos.group != group || evport->qos.min_rate < max_guarantee)
111 continue;
112 max_guarantee = evport->qos.min_rate;
113 }
114 }
115
116 if (max_guarantee)
117 return max_t(u32, max_guarantee / fw_max_bw_share, 1);
118
119 /* If vports min rate divider is 0 but their group has bw_share configured, then
120 * need to set bw_share for vports to minimal value.
121 */
122 if (!group_level && !max_guarantee && group && group->bw_share)
123 return 1;
124 return 0;
125 }
126
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)127 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
128 {
129 if (divider)
130 return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
131
132 return 0;
133 }
134
esw_qos_normalize_vports_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)135 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
136 struct mlx5_esw_rate_group *group,
137 struct netlink_ext_ack *extack)
138 {
139 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
140 u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
141 struct mlx5_vport *evport;
142 unsigned long i;
143 u32 bw_share;
144 int err;
145
146 mlx5_esw_for_each_vport(esw, i, evport) {
147 if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
148 continue;
149 bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
150
151 if (bw_share == evport->qos.bw_share)
152 continue;
153
154 err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
155 if (err)
156 return err;
157
158 evport->qos.bw_share = bw_share;
159 }
160
161 return 0;
162 }
163
esw_qos_normalize_groups_min_rate(struct mlx5_eswitch * esw,u32 divider,struct netlink_ext_ack * extack)164 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
165 struct netlink_ext_ack *extack)
166 {
167 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
168 struct mlx5_esw_rate_group *group;
169 u32 bw_share;
170 int err;
171
172 list_for_each_entry(group, &esw->qos.groups, list) {
173 bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
174
175 if (bw_share == group->bw_share)
176 continue;
177
178 err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
179 if (err)
180 return err;
181
182 group->bw_share = bw_share;
183
184 /* All the group's vports need to be set with default bw_share
185 * to enable them with QOS
186 */
187 err = esw_qos_normalize_vports_min_rate(esw, group, extack);
188
189 if (err)
190 return err;
191 }
192
193 return 0;
194 }
195
esw_qos_set_vport_min_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 min_rate,struct netlink_ext_ack * extack)196 static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
197 u32 min_rate, struct netlink_ext_ack *extack)
198 {
199 u32 fw_max_bw_share, previous_min_rate;
200 bool min_rate_supported;
201 int err;
202
203 lockdep_assert_held(&esw->state_lock);
204 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
205 min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
206 fw_max_bw_share >= MLX5_MIN_BW_SHARE;
207 if (min_rate && !min_rate_supported)
208 return -EOPNOTSUPP;
209 if (min_rate == evport->qos.min_rate)
210 return 0;
211
212 previous_min_rate = evport->qos.min_rate;
213 evport->qos.min_rate = min_rate;
214 err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
215 if (err)
216 evport->qos.min_rate = previous_min_rate;
217
218 return err;
219 }
220
esw_qos_set_vport_max_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 max_rate,struct netlink_ext_ack * extack)221 static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
222 u32 max_rate, struct netlink_ext_ack *extack)
223 {
224 u32 act_max_rate = max_rate;
225 bool max_rate_supported;
226 int err;
227
228 lockdep_assert_held(&esw->state_lock);
229 max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
230
231 if (max_rate && !max_rate_supported)
232 return -EOPNOTSUPP;
233 if (max_rate == evport->qos.max_rate)
234 return 0;
235
236 /* If parent group has rate limit need to set to group
237 * value when new max rate is 0.
238 */
239 if (evport->qos.group && !max_rate)
240 act_max_rate = evport->qos.group->max_rate;
241
242 err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
243
244 if (!err)
245 evport->qos.max_rate = max_rate;
246
247 return err;
248 }
249
esw_qos_set_group_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 min_rate,struct netlink_ext_ack * extack)250 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
251 u32 min_rate, struct netlink_ext_ack *extack)
252 {
253 u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
254 struct mlx5_core_dev *dev = esw->dev;
255 u32 previous_min_rate, divider;
256 int err;
257
258 if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
259 return -EOPNOTSUPP;
260
261 if (min_rate == group->min_rate)
262 return 0;
263
264 previous_min_rate = group->min_rate;
265 group->min_rate = min_rate;
266 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
267 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
268 if (err) {
269 group->min_rate = previous_min_rate;
270 NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
271
272 /* Attempt restoring previous configuration */
273 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
274 if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
275 NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
276 }
277
278 return err;
279 }
280
esw_qos_set_group_max_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,struct netlink_ext_ack * extack)281 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
282 struct mlx5_esw_rate_group *group,
283 u32 max_rate, struct netlink_ext_ack *extack)
284 {
285 struct mlx5_vport *vport;
286 unsigned long i;
287 int err;
288
289 if (group->max_rate == max_rate)
290 return 0;
291
292 err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
293 if (err)
294 return err;
295
296 group->max_rate = max_rate;
297
298 /* Any unlimited vports in the group should be set
299 * with the value of the group.
300 */
301 mlx5_esw_for_each_vport(esw, i, vport) {
302 if (!vport->enabled || !vport->qos.enabled ||
303 vport->qos.group != group || vport->qos.max_rate)
304 continue;
305
306 err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
307 if (err)
308 NL_SET_ERR_MSG_MOD(extack,
309 "E-Switch vport implicit rate limit setting failed");
310 }
311
312 return err;
313 }
314
esw_qos_element_type_supported(struct mlx5_core_dev * dev,int type)315 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
316 {
317 switch (type) {
318 case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
319 return MLX5_CAP_QOS(dev, esw_element_type) &
320 ELEMENT_TYPE_CAP_MASK_TSAR;
321 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
322 return MLX5_CAP_QOS(dev, esw_element_type) &
323 ELEMENT_TYPE_CAP_MASK_VPORT;
324 case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
325 return MLX5_CAP_QOS(dev, esw_element_type) &
326 ELEMENT_TYPE_CAP_MASK_VPORT_TC;
327 case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
328 return MLX5_CAP_QOS(dev, esw_element_type) &
329 ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
330 }
331 return false;
332 }
333
esw_qos_vport_create_sched_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)334 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
335 struct mlx5_vport *vport,
336 u32 max_rate, u32 bw_share)
337 {
338 u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
339 struct mlx5_esw_rate_group *group = vport->qos.group;
340 struct mlx5_core_dev *dev = esw->dev;
341 u32 parent_tsar_ix;
342 void *vport_elem;
343 int err;
344
345 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT))
346 return -EOPNOTSUPP;
347
348 parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
349 MLX5_SET(scheduling_context, sched_ctx, element_type,
350 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
351 vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
352 MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
353 MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
354 MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
355 MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
356
357 err = mlx5_create_scheduling_element_cmd(dev,
358 SCHEDULING_HIERARCHY_E_SWITCH,
359 sched_ctx,
360 &vport->qos.esw_tsar_ix);
361 if (err) {
362 esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
363 vport->vport, err);
364 return err;
365 }
366
367 return 0;
368 }
369
esw_qos_update_group_scheduling_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * curr_group,struct mlx5_esw_rate_group * new_group,struct netlink_ext_ack * extack)370 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
371 struct mlx5_vport *vport,
372 struct mlx5_esw_rate_group *curr_group,
373 struct mlx5_esw_rate_group *new_group,
374 struct netlink_ext_ack *extack)
375 {
376 u32 max_rate;
377 int err;
378
379 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
380 SCHEDULING_HIERARCHY_E_SWITCH,
381 vport->qos.esw_tsar_ix);
382 if (err) {
383 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
384 return err;
385 }
386
387 vport->qos.group = new_group;
388 max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
389
390 /* If vport is unlimited, we set the group's value.
391 * Therefore, if the group is limited it will apply to
392 * the vport as well and if not, vport will remain unlimited.
393 */
394 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
395 if (err) {
396 NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
397 goto err_sched;
398 }
399
400 return 0;
401
402 err_sched:
403 vport->qos.group = curr_group;
404 max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
405 if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
406 esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
407 vport->vport);
408
409 return err;
410 }
411
esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)412 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
413 struct mlx5_vport *vport,
414 struct mlx5_esw_rate_group *group,
415 struct netlink_ext_ack *extack)
416 {
417 struct mlx5_esw_rate_group *new_group, *curr_group;
418 int err;
419
420 if (!vport->enabled)
421 return -EINVAL;
422
423 curr_group = vport->qos.group;
424 new_group = group ?: esw->qos.group0;
425 if (curr_group == new_group)
426 return 0;
427
428 err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
429 if (err)
430 return err;
431
432 /* Recalculate bw share weights of old and new groups */
433 if (vport->qos.bw_share || new_group->bw_share) {
434 esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
435 esw_qos_normalize_vports_min_rate(esw, new_group, extack);
436 }
437
438 return 0;
439 }
440
441 static struct mlx5_esw_rate_group *
__esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)442 __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
443 {
444 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
445 struct mlx5_esw_rate_group *group;
446 __be32 *attr;
447 u32 divider;
448 int err;
449
450 group = kzalloc(sizeof(*group), GFP_KERNEL);
451 if (!group)
452 return ERR_PTR(-ENOMEM);
453
454 MLX5_SET(scheduling_context, tsar_ctx, element_type,
455 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
456
457 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
458 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
459
460 MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
461 esw->qos.root_tsar_ix);
462 err = mlx5_create_scheduling_element_cmd(esw->dev,
463 SCHEDULING_HIERARCHY_E_SWITCH,
464 tsar_ctx,
465 &group->tsar_ix);
466 if (err) {
467 NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
468 goto err_sched_elem;
469 }
470
471 list_add_tail(&group->list, &esw->qos.groups);
472
473 divider = esw_qos_calculate_min_rate_divider(esw, group, true);
474 if (divider) {
475 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
476 if (err) {
477 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
478 goto err_min_rate;
479 }
480 }
481 trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
482
483 return group;
484
485 err_min_rate:
486 list_del(&group->list);
487 if (mlx5_destroy_scheduling_element_cmd(esw->dev,
488 SCHEDULING_HIERARCHY_E_SWITCH,
489 group->tsar_ix))
490 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
491 err_sched_elem:
492 kfree(group);
493 return ERR_PTR(err);
494 }
495
496 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
497 static void esw_qos_put(struct mlx5_eswitch *esw);
498
499 static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)500 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
501 {
502 struct mlx5_esw_rate_group *group;
503 int err;
504
505 if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
506 return ERR_PTR(-EOPNOTSUPP);
507
508 err = esw_qos_get(esw, extack);
509 if (err)
510 return ERR_PTR(err);
511
512 group = __esw_qos_create_rate_group(esw, extack);
513 if (IS_ERR(group))
514 esw_qos_put(esw);
515
516 return group;
517 }
518
__esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)519 static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
520 struct mlx5_esw_rate_group *group,
521 struct netlink_ext_ack *extack)
522 {
523 u32 divider;
524 int err;
525
526 list_del(&group->list);
527
528 divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
529 err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
530 if (err)
531 NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
532
533 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
534 SCHEDULING_HIERARCHY_E_SWITCH,
535 group->tsar_ix);
536 if (err)
537 NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
538
539 trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
540
541 kfree(group);
542
543 return err;
544 }
545
esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)546 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
547 struct mlx5_esw_rate_group *group,
548 struct netlink_ext_ack *extack)
549 {
550 int err;
551
552 err = __esw_qos_destroy_rate_group(esw, group, extack);
553 esw_qos_put(esw);
554
555 return err;
556 }
557
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)558 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
559 {
560 u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
561 struct mlx5_core_dev *dev = esw->dev;
562 __be32 *attr;
563 int err;
564
565 if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
566 return -EOPNOTSUPP;
567
568 if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) ||
569 !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
570 return -EOPNOTSUPP;
571
572 MLX5_SET(scheduling_context, tsar_ctx, element_type,
573 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
574
575 attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
576 *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
577
578 err = mlx5_create_scheduling_element_cmd(dev,
579 SCHEDULING_HIERARCHY_E_SWITCH,
580 tsar_ctx,
581 &esw->qos.root_tsar_ix);
582 if (err) {
583 esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
584 return err;
585 }
586
587 INIT_LIST_HEAD(&esw->qos.groups);
588 if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
589 esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
590 if (IS_ERR(esw->qos.group0)) {
591 esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
592 PTR_ERR(esw->qos.group0));
593 err = PTR_ERR(esw->qos.group0);
594 goto err_group0;
595 }
596 }
597 refcount_set(&esw->qos.refcnt, 1);
598
599 return 0;
600
601 err_group0:
602 if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
603 esw->qos.root_tsar_ix))
604 esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
605
606 return err;
607 }
608
esw_qos_destroy(struct mlx5_eswitch * esw)609 static void esw_qos_destroy(struct mlx5_eswitch *esw)
610 {
611 int err;
612
613 if (esw->qos.group0)
614 __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
615
616 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
617 SCHEDULING_HIERARCHY_E_SWITCH,
618 esw->qos.root_tsar_ix);
619 if (err)
620 esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
621 }
622
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)623 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
624 {
625 int err = 0;
626
627 lockdep_assert_held(&esw->state_lock);
628
629 if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
630 /* esw_qos_create() set refcount to 1 only on success.
631 * No need to decrement on failure.
632 */
633 err = esw_qos_create(esw, extack);
634 }
635
636 return err;
637 }
638
esw_qos_put(struct mlx5_eswitch * esw)639 static void esw_qos_put(struct mlx5_eswitch *esw)
640 {
641 lockdep_assert_held(&esw->state_lock);
642 if (refcount_dec_and_test(&esw->qos.refcnt))
643 esw_qos_destroy(esw);
644 }
645
esw_qos_vport_enable(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)646 static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
647 u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
648 {
649 int err;
650
651 lockdep_assert_held(&esw->state_lock);
652 if (vport->qos.enabled)
653 return 0;
654
655 err = esw_qos_get(esw, extack);
656 if (err)
657 return err;
658
659 vport->qos.group = esw->qos.group0;
660
661 err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
662 if (err)
663 goto err_out;
664
665 vport->qos.enabled = true;
666 trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
667
668 return 0;
669
670 err_out:
671 esw_qos_put(esw);
672
673 return err;
674 }
675
mlx5_esw_qos_vport_disable(struct mlx5_eswitch * esw,struct mlx5_vport * vport)676 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
677 {
678 int err;
679
680 lockdep_assert_held(&esw->state_lock);
681 if (!vport->qos.enabled)
682 return;
683 WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
684 "Disabling QoS on port before detaching it from group");
685
686 err = mlx5_destroy_scheduling_element_cmd(esw->dev,
687 SCHEDULING_HIERARCHY_E_SWITCH,
688 vport->qos.esw_tsar_ix);
689 if (err)
690 esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
691 vport->vport, err);
692
693 memset(&vport->qos, 0, sizeof(vport->qos));
694 trace_mlx5_esw_vport_qos_destroy(vport);
695
696 esw_qos_put(esw);
697 }
698
mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 min_rate)699 int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
700 u32 max_rate, u32 min_rate)
701 {
702 int err;
703
704 lockdep_assert_held(&esw->state_lock);
705 err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
706 if (err)
707 return err;
708
709 err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
710 if (!err)
711 err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
712
713 return err;
714 }
715
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)716 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
717 {
718 struct ethtool_link_ksettings lksettings;
719 struct net_device *slave, *master;
720 u32 speed = SPEED_UNKNOWN;
721
722 /* Lock ensures a stable reference to master and slave netdevice
723 * while port speed of master is queried.
724 */
725 ASSERT_RTNL();
726
727 slave = mlx5_uplink_netdev_get(mdev);
728 if (!slave)
729 goto out;
730
731 master = netdev_master_upper_dev_get(slave);
732 if (master && !__ethtool_get_link_ksettings(master, &lksettings))
733 speed = lksettings.base.speed;
734
735 out:
736 return speed;
737 }
738
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)739 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
740 bool hold_rtnl_lock, struct netlink_ext_ack *extack)
741 {
742 int err;
743
744 if (!mlx5_lag_is_active(mdev))
745 goto skip_lag;
746
747 if (hold_rtnl_lock)
748 rtnl_lock();
749
750 *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
751
752 if (hold_rtnl_lock)
753 rtnl_unlock();
754
755 if (*link_speed_max != (u32)SPEED_UNKNOWN)
756 return 0;
757
758 skip_lag:
759 err = mlx5_port_max_linkspeed(mdev, link_speed_max);
760 if (err)
761 NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
762
763 return err;
764 }
765
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)766 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
767 const char *name, u32 link_speed_max,
768 u64 value, struct netlink_ext_ack *extack)
769 {
770 if (value > link_speed_max) {
771 pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
772 name, value, link_speed_max);
773 NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
774 return -EINVAL;
775 }
776
777 return 0;
778 }
779
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)780 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
781 {
782 u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
783 struct mlx5_vport *vport;
784 u32 link_speed_max;
785 u32 bitmask;
786 int err;
787
788 vport = mlx5_eswitch_get_vport(esw, vport_num);
789 if (IS_ERR(vport))
790 return PTR_ERR(vport);
791
792 if (rate_mbps) {
793 err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
794 if (err)
795 return err;
796
797 err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
798 link_speed_max, rate_mbps, NULL);
799 if (err)
800 return err;
801 }
802
803 mutex_lock(&esw->state_lock);
804 if (!vport->qos.enabled) {
805 /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
806 err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
807 } else {
808 MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
809
810 bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
811 err = mlx5_modify_scheduling_element_cmd(esw->dev,
812 SCHEDULING_HIERARCHY_E_SWITCH,
813 ctx,
814 vport->qos.esw_tsar_ix,
815 bitmask);
816 }
817 mutex_unlock(&esw->state_lock);
818
819 return err;
820 }
821
822 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
823
824 /* Converts bytes per second value passed in a pointer into megabits per
825 * second, rewriting last. If converted rate exceed link speed or is not a
826 * fraction of Mbps - returns error.
827 */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)828 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
829 u64 *rate, struct netlink_ext_ack *extack)
830 {
831 u32 link_speed_max, remainder;
832 u64 value;
833 int err;
834
835 value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
836 if (remainder) {
837 pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
838 name, *rate);
839 NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
840 return -EINVAL;
841 }
842
843 err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
844 if (err)
845 return err;
846
847 err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
848 if (err)
849 return err;
850
851 *rate = value;
852 return 0;
853 }
854
855 /* Eswitch devlink rate API */
856
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)857 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
858 u64 tx_share, struct netlink_ext_ack *extack)
859 {
860 struct mlx5_vport *vport = priv;
861 struct mlx5_eswitch *esw;
862 int err;
863
864 esw = vport->dev->priv.eswitch;
865 if (!mlx5_esw_allowed(esw))
866 return -EPERM;
867
868 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
869 if (err)
870 return err;
871
872 mutex_lock(&esw->state_lock);
873 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
874 if (err)
875 goto unlock;
876
877 err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
878 unlock:
879 mutex_unlock(&esw->state_lock);
880 return err;
881 }
882
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)883 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
884 u64 tx_max, struct netlink_ext_ack *extack)
885 {
886 struct mlx5_vport *vport = priv;
887 struct mlx5_eswitch *esw;
888 int err;
889
890 esw = vport->dev->priv.eswitch;
891 if (!mlx5_esw_allowed(esw))
892 return -EPERM;
893
894 err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
895 if (err)
896 return err;
897
898 mutex_lock(&esw->state_lock);
899 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
900 if (err)
901 goto unlock;
902
903 err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
904 unlock:
905 mutex_unlock(&esw->state_lock);
906 return err;
907 }
908
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)909 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
910 u64 tx_share, struct netlink_ext_ack *extack)
911 {
912 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
913 struct mlx5_eswitch *esw = dev->priv.eswitch;
914 struct mlx5_esw_rate_group *group = priv;
915 int err;
916
917 err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
918 if (err)
919 return err;
920
921 mutex_lock(&esw->state_lock);
922 err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
923 mutex_unlock(&esw->state_lock);
924 return err;
925 }
926
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)927 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
928 u64 tx_max, struct netlink_ext_ack *extack)
929 {
930 struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
931 struct mlx5_eswitch *esw = dev->priv.eswitch;
932 struct mlx5_esw_rate_group *group = priv;
933 int err;
934
935 err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
936 if (err)
937 return err;
938
939 mutex_lock(&esw->state_lock);
940 err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
941 mutex_unlock(&esw->state_lock);
942 return err;
943 }
944
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)945 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
946 struct netlink_ext_ack *extack)
947 {
948 struct mlx5_esw_rate_group *group;
949 struct mlx5_eswitch *esw;
950 int err = 0;
951
952 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
953 if (IS_ERR(esw))
954 return PTR_ERR(esw);
955
956 mutex_lock(&esw->state_lock);
957 if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
958 NL_SET_ERR_MSG_MOD(extack,
959 "Rate node creation supported only in switchdev mode");
960 err = -EOPNOTSUPP;
961 goto unlock;
962 }
963
964 group = esw_qos_create_rate_group(esw, extack);
965 if (IS_ERR(group)) {
966 err = PTR_ERR(group);
967 goto unlock;
968 }
969
970 *priv = group;
971 unlock:
972 mutex_unlock(&esw->state_lock);
973 return err;
974 }
975
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)976 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
977 struct netlink_ext_ack *extack)
978 {
979 struct mlx5_esw_rate_group *group = priv;
980 struct mlx5_eswitch *esw;
981 int err;
982
983 esw = mlx5_devlink_eswitch_get(rate_node->devlink);
984 if (IS_ERR(esw))
985 return PTR_ERR(esw);
986
987 mutex_lock(&esw->state_lock);
988 err = esw_qos_destroy_rate_group(esw, group, extack);
989 mutex_unlock(&esw->state_lock);
990 return err;
991 }
992
mlx5_esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)993 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
994 struct mlx5_vport *vport,
995 struct mlx5_esw_rate_group *group,
996 struct netlink_ext_ack *extack)
997 {
998 int err = 0;
999
1000 mutex_lock(&esw->state_lock);
1001 if (!vport->qos.enabled && !group)
1002 goto unlock;
1003
1004 err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
1005 if (!err)
1006 err = esw_qos_vport_update_group(esw, vport, group, extack);
1007 unlock:
1008 mutex_unlock(&esw->state_lock);
1009 return err;
1010 }
1011
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)1012 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
1013 struct devlink_rate *parent,
1014 void *priv, void *parent_priv,
1015 struct netlink_ext_ack *extack)
1016 {
1017 struct mlx5_esw_rate_group *group;
1018 struct mlx5_vport *vport = priv;
1019
1020 if (!parent)
1021 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
1022 vport, NULL, extack);
1023
1024 group = parent_priv;
1025 return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
1026 }
1027