xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c (revision 1e15510b71c99c6e49134d756df91069f7d18141)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3 
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10 
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13 
14 /* Holds rate nodes associated with an E-Switch. */
15 struct mlx5_qos_domain {
16 	/* Serializes access to all qos changes in the qos domain. */
17 	struct mutex lock;
18 	/* List of all mlx5_esw_sched_nodes. */
19 	struct list_head nodes;
20 };
21 
esw_qos_lock(struct mlx5_eswitch * esw)22 static void esw_qos_lock(struct mlx5_eswitch *esw)
23 {
24 	mutex_lock(&esw->qos.domain->lock);
25 }
26 
esw_qos_unlock(struct mlx5_eswitch * esw)27 static void esw_qos_unlock(struct mlx5_eswitch *esw)
28 {
29 	mutex_unlock(&esw->qos.domain->lock);
30 }
31 
esw_assert_qos_lock_held(struct mlx5_eswitch * esw)32 static void esw_assert_qos_lock_held(struct mlx5_eswitch *esw)
33 {
34 	lockdep_assert_held(&esw->qos.domain->lock);
35 }
36 
esw_qos_domain_alloc(void)37 static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
38 {
39 	struct mlx5_qos_domain *qos_domain;
40 
41 	qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
42 	if (!qos_domain)
43 		return NULL;
44 
45 	mutex_init(&qos_domain->lock);
46 	INIT_LIST_HEAD(&qos_domain->nodes);
47 
48 	return qos_domain;
49 }
50 
esw_qos_domain_init(struct mlx5_eswitch * esw)51 static int esw_qos_domain_init(struct mlx5_eswitch *esw)
52 {
53 	esw->qos.domain = esw_qos_domain_alloc();
54 
55 	return esw->qos.domain ? 0 : -ENOMEM;
56 }
57 
esw_qos_domain_release(struct mlx5_eswitch * esw)58 static void esw_qos_domain_release(struct mlx5_eswitch *esw)
59 {
60 	kfree(esw->qos.domain);
61 	esw->qos.domain = NULL;
62 }
63 
64 enum sched_node_type {
65 	SCHED_NODE_TYPE_VPORTS_TSAR,
66 	SCHED_NODE_TYPE_VPORT,
67 };
68 
69 static const char * const sched_node_type_str[] = {
70 	[SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR",
71 	[SCHED_NODE_TYPE_VPORT] = "vport",
72 };
73 
74 struct mlx5_esw_sched_node {
75 	u32 ix;
76 	/* Bandwidth parameters. */
77 	u32 max_rate;
78 	u32 min_rate;
79 	/* A computed value indicating relative min_rate between node's children. */
80 	u32 bw_share;
81 	/* The parent node in the rate hierarchy. */
82 	struct mlx5_esw_sched_node *parent;
83 	/* Entry in the parent node's children list. */
84 	struct list_head entry;
85 	/* The type of this node in the rate hierarchy. */
86 	enum sched_node_type type;
87 	/* The eswitch this node belongs to. */
88 	struct mlx5_eswitch *esw;
89 	/* The children nodes of this node, empty list for leaf nodes. */
90 	struct list_head children;
91 	/* Valid only if this node is associated with a vport. */
92 	struct mlx5_vport *vport;
93 };
94 
95 static void
esw_qos_node_set_parent(struct mlx5_esw_sched_node * node,struct mlx5_esw_sched_node * parent)96 esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent)
97 {
98 	list_del_init(&node->entry);
99 	node->parent = parent;
100 	list_add_tail(&node->entry, &parent->children);
101 	node->esw = parent->esw;
102 }
103 
mlx5_esw_qos_vport_qos_free(struct mlx5_vport * vport)104 void mlx5_esw_qos_vport_qos_free(struct mlx5_vport *vport)
105 {
106 	kfree(vport->qos.sched_node);
107 	memset(&vport->qos, 0, sizeof(vport->qos));
108 }
109 
mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport * vport)110 u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport)
111 {
112 	if (!vport->qos.sched_node)
113 		return 0;
114 
115 	return vport->qos.sched_node->ix;
116 }
117 
118 struct mlx5_esw_sched_node *
mlx5_esw_qos_vport_get_parent(const struct mlx5_vport * vport)119 mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport)
120 {
121 	if (!vport->qos.sched_node)
122 		return NULL;
123 
124 	return vport->qos.sched_node->parent;
125 }
126 
esw_qos_sched_elem_warn(struct mlx5_esw_sched_node * node,int err,const char * op)127 static void esw_qos_sched_elem_warn(struct mlx5_esw_sched_node *node, int err, const char *op)
128 {
129 	if (node->vport) {
130 		esw_warn(node->esw->dev,
131 			 "E-Switch %s %s scheduling element failed (vport=%d,err=%d)\n",
132 			 op, sched_node_type_str[node->type], node->vport->vport, err);
133 		return;
134 	}
135 
136 	esw_warn(node->esw->dev,
137 		 "E-Switch %s %s scheduling element failed (err=%d)\n",
138 		 op, sched_node_type_str[node->type], err);
139 }
140 
esw_qos_node_create_sched_element(struct mlx5_esw_sched_node * node,void * ctx,struct netlink_ext_ack * extack)141 static int esw_qos_node_create_sched_element(struct mlx5_esw_sched_node *node, void *ctx,
142 					     struct netlink_ext_ack *extack)
143 {
144 	int err;
145 
146 	err = mlx5_create_scheduling_element_cmd(node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx,
147 						 &node->ix);
148 	if (err) {
149 		esw_qos_sched_elem_warn(node, err, "create");
150 		NL_SET_ERR_MSG_MOD(extack, "E-Switch create scheduling element failed");
151 	}
152 
153 	return err;
154 }
155 
esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)156 static int esw_qos_node_destroy_sched_element(struct mlx5_esw_sched_node *node,
157 					      struct netlink_ext_ack *extack)
158 {
159 	int err;
160 
161 	err = mlx5_destroy_scheduling_element_cmd(node->esw->dev,
162 						  SCHEDULING_HIERARCHY_E_SWITCH,
163 						  node->ix);
164 	if (err) {
165 		esw_qos_sched_elem_warn(node, err, "destroy");
166 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroying scheduling element failed.");
167 	}
168 
169 	return err;
170 }
171 
esw_qos_sched_elem_config(struct mlx5_esw_sched_node * node,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)172 static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share,
173 				     struct netlink_ext_ack *extack)
174 {
175 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
176 	struct mlx5_core_dev *dev = node->esw->dev;
177 	u32 bitmask = 0;
178 	int err;
179 
180 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
181 		return -EOPNOTSUPP;
182 
183 	if (bw_share && (!MLX5_CAP_QOS(dev, esw_bw_share) ||
184 			 MLX5_CAP_QOS(dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE))
185 		return -EOPNOTSUPP;
186 
187 	if (node->max_rate == max_rate && node->bw_share == bw_share)
188 		return 0;
189 
190 	if (node->max_rate != max_rate) {
191 		MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
192 		bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
193 	}
194 	if (node->bw_share != bw_share) {
195 		MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
196 		bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
197 	}
198 
199 	err = mlx5_modify_scheduling_element_cmd(dev,
200 						 SCHEDULING_HIERARCHY_E_SWITCH,
201 						 sched_ctx,
202 						 node->ix,
203 						 bitmask);
204 	if (err) {
205 		esw_qos_sched_elem_warn(node, err, "modify");
206 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed");
207 
208 		return err;
209 	}
210 
211 	node->max_rate = max_rate;
212 	node->bw_share = bw_share;
213 	if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR)
214 		trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate);
215 	else if (node->type == SCHED_NODE_TYPE_VPORT)
216 		trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate);
217 
218 	return 0;
219 }
220 
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent)221 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
222 					      struct mlx5_esw_sched_node *parent)
223 {
224 	struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
225 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
226 	struct mlx5_esw_sched_node *node;
227 	u32 max_guarantee = 0;
228 
229 	/* Find max min_rate across all nodes.
230 	 * This will correspond to fw_max_bw_share in the final bw_share calculation.
231 	 */
232 	list_for_each_entry(node, nodes, entry) {
233 		if (node->esw == esw && node->ix != esw->qos.root_tsar_ix &&
234 		    node->min_rate > max_guarantee)
235 			max_guarantee = node->min_rate;
236 	}
237 
238 	if (max_guarantee)
239 		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
240 
241 	/* If nodes max min_rate divider is 0 but their parent has bw_share
242 	 * configured, then set bw_share for nodes to minimal value.
243 	 */
244 
245 	if (parent && parent->bw_share)
246 		return 1;
247 
248 	/* If the node nodes has min_rate configured, a divider of 0 sets all
249 	 * nodes' bw_share to 0, effectively disabling min guarantees.
250 	 */
251 	return 0;
252 }
253 
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)254 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
255 {
256 	if (!divider)
257 		return 0;
258 	return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max);
259 }
260 
esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node * node,u32 divider,struct netlink_ext_ack * extack)261 static void esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node,
262 					       u32 divider,
263 					       struct netlink_ext_ack *extack)
264 {
265 	u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share);
266 	u32 bw_share;
267 
268 	bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share);
269 
270 	esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack);
271 }
272 
esw_qos_normalize_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)273 static void esw_qos_normalize_min_rate(struct mlx5_eswitch *esw,
274 				       struct mlx5_esw_sched_node *parent,
275 				       struct netlink_ext_ack *extack)
276 {
277 	struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes;
278 	u32 divider = esw_qos_calculate_min_rate_divider(esw, parent);
279 	struct mlx5_esw_sched_node *node;
280 
281 	list_for_each_entry(node, nodes, entry) {
282 		if (node->esw != esw || node->ix == esw->qos.root_tsar_ix)
283 			continue;
284 
285 		esw_qos_update_sched_node_bw_share(node, divider, extack);
286 
287 		if (list_empty(&node->children))
288 			continue;
289 
290 		esw_qos_normalize_min_rate(node->esw, node, extack);
291 	}
292 }
293 
esw_qos_set_node_min_rate(struct mlx5_esw_sched_node * node,u32 min_rate,struct netlink_ext_ack * extack)294 static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node,
295 				     u32 min_rate, struct netlink_ext_ack *extack)
296 {
297 	struct mlx5_eswitch *esw = node->esw;
298 
299 	if (min_rate == node->min_rate)
300 		return 0;
301 
302 	node->min_rate = min_rate;
303 	esw_qos_normalize_min_rate(esw, node->parent, extack);
304 
305 	return 0;
306 }
307 
esw_qos_create_node_sched_elem(struct mlx5_core_dev * dev,u32 parent_element_id,u32 * tsar_ix)308 static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id,
309 					  u32 *tsar_ix)
310 {
311 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
312 	void *attr;
313 
314 	if (!mlx5_qos_element_type_supported(dev,
315 					     SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR,
316 					     SCHEDULING_HIERARCHY_E_SWITCH) ||
317 	    !mlx5_qos_tsar_type_supported(dev,
318 					  TSAR_ELEMENT_TSAR_TYPE_DWRR,
319 					  SCHEDULING_HIERARCHY_E_SWITCH))
320 		return -EOPNOTSUPP;
321 
322 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
323 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
324 	MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
325 		 parent_element_id);
326 	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
327 	MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR);
328 
329 	return mlx5_create_scheduling_element_cmd(dev,
330 						  SCHEDULING_HIERARCHY_E_SWITCH,
331 						  tsar_ctx,
332 						  tsar_ix);
333 }
334 
esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node * vport_node,struct netlink_ext_ack * extack)335 static int esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
336 					      struct netlink_ext_ack *extack)
337 {
338 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
339 	struct mlx5_core_dev *dev = vport_node->esw->dev;
340 	void *attr;
341 
342 	if (!mlx5_qos_element_type_supported(dev,
343 					     SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT,
344 					     SCHEDULING_HIERARCHY_E_SWITCH))
345 		return -EOPNOTSUPP;
346 
347 	MLX5_SET(scheduling_context, sched_ctx, element_type,
348 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
349 	attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
350 	MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
351 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, vport_node->parent->ix);
352 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, vport_node->max_rate);
353 
354 	return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
355 }
356 
357 static struct mlx5_esw_sched_node *
__esw_qos_alloc_node(struct mlx5_eswitch * esw,u32 tsar_ix,enum sched_node_type type,struct mlx5_esw_sched_node * parent)358 __esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type,
359 		     struct mlx5_esw_sched_node *parent)
360 {
361 	struct list_head *parent_children;
362 	struct mlx5_esw_sched_node *node;
363 
364 	node = kzalloc(sizeof(*node), GFP_KERNEL);
365 	if (!node)
366 		return NULL;
367 
368 	node->esw = esw;
369 	node->ix = tsar_ix;
370 	node->type = type;
371 	node->parent = parent;
372 	INIT_LIST_HEAD(&node->children);
373 	parent_children = parent ? &parent->children : &esw->qos.domain->nodes;
374 	list_add_tail(&node->entry, parent_children);
375 
376 	return node;
377 }
378 
__esw_qos_free_node(struct mlx5_esw_sched_node * node)379 static void __esw_qos_free_node(struct mlx5_esw_sched_node *node)
380 {
381 	list_del(&node->entry);
382 	kfree(node);
383 }
384 
esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)385 static void esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
386 {
387 	esw_qos_node_destroy_sched_element(node, extack);
388 	__esw_qos_free_node(node);
389 }
390 
391 static struct mlx5_esw_sched_node *
__esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)392 __esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent,
393 				   struct netlink_ext_ack *extack)
394 {
395 	struct mlx5_esw_sched_node *node;
396 	u32 tsar_ix;
397 	int err;
398 
399 	err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, &tsar_ix);
400 	if (err) {
401 		NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed");
402 		return ERR_PTR(err);
403 	}
404 
405 	node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent);
406 	if (!node) {
407 		NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed");
408 		err = -ENOMEM;
409 		goto err_alloc_node;
410 	}
411 
412 	esw_qos_normalize_min_rate(esw, NULL, extack);
413 	trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix);
414 
415 	return node;
416 
417 err_alloc_node:
418 	if (mlx5_destroy_scheduling_element_cmd(esw->dev,
419 						SCHEDULING_HIERARCHY_E_SWITCH,
420 						tsar_ix))
421 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed");
422 	return ERR_PTR(err);
423 }
424 
425 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
426 static void esw_qos_put(struct mlx5_eswitch *esw);
427 
428 static struct mlx5_esw_sched_node *
esw_qos_create_vports_sched_node(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)429 esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
430 {
431 	struct mlx5_esw_sched_node *node;
432 	int err;
433 
434 	esw_assert_qos_lock_held(esw);
435 	if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
436 		return ERR_PTR(-EOPNOTSUPP);
437 
438 	err = esw_qos_get(esw, extack);
439 	if (err)
440 		return ERR_PTR(err);
441 
442 	node = __esw_qos_create_vports_sched_node(esw, NULL, extack);
443 	if (IS_ERR(node))
444 		esw_qos_put(esw);
445 
446 	return node;
447 }
448 
__esw_qos_destroy_node(struct mlx5_esw_sched_node * node,struct netlink_ext_ack * extack)449 static void __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack)
450 {
451 	struct mlx5_eswitch *esw = node->esw;
452 
453 	trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix);
454 	esw_qos_destroy_node(node, extack);
455 	esw_qos_normalize_min_rate(esw, NULL, extack);
456 }
457 
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)458 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
459 {
460 	struct mlx5_core_dev *dev = esw->dev;
461 	int err;
462 
463 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
464 		return -EOPNOTSUPP;
465 
466 	err = esw_qos_create_node_sched_elem(esw->dev, 0, &esw->qos.root_tsar_ix);
467 	if (err) {
468 		esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
469 		return err;
470 	}
471 
472 	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
473 		esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack);
474 	} else {
475 		/* The eswitch doesn't support scheduling nodes.
476 		 * Create a software-only node0 using the root TSAR to attach vport QoS to.
477 		 */
478 		if (!__esw_qos_alloc_node(esw,
479 					  esw->qos.root_tsar_ix,
480 					  SCHED_NODE_TYPE_VPORTS_TSAR,
481 					  NULL))
482 			esw->qos.node0 = ERR_PTR(-ENOMEM);
483 	}
484 	if (IS_ERR(esw->qos.node0)) {
485 		err = PTR_ERR(esw->qos.node0);
486 		esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err);
487 		goto err_node0;
488 	}
489 	refcount_set(&esw->qos.refcnt, 1);
490 
491 	return 0;
492 
493 err_node0:
494 	if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
495 						esw->qos.root_tsar_ix))
496 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
497 
498 	return err;
499 }
500 
esw_qos_destroy(struct mlx5_eswitch * esw)501 static void esw_qos_destroy(struct mlx5_eswitch *esw)
502 {
503 	int err;
504 
505 	if (esw->qos.node0->ix != esw->qos.root_tsar_ix)
506 		__esw_qos_destroy_node(esw->qos.node0, NULL);
507 	else
508 		__esw_qos_free_node(esw->qos.node0);
509 	esw->qos.node0 = NULL;
510 
511 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
512 						  SCHEDULING_HIERARCHY_E_SWITCH,
513 						  esw->qos.root_tsar_ix);
514 	if (err)
515 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
516 }
517 
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)518 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
519 {
520 	int err = 0;
521 
522 	esw_assert_qos_lock_held(esw);
523 	if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
524 		/* esw_qos_create() set refcount to 1 only on success.
525 		 * No need to decrement on failure.
526 		 */
527 		err = esw_qos_create(esw, extack);
528 	}
529 
530 	return err;
531 }
532 
esw_qos_put(struct mlx5_eswitch * esw)533 static void esw_qos_put(struct mlx5_eswitch *esw)
534 {
535 	esw_assert_qos_lock_held(esw);
536 	if (refcount_dec_and_test(&esw->qos.refcnt))
537 		esw_qos_destroy(esw);
538 }
539 
esw_qos_vport_disable(struct mlx5_vport * vport,struct netlink_ext_ack * extack)540 static void esw_qos_vport_disable(struct mlx5_vport *vport, struct netlink_ext_ack *extack)
541 {
542 	struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
543 	struct mlx5_esw_sched_node *parent = vport_node->parent;
544 
545 	esw_qos_node_destroy_sched_element(vport_node, extack);
546 
547 	vport_node->bw_share = 0;
548 	list_del_init(&vport_node->entry);
549 	esw_qos_normalize_min_rate(parent->esw, parent, extack);
550 
551 	trace_mlx5_esw_vport_qos_destroy(vport_node->esw->dev, vport);
552 }
553 
esw_qos_vport_enable(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)554 static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
555 				struct netlink_ext_ack *extack)
556 {
557 	int err;
558 
559 	esw_assert_qos_lock_held(vport->dev->priv.eswitch);
560 
561 	esw_qos_node_set_parent(vport->qos.sched_node, parent);
562 	err = esw_qos_vport_create_sched_element(vport->qos.sched_node, extack);
563 	if (err)
564 		return err;
565 
566 	esw_qos_normalize_min_rate(parent->esw, parent, extack);
567 	trace_mlx5_esw_vport_qos_create(vport->dev, vport,
568 					vport->qos.sched_node->max_rate,
569 					vport->qos.sched_node->bw_share);
570 
571 	return 0;
572 }
573 
mlx5_esw_qos_vport_enable(struct mlx5_vport * vport,enum sched_node_type type,struct mlx5_esw_sched_node * parent,u32 max_rate,u32 min_rate,struct netlink_ext_ack * extack)574 static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_type type,
575 				     struct mlx5_esw_sched_node *parent, u32 max_rate,
576 				     u32 min_rate, struct netlink_ext_ack *extack)
577 {
578 	struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
579 	struct mlx5_esw_sched_node *sched_node;
580 	int err;
581 
582 	esw_assert_qos_lock_held(esw);
583 	err = esw_qos_get(esw, extack);
584 	if (err)
585 		return err;
586 
587 	parent = parent ?: esw->qos.node0;
588 	sched_node = __esw_qos_alloc_node(parent->esw, 0, type, parent);
589 	if (!sched_node)
590 		return -ENOMEM;
591 
592 	sched_node->max_rate = max_rate;
593 	sched_node->min_rate = min_rate;
594 	sched_node->vport = vport;
595 	vport->qos.sched_node = sched_node;
596 	err = esw_qos_vport_enable(vport, parent, extack);
597 	if (err) {
598 		__esw_qos_free_node(sched_node);
599 		esw_qos_put(esw);
600 		vport->qos.sched_node = NULL;
601 	}
602 
603 	return err;
604 }
605 
mlx5_esw_qos_vport_disable(struct mlx5_vport * vport)606 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport)
607 {
608 	struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
609 	struct mlx5_esw_sched_node *parent;
610 
611 	lockdep_assert_held(&esw->state_lock);
612 	esw_qos_lock(esw);
613 	if (!vport->qos.sched_node)
614 		goto unlock;
615 
616 	parent = vport->qos.sched_node->parent;
617 	WARN(parent != esw->qos.node0, "Disabling QoS on port before detaching it from node");
618 
619 	esw_qos_vport_disable(vport, NULL);
620 	mlx5_esw_qos_vport_qos_free(vport);
621 	esw_qos_put(esw);
622 unlock:
623 	esw_qos_unlock(esw);
624 }
625 
mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport * vport,u32 max_rate,struct netlink_ext_ack * extack)626 static int mlx5_esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate,
627 					   struct netlink_ext_ack *extack)
628 {
629 	struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
630 
631 	esw_assert_qos_lock_held(vport->dev->priv.eswitch);
632 
633 	if (!vport_node)
634 		return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, max_rate, 0,
635 						 extack);
636 	else
637 		return esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share,
638 						 extack);
639 }
640 
mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport * vport,u32 min_rate,struct netlink_ext_ack * extack)641 static int mlx5_esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate,
642 					   struct netlink_ext_ack *extack)
643 {
644 	struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node;
645 
646 	esw_assert_qos_lock_held(vport->dev->priv.eswitch);
647 
648 	if (!vport_node)
649 		return mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, NULL, 0, min_rate,
650 						 extack);
651 	else
652 		return esw_qos_set_node_min_rate(vport_node, min_rate, extack);
653 }
654 
mlx5_esw_qos_set_vport_rate(struct mlx5_vport * vport,u32 max_rate,u32 min_rate)655 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *vport, u32 max_rate, u32 min_rate)
656 {
657 	struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
658 	int err;
659 
660 	esw_qos_lock(esw);
661 	err = mlx5_esw_qos_set_vport_min_rate(vport, min_rate, NULL);
662 	if (!err)
663 		err = mlx5_esw_qos_set_vport_max_rate(vport, max_rate, NULL);
664 	esw_qos_unlock(esw);
665 	return err;
666 }
667 
mlx5_esw_qos_get_vport_rate(struct mlx5_vport * vport,u32 * max_rate,u32 * min_rate)668 bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate)
669 {
670 	struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
671 	bool enabled;
672 
673 	esw_qos_lock(esw);
674 	enabled = !!vport->qos.sched_node;
675 	if (enabled) {
676 		*max_rate = vport->qos.sched_node->max_rate;
677 		*min_rate = vport->qos.sched_node->min_rate;
678 	}
679 	esw_qos_unlock(esw);
680 	return enabled;
681 }
682 
esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)683 static int esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
684 				       struct netlink_ext_ack *extack)
685 {
686 	struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
687 	struct mlx5_esw_sched_node *curr_parent;
688 	int err;
689 
690 	esw_assert_qos_lock_held(esw);
691 	curr_parent = vport->qos.sched_node->parent;
692 	parent = parent ?: esw->qos.node0;
693 	if (curr_parent == parent)
694 		return 0;
695 
696 	esw_qos_vport_disable(vport, extack);
697 
698 	err = esw_qos_vport_enable(vport, parent, extack);
699 	if (err) {
700 		if (esw_qos_vport_enable(vport, curr_parent, NULL))
701 			esw_warn(parent->esw->dev, "vport restore QoS failed (vport=%d)\n",
702 				 vport->vport);
703 	}
704 
705 	return err;
706 }
707 
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)708 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
709 {
710 	struct ethtool_link_ksettings lksettings;
711 	struct net_device *slave, *master;
712 	u32 speed = SPEED_UNKNOWN;
713 
714 	/* Lock ensures a stable reference to master and slave netdevice
715 	 * while port speed of master is queried.
716 	 */
717 	ASSERT_RTNL();
718 
719 	slave = mlx5_uplink_netdev_get(mdev);
720 	if (!slave)
721 		goto out;
722 
723 	master = netdev_master_upper_dev_get(slave);
724 	if (master && !__ethtool_get_link_ksettings(master, &lksettings))
725 		speed = lksettings.base.speed;
726 
727 out:
728 	return speed;
729 }
730 
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)731 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
732 					   bool hold_rtnl_lock, struct netlink_ext_ack *extack)
733 {
734 	int err;
735 
736 	if (!mlx5_lag_is_active(mdev))
737 		goto skip_lag;
738 
739 	if (hold_rtnl_lock)
740 		rtnl_lock();
741 
742 	*link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
743 
744 	if (hold_rtnl_lock)
745 		rtnl_unlock();
746 
747 	if (*link_speed_max != (u32)SPEED_UNKNOWN)
748 		return 0;
749 
750 skip_lag:
751 	err = mlx5_port_max_linkspeed(mdev, link_speed_max);
752 	if (err)
753 		NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
754 
755 	return err;
756 }
757 
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)758 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
759 					  const char *name, u32 link_speed_max,
760 					  u64 value, struct netlink_ext_ack *extack)
761 {
762 	if (value > link_speed_max) {
763 		pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
764 		       name, value, link_speed_max);
765 		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
766 		return -EINVAL;
767 	}
768 
769 	return 0;
770 }
771 
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)772 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
773 {
774 	struct mlx5_vport *vport;
775 	u32 link_speed_max;
776 	int err;
777 
778 	vport = mlx5_eswitch_get_vport(esw, vport_num);
779 	if (IS_ERR(vport))
780 		return PTR_ERR(vport);
781 
782 	if (rate_mbps) {
783 		err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
784 		if (err)
785 			return err;
786 
787 		err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
788 						     link_speed_max, rate_mbps, NULL);
789 		if (err)
790 			return err;
791 	}
792 
793 	esw_qos_lock(esw);
794 	err = mlx5_esw_qos_set_vport_max_rate(vport, rate_mbps, NULL);
795 	esw_qos_unlock(esw);
796 
797 	return err;
798 }
799 
800 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
801 
802 /* Converts bytes per second value passed in a pointer into megabits per
803  * second, rewriting last. If converted rate exceed link speed or is not a
804  * fraction of Mbps - returns error.
805  */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)806 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
807 					u64 *rate, struct netlink_ext_ack *extack)
808 {
809 	u32 link_speed_max, remainder;
810 	u64 value;
811 	int err;
812 
813 	value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
814 	if (remainder) {
815 		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
816 		       name, *rate);
817 		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
818 		return -EINVAL;
819 	}
820 
821 	err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
822 	if (err)
823 		return err;
824 
825 	err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
826 	if (err)
827 		return err;
828 
829 	*rate = value;
830 	return 0;
831 }
832 
mlx5_esw_qos_init(struct mlx5_eswitch * esw)833 int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
834 {
835 	if (esw->qos.domain)
836 		return 0;  /* Nothing to change. */
837 
838 	return esw_qos_domain_init(esw);
839 }
840 
mlx5_esw_qos_cleanup(struct mlx5_eswitch * esw)841 void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
842 {
843 	if (esw->qos.domain)
844 		esw_qos_domain_release(esw);
845 }
846 
847 /* Eswitch devlink rate API */
848 
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)849 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
850 					    u64 tx_share, struct netlink_ext_ack *extack)
851 {
852 	struct mlx5_vport *vport = priv;
853 	struct mlx5_eswitch *esw;
854 	int err;
855 
856 	esw = vport->dev->priv.eswitch;
857 	if (!mlx5_esw_allowed(esw))
858 		return -EPERM;
859 
860 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
861 	if (err)
862 		return err;
863 
864 	esw_qos_lock(esw);
865 	err = mlx5_esw_qos_set_vport_min_rate(vport, tx_share, extack);
866 	esw_qos_unlock(esw);
867 	return err;
868 }
869 
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)870 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
871 					  u64 tx_max, struct netlink_ext_ack *extack)
872 {
873 	struct mlx5_vport *vport = priv;
874 	struct mlx5_eswitch *esw;
875 	int err;
876 
877 	esw = vport->dev->priv.eswitch;
878 	if (!mlx5_esw_allowed(esw))
879 		return -EPERM;
880 
881 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
882 	if (err)
883 		return err;
884 
885 	esw_qos_lock(esw);
886 	err = mlx5_esw_qos_set_vport_max_rate(vport, tx_max, extack);
887 	esw_qos_unlock(esw);
888 	return err;
889 }
890 
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)891 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
892 					    u64 tx_share, struct netlink_ext_ack *extack)
893 {
894 	struct mlx5_esw_sched_node *node = priv;
895 	struct mlx5_eswitch *esw = node->esw;
896 	int err;
897 
898 	err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack);
899 	if (err)
900 		return err;
901 
902 	esw_qos_lock(esw);
903 	err = esw_qos_set_node_min_rate(node, tx_share, extack);
904 	esw_qos_unlock(esw);
905 	return err;
906 }
907 
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)908 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
909 					  u64 tx_max, struct netlink_ext_ack *extack)
910 {
911 	struct mlx5_esw_sched_node *node = priv;
912 	struct mlx5_eswitch *esw = node->esw;
913 	int err;
914 
915 	err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack);
916 	if (err)
917 		return err;
918 
919 	esw_qos_lock(esw);
920 	err = esw_qos_sched_elem_config(node, tx_max, node->bw_share, extack);
921 	esw_qos_unlock(esw);
922 	return err;
923 }
924 
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)925 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
926 				   struct netlink_ext_ack *extack)
927 {
928 	struct mlx5_esw_sched_node *node;
929 	struct mlx5_eswitch *esw;
930 	int err = 0;
931 
932 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
933 	if (IS_ERR(esw))
934 		return PTR_ERR(esw);
935 
936 	esw_qos_lock(esw);
937 	if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
938 		NL_SET_ERR_MSG_MOD(extack,
939 				   "Rate node creation supported only in switchdev mode");
940 		err = -EOPNOTSUPP;
941 		goto unlock;
942 	}
943 
944 	node = esw_qos_create_vports_sched_node(esw, extack);
945 	if (IS_ERR(node)) {
946 		err = PTR_ERR(node);
947 		goto unlock;
948 	}
949 
950 	*priv = node;
951 unlock:
952 	esw_qos_unlock(esw);
953 	return err;
954 }
955 
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)956 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
957 				   struct netlink_ext_ack *extack)
958 {
959 	struct mlx5_esw_sched_node *node = priv;
960 	struct mlx5_eswitch *esw = node->esw;
961 
962 	esw_qos_lock(esw);
963 	__esw_qos_destroy_node(node, extack);
964 	esw_qos_put(esw);
965 	esw_qos_unlock(esw);
966 	return 0;
967 }
968 
mlx5_esw_qos_vport_update_parent(struct mlx5_vport * vport,struct mlx5_esw_sched_node * parent,struct netlink_ext_ack * extack)969 int mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent,
970 				     struct netlink_ext_ack *extack)
971 {
972 	struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
973 	int err = 0;
974 
975 	if (parent && parent->esw != esw) {
976 		NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
977 		return -EOPNOTSUPP;
978 	}
979 
980 	esw_qos_lock(esw);
981 	if (!vport->qos.sched_node && parent)
982 		err = mlx5_esw_qos_vport_enable(vport, SCHED_NODE_TYPE_VPORT, parent, 0, 0, extack);
983 	else if (vport->qos.sched_node)
984 		err = esw_qos_vport_update_parent(vport, parent, extack);
985 	esw_qos_unlock(esw);
986 	return err;
987 }
988 
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)989 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
990 				     struct devlink_rate *parent,
991 				     void *priv, void *parent_priv,
992 				     struct netlink_ext_ack *extack)
993 {
994 	struct mlx5_esw_sched_node *node;
995 	struct mlx5_vport *vport = priv;
996 
997 	if (!parent)
998 		return mlx5_esw_qos_vport_update_parent(vport, NULL, extack);
999 
1000 	node = parent_priv;
1001 	return mlx5_esw_qos_vport_update_parent(vport, node, extack);
1002 }
1003