xref: /linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c (revision 95298d63c67673c654c08952672d016212b26054)
1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved */
3 
4 #include <linux/kernel.h>
5 #include <linux/errno.h>
6 #include <linux/netdevice.h>
7 #include <net/pkt_cls.h>
8 #include <net/red.h>
9 
10 #include "spectrum.h"
11 #include "reg.h"
12 
13 #define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1)
14 #define MLXSW_SP_PRIO_CHILD_TO_TCLASS(child) \
15 	MLXSW_SP_PRIO_BAND_TO_TCLASS((child - 1))
16 
17 enum mlxsw_sp_qdisc_type {
18 	MLXSW_SP_QDISC_NO_QDISC,
19 	MLXSW_SP_QDISC_RED,
20 	MLXSW_SP_QDISC_PRIO,
21 	MLXSW_SP_QDISC_ETS,
22 	MLXSW_SP_QDISC_TBF,
23 	MLXSW_SP_QDISC_FIFO,
24 };
25 
26 struct mlxsw_sp_qdisc;
27 
28 struct mlxsw_sp_qdisc_ops {
29 	enum mlxsw_sp_qdisc_type type;
30 	int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port,
31 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
32 			    void *params);
33 	int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
34 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
35 	int (*destroy)(struct mlxsw_sp_port *mlxsw_sp_port,
36 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc);
37 	int (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port,
38 			 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
39 			 struct tc_qopt_offload_stats *stats_ptr);
40 	int (*get_xstats)(struct mlxsw_sp_port *mlxsw_sp_port,
41 			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
42 			  void *xstats_ptr);
43 	void (*clean_stats)(struct mlxsw_sp_port *mlxsw_sp_port,
44 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc);
45 	/* unoffload - to be used for a qdisc that stops being offloaded without
46 	 * being destroyed.
47 	 */
48 	void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port,
49 			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params);
50 };
51 
52 struct mlxsw_sp_qdisc {
53 	u32 handle;
54 	u8 tclass_num;
55 	u8 prio_bitmap;
56 	union {
57 		struct red_stats red;
58 	} xstats_base;
59 	struct mlxsw_sp_qdisc_stats {
60 		u64 tx_bytes;
61 		u64 tx_packets;
62 		u64 drops;
63 		u64 overlimits;
64 		u64 backlog;
65 	} stats_base;
66 
67 	struct mlxsw_sp_qdisc_ops *ops;
68 };
69 
70 struct mlxsw_sp_qdisc_state {
71 	struct mlxsw_sp_qdisc root_qdisc;
72 	struct mlxsw_sp_qdisc tclass_qdiscs[IEEE_8021QAZ_MAX_TCS];
73 
74 	/* When a PRIO or ETS are added, the invisible FIFOs in their bands are
75 	 * created first. When notifications for these FIFOs arrive, it is not
76 	 * known what qdisc their parent handle refers to. It could be a
77 	 * newly-created PRIO that will replace the currently-offloaded one, or
78 	 * it could be e.g. a RED that will be attached below it.
79 	 *
80 	 * As the notifications start to arrive, use them to note what the
81 	 * future parent handle is, and keep track of which child FIFOs were
82 	 * seen. Then when the parent is known, retroactively offload those
83 	 * FIFOs.
84 	 */
85 	u32 future_handle;
86 	bool future_fifos[IEEE_8021QAZ_MAX_TCS];
87 };
88 
89 static bool
90 mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle,
91 		       enum mlxsw_sp_qdisc_type type)
92 {
93 	return mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops &&
94 	       mlxsw_sp_qdisc->ops->type == type &&
95 	       mlxsw_sp_qdisc->handle == handle;
96 }
97 
98 static struct mlxsw_sp_qdisc *
99 mlxsw_sp_qdisc_find(struct mlxsw_sp_port *mlxsw_sp_port, u32 parent,
100 		    bool root_only)
101 {
102 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
103 	int tclass, child_index;
104 
105 	if (parent == TC_H_ROOT)
106 		return &qdisc_state->root_qdisc;
107 
108 	if (root_only || !qdisc_state ||
109 	    !qdisc_state->root_qdisc.ops ||
110 	    TC_H_MAJ(parent) != qdisc_state->root_qdisc.handle ||
111 	    TC_H_MIN(parent) > IEEE_8021QAZ_MAX_TCS)
112 		return NULL;
113 
114 	child_index = TC_H_MIN(parent);
115 	tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
116 	return &qdisc_state->tclass_qdiscs[tclass];
117 }
118 
119 static struct mlxsw_sp_qdisc *
120 mlxsw_sp_qdisc_find_by_handle(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle)
121 {
122 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
123 	int i;
124 
125 	if (qdisc_state->root_qdisc.handle == handle)
126 		return &qdisc_state->root_qdisc;
127 
128 	if (qdisc_state->root_qdisc.handle == TC_H_UNSPEC)
129 		return NULL;
130 
131 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
132 		if (qdisc_state->tclass_qdiscs[i].handle == handle)
133 			return &qdisc_state->tclass_qdiscs[i];
134 
135 	return NULL;
136 }
137 
138 static int
139 mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
140 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
141 {
142 	int err = 0;
143 
144 	if (!mlxsw_sp_qdisc)
145 		return 0;
146 
147 	if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->destroy)
148 		err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port,
149 						   mlxsw_sp_qdisc);
150 
151 	mlxsw_sp_qdisc->handle = TC_H_UNSPEC;
152 	mlxsw_sp_qdisc->ops = NULL;
153 	return err;
154 }
155 
156 static int
157 mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
158 		       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
159 		       struct mlxsw_sp_qdisc_ops *ops, void *params)
160 {
161 	int err;
162 
163 	if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type)
164 		/* In case this location contained a different qdisc of the
165 		 * same type we can override the old qdisc configuration.
166 		 * Otherwise, we need to remove the old qdisc before setting the
167 		 * new one.
168 		 */
169 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
170 	err = ops->check_params(mlxsw_sp_port, mlxsw_sp_qdisc, params);
171 	if (err)
172 		goto err_bad_param;
173 
174 	err = ops->replace(mlxsw_sp_port, handle, mlxsw_sp_qdisc, params);
175 	if (err)
176 		goto err_config;
177 
178 	/* Check if the Qdisc changed. That includes a situation where an
179 	 * invisible Qdisc replaces another one, or is being added for the
180 	 * first time.
181 	 */
182 	if (mlxsw_sp_qdisc->handle != handle || handle == TC_H_UNSPEC) {
183 		mlxsw_sp_qdisc->ops = ops;
184 		if (ops->clean_stats)
185 			ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc);
186 	}
187 
188 	mlxsw_sp_qdisc->handle = handle;
189 	return 0;
190 
191 err_bad_param:
192 err_config:
193 	if (mlxsw_sp_qdisc->handle == handle && ops->unoffload)
194 		ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params);
195 
196 	mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
197 	return err;
198 }
199 
200 static int
201 mlxsw_sp_qdisc_get_stats(struct mlxsw_sp_port *mlxsw_sp_port,
202 			 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
203 			 struct tc_qopt_offload_stats *stats_ptr)
204 {
205 	if (mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops &&
206 	    mlxsw_sp_qdisc->ops->get_stats)
207 		return mlxsw_sp_qdisc->ops->get_stats(mlxsw_sp_port,
208 						      mlxsw_sp_qdisc,
209 						      stats_ptr);
210 
211 	return -EOPNOTSUPP;
212 }
213 
214 static int
215 mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
216 			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
217 			  void *xstats_ptr)
218 {
219 	if (mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops &&
220 	    mlxsw_sp_qdisc->ops->get_xstats)
221 		return mlxsw_sp_qdisc->ops->get_xstats(mlxsw_sp_port,
222 						      mlxsw_sp_qdisc,
223 						      xstats_ptr);
224 
225 	return -EOPNOTSUPP;
226 }
227 
228 static u64
229 mlxsw_sp_xstats_backlog(struct mlxsw_sp_port_xstats *xstats, int tclass_num)
230 {
231 	return xstats->backlog[tclass_num] +
232 	       xstats->backlog[tclass_num + 8];
233 }
234 
235 static u64
236 mlxsw_sp_xstats_tail_drop(struct mlxsw_sp_port_xstats *xstats, int tclass_num)
237 {
238 	return xstats->tail_drop[tclass_num] +
239 	       xstats->tail_drop[tclass_num + 8];
240 }
241 
242 static void
243 mlxsw_sp_qdisc_bstats_per_priority_get(struct mlxsw_sp_port_xstats *xstats,
244 				       u8 prio_bitmap, u64 *tx_packets,
245 				       u64 *tx_bytes)
246 {
247 	int i;
248 
249 	*tx_packets = 0;
250 	*tx_bytes = 0;
251 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
252 		if (prio_bitmap & BIT(i)) {
253 			*tx_packets += xstats->tx_packets[i];
254 			*tx_bytes += xstats->tx_bytes[i];
255 		}
256 	}
257 }
258 
259 static void
260 mlxsw_sp_qdisc_collect_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port,
261 				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
262 				u64 *p_tx_bytes, u64 *p_tx_packets,
263 				u64 *p_drops, u64 *p_backlog)
264 {
265 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
266 	struct mlxsw_sp_port_xstats *xstats;
267 	u64 tx_bytes, tx_packets;
268 
269 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
270 	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
271 					       mlxsw_sp_qdisc->prio_bitmap,
272 					       &tx_packets, &tx_bytes);
273 
274 	*p_tx_packets += tx_packets;
275 	*p_tx_bytes += tx_bytes;
276 	*p_drops += xstats->wred_drop[tclass_num] +
277 		    mlxsw_sp_xstats_tail_drop(xstats, tclass_num);
278 	*p_backlog += mlxsw_sp_xstats_backlog(xstats, tclass_num);
279 }
280 
281 static void
282 mlxsw_sp_qdisc_update_stats(struct mlxsw_sp *mlxsw_sp,
283 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
284 			    u64 tx_bytes, u64 tx_packets,
285 			    u64 drops, u64 backlog,
286 			    struct tc_qopt_offload_stats *stats_ptr)
287 {
288 	struct mlxsw_sp_qdisc_stats *stats_base = &mlxsw_sp_qdisc->stats_base;
289 
290 	tx_bytes -= stats_base->tx_bytes;
291 	tx_packets -= stats_base->tx_packets;
292 	drops -= stats_base->drops;
293 	backlog -= stats_base->backlog;
294 
295 	_bstats_update(stats_ptr->bstats, tx_bytes, tx_packets);
296 	stats_ptr->qstats->drops += drops;
297 	stats_ptr->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp, backlog);
298 
299 	stats_base->backlog += backlog;
300 	stats_base->drops += drops;
301 	stats_base->tx_bytes += tx_bytes;
302 	stats_base->tx_packets += tx_packets;
303 }
304 
305 static void
306 mlxsw_sp_qdisc_get_tc_stats(struct mlxsw_sp_port *mlxsw_sp_port,
307 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
308 			    struct tc_qopt_offload_stats *stats_ptr)
309 {
310 	u64 tx_packets = 0;
311 	u64 tx_bytes = 0;
312 	u64 backlog = 0;
313 	u64 drops = 0;
314 
315 	mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
316 					&tx_bytes, &tx_packets,
317 					&drops, &backlog);
318 	mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc,
319 				    tx_bytes, tx_packets, drops, backlog,
320 				    stats_ptr);
321 }
322 
323 static int
324 mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port,
325 				  int tclass_num, u32 min, u32 max,
326 				  u32 probability, bool is_wred, bool is_ecn)
327 {
328 	char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
329 	char cwtp_cmd[MLXSW_REG_CWTP_LEN];
330 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
331 	int err;
332 
333 	mlxsw_reg_cwtp_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num);
334 	mlxsw_reg_cwtp_profile_pack(cwtp_cmd, MLXSW_REG_CWTP_DEFAULT_PROFILE,
335 				    roundup(min, MLXSW_REG_CWTP_MIN_VALUE),
336 				    roundup(max, MLXSW_REG_CWTP_MIN_VALUE),
337 				    probability);
338 
339 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtp), cwtp_cmd);
340 	if (err)
341 		return err;
342 
343 	mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
344 			     MLXSW_REG_CWTP_DEFAULT_PROFILE, is_wred, is_ecn);
345 
346 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
347 }
348 
349 static int
350 mlxsw_sp_tclass_congestion_disable(struct mlxsw_sp_port *mlxsw_sp_port,
351 				   int tclass_num)
352 {
353 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
354 	char cwtpm_cmd[MLXSW_REG_CWTPM_LEN];
355 
356 	mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num,
357 			     MLXSW_REG_CWTPM_RESET_PROFILE, false, false);
358 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd);
359 }
360 
361 static void
362 mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
363 					struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
364 {
365 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
366 	struct mlxsw_sp_qdisc_stats *stats_base;
367 	struct mlxsw_sp_port_xstats *xstats;
368 	struct red_stats *red_base;
369 
370 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
371 	stats_base = &mlxsw_sp_qdisc->stats_base;
372 	red_base = &mlxsw_sp_qdisc->xstats_base.red;
373 
374 	mlxsw_sp_qdisc_bstats_per_priority_get(xstats,
375 					       mlxsw_sp_qdisc->prio_bitmap,
376 					       &stats_base->tx_packets,
377 					       &stats_base->tx_bytes);
378 	red_base->prob_drop = xstats->wred_drop[tclass_num];
379 	red_base->pdrop = mlxsw_sp_xstats_tail_drop(xstats, tclass_num);
380 
381 	stats_base->overlimits = red_base->prob_drop + red_base->prob_mark;
382 	stats_base->drops = red_base->prob_drop + red_base->pdrop;
383 
384 	stats_base->backlog = 0;
385 }
386 
387 static int
388 mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
389 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
390 {
391 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
392 	struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc;
393 
394 	if (root_qdisc != mlxsw_sp_qdisc)
395 		root_qdisc->stats_base.backlog -=
396 					mlxsw_sp_qdisc->stats_base.backlog;
397 
398 	return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port,
399 						  mlxsw_sp_qdisc->tclass_num);
400 }
401 
402 static int
403 mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
404 				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
405 				void *params)
406 {
407 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
408 	struct tc_red_qopt_offload_params *p = params;
409 
410 	if (p->min > p->max) {
411 		dev_err(mlxsw_sp->bus_info->dev,
412 			"spectrum: RED: min %u is bigger then max %u\n", p->min,
413 			p->max);
414 		return -EINVAL;
415 	}
416 	if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core,
417 					GUARANTEED_SHARED_BUFFER)) {
418 		dev_err(mlxsw_sp->bus_info->dev,
419 			"spectrum: RED: max value %u is too big\n", p->max);
420 		return -EINVAL;
421 	}
422 	if (p->min == 0 || p->max == 0) {
423 		dev_err(mlxsw_sp->bus_info->dev,
424 			"spectrum: RED: 0 value is illegal for min and max\n");
425 		return -EINVAL;
426 	}
427 	return 0;
428 }
429 
430 static int
431 mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
432 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
433 			   void *params)
434 {
435 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
436 	struct tc_red_qopt_offload_params *p = params;
437 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
438 	u32 min, max;
439 	u64 prob;
440 
441 	/* calculate probability in percentage */
442 	prob = p->probability;
443 	prob *= 100;
444 	prob = DIV_ROUND_UP(prob, 1 << 16);
445 	prob = DIV_ROUND_UP(prob, 1 << 16);
446 	min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min);
447 	max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max);
448 	return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num,
449 						 min, max, prob,
450 						 !p->is_nodrop, p->is_ecn);
451 }
452 
453 static void
454 mlxsw_sp_qdisc_leaf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
455 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
456 			      struct gnet_stats_queue *qstats)
457 {
458 	u64 backlog;
459 
460 	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
461 				       mlxsw_sp_qdisc->stats_base.backlog);
462 	qstats->backlog -= backlog;
463 	mlxsw_sp_qdisc->stats_base.backlog = 0;
464 }
465 
466 static void
467 mlxsw_sp_qdisc_red_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
468 			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
469 			     void *params)
470 {
471 	struct tc_red_qopt_offload_params *p = params;
472 
473 	mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats);
474 }
475 
476 static int
477 mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port,
478 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
479 			      void *xstats_ptr)
480 {
481 	struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red;
482 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
483 	struct mlxsw_sp_port_xstats *xstats;
484 	struct red_stats *res = xstats_ptr;
485 	int early_drops, pdrops;
486 
487 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
488 
489 	early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop;
490 	pdrops = mlxsw_sp_xstats_tail_drop(xstats, tclass_num) -
491 		 xstats_base->pdrop;
492 
493 	res->pdrop += pdrops;
494 	res->prob_drop += early_drops;
495 
496 	xstats_base->pdrop += pdrops;
497 	xstats_base->prob_drop += early_drops;
498 	return 0;
499 }
500 
501 static int
502 mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port,
503 			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
504 			     struct tc_qopt_offload_stats *stats_ptr)
505 {
506 	u8 tclass_num = mlxsw_sp_qdisc->tclass_num;
507 	struct mlxsw_sp_qdisc_stats *stats_base;
508 	struct mlxsw_sp_port_xstats *xstats;
509 	u64 overlimits;
510 
511 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
512 	stats_base = &mlxsw_sp_qdisc->stats_base;
513 
514 	mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc, stats_ptr);
515 	overlimits = xstats->wred_drop[tclass_num] - stats_base->overlimits;
516 
517 	stats_ptr->qstats->overlimits += overlimits;
518 	stats_base->overlimits += overlimits;
519 
520 	return 0;
521 }
522 
523 #define MLXSW_SP_PORT_DEFAULT_TCLASS 0
524 
525 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = {
526 	.type = MLXSW_SP_QDISC_RED,
527 	.check_params = mlxsw_sp_qdisc_red_check_params,
528 	.replace = mlxsw_sp_qdisc_red_replace,
529 	.unoffload = mlxsw_sp_qdisc_red_unoffload,
530 	.destroy = mlxsw_sp_qdisc_red_destroy,
531 	.get_stats = mlxsw_sp_qdisc_get_red_stats,
532 	.get_xstats = mlxsw_sp_qdisc_get_red_xstats,
533 	.clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats,
534 };
535 
536 int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
537 			  struct tc_red_qopt_offload *p)
538 {
539 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
540 
541 	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
542 	if (!mlxsw_sp_qdisc)
543 		return -EOPNOTSUPP;
544 
545 	if (p->command == TC_RED_REPLACE)
546 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
547 					      mlxsw_sp_qdisc,
548 					      &mlxsw_sp_qdisc_ops_red,
549 					      &p->set);
550 
551 	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
552 				    MLXSW_SP_QDISC_RED))
553 		return -EOPNOTSUPP;
554 
555 	switch (p->command) {
556 	case TC_RED_DESTROY:
557 		return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
558 	case TC_RED_XSTATS:
559 		return mlxsw_sp_qdisc_get_xstats(mlxsw_sp_port, mlxsw_sp_qdisc,
560 						 p->xstats);
561 	case TC_RED_STATS:
562 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
563 						&p->stats);
564 	default:
565 		return -EOPNOTSUPP;
566 	}
567 }
568 
569 static void
570 mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
571 					 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
572 {
573 	u64 backlog_cells = 0;
574 	u64 tx_packets = 0;
575 	u64 tx_bytes = 0;
576 	u64 drops = 0;
577 
578 	mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
579 					&tx_bytes, &tx_packets,
580 					&drops, &backlog_cells);
581 
582 	mlxsw_sp_qdisc->stats_base.tx_packets = tx_packets;
583 	mlxsw_sp_qdisc->stats_base.tx_bytes = tx_bytes;
584 	mlxsw_sp_qdisc->stats_base.drops = drops;
585 	mlxsw_sp_qdisc->stats_base.backlog = 0;
586 }
587 
588 static int
589 mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
590 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
591 {
592 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
593 	struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc;
594 
595 	if (root_qdisc != mlxsw_sp_qdisc)
596 		root_qdisc->stats_base.backlog -=
597 					mlxsw_sp_qdisc->stats_base.backlog;
598 
599 	return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
600 					     MLXSW_REG_QEEC_HR_SUBGROUP,
601 					     mlxsw_sp_qdisc->tclass_num, 0,
602 					     MLXSW_REG_QEEC_MAS_DIS, 0);
603 }
604 
605 static int
606 mlxsw_sp_qdisc_tbf_bs(struct mlxsw_sp_port *mlxsw_sp_port,
607 		      u32 max_size, u8 *p_burst_size)
608 {
609 	/* TBF burst size is configured in bytes. The ASIC burst size value is
610 	 * ((2 ^ bs) * 512 bits. Convert the TBF bytes to 512-bit units.
611 	 */
612 	u32 bs512 = max_size / 64;
613 	u8 bs = fls(bs512);
614 
615 	if (!bs)
616 		return -EINVAL;
617 	--bs;
618 
619 	/* Demand a power of two. */
620 	if ((1 << bs) != bs512)
621 		return -EINVAL;
622 
623 	if (bs < mlxsw_sp_port->mlxsw_sp->lowest_shaper_bs ||
624 	    bs > MLXSW_REG_QEEC_HIGHEST_SHAPER_BS)
625 		return -EINVAL;
626 
627 	*p_burst_size = bs;
628 	return 0;
629 }
630 
631 static u32
632 mlxsw_sp_qdisc_tbf_max_size(u8 bs)
633 {
634 	return (1U << bs) * 64;
635 }
636 
637 static u64
638 mlxsw_sp_qdisc_tbf_rate_kbps(struct tc_tbf_qopt_offload_replace_params *p)
639 {
640 	/* TBF interface is in bytes/s, whereas Spectrum ASIC is configured in
641 	 * Kbits/s.
642 	 */
643 	return div_u64(p->rate.rate_bytes_ps, 1000) * 8;
644 }
645 
646 static int
647 mlxsw_sp_qdisc_tbf_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
648 				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
649 				void *params)
650 {
651 	struct tc_tbf_qopt_offload_replace_params *p = params;
652 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
653 	u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
654 	u8 burst_size;
655 	int err;
656 
657 	if (rate_kbps >= MLXSW_REG_QEEC_MAS_DIS) {
658 		dev_err(mlxsw_sp_port->mlxsw_sp->bus_info->dev,
659 			"spectrum: TBF: rate of %lluKbps must be below %u\n",
660 			rate_kbps, MLXSW_REG_QEEC_MAS_DIS);
661 		return -EINVAL;
662 	}
663 
664 	err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size);
665 	if (err) {
666 		u8 highest_shaper_bs = MLXSW_REG_QEEC_HIGHEST_SHAPER_BS;
667 
668 		dev_err(mlxsw_sp->bus_info->dev,
669 			"spectrum: TBF: invalid burst size of %u, must be a power of two between %u and %u",
670 			p->max_size,
671 			mlxsw_sp_qdisc_tbf_max_size(mlxsw_sp->lowest_shaper_bs),
672 			mlxsw_sp_qdisc_tbf_max_size(highest_shaper_bs));
673 		return -EINVAL;
674 	}
675 
676 	return 0;
677 }
678 
679 static int
680 mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
681 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
682 			   void *params)
683 {
684 	struct tc_tbf_qopt_offload_replace_params *p = params;
685 	u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
686 	u8 burst_size;
687 	int err;
688 
689 	err = mlxsw_sp_qdisc_tbf_bs(mlxsw_sp_port, p->max_size, &burst_size);
690 	if (WARN_ON_ONCE(err))
691 		/* check_params above was supposed to reject this value. */
692 		return -EINVAL;
693 
694 	/* Configure subgroup shaper, so that both UC and MC traffic is subject
695 	 * to shaping. That is unlike RED, however UC queue lengths are going to
696 	 * be different than MC ones due to different pool and quota
697 	 * configurations, so the configuration is not applicable. For shaper on
698 	 * the other hand, subjecting the overall stream to the configured
699 	 * shaper makes sense. Also note that that is what we do for
700 	 * ieee_setmaxrate().
701 	 */
702 	return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
703 					     MLXSW_REG_QEEC_HR_SUBGROUP,
704 					     mlxsw_sp_qdisc->tclass_num, 0,
705 					     rate_kbps, burst_size);
706 }
707 
708 static void
709 mlxsw_sp_qdisc_tbf_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
710 			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
711 			     void *params)
712 {
713 	struct tc_tbf_qopt_offload_replace_params *p = params;
714 
715 	mlxsw_sp_qdisc_leaf_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, p->qstats);
716 }
717 
718 static int
719 mlxsw_sp_qdisc_get_tbf_stats(struct mlxsw_sp_port *mlxsw_sp_port,
720 			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
721 			     struct tc_qopt_offload_stats *stats_ptr)
722 {
723 	mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
724 				    stats_ptr);
725 	return 0;
726 }
727 
728 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_tbf = {
729 	.type = MLXSW_SP_QDISC_TBF,
730 	.check_params = mlxsw_sp_qdisc_tbf_check_params,
731 	.replace = mlxsw_sp_qdisc_tbf_replace,
732 	.unoffload = mlxsw_sp_qdisc_tbf_unoffload,
733 	.destroy = mlxsw_sp_qdisc_tbf_destroy,
734 	.get_stats = mlxsw_sp_qdisc_get_tbf_stats,
735 	.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
736 };
737 
738 int mlxsw_sp_setup_tc_tbf(struct mlxsw_sp_port *mlxsw_sp_port,
739 			  struct tc_tbf_qopt_offload *p)
740 {
741 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
742 
743 	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
744 	if (!mlxsw_sp_qdisc)
745 		return -EOPNOTSUPP;
746 
747 	if (p->command == TC_TBF_REPLACE)
748 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
749 					      mlxsw_sp_qdisc,
750 					      &mlxsw_sp_qdisc_ops_tbf,
751 					      &p->replace_params);
752 
753 	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
754 				    MLXSW_SP_QDISC_TBF))
755 		return -EOPNOTSUPP;
756 
757 	switch (p->command) {
758 	case TC_TBF_DESTROY:
759 		return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
760 	case TC_TBF_STATS:
761 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
762 						&p->stats);
763 	default:
764 		return -EOPNOTSUPP;
765 	}
766 }
767 
768 static int
769 mlxsw_sp_qdisc_fifo_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
770 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
771 {
772 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
773 	struct mlxsw_sp_qdisc *root_qdisc = &qdisc_state->root_qdisc;
774 
775 	if (root_qdisc != mlxsw_sp_qdisc)
776 		root_qdisc->stats_base.backlog -=
777 					mlxsw_sp_qdisc->stats_base.backlog;
778 	return 0;
779 }
780 
781 static int
782 mlxsw_sp_qdisc_fifo_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
783 				 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
784 				 void *params)
785 {
786 	return 0;
787 }
788 
789 static int
790 mlxsw_sp_qdisc_fifo_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
791 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
792 			    void *params)
793 {
794 	return 0;
795 }
796 
797 static int
798 mlxsw_sp_qdisc_get_fifo_stats(struct mlxsw_sp_port *mlxsw_sp_port,
799 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
800 			      struct tc_qopt_offload_stats *stats_ptr)
801 {
802 	mlxsw_sp_qdisc_get_tc_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
803 				    stats_ptr);
804 	return 0;
805 }
806 
807 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_fifo = {
808 	.type = MLXSW_SP_QDISC_FIFO,
809 	.check_params = mlxsw_sp_qdisc_fifo_check_params,
810 	.replace = mlxsw_sp_qdisc_fifo_replace,
811 	.destroy = mlxsw_sp_qdisc_fifo_destroy,
812 	.get_stats = mlxsw_sp_qdisc_get_fifo_stats,
813 	.clean_stats = mlxsw_sp_setup_tc_qdisc_leaf_clean_stats,
814 };
815 
816 int mlxsw_sp_setup_tc_fifo(struct mlxsw_sp_port *mlxsw_sp_port,
817 			   struct tc_fifo_qopt_offload *p)
818 {
819 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
820 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
821 	int tclass, child_index;
822 	u32 parent_handle;
823 
824 	/* Invisible FIFOs are tracked in future_handle and future_fifos. Make
825 	 * sure that not more than one qdisc is created for a port at a time.
826 	 * RTNL is a simple proxy for that.
827 	 */
828 	ASSERT_RTNL();
829 
830 	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, false);
831 	if (!mlxsw_sp_qdisc && p->handle == TC_H_UNSPEC) {
832 		parent_handle = TC_H_MAJ(p->parent);
833 		if (parent_handle != qdisc_state->future_handle) {
834 			/* This notifications is for a different Qdisc than
835 			 * previously. Wipe the future cache.
836 			 */
837 			memset(qdisc_state->future_fifos, 0,
838 			       sizeof(qdisc_state->future_fifos));
839 			qdisc_state->future_handle = parent_handle;
840 		}
841 
842 		child_index = TC_H_MIN(p->parent);
843 		tclass = MLXSW_SP_PRIO_CHILD_TO_TCLASS(child_index);
844 		if (tclass < IEEE_8021QAZ_MAX_TCS) {
845 			if (p->command == TC_FIFO_REPLACE)
846 				qdisc_state->future_fifos[tclass] = true;
847 			else if (p->command == TC_FIFO_DESTROY)
848 				qdisc_state->future_fifos[tclass] = false;
849 		}
850 	}
851 	if (!mlxsw_sp_qdisc)
852 		return -EOPNOTSUPP;
853 
854 	if (p->command == TC_FIFO_REPLACE) {
855 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
856 					      mlxsw_sp_qdisc,
857 					      &mlxsw_sp_qdisc_ops_fifo, NULL);
858 	}
859 
860 	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
861 				    MLXSW_SP_QDISC_FIFO))
862 		return -EOPNOTSUPP;
863 
864 	switch (p->command) {
865 	case TC_FIFO_DESTROY:
866 		if (p->handle == mlxsw_sp_qdisc->handle)
867 			return mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
868 						      mlxsw_sp_qdisc);
869 		return 0;
870 	case TC_FIFO_STATS:
871 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
872 						&p->stats);
873 	case TC_FIFO_REPLACE: /* Handled above. */
874 		break;
875 	}
876 
877 	return -EOPNOTSUPP;
878 }
879 
880 static int
881 __mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
882 {
883 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
884 	int i;
885 
886 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
887 		mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
888 					  MLXSW_SP_PORT_DEFAULT_TCLASS);
889 		mlxsw_sp_port_ets_set(mlxsw_sp_port,
890 				      MLXSW_REG_QEEC_HR_SUBGROUP,
891 				      i, 0, false, 0);
892 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
893 				       &qdisc_state->tclass_qdiscs[i]);
894 		qdisc_state->tclass_qdiscs[i].prio_bitmap = 0;
895 	}
896 
897 	return 0;
898 }
899 
900 static int
901 mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
902 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
903 {
904 	return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port);
905 }
906 
907 static int
908 __mlxsw_sp_qdisc_ets_check_params(unsigned int nbands)
909 {
910 	if (nbands > IEEE_8021QAZ_MAX_TCS)
911 		return -EOPNOTSUPP;
912 
913 	return 0;
914 }
915 
916 static int
917 mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
918 				 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
919 				 void *params)
920 {
921 	struct tc_prio_qopt_offload_params *p = params;
922 
923 	return __mlxsw_sp_qdisc_ets_check_params(p->bands);
924 }
925 
926 static int
927 __mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
928 			     unsigned int nbands,
929 			     const unsigned int *quanta,
930 			     const unsigned int *weights,
931 			     const u8 *priomap)
932 {
933 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
934 	struct mlxsw_sp_qdisc *child_qdisc;
935 	int tclass, i, band, backlog;
936 	u8 old_priomap;
937 	int err;
938 
939 	for (band = 0; band < nbands; band++) {
940 		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
941 		child_qdisc = &qdisc_state->tclass_qdiscs[tclass];
942 		old_priomap = child_qdisc->prio_bitmap;
943 		child_qdisc->prio_bitmap = 0;
944 
945 		err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
946 					    MLXSW_REG_QEEC_HR_SUBGROUP,
947 					    tclass, 0, !!quanta[band],
948 					    weights[band]);
949 		if (err)
950 			return err;
951 
952 		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
953 			if (priomap[i] == band) {
954 				child_qdisc->prio_bitmap |= BIT(i);
955 				if (BIT(i) & old_priomap)
956 					continue;
957 				err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port,
958 								i, tclass);
959 				if (err)
960 					return err;
961 			}
962 		}
963 		if (old_priomap != child_qdisc->prio_bitmap &&
964 		    child_qdisc->ops && child_qdisc->ops->clean_stats) {
965 			backlog = child_qdisc->stats_base.backlog;
966 			child_qdisc->ops->clean_stats(mlxsw_sp_port,
967 						      child_qdisc);
968 			child_qdisc->stats_base.backlog = backlog;
969 		}
970 
971 		if (handle == qdisc_state->future_handle &&
972 		    qdisc_state->future_fifos[tclass]) {
973 			err = mlxsw_sp_qdisc_replace(mlxsw_sp_port, TC_H_UNSPEC,
974 						     child_qdisc,
975 						     &mlxsw_sp_qdisc_ops_fifo,
976 						     NULL);
977 			if (err)
978 				return err;
979 		}
980 	}
981 	for (; band < IEEE_8021QAZ_MAX_TCS; band++) {
982 		tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
983 		child_qdisc = &qdisc_state->tclass_qdiscs[tclass];
984 		child_qdisc->prio_bitmap = 0;
985 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
986 		mlxsw_sp_port_ets_set(mlxsw_sp_port,
987 				      MLXSW_REG_QEEC_HR_SUBGROUP,
988 				      tclass, 0, false, 0);
989 	}
990 
991 	qdisc_state->future_handle = TC_H_UNSPEC;
992 	memset(qdisc_state->future_fifos, 0, sizeof(qdisc_state->future_fifos));
993 	return 0;
994 }
995 
996 static int
997 mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
998 			    struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
999 			    void *params)
1000 {
1001 	struct tc_prio_qopt_offload_params *p = params;
1002 	unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0};
1003 
1004 	return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands,
1005 					    zeroes, zeroes, p->priomap);
1006 }
1007 
1008 static void
1009 __mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
1010 			       struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
1011 			       struct gnet_stats_queue *qstats)
1012 {
1013 	u64 backlog;
1014 
1015 	backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
1016 				       mlxsw_sp_qdisc->stats_base.backlog);
1017 	qstats->backlog -= backlog;
1018 }
1019 
1020 static void
1021 mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
1022 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
1023 			      void *params)
1024 {
1025 	struct tc_prio_qopt_offload_params *p = params;
1026 
1027 	__mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc,
1028 				       p->qstats);
1029 }
1030 
1031 static int
1032 mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port,
1033 			      struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
1034 			      struct tc_qopt_offload_stats *stats_ptr)
1035 {
1036 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
1037 	struct mlxsw_sp_qdisc *tc_qdisc;
1038 	u64 tx_packets = 0;
1039 	u64 tx_bytes = 0;
1040 	u64 backlog = 0;
1041 	u64 drops = 0;
1042 	int i;
1043 
1044 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
1045 		tc_qdisc = &qdisc_state->tclass_qdiscs[i];
1046 		mlxsw_sp_qdisc_collect_tc_stats(mlxsw_sp_port, tc_qdisc,
1047 						&tx_bytes, &tx_packets,
1048 						&drops, &backlog);
1049 	}
1050 
1051 	mlxsw_sp_qdisc_update_stats(mlxsw_sp_port->mlxsw_sp, mlxsw_sp_qdisc,
1052 				    tx_bytes, tx_packets, drops, backlog,
1053 				    stats_ptr);
1054 	return 0;
1055 }
1056 
1057 static void
1058 mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
1059 					 struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
1060 {
1061 	struct mlxsw_sp_qdisc_stats *stats_base;
1062 	struct mlxsw_sp_port_xstats *xstats;
1063 	struct rtnl_link_stats64 *stats;
1064 	int i;
1065 
1066 	xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
1067 	stats = &mlxsw_sp_port->periodic_hw_stats.stats;
1068 	stats_base = &mlxsw_sp_qdisc->stats_base;
1069 
1070 	stats_base->tx_packets = stats->tx_packets;
1071 	stats_base->tx_bytes = stats->tx_bytes;
1072 
1073 	stats_base->drops = 0;
1074 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
1075 		stats_base->drops += mlxsw_sp_xstats_tail_drop(xstats, i);
1076 		stats_base->drops += xstats->wred_drop[i];
1077 	}
1078 
1079 	mlxsw_sp_qdisc->stats_base.backlog = 0;
1080 }
1081 
1082 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
1083 	.type = MLXSW_SP_QDISC_PRIO,
1084 	.check_params = mlxsw_sp_qdisc_prio_check_params,
1085 	.replace = mlxsw_sp_qdisc_prio_replace,
1086 	.unoffload = mlxsw_sp_qdisc_prio_unoffload,
1087 	.destroy = mlxsw_sp_qdisc_prio_destroy,
1088 	.get_stats = mlxsw_sp_qdisc_get_prio_stats,
1089 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
1090 };
1091 
1092 static int
1093 mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
1094 				struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
1095 				void *params)
1096 {
1097 	struct tc_ets_qopt_offload_replace_params *p = params;
1098 
1099 	return __mlxsw_sp_qdisc_ets_check_params(p->bands);
1100 }
1101 
1102 static int
1103 mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
1104 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
1105 			   void *params)
1106 {
1107 	struct tc_ets_qopt_offload_replace_params *p = params;
1108 
1109 	return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, handle, p->bands,
1110 					    p->quanta, p->weights, p->priomap);
1111 }
1112 
1113 static void
1114 mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
1115 			     struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
1116 			     void *params)
1117 {
1118 	struct tc_ets_qopt_offload_replace_params *p = params;
1119 
1120 	__mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc,
1121 				       p->qstats);
1122 }
1123 
1124 static int
1125 mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
1126 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
1127 {
1128 	return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port);
1129 }
1130 
1131 static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = {
1132 	.type = MLXSW_SP_QDISC_ETS,
1133 	.check_params = mlxsw_sp_qdisc_ets_check_params,
1134 	.replace = mlxsw_sp_qdisc_ets_replace,
1135 	.unoffload = mlxsw_sp_qdisc_ets_unoffload,
1136 	.destroy = mlxsw_sp_qdisc_ets_destroy,
1137 	.get_stats = mlxsw_sp_qdisc_get_prio_stats,
1138 	.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
1139 };
1140 
1141 /* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting
1142  * graph is free of cycles). These operations do not change the parent handle
1143  * though, which means it can be incomplete (if there is more than one class
1144  * where the Qdisc in question is grafted) or outright wrong (if the Qdisc was
1145  * linked to a different class and then removed from the original class).
1146  *
1147  * E.g. consider this sequence of operations:
1148  *
1149  *  # tc qdisc add dev swp1 root handle 1: prio
1150  *  # tc qdisc add dev swp1 parent 1:3 handle 13: red limit 1000000 avpkt 10000
1151  *  RED: set bandwidth to 10Mbit
1152  *  # tc qdisc link dev swp1 handle 13: parent 1:2
1153  *
1154  * At this point, both 1:2 and 1:3 have the same RED Qdisc instance as their
1155  * child. But RED will still only claim that 1:3 is its parent. If it's removed
1156  * from that band, its only parent will be 1:2, but it will continue to claim
1157  * that it is in fact 1:3.
1158  *
1159  * The notification for child Qdisc replace (e.g. TC_RED_REPLACE) comes before
1160  * the notification for parent graft (e.g. TC_PRIO_GRAFT). We take the replace
1161  * notification to offload the child Qdisc, based on its parent handle, and use
1162  * the graft operation to validate that the class where the child is actually
1163  * grafted corresponds to the parent handle. If the two don't match, we
1164  * unoffload the child.
1165  */
1166 static int
1167 __mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
1168 			   struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
1169 			   u8 band, u32 child_handle)
1170 {
1171 	struct mlxsw_sp_qdisc_state *qdisc_state = mlxsw_sp_port->qdisc;
1172 	int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
1173 	struct mlxsw_sp_qdisc *old_qdisc;
1174 
1175 	if (band < IEEE_8021QAZ_MAX_TCS &&
1176 	    qdisc_state->tclass_qdiscs[tclass_num].handle == child_handle)
1177 		return 0;
1178 
1179 	if (!child_handle) {
1180 		/* This is an invisible FIFO replacing the original Qdisc.
1181 		 * Ignore it--the original Qdisc's destroy will follow.
1182 		 */
1183 		return 0;
1184 	}
1185 
1186 	/* See if the grafted qdisc is already offloaded on any tclass. If so,
1187 	 * unoffload it.
1188 	 */
1189 	old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
1190 						  child_handle);
1191 	if (old_qdisc)
1192 		mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
1193 
1194 	mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
1195 			       &qdisc_state->tclass_qdiscs[tclass_num]);
1196 	return -EOPNOTSUPP;
1197 }
1198 
1199 static int
1200 mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
1201 			  struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
1202 			  struct tc_prio_qopt_offload_graft_params *p)
1203 {
1204 	return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
1205 					  p->band, p->child_handle);
1206 }
1207 
1208 int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
1209 			   struct tc_prio_qopt_offload *p)
1210 {
1211 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
1212 
1213 	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
1214 	if (!mlxsw_sp_qdisc)
1215 		return -EOPNOTSUPP;
1216 
1217 	if (p->command == TC_PRIO_REPLACE)
1218 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
1219 					      mlxsw_sp_qdisc,
1220 					      &mlxsw_sp_qdisc_ops_prio,
1221 					      &p->replace_params);
1222 
1223 	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
1224 				    MLXSW_SP_QDISC_PRIO))
1225 		return -EOPNOTSUPP;
1226 
1227 	switch (p->command) {
1228 	case TC_PRIO_DESTROY:
1229 		return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
1230 	case TC_PRIO_STATS:
1231 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
1232 						&p->stats);
1233 	case TC_PRIO_GRAFT:
1234 		return mlxsw_sp_qdisc_prio_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
1235 						 &p->graft_params);
1236 	default:
1237 		return -EOPNOTSUPP;
1238 	}
1239 }
1240 
1241 int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
1242 			  struct tc_ets_qopt_offload *p)
1243 {
1244 	struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
1245 
1246 	mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
1247 	if (!mlxsw_sp_qdisc)
1248 		return -EOPNOTSUPP;
1249 
1250 	if (p->command == TC_ETS_REPLACE)
1251 		return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
1252 					      mlxsw_sp_qdisc,
1253 					      &mlxsw_sp_qdisc_ops_ets,
1254 					      &p->replace_params);
1255 
1256 	if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
1257 				    MLXSW_SP_QDISC_ETS))
1258 		return -EOPNOTSUPP;
1259 
1260 	switch (p->command) {
1261 	case TC_ETS_DESTROY:
1262 		return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
1263 	case TC_ETS_STATS:
1264 		return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
1265 						&p->stats);
1266 	case TC_ETS_GRAFT:
1267 		return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
1268 						  p->graft_params.band,
1269 						  p->graft_params.child_handle);
1270 	default:
1271 		return -EOPNOTSUPP;
1272 	}
1273 }
1274 
1275 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
1276 {
1277 	struct mlxsw_sp_qdisc_state *qdisc_state;
1278 	int i;
1279 
1280 	qdisc_state = kzalloc(sizeof(*qdisc_state), GFP_KERNEL);
1281 	if (!qdisc_state)
1282 		return -ENOMEM;
1283 
1284 	qdisc_state->root_qdisc.prio_bitmap = 0xff;
1285 	qdisc_state->root_qdisc.tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS;
1286 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
1287 		qdisc_state->tclass_qdiscs[i].tclass_num = i;
1288 
1289 	mlxsw_sp_port->qdisc = qdisc_state;
1290 	return 0;
1291 }
1292 
1293 void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port)
1294 {
1295 	kfree(mlxsw_sp_port->qdisc);
1296 }
1297