xref: /freebsd/sys/dev/cxgbe/t4_sched.c (revision cab6a39d7b343596a5823e65c0f7b426551ec22d)
1 /*-
2  * Copyright (c) 2017 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: Navdeep Parhar <np@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 #include "opt_ratelimit.h"
34 
35 #include <sys/types.h>
36 #include <sys/malloc.h>
37 #include <sys/queue.h>
38 #include <sys/sbuf.h>
39 #include <sys/taskqueue.h>
40 #include <sys/sysctl.h>
41 
42 #include "common/common.h"
43 #include "common/t4_regs.h"
44 #include "common/t4_regs_values.h"
45 #include "common/t4_msg.h"
46 
47 
48 static int
49 in_range(int val, int lo, int hi)
50 {
51 
52 	return (val < 0 || (val <= hi && val >= lo));
53 }
54 
55 static int
56 set_sched_class_config(struct adapter *sc, int minmax)
57 {
58 	int rc;
59 
60 	if (minmax < 0)
61 		return (EINVAL);
62 
63 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc");
64 	if (rc)
65 		return (rc);
66 	if (hw_off_limits(sc))
67 		rc = ENXIO;
68 	else
69 		rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1);
70 	end_synchronized_op(sc, 0);
71 
72 	return (rc);
73 }
74 
75 static int
76 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p,
77     int sleep_ok)
78 {
79 	int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode;
80 	struct port_info *pi;
81 	struct tx_cl_rl_params *tc, old;
82 	bool check_pktsize = false;
83 
84 	if (p->level == SCHED_CLASS_LEVEL_CL_RL)
85 		fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL;
86 	else if (p->level == SCHED_CLASS_LEVEL_CL_WRR)
87 		fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR;
88 	else if (p->level == SCHED_CLASS_LEVEL_CH_RL)
89 		fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL;
90 	else
91 		return (EINVAL);
92 
93 	if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
94 		if (p->mode == SCHED_CLASS_MODE_CLASS)
95 			fw_mode = FW_SCHED_PARAMS_MODE_CLASS;
96 		else if (p->mode == SCHED_CLASS_MODE_FLOW) {
97 			check_pktsize = true;
98 			fw_mode = FW_SCHED_PARAMS_MODE_FLOW;
99 		} else
100 			return (EINVAL);
101 	} else
102 		fw_mode = 0;
103 
104 	/* Valid channel must always be provided. */
105 	if (p->channel < 0)
106 		return (EINVAL);
107 	if (!in_range(p->channel, 0, sc->chip_params->nchan - 1))
108 		return (ERANGE);
109 
110 	pi = sc->port[sc->chan_map[p->channel]];
111 	if (pi == NULL)
112 		return (ENXIO);
113 	MPASS(pi->tx_chan == p->channel);
114 	top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */
115 
116 	if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
117 	    p->level == SCHED_CLASS_LEVEL_CH_RL) {
118 		/*
119 		 * Valid rate (mode, unit and values) must be provided.
120 		 */
121 
122 		if (p->minrate < 0)
123 			p->minrate = 0;
124 		if (p->maxrate < 0)
125 			return (EINVAL);
126 
127 		if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) {
128 			fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
129 			/* ratemode could be relative (%) or absolute. */
130 			if (p->ratemode == SCHED_CLASS_RATEMODE_REL) {
131 				fw_ratemode = FW_SCHED_PARAMS_RATE_REL;
132 				/* maxrate is % of port bandwidth. */
133 				if (!in_range(p->minrate, 0, 100) ||
134 				    !in_range(p->maxrate, 0, 100)) {
135 					return (ERANGE);
136 				}
137 			} else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) {
138 				fw_ratemode = FW_SCHED_PARAMS_RATE_ABS;
139 				/* maxrate is absolute value in kbps. */
140 				if (!in_range(p->minrate, 0, top_speed) ||
141 				    !in_range(p->maxrate, 0, top_speed)) {
142 					return (ERANGE);
143 				}
144 			} else
145 				return (EINVAL);
146 		} else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) {
147 			/* maxrate is the absolute value in pps. */
148 			check_pktsize = true;
149 			fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE;
150 		} else
151 			return (EINVAL);
152 	} else {
153 		MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR);
154 
155 		/*
156 		 * Valid weight must be provided.
157 		 */
158 		if (p->weight < 0)
159 		       return (EINVAL);
160 		if (!in_range(p->weight, 1, 99))
161 			return (ERANGE);
162 
163 		fw_rateunit = 0;
164 		fw_ratemode = 0;
165 	}
166 
167 	if (p->level == SCHED_CLASS_LEVEL_CL_RL ||
168 	    p->level == SCHED_CLASS_LEVEL_CL_WRR) {
169 		/*
170 		 * Valid scheduling class must be provided.
171 		 */
172 		if (p->cl < 0)
173 			return (EINVAL);
174 		if (!in_range(p->cl, 0, sc->params.nsched_cls - 1))
175 			return (ERANGE);
176 	}
177 
178 	if (check_pktsize) {
179 		if (p->pktsize < 0)
180 			return (EINVAL);
181 		if (!in_range(p->pktsize, 64, pi->vi[0].ifp->if_mtu))
182 			return (ERANGE);
183 	}
184 
185 	if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
186 		tc = &pi->sched_params->cl_rl[p->cl];
187 		mtx_lock(&sc->tc_lock);
188 		if (tc->refcount > 0 || tc->state == CS_HW_UPDATE_IN_PROGRESS)
189 			rc = EBUSY;
190 		else {
191 			old = *tc;
192 
193 			tc->flags |= CF_USER;
194 			tc->state = CS_HW_UPDATE_IN_PROGRESS;
195 			tc->ratemode = fw_ratemode;
196 			tc->rateunit = fw_rateunit;
197 			tc->mode = fw_mode;
198 			tc->maxrate = p->maxrate;
199 			tc->pktsize = p->pktsize;
200 			rc = 0;
201 		}
202 		mtx_unlock(&sc->tc_lock);
203 		if (rc != 0)
204 			return (rc);
205 	}
206 
207 	rc = begin_synchronized_op(sc, NULL,
208 	    sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp");
209 	if (rc != 0) {
210 		if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
211 			mtx_lock(&sc->tc_lock);
212 			MPASS(tc->refcount == 0);
213 			MPASS(tc->flags & CF_USER);
214 			MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS);
215 			*tc = old;
216 			mtx_unlock(&sc->tc_lock);
217 		}
218 		return (rc);
219 	}
220 	if (!hw_off_limits(sc)) {
221 		rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level,
222 		    fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl,
223 		    p->minrate, p->maxrate, p->weight, p->pktsize, 0, sleep_ok);
224 	}
225 	end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD);
226 
227 	if (p->level == SCHED_CLASS_LEVEL_CL_RL) {
228 		mtx_lock(&sc->tc_lock);
229 		MPASS(tc->refcount == 0);
230 		MPASS(tc->flags & CF_USER);
231 		MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS);
232 
233 		if (rc == 0)
234 			tc->state = CS_HW_CONFIGURED;
235 		else {
236 			/* parameters failed so we don't park at params_set */
237 			tc->state = CS_UNINITIALIZED;
238 			tc->flags &= ~CF_USER;
239 			CH_ERR(pi, "failed to configure traffic class %d: %d.  "
240 			    "params: mode %d, rateunit %d, ratemode %d, "
241 			    "channel %d, minrate %d, maxrate %d, pktsize %d, "
242 			    "burstsize %d\n", p->cl, rc, fw_mode, fw_rateunit,
243 			    fw_ratemode, p->channel, p->minrate, p->maxrate,
244 			    p->pktsize, 0);
245 		}
246 		mtx_unlock(&sc->tc_lock);
247 	}
248 
249 	return (rc);
250 }
251 
252 static void
253 update_tx_sched(void *context, int pending)
254 {
255 	int i, j, rc;
256 	struct port_info *pi;
257 	struct tx_cl_rl_params *tc;
258 	struct adapter *sc = context;
259 	const int n = sc->params.nsched_cls;
260 
261 	mtx_lock(&sc->tc_lock);
262 	for_each_port(sc, i) {
263 		pi = sc->port[i];
264 		tc = &pi->sched_params->cl_rl[0];
265 		for (j = 0; j < n; j++, tc++) {
266 			MPASS(mtx_owned(&sc->tc_lock));
267 			if (tc->state != CS_HW_UPDATE_REQUESTED)
268 				continue;
269 			mtx_unlock(&sc->tc_lock);
270 
271 			if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK,
272 			    "t4utxs") != 0) {
273 				mtx_lock(&sc->tc_lock);
274 				continue;
275 			}
276 			rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED,
277 			    FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit,
278 			    tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0,
279 			    tc->pktsize, tc->burstsize, 1);
280 			end_synchronized_op(sc, 0);
281 
282 			mtx_lock(&sc->tc_lock);
283 			MPASS(tc->state == CS_HW_UPDATE_REQUESTED);
284 			if (rc == 0) {
285 				tc->state = CS_HW_CONFIGURED;
286 				continue;
287 			}
288 			/* parameters failed so we try to avoid params_set */
289 			if (tc->refcount > 0)
290 				tc->state = CS_PARAMS_SET;
291 			else
292 				tc->state = CS_UNINITIALIZED;
293 			CH_ERR(pi, "failed to configure traffic class %d: %d.  "
294 			    "params: mode %d, rateunit %d, ratemode %d, "
295 			    "channel %d, minrate %d, maxrate %d, pktsize %d, "
296 			    "burstsize %d\n", j, rc, tc->mode, tc->rateunit,
297 			    tc->ratemode, pi->tx_chan, 0, tc->maxrate,
298 			    tc->pktsize, tc->burstsize);
299 		}
300 	}
301 	mtx_unlock(&sc->tc_lock);
302 }
303 
304 int
305 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p)
306 {
307 
308 	if (p->type != SCHED_CLASS_TYPE_PACKET)
309 		return (EINVAL);
310 
311 	if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG)
312 		return (set_sched_class_config(sc, p->u.config.minmax));
313 
314 	if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS)
315 		return (set_sched_class_params(sc, &p->u.params, 1));
316 
317 	return (EINVAL);
318 }
319 
320 static int
321 bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx)
322 {
323 	struct tx_cl_rl_params *tc0, *tc;
324 	int rc, old_idx;
325 	uint32_t fw_mnem, fw_class;
326 
327 	if (!(txq->eq.flags & EQ_HW_ALLOCATED))
328 		return (ENXIO);
329 
330 	mtx_lock(&sc->tc_lock);
331 	if (txq->tc_idx == -2) {
332 		rc = EBUSY;	/* Another bind/unbind in progress already. */
333 		goto done;
334 	}
335 	if (idx == txq->tc_idx) {
336 		rc = 0;		/* No change, nothing to do. */
337 		goto done;
338 	}
339 
340 	tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0];
341 	if (idx != -1) {
342 		/*
343 		 * Bind to a different class at index idx.
344 		 */
345 		tc = &tc0[idx];
346 		if (tc->state != CS_HW_CONFIGURED) {
347 			rc = ENXIO;
348 			goto done;
349 		} else {
350 			/*
351 			 * Ok to proceed.  Place a reference on the new class
352 			 * while still holding on to the reference on the
353 			 * previous class, if any.
354 			 */
355 			tc->refcount++;
356 		}
357 	}
358 	/* Mark as busy before letting go of the lock. */
359 	old_idx = txq->tc_idx;
360 	txq->tc_idx = -2;
361 	mtx_unlock(&sc->tc_lock);
362 
363 	rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq");
364 	if (rc == 0) {
365 		fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
366 		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) |
367 		    V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id));
368 		fw_class = idx < 0 ? 0xffffffff : idx;
369 		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem,
370 		    &fw_class);
371 		end_synchronized_op(sc, 0);
372 	}
373 
374 	mtx_lock(&sc->tc_lock);
375 	MPASS(txq->tc_idx == -2);
376 	if (rc == 0) {
377 		/*
378 		 * Unbind, bind, or bind to a different class succeeded.  Remove
379 		 * the reference on the old traffic class, if any.
380 		 */
381 		if (old_idx != -1) {
382 			tc = &tc0[old_idx];
383 			MPASS(tc->refcount > 0);
384 			tc->refcount--;
385 		}
386 		txq->tc_idx = idx;
387 	} else {
388 		/*
389 		 * Unbind, bind, or bind to a different class failed.  Remove
390 		 * the anticipatory reference on the new traffic class, if any.
391 		 */
392 		if (idx != -1) {
393 			tc = &tc0[idx];
394 			MPASS(tc->refcount > 0);
395 			tc->refcount--;
396 		}
397 		txq->tc_idx = old_idx;
398 	}
399 done:
400 	MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->params.nsched_cls);
401 	mtx_unlock(&sc->tc_lock);
402 	return (rc);
403 }
404 
405 int
406 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p)
407 {
408 	struct port_info *pi = NULL;
409 	struct vi_info *vi;
410 	struct sge_txq *txq;
411 	int i, rc;
412 
413 	if (p->port >= sc->params.nports)
414 		return (EINVAL);
415 
416 	/*
417 	 * XXX: cxgbetool allows the user to specify the physical port only.  So
418 	 * we always operate on the main VI.
419 	 */
420 	pi = sc->port[p->port];
421 	vi = &pi->vi[0];
422 
423 	/* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */
424 	if (!(vi->flags & VI_INIT_DONE))
425 		return (EAGAIN);
426 	MPASS(vi->ntxq > 0);
427 
428 	if (!in_range(p->queue, 0, vi->ntxq - 1) ||
429 	    !in_range(p->cl, 0, sc->params.nsched_cls - 1))
430 		return (EINVAL);
431 
432 	if (p->queue < 0) {
433 		/*
434 		 * Change the scheduling on all the TX queues for the
435 		 * interface.
436 		 */
437 		for_each_txq(vi, i, txq) {
438 			rc = bind_txq_to_traffic_class(sc, txq, p->cl);
439 			if (rc != 0)
440 				break;
441 		}
442 	} else {
443 		/*
444 		 * If op.queue is non-negative, then we're only changing the
445 		 * scheduling on a single specified TX queue.
446 		 */
447 		txq = &sc->sge.txq[vi->first_txq + p->queue];
448 		rc = bind_txq_to_traffic_class(sc, txq, p->cl);
449 	}
450 
451 	return (rc);
452 }
453 
454 int
455 t4_init_tx_sched(struct adapter *sc)
456 {
457 	int i;
458 	const int n = sc->params.nsched_cls;
459 	struct port_info *pi;
460 
461 	mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF);
462 	TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc);
463 	for_each_port(sc, i) {
464 		pi = sc->port[i];
465 		pi->sched_params = malloc(sizeof(*pi->sched_params) +
466 		    n * sizeof(struct tx_cl_rl_params), M_CXGBE, M_ZERO | M_WAITOK);
467 	}
468 
469 	return (0);
470 }
471 
472 int
473 t4_free_tx_sched(struct adapter *sc)
474 {
475 	int i;
476 
477 	taskqueue_drain(taskqueue_thread, &sc->tc_task);
478 
479 	for_each_port(sc, i) {
480 		if (sc->port[i] != NULL)
481 			free(sc->port[i]->sched_params, M_CXGBE);
482 	}
483 
484 	if (mtx_initialized(&sc->tc_lock))
485 		mtx_destroy(&sc->tc_lock);
486 
487 	return (0);
488 }
489 
490 void
491 t4_update_tx_sched(struct adapter *sc)
492 {
493 
494 	taskqueue_enqueue(taskqueue_thread, &sc->tc_task);
495 }
496 
497 int
498 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate,
499     int *tc_idx)
500 {
501 	int rc = 0, fa, fa2, i, pktsize, burstsize;
502 	bool update;
503 	struct tx_cl_rl_params *tc;
504 	struct port_info *pi;
505 
506 	MPASS(port_id >= 0 && port_id < sc->params.nports);
507 
508 	pi = sc->port[port_id];
509 	if (pi->sched_params->pktsize > 0)
510 		pktsize = pi->sched_params->pktsize;
511 	else
512 		pktsize = pi->vi[0].ifp->if_mtu;
513 	if (pi->sched_params->burstsize > 0)
514 		burstsize = pi->sched_params->burstsize;
515 	else
516 		burstsize = pktsize * 4;
517 	tc = &pi->sched_params->cl_rl[0];
518 
519 	update = false;
520 	fa = fa2 = -1;
521 	mtx_lock(&sc->tc_lock);
522 	for (i = 0; i < sc->params.nsched_cls; i++, tc++) {
523 		if (tc->state >= CS_PARAMS_SET &&
524 		    tc->ratemode == FW_SCHED_PARAMS_RATE_ABS &&
525 		    tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE &&
526 		    tc->mode == FW_SCHED_PARAMS_MODE_FLOW &&
527 		    tc->maxrate == maxrate && tc->pktsize == pktsize &&
528 		    tc->burstsize == burstsize) {
529 			tc->refcount++;
530 			*tc_idx = i;
531 			if (tc->state == CS_PARAMS_SET) {
532 				tc->state = CS_HW_UPDATE_REQUESTED;
533 				update = true;
534 			}
535 			goto done;
536 		}
537 
538 		if (fa < 0 && tc->state == CS_UNINITIALIZED) {
539 			MPASS(tc->refcount == 0);
540 			fa = i;		/* first available, never used. */
541 		}
542 		if (fa2 < 0 && tc->refcount == 0 && !(tc->flags & CF_USER)) {
543 			fa2 = i;	/* first available, used previously.  */
544 		}
545 	}
546 	/* Not found */
547 	MPASS(i == sc->params.nsched_cls);
548 	if (fa == -1)
549 		fa = fa2;
550 	if (fa == -1) {
551 		*tc_idx = -1;
552 		rc = ENOSPC;
553 	} else {
554 		MPASS(fa >= 0 && fa < sc->params.nsched_cls);
555 		tc = &pi->sched_params->cl_rl[fa];
556 		MPASS(!(tc->flags & CF_USER));
557 		MPASS(tc->refcount == 0);
558 
559 		tc->refcount = 1;
560 		tc->state = CS_HW_UPDATE_REQUESTED;
561 		tc->ratemode = FW_SCHED_PARAMS_RATE_ABS;
562 		tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE;
563 		tc->mode = FW_SCHED_PARAMS_MODE_FLOW;
564 		tc->maxrate = maxrate;
565 		tc->pktsize = pktsize;
566 		tc->burstsize = burstsize;
567 		*tc_idx = fa;
568 		update = true;
569 	}
570 done:
571 	mtx_unlock(&sc->tc_lock);
572 	if (update)
573 		t4_update_tx_sched(sc);
574 	return (rc);
575 }
576 
577 void
578 t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx)
579 {
580 	struct tx_cl_rl_params *tc;
581 
582 	MPASS(port_id >= 0 && port_id < sc->params.nports);
583 	MPASS(tc_idx >= 0 && tc_idx < sc->params.nsched_cls);
584 
585 	mtx_lock(&sc->tc_lock);
586 	tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx];
587 	MPASS(tc->refcount > 0);
588 	tc->refcount--;
589 	mtx_unlock(&sc->tc_lock);
590 }
591 
592 int
593 sysctl_tc(SYSCTL_HANDLER_ARGS)
594 {
595 	struct vi_info *vi = arg1;
596 	struct adapter *sc = vi->adapter;
597 	struct sge_txq *txq;
598 	int qidx = arg2, rc, tc_idx;
599 
600 	MPASS(qidx >= vi->first_txq && qidx < vi->first_txq + vi->ntxq);
601 
602 	txq = &sc->sge.txq[qidx];
603 	tc_idx = txq->tc_idx;
604 	rc = sysctl_handle_int(oidp, &tc_idx, 0, req);
605 	if (rc != 0 || req->newptr == NULL)
606 		return (rc);
607 
608 	if (sc->flags & IS_VF)
609 		return (EPERM);
610 	if (!in_range(tc_idx, 0, sc->params.nsched_cls - 1))
611 		return (EINVAL);
612 
613 	return (bind_txq_to_traffic_class(sc, txq, tc_idx));
614 }
615 
616 int
617 sysctl_tc_params(SYSCTL_HANDLER_ARGS)
618 {
619 	struct adapter *sc = arg1;
620 	struct tx_cl_rl_params tc;
621 	struct sbuf *sb;
622 	int i, rc, port_id, mbps, gbps;
623 
624 	rc = sysctl_wire_old_buffer(req, 0);
625 	if (rc != 0)
626 		return (rc);
627 
628 	sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
629 	if (sb == NULL)
630 		return (ENOMEM);
631 
632 	port_id = arg2 >> 16;
633 	MPASS(port_id < sc->params.nports);
634 	MPASS(sc->port[port_id] != NULL);
635 	i = arg2 & 0xffff;
636 	MPASS(i < sc->params.nsched_cls);
637 
638 	mtx_lock(&sc->tc_lock);
639 	tc = sc->port[port_id]->sched_params->cl_rl[i];
640 	mtx_unlock(&sc->tc_lock);
641 
642 	if (tc.state < CS_PARAMS_SET) {
643 		sbuf_printf(sb, "uninitialized");
644 		goto done;
645 	}
646 
647 	switch (tc.rateunit) {
648 	case SCHED_CLASS_RATEUNIT_BITS:
649 		switch (tc.ratemode) {
650 		case SCHED_CLASS_RATEMODE_REL:
651 			/* XXX: top speed or actual link speed? */
652 			gbps = port_top_speed(sc->port[port_id]);
653 			sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps);
654 			break;
655 		case SCHED_CLASS_RATEMODE_ABS:
656 			mbps = tc.maxrate / 1000;
657 			gbps = tc.maxrate / 1000000;
658 			if (tc.maxrate == gbps * 1000000)
659 				sbuf_printf(sb, "%uGbps", gbps);
660 			else if (tc.maxrate == mbps * 1000)
661 				sbuf_printf(sb, "%uMbps", mbps);
662 			else
663 				sbuf_printf(sb, "%uKbps", tc.maxrate);
664 			break;
665 		default:
666 			rc = ENXIO;
667 			goto done;
668 		}
669 		break;
670 	case SCHED_CLASS_RATEUNIT_PKTS:
671 		sbuf_printf(sb, "%upps", tc.maxrate);
672 		break;
673 	default:
674 		rc = ENXIO;
675 		goto done;
676 	}
677 
678 	switch (tc.mode) {
679 	case SCHED_CLASS_MODE_CLASS:
680 		/* Note that pktsize and burstsize are not used in this mode. */
681 		sbuf_printf(sb, " aggregate");
682 		break;
683 	case SCHED_CLASS_MODE_FLOW:
684 		sbuf_printf(sb, " per-flow");
685 		if (tc.pktsize > 0)
686 			sbuf_printf(sb, " pkt-size %u", tc.pktsize);
687 		if (tc.burstsize > 0)
688 			sbuf_printf(sb, " burst-size %u", tc.burstsize);
689 		break;
690 	default:
691 		rc = ENXIO;
692 		goto done;
693 	}
694 
695 done:
696 	if (rc == 0)
697 		rc = sbuf_finish(sb);
698 	sbuf_delete(sb);
699 
700 	return (rc);
701 }
702 
703 #ifdef RATELIMIT
704 void
705 t4_init_etid_table(struct adapter *sc)
706 {
707 	int i;
708 	struct tid_info *t;
709 
710 	if (!is_ethoffload(sc))
711 		return;
712 
713 	t = &sc->tids;
714 	MPASS(t->netids > 0);
715 
716 	mtx_init(&t->etid_lock, "etid lock", NULL, MTX_DEF);
717 	t->etid_tab = malloc(sizeof(*t->etid_tab) * t->netids, M_CXGBE,
718 			M_ZERO | M_WAITOK);
719 	t->efree = t->etid_tab;
720 	t->etids_in_use = 0;
721 	for (i = 1; i < t->netids; i++)
722 		t->etid_tab[i - 1].next = &t->etid_tab[i];
723 	t->etid_tab[t->netids - 1].next = NULL;
724 }
725 
726 void
727 t4_free_etid_table(struct adapter *sc)
728 {
729 	struct tid_info *t;
730 
731 	if (!is_ethoffload(sc))
732 		return;
733 
734 	t = &sc->tids;
735 	MPASS(t->netids > 0);
736 
737 	free(t->etid_tab, M_CXGBE);
738 	t->etid_tab = NULL;
739 
740 	if (mtx_initialized(&t->etid_lock))
741 		mtx_destroy(&t->etid_lock);
742 }
743 
744 /* etid services */
745 static int alloc_etid(struct adapter *, struct cxgbe_rate_tag *);
746 static void free_etid(struct adapter *, int);
747 
748 static int
749 alloc_etid(struct adapter *sc, struct cxgbe_rate_tag *cst)
750 {
751 	struct tid_info *t = &sc->tids;
752 	int etid = -1;
753 
754 	mtx_lock(&t->etid_lock);
755 	if (t->efree) {
756 		union etid_entry *p = t->efree;
757 
758 		etid = p - t->etid_tab + t->etid_base;
759 		t->efree = p->next;
760 		p->cst = cst;
761 		t->etids_in_use++;
762 	}
763 	mtx_unlock(&t->etid_lock);
764 	return (etid);
765 }
766 
767 struct cxgbe_rate_tag *
768 lookup_etid(struct adapter *sc, int etid)
769 {
770 	struct tid_info *t = &sc->tids;
771 
772 	return (t->etid_tab[etid - t->etid_base].cst);
773 }
774 
775 static void
776 free_etid(struct adapter *sc, int etid)
777 {
778 	struct tid_info *t = &sc->tids;
779 	union etid_entry *p = &t->etid_tab[etid - t->etid_base];
780 
781 	mtx_lock(&t->etid_lock);
782 	p->next = t->efree;
783 	t->efree = p;
784 	t->etids_in_use--;
785 	mtx_unlock(&t->etid_lock);
786 }
787 
788 int
789 cxgbe_rate_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
790     struct m_snd_tag **pt)
791 {
792 	int rc, schedcl;
793 	struct vi_info *vi = ifp->if_softc;
794 	struct port_info *pi = vi->pi;
795 	struct adapter *sc = pi->adapter;
796 	struct cxgbe_rate_tag *cst;
797 
798 	MPASS(params->hdr.type == IF_SND_TAG_TYPE_RATE_LIMIT);
799 
800 	rc = t4_reserve_cl_rl_kbps(sc, pi->port_id,
801 	    (params->rate_limit.max_rate * 8ULL / 1000), &schedcl);
802 	if (rc != 0)
803 		return (rc);
804 	MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls);
805 
806 	cst = malloc(sizeof(*cst), M_CXGBE, M_ZERO | M_NOWAIT);
807 	if (cst == NULL) {
808 failed:
809 		t4_release_cl_rl(sc, pi->port_id, schedcl);
810 		return (ENOMEM);
811 	}
812 
813 	cst->etid = alloc_etid(sc, cst);
814 	if (cst->etid < 0) {
815 		free(cst, M_CXGBE);
816 		goto failed;
817 	}
818 
819 	mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF);
820 	mbufq_init(&cst->pending_tx, INT_MAX);
821 	mbufq_init(&cst->pending_fwack, INT_MAX);
822 	m_snd_tag_init(&cst->com, ifp, IF_SND_TAG_TYPE_RATE_LIMIT);
823 	cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF;
824 	cst->adapter = sc;
825 	cst->port_id = pi->port_id;
826 	cst->schedcl = schedcl;
827 	cst->max_rate = params->rate_limit.max_rate;
828 	cst->tx_credits = sc->params.eo_wr_cred;
829 	cst->tx_total = cst->tx_credits;
830 	cst->plen = 0;
831 	cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
832 	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
833 	    V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
834 
835 	/*
836 	 * Queues will be selected later when the connection flowid is available.
837 	 */
838 
839 	*pt = &cst->com;
840 	return (0);
841 }
842 
843 /*
844  * Change in parameters, no change in ifp.
845  */
846 int
847 cxgbe_rate_tag_modify(struct m_snd_tag *mst,
848     union if_snd_tag_modify_params *params)
849 {
850 	int rc, schedcl;
851 	struct cxgbe_rate_tag *cst = mst_to_crt(mst);
852 	struct adapter *sc = cst->adapter;
853 
854 	/* XXX: is schedcl -1 ok here? */
855 	MPASS(cst->schedcl >= 0 && cst->schedcl < sc->params.nsched_cls);
856 
857 	mtx_lock(&cst->lock);
858 	MPASS(cst->flags & EO_SND_TAG_REF);
859 	rc = t4_reserve_cl_rl_kbps(sc, cst->port_id,
860 	    (params->rate_limit.max_rate * 8ULL / 1000), &schedcl);
861 	if (rc != 0)
862 		return (rc);
863 	MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls);
864 	t4_release_cl_rl(sc, cst->port_id, cst->schedcl);
865 	cst->schedcl = schedcl;
866 	cst->max_rate = params->rate_limit.max_rate;
867 	mtx_unlock(&cst->lock);
868 
869 	return (0);
870 }
871 
872 int
873 cxgbe_rate_tag_query(struct m_snd_tag *mst,
874     union if_snd_tag_query_params *params)
875 {
876 	struct cxgbe_rate_tag *cst = mst_to_crt(mst);
877 
878 	params->rate_limit.max_rate = cst->max_rate;
879 
880 #define CST_TO_MST_QLEVEL_SCALE (IF_SND_QUEUE_LEVEL_MAX / cst->tx_total)
881 	params->rate_limit.queue_level =
882 		(cst->tx_total - cst->tx_credits) * CST_TO_MST_QLEVEL_SCALE;
883 
884 	return (0);
885 }
886 
887 /*
888  * Unlocks cst and frees it.
889  */
890 void
891 cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *cst)
892 {
893 	struct adapter *sc = cst->adapter;
894 
895 	mtx_assert(&cst->lock, MA_OWNED);
896 	MPASS((cst->flags & EO_SND_TAG_REF) == 0);
897 	MPASS(cst->tx_credits == cst->tx_total);
898 	MPASS(cst->plen == 0);
899 	MPASS(mbufq_first(&cst->pending_tx) == NULL);
900 	MPASS(mbufq_first(&cst->pending_fwack) == NULL);
901 
902 	if (cst->etid >= 0)
903 		free_etid(sc, cst->etid);
904 	if (cst->schedcl != -1)
905 		t4_release_cl_rl(sc, cst->port_id, cst->schedcl);
906 	mtx_unlock(&cst->lock);
907 	mtx_destroy(&cst->lock);
908 	free(cst, M_CXGBE);
909 }
910 
911 void
912 cxgbe_rate_tag_free(struct m_snd_tag *mst)
913 {
914 	struct cxgbe_rate_tag *cst = mst_to_crt(mst);
915 
916 	mtx_lock(&cst->lock);
917 
918 	/* The kernel is done with the snd_tag.  Remove its reference. */
919 	MPASS(cst->flags & EO_SND_TAG_REF);
920 	cst->flags &= ~EO_SND_TAG_REF;
921 
922 	if (cst->ncompl == 0) {
923 		/*
924 		 * No fw4_ack in flight.  Free the tag right away if there are
925 		 * no outstanding credits.  Request the firmware to return all
926 		 * credits for the etid otherwise.
927 		 */
928 		if (cst->tx_credits == cst->tx_total) {
929 			cxgbe_rate_tag_free_locked(cst);
930 			return;	/* cst is gone. */
931 		}
932 		send_etid_flush_wr(cst);
933 	}
934 	mtx_unlock(&cst->lock);
935 }
936 
937 void
938 cxgbe_ratelimit_query(struct ifnet *ifp, struct if_ratelimit_query_results *q)
939 {
940 	struct vi_info *vi = ifp->if_softc;
941 	struct adapter *sc = vi->adapter;
942 
943 	q->rate_table = NULL;
944 	q->flags = RT_IS_SELECTABLE;
945 	/*
946 	 * Absolute max limits from the firmware configuration.  Practical
947 	 * limits depend on the burstsize, pktsize (ifp->if_mtu ultimately) and
948 	 * the card's cclk.
949 	 */
950 	q->max_flows = sc->tids.netids;
951 	q->number_of_rates = sc->params.nsched_cls;
952 	q->min_segment_burst = 4; /* matches PKTSCHED_BURST in the firmware. */
953 
954 #if 1
955 	if (chip_id(sc) < CHELSIO_T6) {
956 		/* Based on testing by rrs@ with a T580 at burstsize = 4. */
957 		MPASS(q->min_segment_burst == 4);
958 		q->max_flows = min(4000, q->max_flows);
959 	} else {
960 		/* XXX: TBD, carried forward from T5 for now. */
961 		q->max_flows = min(4000, q->max_flows);
962 	}
963 
964 	/*
965 	 * XXX: tcp_ratelimit.c grabs all available rates on link-up before it
966 	 * even knows whether hw pacing will be used or not.  This prevents
967 	 * other consumers like SO_MAX_PACING_RATE or those using cxgbetool or
968 	 * the private ioctls from using any of traffic classes.
969 	 *
970 	 * Underreport the number of rates to tcp_ratelimit so that it doesn't
971 	 * hog all of them.  This can be removed if/when tcp_ratelimit switches
972 	 * to making its allocations on first-use rather than link-up.  There is
973 	 * nothing wrong with one particular consumer reserving all the classes
974 	 * but it should do so only if it'll actually use hw rate limiting.
975 	 */
976 	q->number_of_rates /= 4;
977 #endif
978 }
979 #endif
980