1 /*- 2 * Copyright (c) 2017 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_ratelimit.h" 34 35 #include <sys/types.h> 36 #include <sys/malloc.h> 37 #include <sys/queue.h> 38 #include <sys/sbuf.h> 39 #include <sys/taskqueue.h> 40 #include <sys/sysctl.h> 41 42 #include "common/common.h" 43 #include "common/t4_regs.h" 44 #include "common/t4_regs_values.h" 45 #include "common/t4_msg.h" 46 47 48 static int 49 in_range(int val, int lo, int hi) 50 { 51 52 return (val < 0 || (val <= hi && val >= lo)); 53 } 54 55 static int 56 set_sched_class_config(struct adapter *sc, int minmax) 57 { 58 int rc; 59 60 if (minmax < 0) 61 return (EINVAL); 62 63 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc"); 64 if (rc) 65 return (rc); 66 if (hw_off_limits(sc)) 67 rc = ENXIO; 68 else 69 rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1); 70 end_synchronized_op(sc, 0); 71 72 return (rc); 73 } 74 75 static int 76 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p, 77 int sleep_ok) 78 { 79 int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode; 80 struct port_info *pi; 81 struct tx_cl_rl_params *tc, old; 82 bool check_pktsize = false; 83 84 if (p->level == SCHED_CLASS_LEVEL_CL_RL) 85 fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; 86 else if (p->level == SCHED_CLASS_LEVEL_CL_WRR) 87 fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; 88 else if (p->level == SCHED_CLASS_LEVEL_CH_RL) 89 fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; 90 else 91 return (EINVAL); 92 93 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 94 if (p->mode == SCHED_CLASS_MODE_CLASS) 95 fw_mode = FW_SCHED_PARAMS_MODE_CLASS; 96 else if (p->mode == SCHED_CLASS_MODE_FLOW) { 97 check_pktsize = true; 98 fw_mode = FW_SCHED_PARAMS_MODE_FLOW; 99 } else 100 return (EINVAL); 101 } else 102 fw_mode = 0; 103 104 /* Valid channel must always be provided. */ 105 if (p->channel < 0) 106 return (EINVAL); 107 if (!in_range(p->channel, 0, sc->chip_params->nchan - 1)) 108 return (ERANGE); 109 110 pi = sc->port[sc->chan_map[p->channel]]; 111 if (pi == NULL) 112 return (ENXIO); 113 MPASS(pi->tx_chan == p->channel); 114 top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */ 115 116 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 117 p->level == SCHED_CLASS_LEVEL_CH_RL) { 118 /* 119 * Valid rate (mode, unit and values) must be provided. 120 */ 121 122 if (p->minrate < 0) 123 p->minrate = 0; 124 if (p->maxrate < 0) 125 return (EINVAL); 126 127 if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) { 128 fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 129 /* ratemode could be relative (%) or absolute. */ 130 if (p->ratemode == SCHED_CLASS_RATEMODE_REL) { 131 fw_ratemode = FW_SCHED_PARAMS_RATE_REL; 132 /* maxrate is % of port bandwidth. */ 133 if (!in_range(p->minrate, 0, 100) || 134 !in_range(p->maxrate, 0, 100)) { 135 return (ERANGE); 136 } 137 } else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) { 138 fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; 139 /* maxrate is absolute value in kbps. */ 140 if (!in_range(p->minrate, 0, top_speed) || 141 !in_range(p->maxrate, 0, top_speed)) { 142 return (ERANGE); 143 } 144 } else 145 return (EINVAL); 146 } else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) { 147 /* maxrate is the absolute value in pps. */ 148 check_pktsize = true; 149 fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; 150 } else 151 return (EINVAL); 152 } else { 153 MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR); 154 155 /* 156 * Valid weight must be provided. 157 */ 158 if (p->weight < 0) 159 return (EINVAL); 160 if (!in_range(p->weight, 1, 99)) 161 return (ERANGE); 162 163 fw_rateunit = 0; 164 fw_ratemode = 0; 165 } 166 167 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 168 p->level == SCHED_CLASS_LEVEL_CL_WRR) { 169 /* 170 * Valid scheduling class must be provided. 171 */ 172 if (p->cl < 0) 173 return (EINVAL); 174 if (!in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) 175 return (ERANGE); 176 } 177 178 if (check_pktsize) { 179 if (p->pktsize < 0) 180 return (EINVAL); 181 if (!in_range(p->pktsize, 64, pi->vi[0].ifp->if_mtu)) 182 return (ERANGE); 183 } 184 185 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 186 tc = &pi->sched_params->cl_rl[p->cl]; 187 mtx_lock(&sc->tc_lock); 188 if (tc->refcount > 0 || tc->flags & (CLRL_SYNC | CLRL_ASYNC)) 189 rc = EBUSY; 190 else { 191 tc->flags |= CLRL_SYNC | CLRL_USER; 192 tc->ratemode = fw_ratemode; 193 tc->rateunit = fw_rateunit; 194 tc->mode = fw_mode; 195 tc->maxrate = p->maxrate; 196 tc->pktsize = p->pktsize; 197 rc = 0; 198 old= *tc; 199 } 200 mtx_unlock(&sc->tc_lock); 201 if (rc != 0) 202 return (rc); 203 } 204 205 rc = begin_synchronized_op(sc, NULL, 206 sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp"); 207 if (rc != 0) { 208 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 209 mtx_lock(&sc->tc_lock); 210 *tc = old; 211 mtx_unlock(&sc->tc_lock); 212 } 213 return (rc); 214 } 215 if (!hw_off_limits(sc)) { 216 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, 217 fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl, 218 p->minrate, p->maxrate, p->weight, p->pktsize, 0, sleep_ok); 219 } 220 end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD); 221 222 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 223 mtx_lock(&sc->tc_lock); 224 MPASS(tc->flags & CLRL_SYNC); 225 MPASS(tc->flags & CLRL_USER); 226 MPASS(tc->refcount == 0); 227 228 tc->flags &= ~CLRL_SYNC; 229 if (rc == 0) 230 tc->flags &= ~CLRL_ERR; 231 else 232 tc->flags |= CLRL_ERR; 233 mtx_unlock(&sc->tc_lock); 234 } 235 236 return (rc); 237 } 238 239 static void 240 update_tx_sched(void *context, int pending) 241 { 242 int i, j, rc; 243 struct port_info *pi; 244 struct tx_cl_rl_params *tc; 245 struct adapter *sc = context; 246 const int n = sc->chip_params->nsched_cls; 247 248 mtx_lock(&sc->tc_lock); 249 for_each_port(sc, i) { 250 pi = sc->port[i]; 251 tc = &pi->sched_params->cl_rl[0]; 252 for (j = 0; j < n; j++, tc++) { 253 MPASS(mtx_owned(&sc->tc_lock)); 254 if ((tc->flags & CLRL_ASYNC) == 0) 255 continue; 256 mtx_unlock(&sc->tc_lock); 257 258 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, 259 "t4utxs") != 0) { 260 mtx_lock(&sc->tc_lock); 261 continue; 262 } 263 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, 264 FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit, 265 tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0, 266 tc->pktsize, tc->burstsize, 1); 267 end_synchronized_op(sc, 0); 268 269 mtx_lock(&sc->tc_lock); 270 MPASS(tc->flags & CLRL_ASYNC); 271 tc->flags &= ~CLRL_ASYNC; 272 if (rc == 0) 273 tc->flags &= ~CLRL_ERR; 274 else 275 tc->flags |= CLRL_ERR; 276 } 277 } 278 mtx_unlock(&sc->tc_lock); 279 } 280 281 int 282 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p) 283 { 284 285 if (p->type != SCHED_CLASS_TYPE_PACKET) 286 return (EINVAL); 287 288 if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) 289 return (set_sched_class_config(sc, p->u.config.minmax)); 290 291 if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) 292 return (set_sched_class_params(sc, &p->u.params, 1)); 293 294 return (EINVAL); 295 } 296 297 static int 298 bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx) 299 { 300 struct tx_cl_rl_params *tc0, *tc; 301 int rc, old_idx; 302 uint32_t fw_mnem, fw_class; 303 304 if (!(txq->eq.flags & EQ_HW_ALLOCATED)) 305 return (ENXIO); 306 307 mtx_lock(&sc->tc_lock); 308 if (txq->tc_idx == -2) { 309 rc = EBUSY; /* Another bind/unbind in progress already. */ 310 goto done; 311 } 312 if (idx == txq->tc_idx) { 313 rc = 0; /* No change, nothing to do. */ 314 goto done; 315 } 316 317 tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0]; 318 if (idx != -1) { 319 /* 320 * Bind to a different class at index idx. 321 */ 322 tc = &tc0[idx]; 323 if (tc->flags & CLRL_ERR) { 324 rc = ENXIO; 325 goto done; 326 } else { 327 /* 328 * Ok to proceed. Place a reference on the new class 329 * while still holding on to the reference on the 330 * previous class, if any. 331 */ 332 tc->refcount++; 333 } 334 } 335 /* Mark as busy before letting go of the lock. */ 336 old_idx = txq->tc_idx; 337 txq->tc_idx = -2; 338 mtx_unlock(&sc->tc_lock); 339 340 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq"); 341 if (rc != 0) 342 return (rc); 343 fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 344 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 345 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); 346 fw_class = idx < 0 ? 0xffffffff : idx; 347 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem, &fw_class); 348 end_synchronized_op(sc, 0); 349 350 mtx_lock(&sc->tc_lock); 351 MPASS(txq->tc_idx == -2); 352 if (rc == 0) { 353 /* 354 * Unbind, bind, or bind to a different class succeeded. Remove 355 * the reference on the old traffic class, if any. 356 */ 357 if (old_idx != -1) { 358 tc = &tc0[old_idx]; 359 MPASS(tc->refcount > 0); 360 tc->refcount--; 361 } 362 txq->tc_idx = idx; 363 } else { 364 /* 365 * Unbind, bind, or bind to a different class failed. Remove 366 * the anticipatory reference on the new traffic class, if any. 367 */ 368 if (idx != -1) { 369 tc = &tc0[idx]; 370 MPASS(tc->refcount > 0); 371 tc->refcount--; 372 } 373 txq->tc_idx = old_idx; 374 } 375 done: 376 MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->chip_params->nsched_cls); 377 mtx_unlock(&sc->tc_lock); 378 return (rc); 379 } 380 381 int 382 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) 383 { 384 struct port_info *pi = NULL; 385 struct vi_info *vi; 386 struct sge_txq *txq; 387 int i, rc; 388 389 if (p->port >= sc->params.nports) 390 return (EINVAL); 391 392 /* 393 * XXX: cxgbetool allows the user to specify the physical port only. So 394 * we always operate on the main VI. 395 */ 396 pi = sc->port[p->port]; 397 vi = &pi->vi[0]; 398 399 /* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */ 400 if (!(vi->flags & VI_INIT_DONE)) 401 return (EAGAIN); 402 MPASS(vi->ntxq > 0); 403 404 if (!in_range(p->queue, 0, vi->ntxq - 1) || 405 !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) 406 return (EINVAL); 407 408 if (p->queue < 0) { 409 /* 410 * Change the scheduling on all the TX queues for the 411 * interface. 412 */ 413 for_each_txq(vi, i, txq) { 414 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 415 if (rc != 0) 416 break; 417 } 418 } else { 419 /* 420 * If op.queue is non-negative, then we're only changing the 421 * scheduling on a single specified TX queue. 422 */ 423 txq = &sc->sge.txq[vi->first_txq + p->queue]; 424 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 425 } 426 427 return (rc); 428 } 429 430 int 431 t4_init_tx_sched(struct adapter *sc) 432 { 433 int i, j; 434 const int n = sc->chip_params->nsched_cls; 435 struct port_info *pi; 436 struct tx_cl_rl_params *tc; 437 438 mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF); 439 TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc); 440 for_each_port(sc, i) { 441 pi = sc->port[i]; 442 pi->sched_params = malloc(sizeof(*pi->sched_params) + 443 n * sizeof(*tc), M_CXGBE, M_ZERO | M_WAITOK); 444 tc = &pi->sched_params->cl_rl[0]; 445 for (j = 0; j < n; j++, tc++) { 446 tc->refcount = 0; 447 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS; 448 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 449 tc->mode = FW_SCHED_PARAMS_MODE_CLASS; 450 tc->maxrate = 1000 * 1000; /* 1 Gbps. Arbitrary */ 451 452 if (t4_sched_params_cl_rl_kbps(sc, pi->tx_chan, j, 453 tc->mode, tc->maxrate, tc->pktsize, 1) != 0) 454 tc->flags = CLRL_ERR; 455 } 456 } 457 458 return (0); 459 } 460 461 int 462 t4_free_tx_sched(struct adapter *sc) 463 { 464 int i; 465 466 taskqueue_drain(taskqueue_thread, &sc->tc_task); 467 468 for_each_port(sc, i) { 469 if (sc->port[i] != NULL) 470 free(sc->port[i]->sched_params, M_CXGBE); 471 } 472 473 if (mtx_initialized(&sc->tc_lock)) 474 mtx_destroy(&sc->tc_lock); 475 476 return (0); 477 } 478 479 void 480 t4_update_tx_sched(struct adapter *sc) 481 { 482 483 taskqueue_enqueue(taskqueue_thread, &sc->tc_task); 484 } 485 486 int 487 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate, 488 int *tc_idx) 489 { 490 int rc = 0, fa = -1, i, pktsize, burstsize; 491 bool update; 492 struct tx_cl_rl_params *tc; 493 struct port_info *pi; 494 495 MPASS(port_id >= 0 && port_id < sc->params.nports); 496 497 pi = sc->port[port_id]; 498 if (pi->sched_params->pktsize > 0) 499 pktsize = pi->sched_params->pktsize; 500 else 501 pktsize = pi->vi[0].ifp->if_mtu; 502 if (pi->sched_params->burstsize > 0) 503 burstsize = pi->sched_params->burstsize; 504 else 505 burstsize = pktsize * 4; 506 tc = &pi->sched_params->cl_rl[0]; 507 508 update = false; 509 mtx_lock(&sc->tc_lock); 510 for (i = 0; i < sc->chip_params->nsched_cls; i++, tc++) { 511 if (fa < 0 && tc->refcount == 0 && !(tc->flags & CLRL_USER)) 512 fa = i; /* first available */ 513 514 if (tc->ratemode == FW_SCHED_PARAMS_RATE_ABS && 515 tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE && 516 tc->mode == FW_SCHED_PARAMS_MODE_FLOW && 517 tc->maxrate == maxrate && tc->pktsize == pktsize && 518 tc->burstsize == burstsize) { 519 tc->refcount++; 520 *tc_idx = i; 521 if ((tc->flags & (CLRL_ERR | CLRL_ASYNC | CLRL_SYNC)) == 522 CLRL_ERR) { 523 update = true; 524 } 525 goto done; 526 } 527 } 528 /* Not found */ 529 MPASS(i == sc->chip_params->nsched_cls); 530 if (fa != -1) { 531 tc = &pi->sched_params->cl_rl[fa]; 532 tc->refcount = 1; 533 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS; 534 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 535 tc->mode = FW_SCHED_PARAMS_MODE_FLOW; 536 tc->maxrate = maxrate; 537 tc->pktsize = pktsize; 538 tc->burstsize = burstsize; 539 *tc_idx = fa; 540 update = true; 541 } else { 542 *tc_idx = -1; 543 rc = ENOSPC; 544 } 545 done: 546 mtx_unlock(&sc->tc_lock); 547 if (update) { 548 tc->flags |= CLRL_ASYNC; 549 t4_update_tx_sched(sc); 550 } 551 return (rc); 552 } 553 554 void 555 t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx) 556 { 557 struct tx_cl_rl_params *tc; 558 559 MPASS(port_id >= 0 && port_id < sc->params.nports); 560 MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls); 561 562 mtx_lock(&sc->tc_lock); 563 tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx]; 564 MPASS(tc->refcount > 0); 565 tc->refcount--; 566 mtx_unlock(&sc->tc_lock); 567 } 568 569 int 570 sysctl_tc(SYSCTL_HANDLER_ARGS) 571 { 572 struct vi_info *vi = arg1; 573 struct adapter *sc = vi->adapter; 574 struct sge_txq *txq; 575 int qidx = arg2, rc, tc_idx; 576 577 MPASS(qidx >= vi->first_txq && qidx < vi->first_txq + vi->ntxq); 578 579 txq = &sc->sge.txq[qidx]; 580 tc_idx = txq->tc_idx; 581 rc = sysctl_handle_int(oidp, &tc_idx, 0, req); 582 if (rc != 0 || req->newptr == NULL) 583 return (rc); 584 585 if (sc->flags & IS_VF) 586 return (EPERM); 587 if (!in_range(tc_idx, 0, sc->chip_params->nsched_cls - 1)) 588 return (EINVAL); 589 590 return (bind_txq_to_traffic_class(sc, txq, tc_idx)); 591 } 592 593 int 594 sysctl_tc_params(SYSCTL_HANDLER_ARGS) 595 { 596 struct adapter *sc = arg1; 597 struct tx_cl_rl_params tc; 598 struct sbuf *sb; 599 int i, rc, port_id, mbps, gbps; 600 601 rc = sysctl_wire_old_buffer(req, 0); 602 if (rc != 0) 603 return (rc); 604 605 sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); 606 if (sb == NULL) 607 return (ENOMEM); 608 609 port_id = arg2 >> 16; 610 MPASS(port_id < sc->params.nports); 611 MPASS(sc->port[port_id] != NULL); 612 i = arg2 & 0xffff; 613 MPASS(i < sc->chip_params->nsched_cls); 614 615 mtx_lock(&sc->tc_lock); 616 tc = sc->port[port_id]->sched_params->cl_rl[i]; 617 mtx_unlock(&sc->tc_lock); 618 619 switch (tc.rateunit) { 620 case SCHED_CLASS_RATEUNIT_BITS: 621 switch (tc.ratemode) { 622 case SCHED_CLASS_RATEMODE_REL: 623 /* XXX: top speed or actual link speed? */ 624 gbps = port_top_speed(sc->port[port_id]); 625 sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps); 626 break; 627 case SCHED_CLASS_RATEMODE_ABS: 628 mbps = tc.maxrate / 1000; 629 gbps = tc.maxrate / 1000000; 630 if (tc.maxrate == gbps * 1000000) 631 sbuf_printf(sb, "%uGbps", gbps); 632 else if (tc.maxrate == mbps * 1000) 633 sbuf_printf(sb, "%uMbps", mbps); 634 else 635 sbuf_printf(sb, "%uKbps", tc.maxrate); 636 break; 637 default: 638 rc = ENXIO; 639 goto done; 640 } 641 break; 642 case SCHED_CLASS_RATEUNIT_PKTS: 643 sbuf_printf(sb, "%upps", tc.maxrate); 644 break; 645 default: 646 rc = ENXIO; 647 goto done; 648 } 649 650 switch (tc.mode) { 651 case SCHED_CLASS_MODE_CLASS: 652 sbuf_printf(sb, " aggregate"); 653 break; 654 case SCHED_CLASS_MODE_FLOW: 655 sbuf_printf(sb, " per-flow"); 656 if (tc.pktsize > 0) 657 sbuf_printf(sb, " pkt-size %u", tc.pktsize); 658 if (tc.burstsize > 0) 659 sbuf_printf(sb, " burst-size %u", tc.burstsize); 660 break; 661 default: 662 rc = ENXIO; 663 goto done; 664 } 665 666 done: 667 if (rc == 0) 668 rc = sbuf_finish(sb); 669 sbuf_delete(sb); 670 671 return (rc); 672 } 673 674 #ifdef RATELIMIT 675 void 676 t4_init_etid_table(struct adapter *sc) 677 { 678 int i; 679 struct tid_info *t; 680 681 if (!is_ethoffload(sc)) 682 return; 683 684 t = &sc->tids; 685 MPASS(t->netids > 0); 686 687 mtx_init(&t->etid_lock, "etid lock", NULL, MTX_DEF); 688 t->etid_tab = malloc(sizeof(*t->etid_tab) * t->netids, M_CXGBE, 689 M_ZERO | M_WAITOK); 690 t->efree = t->etid_tab; 691 t->etids_in_use = 0; 692 for (i = 1; i < t->netids; i++) 693 t->etid_tab[i - 1].next = &t->etid_tab[i]; 694 t->etid_tab[t->netids - 1].next = NULL; 695 } 696 697 void 698 t4_free_etid_table(struct adapter *sc) 699 { 700 struct tid_info *t; 701 702 if (!is_ethoffload(sc)) 703 return; 704 705 t = &sc->tids; 706 MPASS(t->netids > 0); 707 708 free(t->etid_tab, M_CXGBE); 709 t->etid_tab = NULL; 710 711 if (mtx_initialized(&t->etid_lock)) 712 mtx_destroy(&t->etid_lock); 713 } 714 715 /* etid services */ 716 static int alloc_etid(struct adapter *, struct cxgbe_rate_tag *); 717 static void free_etid(struct adapter *, int); 718 719 static int 720 alloc_etid(struct adapter *sc, struct cxgbe_rate_tag *cst) 721 { 722 struct tid_info *t = &sc->tids; 723 int etid = -1; 724 725 mtx_lock(&t->etid_lock); 726 if (t->efree) { 727 union etid_entry *p = t->efree; 728 729 etid = p - t->etid_tab + t->etid_base; 730 t->efree = p->next; 731 p->cst = cst; 732 t->etids_in_use++; 733 } 734 mtx_unlock(&t->etid_lock); 735 return (etid); 736 } 737 738 struct cxgbe_rate_tag * 739 lookup_etid(struct adapter *sc, int etid) 740 { 741 struct tid_info *t = &sc->tids; 742 743 return (t->etid_tab[etid - t->etid_base].cst); 744 } 745 746 static void 747 free_etid(struct adapter *sc, int etid) 748 { 749 struct tid_info *t = &sc->tids; 750 union etid_entry *p = &t->etid_tab[etid - t->etid_base]; 751 752 mtx_lock(&t->etid_lock); 753 p->next = t->efree; 754 t->efree = p; 755 t->etids_in_use--; 756 mtx_unlock(&t->etid_lock); 757 } 758 759 int 760 cxgbe_rate_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, 761 struct m_snd_tag **pt) 762 { 763 int rc, schedcl; 764 struct vi_info *vi = ifp->if_softc; 765 struct port_info *pi = vi->pi; 766 struct adapter *sc = pi->adapter; 767 struct cxgbe_rate_tag *cst; 768 769 MPASS(params->hdr.type == IF_SND_TAG_TYPE_RATE_LIMIT); 770 771 rc = t4_reserve_cl_rl_kbps(sc, pi->port_id, 772 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 773 if (rc != 0) 774 return (rc); 775 MPASS(schedcl >= 0 && schedcl < sc->chip_params->nsched_cls); 776 777 cst = malloc(sizeof(*cst), M_CXGBE, M_ZERO | M_NOWAIT); 778 if (cst == NULL) { 779 failed: 780 t4_release_cl_rl(sc, pi->port_id, schedcl); 781 return (ENOMEM); 782 } 783 784 cst->etid = alloc_etid(sc, cst); 785 if (cst->etid < 0) { 786 free(cst, M_CXGBE); 787 goto failed; 788 } 789 790 mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF); 791 mbufq_init(&cst->pending_tx, INT_MAX); 792 mbufq_init(&cst->pending_fwack, INT_MAX); 793 m_snd_tag_init(&cst->com, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); 794 cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF; 795 cst->adapter = sc; 796 cst->port_id = pi->port_id; 797 cst->schedcl = schedcl; 798 cst->max_rate = params->rate_limit.max_rate; 799 cst->tx_credits = sc->params.eo_wr_cred; 800 cst->tx_total = cst->tx_credits; 801 cst->plen = 0; 802 cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 803 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 804 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 805 806 /* 807 * Queues will be selected later when the connection flowid is available. 808 */ 809 810 *pt = &cst->com; 811 return (0); 812 } 813 814 /* 815 * Change in parameters, no change in ifp. 816 */ 817 int 818 cxgbe_rate_tag_modify(struct m_snd_tag *mst, 819 union if_snd_tag_modify_params *params) 820 { 821 int rc, schedcl; 822 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 823 struct adapter *sc = cst->adapter; 824 825 /* XXX: is schedcl -1 ok here? */ 826 MPASS(cst->schedcl >= 0 && cst->schedcl < sc->chip_params->nsched_cls); 827 828 mtx_lock(&cst->lock); 829 MPASS(cst->flags & EO_SND_TAG_REF); 830 rc = t4_reserve_cl_rl_kbps(sc, cst->port_id, 831 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 832 if (rc != 0) 833 return (rc); 834 MPASS(schedcl >= 0 && schedcl < sc->chip_params->nsched_cls); 835 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 836 cst->schedcl = schedcl; 837 cst->max_rate = params->rate_limit.max_rate; 838 mtx_unlock(&cst->lock); 839 840 return (0); 841 } 842 843 int 844 cxgbe_rate_tag_query(struct m_snd_tag *mst, 845 union if_snd_tag_query_params *params) 846 { 847 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 848 849 params->rate_limit.max_rate = cst->max_rate; 850 851 #define CST_TO_MST_QLEVEL_SCALE (IF_SND_QUEUE_LEVEL_MAX / cst->tx_total) 852 params->rate_limit.queue_level = 853 (cst->tx_total - cst->tx_credits) * CST_TO_MST_QLEVEL_SCALE; 854 855 return (0); 856 } 857 858 /* 859 * Unlocks cst and frees it. 860 */ 861 void 862 cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *cst) 863 { 864 struct adapter *sc = cst->adapter; 865 866 mtx_assert(&cst->lock, MA_OWNED); 867 MPASS((cst->flags & EO_SND_TAG_REF) == 0); 868 MPASS(cst->tx_credits == cst->tx_total); 869 MPASS(cst->plen == 0); 870 MPASS(mbufq_first(&cst->pending_tx) == NULL); 871 MPASS(mbufq_first(&cst->pending_fwack) == NULL); 872 873 if (cst->etid >= 0) 874 free_etid(sc, cst->etid); 875 if (cst->schedcl != -1) 876 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 877 mtx_unlock(&cst->lock); 878 mtx_destroy(&cst->lock); 879 free(cst, M_CXGBE); 880 } 881 882 void 883 cxgbe_rate_tag_free(struct m_snd_tag *mst) 884 { 885 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 886 887 mtx_lock(&cst->lock); 888 889 /* The kernel is done with the snd_tag. Remove its reference. */ 890 MPASS(cst->flags & EO_SND_TAG_REF); 891 cst->flags &= ~EO_SND_TAG_REF; 892 893 if (cst->ncompl == 0) { 894 /* 895 * No fw4_ack in flight. Free the tag right away if there are 896 * no outstanding credits. Request the firmware to return all 897 * credits for the etid otherwise. 898 */ 899 if (cst->tx_credits == cst->tx_total) { 900 cxgbe_rate_tag_free_locked(cst); 901 return; /* cst is gone. */ 902 } 903 send_etid_flush_wr(cst); 904 } 905 mtx_unlock(&cst->lock); 906 } 907 908 void 909 cxgbe_ratelimit_query(struct ifnet *ifp, struct if_ratelimit_query_results *q) 910 { 911 struct vi_info *vi = ifp->if_softc; 912 struct adapter *sc = vi->adapter; 913 914 q->rate_table = NULL; 915 q->flags = RT_IS_SELECTABLE; 916 /* 917 * Absolute max limits from the firmware configuration. Practical 918 * limits depend on the burstsize, pktsize (ifp->if_mtu ultimately) and 919 * the card's cclk. 920 */ 921 q->max_flows = sc->tids.netids; 922 q->number_of_rates = sc->chip_params->nsched_cls; 923 q->min_segment_burst = 4; /* matches PKTSCHED_BURST in the firmware. */ 924 925 #if 1 926 if (chip_id(sc) < CHELSIO_T6) { 927 /* Based on testing by rrs@ with a T580 at burstsize = 4. */ 928 MPASS(q->min_segment_burst == 4); 929 q->max_flows = min(4000, q->max_flows); 930 } else { 931 /* XXX: TBD, carried forward from T5 for now. */ 932 q->max_flows = min(4000, q->max_flows); 933 } 934 935 /* 936 * XXX: tcp_ratelimit.c grabs all available rates on link-up before it 937 * even knows whether hw pacing will be used or not. This prevents 938 * other consumers like SO_MAX_PACING_RATE or those using cxgbetool or 939 * the private ioctls from using any of traffic classes. 940 * 941 * Underreport the number of rates to tcp_ratelimit so that it doesn't 942 * hog all of them. This can be removed if/when tcp_ratelimit switches 943 * to making its allocations on first-use rather than link-up. There is 944 * nothing wrong with one particular consumer reserving all the classes 945 * but it should do so only if it'll actually use hw rate limiting. 946 */ 947 q->number_of_rates /= 4; 948 #endif 949 } 950 #endif 951