1 /*- 2 * Copyright (c) 2017 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_ratelimit.h" 34 35 #include <sys/types.h> 36 #include <sys/malloc.h> 37 #include <sys/queue.h> 38 #include <sys/sbuf.h> 39 #include <sys/taskqueue.h> 40 #include <sys/sysctl.h> 41 42 #include "common/common.h" 43 #include "common/t4_regs.h" 44 #include "common/t4_regs_values.h" 45 #include "common/t4_msg.h" 46 47 48 static int 49 in_range(int val, int lo, int hi) 50 { 51 52 return (val < 0 || (val <= hi && val >= lo)); 53 } 54 55 static int 56 set_sched_class_config(struct adapter *sc, int minmax) 57 { 58 int rc; 59 60 if (minmax < 0) 61 return (EINVAL); 62 63 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc"); 64 if (rc) 65 return (rc); 66 rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1); 67 end_synchronized_op(sc, 0); 68 69 return (rc); 70 } 71 72 static int 73 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p, 74 int sleep_ok) 75 { 76 int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode; 77 struct port_info *pi; 78 struct tx_cl_rl_params *tc, old; 79 bool check_pktsize = false; 80 81 if (p->level == SCHED_CLASS_LEVEL_CL_RL) 82 fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; 83 else if (p->level == SCHED_CLASS_LEVEL_CL_WRR) 84 fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; 85 else if (p->level == SCHED_CLASS_LEVEL_CH_RL) 86 fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; 87 else 88 return (EINVAL); 89 90 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 91 if (p->mode == SCHED_CLASS_MODE_CLASS) 92 fw_mode = FW_SCHED_PARAMS_MODE_CLASS; 93 else if (p->mode == SCHED_CLASS_MODE_FLOW) { 94 check_pktsize = true; 95 fw_mode = FW_SCHED_PARAMS_MODE_FLOW; 96 } else 97 return (EINVAL); 98 } else 99 fw_mode = 0; 100 101 /* Valid channel must always be provided. */ 102 if (p->channel < 0) 103 return (EINVAL); 104 if (!in_range(p->channel, 0, sc->chip_params->nchan - 1)) 105 return (ERANGE); 106 107 pi = sc->port[sc->chan_map[p->channel]]; 108 if (pi == NULL) 109 return (ENXIO); 110 MPASS(pi->tx_chan == p->channel); 111 top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */ 112 113 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 114 p->level == SCHED_CLASS_LEVEL_CH_RL) { 115 /* 116 * Valid rate (mode, unit and values) must be provided. 117 */ 118 119 if (p->minrate < 0) 120 p->minrate = 0; 121 if (p->maxrate < 0) 122 return (EINVAL); 123 124 if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) { 125 fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 126 /* ratemode could be relative (%) or absolute. */ 127 if (p->ratemode == SCHED_CLASS_RATEMODE_REL) { 128 fw_ratemode = FW_SCHED_PARAMS_RATE_REL; 129 /* maxrate is % of port bandwidth. */ 130 if (!in_range(p->minrate, 0, 100) || 131 !in_range(p->maxrate, 0, 100)) { 132 return (ERANGE); 133 } 134 } else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) { 135 fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; 136 /* maxrate is absolute value in kbps. */ 137 if (!in_range(p->minrate, 0, top_speed) || 138 !in_range(p->maxrate, 0, top_speed)) { 139 return (ERANGE); 140 } 141 } else 142 return (EINVAL); 143 } else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) { 144 /* maxrate is the absolute value in pps. */ 145 check_pktsize = true; 146 fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; 147 } else 148 return (EINVAL); 149 } else { 150 MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR); 151 152 /* 153 * Valid weight must be provided. 154 */ 155 if (p->weight < 0) 156 return (EINVAL); 157 if (!in_range(p->weight, 1, 99)) 158 return (ERANGE); 159 160 fw_rateunit = 0; 161 fw_ratemode = 0; 162 } 163 164 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 165 p->level == SCHED_CLASS_LEVEL_CL_WRR) { 166 /* 167 * Valid scheduling class must be provided. 168 */ 169 if (p->cl < 0) 170 return (EINVAL); 171 if (!in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) 172 return (ERANGE); 173 } 174 175 if (check_pktsize) { 176 if (p->pktsize < 0) 177 return (EINVAL); 178 if (!in_range(p->pktsize, 64, pi->vi[0].ifp->if_mtu)) 179 return (ERANGE); 180 } 181 182 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 183 tc = &pi->sched_params->cl_rl[p->cl]; 184 mtx_lock(&sc->tc_lock); 185 if (tc->refcount > 0 || tc->flags & (CLRL_SYNC | CLRL_ASYNC)) 186 rc = EBUSY; 187 else { 188 tc->flags |= CLRL_SYNC | CLRL_USER; 189 tc->ratemode = fw_ratemode; 190 tc->rateunit = fw_rateunit; 191 tc->mode = fw_mode; 192 tc->maxrate = p->maxrate; 193 tc->pktsize = p->pktsize; 194 rc = 0; 195 old= *tc; 196 } 197 mtx_unlock(&sc->tc_lock); 198 if (rc != 0) 199 return (rc); 200 } 201 202 rc = begin_synchronized_op(sc, NULL, 203 sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp"); 204 if (rc != 0) { 205 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 206 mtx_lock(&sc->tc_lock); 207 *tc = old; 208 mtx_unlock(&sc->tc_lock); 209 } 210 return (rc); 211 } 212 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, fw_mode, 213 fw_rateunit, fw_ratemode, p->channel, p->cl, p->minrate, p->maxrate, 214 p->weight, p->pktsize, 0, sleep_ok); 215 end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD); 216 217 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 218 mtx_lock(&sc->tc_lock); 219 MPASS(tc->flags & CLRL_SYNC); 220 MPASS(tc->flags & CLRL_USER); 221 MPASS(tc->refcount == 0); 222 223 tc->flags &= ~CLRL_SYNC; 224 if (rc == 0) 225 tc->flags &= ~CLRL_ERR; 226 else 227 tc->flags |= CLRL_ERR; 228 mtx_unlock(&sc->tc_lock); 229 } 230 231 return (rc); 232 } 233 234 static void 235 update_tx_sched(void *context, int pending) 236 { 237 int i, j, rc; 238 struct port_info *pi; 239 struct tx_cl_rl_params *tc; 240 struct adapter *sc = context; 241 const int n = sc->chip_params->nsched_cls; 242 243 mtx_lock(&sc->tc_lock); 244 for_each_port(sc, i) { 245 pi = sc->port[i]; 246 tc = &pi->sched_params->cl_rl[0]; 247 for (j = 0; j < n; j++, tc++) { 248 MPASS(mtx_owned(&sc->tc_lock)); 249 if ((tc->flags & CLRL_ASYNC) == 0) 250 continue; 251 mtx_unlock(&sc->tc_lock); 252 253 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, 254 "t4utxs") != 0) { 255 mtx_lock(&sc->tc_lock); 256 continue; 257 } 258 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, 259 FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit, 260 tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0, 261 tc->pktsize, tc->burstsize, 1); 262 end_synchronized_op(sc, 0); 263 264 mtx_lock(&sc->tc_lock); 265 MPASS(tc->flags & CLRL_ASYNC); 266 tc->flags &= ~CLRL_ASYNC; 267 if (rc == 0) 268 tc->flags &= ~CLRL_ERR; 269 else 270 tc->flags |= CLRL_ERR; 271 } 272 } 273 mtx_unlock(&sc->tc_lock); 274 } 275 276 int 277 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p) 278 { 279 280 if (p->type != SCHED_CLASS_TYPE_PACKET) 281 return (EINVAL); 282 283 if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) 284 return (set_sched_class_config(sc, p->u.config.minmax)); 285 286 if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) 287 return (set_sched_class_params(sc, &p->u.params, 1)); 288 289 return (EINVAL); 290 } 291 292 static int 293 bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx) 294 { 295 struct tx_cl_rl_params *tc0, *tc; 296 int rc, old_idx; 297 uint32_t fw_mnem, fw_class; 298 299 if (!(txq->eq.flags & EQ_ALLOCATED)) 300 return (EAGAIN); 301 302 mtx_lock(&sc->tc_lock); 303 if (txq->tc_idx == -2) { 304 rc = EBUSY; /* Another bind/unbind in progress already. */ 305 goto done; 306 } 307 if (idx == txq->tc_idx) { 308 rc = 0; /* No change, nothing to do. */ 309 goto done; 310 } 311 312 tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0]; 313 if (idx != -1) { 314 /* 315 * Bind to a different class at index idx. 316 */ 317 tc = &tc0[idx]; 318 if (tc->flags & CLRL_ERR) { 319 rc = ENXIO; 320 goto done; 321 } else { 322 /* 323 * Ok to proceed. Place a reference on the new class 324 * while still holding on to the reference on the 325 * previous class, if any. 326 */ 327 tc->refcount++; 328 } 329 } 330 /* Mark as busy before letting go of the lock. */ 331 old_idx = txq->tc_idx; 332 txq->tc_idx = -2; 333 mtx_unlock(&sc->tc_lock); 334 335 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq"); 336 if (rc != 0) 337 return (rc); 338 fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 339 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 340 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); 341 fw_class = idx < 0 ? 0xffffffff : idx; 342 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem, &fw_class); 343 end_synchronized_op(sc, 0); 344 345 mtx_lock(&sc->tc_lock); 346 MPASS(txq->tc_idx == -2); 347 if (rc == 0) { 348 /* 349 * Unbind, bind, or bind to a different class succeeded. Remove 350 * the reference on the old traffic class, if any. 351 */ 352 if (old_idx != -1) { 353 tc = &tc0[old_idx]; 354 MPASS(tc->refcount > 0); 355 tc->refcount--; 356 } 357 txq->tc_idx = idx; 358 } else { 359 /* 360 * Unbind, bind, or bind to a different class failed. Remove 361 * the anticipatory reference on the new traffic class, if any. 362 */ 363 if (idx != -1) { 364 tc = &tc0[idx]; 365 MPASS(tc->refcount > 0); 366 tc->refcount--; 367 } 368 txq->tc_idx = old_idx; 369 } 370 done: 371 MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->chip_params->nsched_cls); 372 mtx_unlock(&sc->tc_lock); 373 return (rc); 374 } 375 376 int 377 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) 378 { 379 struct port_info *pi = NULL; 380 struct vi_info *vi; 381 struct sge_txq *txq; 382 int i, rc; 383 384 if (p->port >= sc->params.nports) 385 return (EINVAL); 386 387 /* 388 * XXX: cxgbetool allows the user to specify the physical port only. So 389 * we always operate on the main VI. 390 */ 391 pi = sc->port[p->port]; 392 vi = &pi->vi[0]; 393 394 /* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */ 395 if (!(vi->flags & VI_INIT_DONE)) 396 return (EAGAIN); 397 MPASS(vi->ntxq > 0); 398 399 if (!in_range(p->queue, 0, vi->ntxq - 1) || 400 !in_range(p->cl, 0, sc->chip_params->nsched_cls - 1)) 401 return (EINVAL); 402 403 if (p->queue < 0) { 404 /* 405 * Change the scheduling on all the TX queues for the 406 * interface. 407 */ 408 for_each_txq(vi, i, txq) { 409 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 410 if (rc != 0) 411 break; 412 } 413 } else { 414 /* 415 * If op.queue is non-negative, then we're only changing the 416 * scheduling on a single specified TX queue. 417 */ 418 txq = &sc->sge.txq[vi->first_txq + p->queue]; 419 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 420 } 421 422 return (rc); 423 } 424 425 int 426 t4_init_tx_sched(struct adapter *sc) 427 { 428 int i, j; 429 const int n = sc->chip_params->nsched_cls; 430 struct port_info *pi; 431 struct tx_cl_rl_params *tc; 432 433 mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF); 434 TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc); 435 for_each_port(sc, i) { 436 pi = sc->port[i]; 437 pi->sched_params = malloc(sizeof(*pi->sched_params) + 438 n * sizeof(*tc), M_CXGBE, M_ZERO | M_WAITOK); 439 tc = &pi->sched_params->cl_rl[0]; 440 for (j = 0; j < n; j++, tc++) { 441 tc->refcount = 0; 442 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS; 443 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 444 tc->mode = FW_SCHED_PARAMS_MODE_CLASS; 445 tc->maxrate = 1000 * 1000; /* 1 Gbps. Arbitrary */ 446 447 if (t4_sched_params_cl_rl_kbps(sc, pi->tx_chan, j, 448 tc->mode, tc->maxrate, tc->pktsize, 1) != 0) 449 tc->flags = CLRL_ERR; 450 } 451 } 452 453 return (0); 454 } 455 456 int 457 t4_free_tx_sched(struct adapter *sc) 458 { 459 int i; 460 461 taskqueue_drain(taskqueue_thread, &sc->tc_task); 462 463 for_each_port(sc, i) { 464 if (sc->port[i] != NULL) 465 free(sc->port[i]->sched_params, M_CXGBE); 466 } 467 468 if (mtx_initialized(&sc->tc_lock)) 469 mtx_destroy(&sc->tc_lock); 470 471 return (0); 472 } 473 474 void 475 t4_update_tx_sched(struct adapter *sc) 476 { 477 478 taskqueue_enqueue(taskqueue_thread, &sc->tc_task); 479 } 480 481 int 482 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate, 483 int *tc_idx) 484 { 485 int rc = 0, fa = -1, i, pktsize, burstsize; 486 bool update; 487 struct tx_cl_rl_params *tc; 488 struct port_info *pi; 489 490 MPASS(port_id >= 0 && port_id < sc->params.nports); 491 492 pi = sc->port[port_id]; 493 if (pi->sched_params->pktsize > 0) 494 pktsize = pi->sched_params->pktsize; 495 else 496 pktsize = pi->vi[0].ifp->if_mtu; 497 if (pi->sched_params->burstsize > 0) 498 burstsize = pi->sched_params->burstsize; 499 else 500 burstsize = pktsize * 4; 501 tc = &pi->sched_params->cl_rl[0]; 502 503 update = false; 504 mtx_lock(&sc->tc_lock); 505 for (i = 0; i < sc->chip_params->nsched_cls; i++, tc++) { 506 if (fa < 0 && tc->refcount == 0 && !(tc->flags & CLRL_USER)) 507 fa = i; /* first available */ 508 509 if (tc->ratemode == FW_SCHED_PARAMS_RATE_ABS && 510 tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE && 511 tc->mode == FW_SCHED_PARAMS_MODE_FLOW && 512 tc->maxrate == maxrate && tc->pktsize == pktsize && 513 tc->burstsize == burstsize) { 514 tc->refcount++; 515 *tc_idx = i; 516 if ((tc->flags & (CLRL_ERR | CLRL_ASYNC | CLRL_SYNC)) == 517 CLRL_ERR) { 518 update = true; 519 } 520 goto done; 521 } 522 } 523 /* Not found */ 524 MPASS(i == sc->chip_params->nsched_cls); 525 if (fa != -1) { 526 tc = &pi->sched_params->cl_rl[fa]; 527 tc->refcount = 1; 528 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS; 529 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 530 tc->mode = FW_SCHED_PARAMS_MODE_FLOW; 531 tc->maxrate = maxrate; 532 tc->pktsize = pktsize; 533 tc->burstsize = burstsize; 534 *tc_idx = fa; 535 update = true; 536 } else { 537 *tc_idx = -1; 538 rc = ENOSPC; 539 } 540 done: 541 mtx_unlock(&sc->tc_lock); 542 if (update) { 543 tc->flags |= CLRL_ASYNC; 544 t4_update_tx_sched(sc); 545 } 546 return (rc); 547 } 548 549 void 550 t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx) 551 { 552 struct tx_cl_rl_params *tc; 553 554 MPASS(port_id >= 0 && port_id < sc->params.nports); 555 MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls); 556 557 mtx_lock(&sc->tc_lock); 558 tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx]; 559 MPASS(tc->refcount > 0); 560 tc->refcount--; 561 mtx_unlock(&sc->tc_lock); 562 } 563 564 int 565 sysctl_tc(SYSCTL_HANDLER_ARGS) 566 { 567 struct vi_info *vi = arg1; 568 struct port_info *pi; 569 struct adapter *sc; 570 struct sge_txq *txq; 571 int qidx = arg2, rc, tc_idx; 572 573 MPASS(qidx >= 0 && qidx < vi->ntxq); 574 pi = vi->pi; 575 sc = pi->adapter; 576 txq = &sc->sge.txq[vi->first_txq + qidx]; 577 578 tc_idx = txq->tc_idx; 579 rc = sysctl_handle_int(oidp, &tc_idx, 0, req); 580 if (rc != 0 || req->newptr == NULL) 581 return (rc); 582 583 if (sc->flags & IS_VF) 584 return (EPERM); 585 if (!in_range(tc_idx, 0, sc->chip_params->nsched_cls - 1)) 586 return (EINVAL); 587 588 return (bind_txq_to_traffic_class(sc, txq, tc_idx)); 589 } 590 591 int 592 sysctl_tc_params(SYSCTL_HANDLER_ARGS) 593 { 594 struct adapter *sc = arg1; 595 struct tx_cl_rl_params tc; 596 struct sbuf *sb; 597 int i, rc, port_id, mbps, gbps; 598 599 rc = sysctl_wire_old_buffer(req, 0); 600 if (rc != 0) 601 return (rc); 602 603 sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); 604 if (sb == NULL) 605 return (ENOMEM); 606 607 port_id = arg2 >> 16; 608 MPASS(port_id < sc->params.nports); 609 MPASS(sc->port[port_id] != NULL); 610 i = arg2 & 0xffff; 611 MPASS(i < sc->chip_params->nsched_cls); 612 613 mtx_lock(&sc->tc_lock); 614 tc = sc->port[port_id]->sched_params->cl_rl[i]; 615 mtx_unlock(&sc->tc_lock); 616 617 switch (tc.rateunit) { 618 case SCHED_CLASS_RATEUNIT_BITS: 619 switch (tc.ratemode) { 620 case SCHED_CLASS_RATEMODE_REL: 621 /* XXX: top speed or actual link speed? */ 622 gbps = port_top_speed(sc->port[port_id]); 623 sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps); 624 break; 625 case SCHED_CLASS_RATEMODE_ABS: 626 mbps = tc.maxrate / 1000; 627 gbps = tc.maxrate / 1000000; 628 if (tc.maxrate == gbps * 1000000) 629 sbuf_printf(sb, "%uGbps", gbps); 630 else if (tc.maxrate == mbps * 1000) 631 sbuf_printf(sb, "%uMbps", mbps); 632 else 633 sbuf_printf(sb, "%uKbps", tc.maxrate); 634 break; 635 default: 636 rc = ENXIO; 637 goto done; 638 } 639 break; 640 case SCHED_CLASS_RATEUNIT_PKTS: 641 sbuf_printf(sb, "%upps", tc.maxrate); 642 break; 643 default: 644 rc = ENXIO; 645 goto done; 646 } 647 648 switch (tc.mode) { 649 case SCHED_CLASS_MODE_CLASS: 650 sbuf_printf(sb, " aggregate"); 651 break; 652 case SCHED_CLASS_MODE_FLOW: 653 sbuf_printf(sb, " per-flow"); 654 if (tc.pktsize > 0) 655 sbuf_printf(sb, " pkt-size %u", tc.pktsize); 656 if (tc.burstsize > 0) 657 sbuf_printf(sb, " burst-size %u", tc.burstsize); 658 break; 659 default: 660 rc = ENXIO; 661 goto done; 662 } 663 664 done: 665 if (rc == 0) 666 rc = sbuf_finish(sb); 667 sbuf_delete(sb); 668 669 return (rc); 670 } 671 672 #ifdef RATELIMIT 673 void 674 t4_init_etid_table(struct adapter *sc) 675 { 676 int i; 677 struct tid_info *t; 678 679 if (!is_ethoffload(sc)) 680 return; 681 682 t = &sc->tids; 683 MPASS(t->netids > 0); 684 685 mtx_init(&t->etid_lock, "etid lock", NULL, MTX_DEF); 686 t->etid_tab = malloc(sizeof(*t->etid_tab) * t->netids, M_CXGBE, 687 M_ZERO | M_WAITOK); 688 t->efree = t->etid_tab; 689 t->etids_in_use = 0; 690 for (i = 1; i < t->netids; i++) 691 t->etid_tab[i - 1].next = &t->etid_tab[i]; 692 t->etid_tab[t->netids - 1].next = NULL; 693 } 694 695 void 696 t4_free_etid_table(struct adapter *sc) 697 { 698 struct tid_info *t; 699 700 if (!is_ethoffload(sc)) 701 return; 702 703 t = &sc->tids; 704 MPASS(t->netids > 0); 705 706 free(t->etid_tab, M_CXGBE); 707 t->etid_tab = NULL; 708 709 if (mtx_initialized(&t->etid_lock)) 710 mtx_destroy(&t->etid_lock); 711 } 712 713 /* etid services */ 714 static int alloc_etid(struct adapter *, struct cxgbe_rate_tag *); 715 static void free_etid(struct adapter *, int); 716 717 static int 718 alloc_etid(struct adapter *sc, struct cxgbe_rate_tag *cst) 719 { 720 struct tid_info *t = &sc->tids; 721 int etid = -1; 722 723 mtx_lock(&t->etid_lock); 724 if (t->efree) { 725 union etid_entry *p = t->efree; 726 727 etid = p - t->etid_tab + t->etid_base; 728 t->efree = p->next; 729 p->cst = cst; 730 t->etids_in_use++; 731 } 732 mtx_unlock(&t->etid_lock); 733 return (etid); 734 } 735 736 struct cxgbe_rate_tag * 737 lookup_etid(struct adapter *sc, int etid) 738 { 739 struct tid_info *t = &sc->tids; 740 741 return (t->etid_tab[etid - t->etid_base].cst); 742 } 743 744 static void 745 free_etid(struct adapter *sc, int etid) 746 { 747 struct tid_info *t = &sc->tids; 748 union etid_entry *p = &t->etid_tab[etid - t->etid_base]; 749 750 mtx_lock(&t->etid_lock); 751 p->next = t->efree; 752 t->efree = p; 753 t->etids_in_use--; 754 mtx_unlock(&t->etid_lock); 755 } 756 757 int 758 cxgbe_rate_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, 759 struct m_snd_tag **pt) 760 { 761 int rc, schedcl; 762 struct vi_info *vi = ifp->if_softc; 763 struct port_info *pi = vi->pi; 764 struct adapter *sc = pi->adapter; 765 struct cxgbe_rate_tag *cst; 766 767 MPASS(params->hdr.type == IF_SND_TAG_TYPE_RATE_LIMIT); 768 769 rc = t4_reserve_cl_rl_kbps(sc, pi->port_id, 770 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 771 if (rc != 0) 772 return (rc); 773 MPASS(schedcl >= 0 && schedcl < sc->chip_params->nsched_cls); 774 775 cst = malloc(sizeof(*cst), M_CXGBE, M_ZERO | M_NOWAIT); 776 if (cst == NULL) { 777 failed: 778 t4_release_cl_rl(sc, pi->port_id, schedcl); 779 return (ENOMEM); 780 } 781 782 cst->etid = alloc_etid(sc, cst); 783 if (cst->etid < 0) { 784 free(cst, M_CXGBE); 785 goto failed; 786 } 787 788 mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF); 789 mbufq_init(&cst->pending_tx, INT_MAX); 790 mbufq_init(&cst->pending_fwack, INT_MAX); 791 m_snd_tag_init(&cst->com, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); 792 cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF; 793 cst->adapter = sc; 794 cst->port_id = pi->port_id; 795 cst->schedcl = schedcl; 796 cst->max_rate = params->rate_limit.max_rate; 797 cst->tx_credits = sc->params.eo_wr_cred; 798 cst->tx_total = cst->tx_credits; 799 cst->plen = 0; 800 cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 801 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 802 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 803 804 /* 805 * Queues will be selected later when the connection flowid is available. 806 */ 807 808 *pt = &cst->com; 809 return (0); 810 } 811 812 /* 813 * Change in parameters, no change in ifp. 814 */ 815 int 816 cxgbe_rate_tag_modify(struct m_snd_tag *mst, 817 union if_snd_tag_modify_params *params) 818 { 819 int rc, schedcl; 820 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 821 struct adapter *sc = cst->adapter; 822 823 /* XXX: is schedcl -1 ok here? */ 824 MPASS(cst->schedcl >= 0 && cst->schedcl < sc->chip_params->nsched_cls); 825 826 mtx_lock(&cst->lock); 827 MPASS(cst->flags & EO_SND_TAG_REF); 828 rc = t4_reserve_cl_rl_kbps(sc, cst->port_id, 829 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 830 if (rc != 0) 831 return (rc); 832 MPASS(schedcl >= 0 && schedcl < sc->chip_params->nsched_cls); 833 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 834 cst->schedcl = schedcl; 835 cst->max_rate = params->rate_limit.max_rate; 836 mtx_unlock(&cst->lock); 837 838 return (0); 839 } 840 841 int 842 cxgbe_rate_tag_query(struct m_snd_tag *mst, 843 union if_snd_tag_query_params *params) 844 { 845 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 846 847 params->rate_limit.max_rate = cst->max_rate; 848 849 #define CST_TO_MST_QLEVEL_SCALE (IF_SND_QUEUE_LEVEL_MAX / cst->tx_total) 850 params->rate_limit.queue_level = 851 (cst->tx_total - cst->tx_credits) * CST_TO_MST_QLEVEL_SCALE; 852 853 return (0); 854 } 855 856 /* 857 * Unlocks cst and frees it. 858 */ 859 void 860 cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *cst) 861 { 862 struct adapter *sc = cst->adapter; 863 864 mtx_assert(&cst->lock, MA_OWNED); 865 MPASS((cst->flags & EO_SND_TAG_REF) == 0); 866 MPASS(cst->tx_credits == cst->tx_total); 867 MPASS(cst->plen == 0); 868 MPASS(mbufq_first(&cst->pending_tx) == NULL); 869 MPASS(mbufq_first(&cst->pending_fwack) == NULL); 870 871 if (cst->etid >= 0) 872 free_etid(sc, cst->etid); 873 if (cst->schedcl != -1) 874 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 875 mtx_unlock(&cst->lock); 876 mtx_destroy(&cst->lock); 877 free(cst, M_CXGBE); 878 } 879 880 void 881 cxgbe_rate_tag_free(struct m_snd_tag *mst) 882 { 883 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 884 885 mtx_lock(&cst->lock); 886 887 /* The kernel is done with the snd_tag. Remove its reference. */ 888 MPASS(cst->flags & EO_SND_TAG_REF); 889 cst->flags &= ~EO_SND_TAG_REF; 890 891 if (cst->ncompl == 0) { 892 /* 893 * No fw4_ack in flight. Free the tag right away if there are 894 * no outstanding credits. Request the firmware to return all 895 * credits for the etid otherwise. 896 */ 897 if (cst->tx_credits == cst->tx_total) { 898 cxgbe_rate_tag_free_locked(cst); 899 return; /* cst is gone. */ 900 } 901 send_etid_flush_wr(cst); 902 } 903 mtx_unlock(&cst->lock); 904 } 905 906 void 907 cxgbe_ratelimit_query(struct ifnet *ifp, struct if_ratelimit_query_results *q) 908 { 909 struct vi_info *vi = ifp->if_softc; 910 struct adapter *sc = vi->adapter; 911 912 q->rate_table = NULL; 913 q->flags = RT_IS_SELECTABLE; 914 /* 915 * Absolute max limits from the firmware configuration. Practical 916 * limits depend on the burstsize, pktsize (ifp->if_mtu ultimately) and 917 * the card's cclk. 918 */ 919 q->max_flows = sc->tids.netids; 920 q->number_of_rates = sc->chip_params->nsched_cls; 921 q->min_segment_burst = 4; /* matches PKTSCHED_BURST in the firmware. */ 922 923 #if 1 924 if (chip_id(sc) < CHELSIO_T6) { 925 /* Based on testing by rrs@ with a T580 at burstsize = 4. */ 926 MPASS(q->min_segment_burst == 4); 927 q->max_flows = min(4000, q->max_flows); 928 } else { 929 /* XXX: TBD, carried forward from T5 for now. */ 930 q->max_flows = min(4000, q->max_flows); 931 } 932 933 /* 934 * XXX: tcp_ratelimit.c grabs all available rates on link-up before it 935 * even knows whether hw pacing will be used or not. This prevents 936 * other consumers like SO_MAX_PACING_RATE or those using cxgbetool or 937 * the private ioctls from using any of traffic classes. 938 * 939 * Underreport the number of rates to tcp_ratelimit so that it doesn't 940 * hog all of them. This can be removed if/when tcp_ratelimit switches 941 * to making its allocations on first-use rather than link-up. There is 942 * nothing wrong with one particular consumer reserving all the classes 943 * but it should do so only if it'll actually use hw rate limiting. 944 */ 945 q->number_of_rates /= 4; 946 #endif 947 } 948 #endif 949