1 /*- 2 * Copyright (c) 2017 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_ratelimit.h" 34 35 #include <sys/types.h> 36 #include <sys/malloc.h> 37 #include <sys/queue.h> 38 #include <sys/sbuf.h> 39 #include <sys/taskqueue.h> 40 #include <sys/sysctl.h> 41 42 #include "common/common.h" 43 #include "common/t4_regs.h" 44 #include "common/t4_regs_values.h" 45 #include "common/t4_msg.h" 46 47 48 static int 49 in_range(int val, int lo, int hi) 50 { 51 52 return (val < 0 || (val <= hi && val >= lo)); 53 } 54 55 static int 56 set_sched_class_config(struct adapter *sc, int minmax) 57 { 58 int rc; 59 60 if (minmax < 0) 61 return (EINVAL); 62 63 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc"); 64 if (rc) 65 return (rc); 66 if (hw_off_limits(sc)) 67 rc = ENXIO; 68 else 69 rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1); 70 end_synchronized_op(sc, 0); 71 72 return (rc); 73 } 74 75 static int 76 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p, 77 int sleep_ok) 78 { 79 int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode; 80 struct port_info *pi; 81 struct tx_cl_rl_params *tc, old; 82 bool check_pktsize = false; 83 84 if (p->level == SCHED_CLASS_LEVEL_CL_RL) 85 fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; 86 else if (p->level == SCHED_CLASS_LEVEL_CL_WRR) 87 fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; 88 else if (p->level == SCHED_CLASS_LEVEL_CH_RL) 89 fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; 90 else 91 return (EINVAL); 92 93 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 94 if (p->mode == SCHED_CLASS_MODE_CLASS) 95 fw_mode = FW_SCHED_PARAMS_MODE_CLASS; 96 else if (p->mode == SCHED_CLASS_MODE_FLOW) { 97 check_pktsize = true; 98 fw_mode = FW_SCHED_PARAMS_MODE_FLOW; 99 } else 100 return (EINVAL); 101 } else 102 fw_mode = 0; 103 104 /* Valid channel must always be provided. */ 105 if (p->channel < 0) 106 return (EINVAL); 107 if (!in_range(p->channel, 0, sc->chip_params->nchan - 1)) 108 return (ERANGE); 109 110 pi = sc->port[sc->chan_map[p->channel]]; 111 if (pi == NULL) 112 return (ENXIO); 113 MPASS(pi->tx_chan == p->channel); 114 top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */ 115 116 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 117 p->level == SCHED_CLASS_LEVEL_CH_RL) { 118 /* 119 * Valid rate (mode, unit and values) must be provided. 120 */ 121 122 if (p->minrate < 0) 123 p->minrate = 0; 124 if (p->maxrate < 0) 125 return (EINVAL); 126 127 if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) { 128 fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 129 /* ratemode could be relative (%) or absolute. */ 130 if (p->ratemode == SCHED_CLASS_RATEMODE_REL) { 131 fw_ratemode = FW_SCHED_PARAMS_RATE_REL; 132 /* maxrate is % of port bandwidth. */ 133 if (!in_range(p->minrate, 0, 100) || 134 !in_range(p->maxrate, 0, 100)) { 135 return (ERANGE); 136 } 137 } else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) { 138 fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; 139 /* maxrate is absolute value in kbps. */ 140 if (!in_range(p->minrate, 0, top_speed) || 141 !in_range(p->maxrate, 0, top_speed)) { 142 return (ERANGE); 143 } 144 } else 145 return (EINVAL); 146 } else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) { 147 /* maxrate is the absolute value in pps. */ 148 check_pktsize = true; 149 fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; 150 } else 151 return (EINVAL); 152 } else { 153 MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR); 154 155 /* 156 * Valid weight must be provided. 157 */ 158 if (p->weight < 0) 159 return (EINVAL); 160 if (!in_range(p->weight, 1, 99)) 161 return (ERANGE); 162 163 fw_rateunit = 0; 164 fw_ratemode = 0; 165 } 166 167 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 168 p->level == SCHED_CLASS_LEVEL_CL_WRR) { 169 /* 170 * Valid scheduling class must be provided. 171 */ 172 if (p->cl < 0) 173 return (EINVAL); 174 if (!in_range(p->cl, 0, sc->params.nsched_cls - 1)) 175 return (ERANGE); 176 } 177 178 if (check_pktsize) { 179 if (p->pktsize < 0) 180 return (EINVAL); 181 if (!in_range(p->pktsize, 64, pi->vi[0].ifp->if_mtu)) 182 return (ERANGE); 183 } 184 185 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 186 tc = &pi->sched_params->cl_rl[p->cl]; 187 mtx_lock(&sc->tc_lock); 188 if (tc->refcount > 0 || tc->state == CS_HW_UPDATE_IN_PROGRESS) 189 rc = EBUSY; 190 else { 191 old = *tc; 192 193 tc->flags |= CF_USER; 194 tc->state = CS_HW_UPDATE_IN_PROGRESS; 195 tc->ratemode = fw_ratemode; 196 tc->rateunit = fw_rateunit; 197 tc->mode = fw_mode; 198 tc->maxrate = p->maxrate; 199 tc->pktsize = p->pktsize; 200 rc = 0; 201 } 202 mtx_unlock(&sc->tc_lock); 203 if (rc != 0) 204 return (rc); 205 } 206 207 rc = begin_synchronized_op(sc, NULL, 208 sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp"); 209 if (rc != 0) { 210 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 211 mtx_lock(&sc->tc_lock); 212 MPASS(tc->refcount == 0); 213 MPASS(tc->flags & CF_USER); 214 MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS); 215 *tc = old; 216 mtx_unlock(&sc->tc_lock); 217 } 218 return (rc); 219 } 220 if (!hw_off_limits(sc)) { 221 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, 222 fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl, 223 p->minrate, p->maxrate, p->weight, p->pktsize, 0, sleep_ok); 224 } 225 end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD); 226 227 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 228 mtx_lock(&sc->tc_lock); 229 MPASS(tc->refcount == 0); 230 MPASS(tc->flags & CF_USER); 231 MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS); 232 233 if (rc == 0) 234 tc->state = CS_HW_CONFIGURED; 235 else { 236 /* parameters failed so we don't park at params_set */ 237 tc->state = CS_UNINITIALIZED; 238 tc->flags &= ~CF_USER; 239 CH_ERR(pi, "failed to configure traffic class %d: %d. " 240 "params: mode %d, rateunit %d, ratemode %d, " 241 "channel %d, minrate %d, maxrate %d, pktsize %d, " 242 "burstsize %d\n", p->cl, rc, fw_mode, fw_rateunit, 243 fw_ratemode, p->channel, p->minrate, p->maxrate, 244 p->pktsize, 0); 245 } 246 mtx_unlock(&sc->tc_lock); 247 } 248 249 return (rc); 250 } 251 252 static void 253 update_tx_sched(void *context, int pending) 254 { 255 int i, j, rc; 256 struct port_info *pi; 257 struct tx_cl_rl_params *tc; 258 struct adapter *sc = context; 259 const int n = sc->params.nsched_cls; 260 261 mtx_lock(&sc->tc_lock); 262 for_each_port(sc, i) { 263 pi = sc->port[i]; 264 tc = &pi->sched_params->cl_rl[0]; 265 for (j = 0; j < n; j++, tc++) { 266 MPASS(mtx_owned(&sc->tc_lock)); 267 if (tc->state != CS_HW_UPDATE_REQUESTED) 268 continue; 269 mtx_unlock(&sc->tc_lock); 270 271 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, 272 "t4utxs") != 0) { 273 mtx_lock(&sc->tc_lock); 274 continue; 275 } 276 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, 277 FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit, 278 tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0, 279 tc->pktsize, tc->burstsize, 1); 280 end_synchronized_op(sc, 0); 281 282 mtx_lock(&sc->tc_lock); 283 MPASS(tc->state == CS_HW_UPDATE_REQUESTED); 284 if (rc == 0) { 285 tc->state = CS_HW_CONFIGURED; 286 continue; 287 } 288 /* parameters failed so we try to avoid params_set */ 289 if (tc->refcount > 0) 290 tc->state = CS_PARAMS_SET; 291 else 292 tc->state = CS_UNINITIALIZED; 293 CH_ERR(pi, "failed to configure traffic class %d: %d. " 294 "params: mode %d, rateunit %d, ratemode %d, " 295 "channel %d, minrate %d, maxrate %d, pktsize %d, " 296 "burstsize %d\n", j, rc, tc->mode, tc->rateunit, 297 tc->ratemode, pi->tx_chan, 0, tc->maxrate, 298 tc->pktsize, tc->burstsize); 299 } 300 } 301 mtx_unlock(&sc->tc_lock); 302 } 303 304 int 305 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p) 306 { 307 308 if (p->type != SCHED_CLASS_TYPE_PACKET) 309 return (EINVAL); 310 311 if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) 312 return (set_sched_class_config(sc, p->u.config.minmax)); 313 314 if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) 315 return (set_sched_class_params(sc, &p->u.params, 1)); 316 317 return (EINVAL); 318 } 319 320 static int 321 bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx) 322 { 323 struct tx_cl_rl_params *tc0, *tc; 324 int rc, old_idx; 325 uint32_t fw_mnem, fw_class; 326 327 if (!(txq->eq.flags & EQ_HW_ALLOCATED)) 328 return (ENXIO); 329 330 mtx_lock(&sc->tc_lock); 331 if (txq->tc_idx == -2) { 332 rc = EBUSY; /* Another bind/unbind in progress already. */ 333 goto done; 334 } 335 if (idx == txq->tc_idx) { 336 rc = 0; /* No change, nothing to do. */ 337 goto done; 338 } 339 340 tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0]; 341 if (idx != -1) { 342 /* 343 * Bind to a different class at index idx. 344 */ 345 tc = &tc0[idx]; 346 if (tc->state != CS_HW_CONFIGURED) { 347 rc = ENXIO; 348 goto done; 349 } else { 350 /* 351 * Ok to proceed. Place a reference on the new class 352 * while still holding on to the reference on the 353 * previous class, if any. 354 */ 355 tc->refcount++; 356 } 357 } 358 /* Mark as busy before letting go of the lock. */ 359 old_idx = txq->tc_idx; 360 txq->tc_idx = -2; 361 mtx_unlock(&sc->tc_lock); 362 363 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq"); 364 if (rc == 0) { 365 fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 366 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 367 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); 368 fw_class = idx < 0 ? 0xffffffff : idx; 369 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem, 370 &fw_class); 371 end_synchronized_op(sc, 0); 372 } 373 374 mtx_lock(&sc->tc_lock); 375 MPASS(txq->tc_idx == -2); 376 if (rc == 0) { 377 /* 378 * Unbind, bind, or bind to a different class succeeded. Remove 379 * the reference on the old traffic class, if any. 380 */ 381 if (old_idx != -1) { 382 tc = &tc0[old_idx]; 383 MPASS(tc->refcount > 0); 384 tc->refcount--; 385 } 386 txq->tc_idx = idx; 387 } else { 388 /* 389 * Unbind, bind, or bind to a different class failed. Remove 390 * the anticipatory reference on the new traffic class, if any. 391 */ 392 if (idx != -1) { 393 tc = &tc0[idx]; 394 MPASS(tc->refcount > 0); 395 tc->refcount--; 396 } 397 txq->tc_idx = old_idx; 398 } 399 done: 400 MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->params.nsched_cls); 401 mtx_unlock(&sc->tc_lock); 402 return (rc); 403 } 404 405 int 406 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) 407 { 408 struct port_info *pi = NULL; 409 struct vi_info *vi; 410 struct sge_txq *txq; 411 int i, rc; 412 413 if (p->port >= sc->params.nports) 414 return (EINVAL); 415 416 /* 417 * XXX: cxgbetool allows the user to specify the physical port only. So 418 * we always operate on the main VI. 419 */ 420 pi = sc->port[p->port]; 421 vi = &pi->vi[0]; 422 423 /* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */ 424 if (!(vi->flags & VI_INIT_DONE)) 425 return (EAGAIN); 426 MPASS(vi->ntxq > 0); 427 428 if (!in_range(p->queue, 0, vi->ntxq - 1) || 429 !in_range(p->cl, 0, sc->params.nsched_cls - 1)) 430 return (EINVAL); 431 432 if (p->queue < 0) { 433 /* 434 * Change the scheduling on all the TX queues for the 435 * interface. 436 */ 437 for_each_txq(vi, i, txq) { 438 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 439 if (rc != 0) 440 break; 441 } 442 } else { 443 /* 444 * If op.queue is non-negative, then we're only changing the 445 * scheduling on a single specified TX queue. 446 */ 447 txq = &sc->sge.txq[vi->first_txq + p->queue]; 448 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 449 } 450 451 return (rc); 452 } 453 454 int 455 t4_init_tx_sched(struct adapter *sc) 456 { 457 int i; 458 const int n = sc->params.nsched_cls; 459 struct port_info *pi; 460 461 mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF); 462 TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc); 463 for_each_port(sc, i) { 464 pi = sc->port[i]; 465 pi->sched_params = malloc(sizeof(*pi->sched_params) + 466 n * sizeof(struct tx_cl_rl_params), M_CXGBE, M_ZERO | M_WAITOK); 467 } 468 469 return (0); 470 } 471 472 int 473 t4_free_tx_sched(struct adapter *sc) 474 { 475 int i; 476 477 taskqueue_drain(taskqueue_thread, &sc->tc_task); 478 479 for_each_port(sc, i) { 480 if (sc->port[i] != NULL) 481 free(sc->port[i]->sched_params, M_CXGBE); 482 } 483 484 if (mtx_initialized(&sc->tc_lock)) 485 mtx_destroy(&sc->tc_lock); 486 487 return (0); 488 } 489 490 void 491 t4_update_tx_sched(struct adapter *sc) 492 { 493 494 taskqueue_enqueue(taskqueue_thread, &sc->tc_task); 495 } 496 497 int 498 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate, 499 int *tc_idx) 500 { 501 int rc = 0, fa, fa2, i, pktsize, burstsize; 502 bool update; 503 struct tx_cl_rl_params *tc; 504 struct port_info *pi; 505 506 MPASS(port_id >= 0 && port_id < sc->params.nports); 507 508 pi = sc->port[port_id]; 509 if (pi->sched_params->pktsize > 0) 510 pktsize = pi->sched_params->pktsize; 511 else 512 pktsize = pi->vi[0].ifp->if_mtu; 513 if (pi->sched_params->burstsize > 0) 514 burstsize = pi->sched_params->burstsize; 515 else 516 burstsize = pktsize * 4; 517 tc = &pi->sched_params->cl_rl[0]; 518 519 update = false; 520 fa = fa2 = -1; 521 mtx_lock(&sc->tc_lock); 522 for (i = 0; i < sc->params.nsched_cls; i++, tc++) { 523 if (tc->state >= CS_PARAMS_SET && 524 tc->ratemode == FW_SCHED_PARAMS_RATE_ABS && 525 tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE && 526 tc->mode == FW_SCHED_PARAMS_MODE_FLOW && 527 tc->maxrate == maxrate && tc->pktsize == pktsize && 528 tc->burstsize == burstsize) { 529 tc->refcount++; 530 *tc_idx = i; 531 if (tc->state == CS_PARAMS_SET) { 532 tc->state = CS_HW_UPDATE_REQUESTED; 533 update = true; 534 } 535 goto done; 536 } 537 538 if (fa < 0 && tc->state == CS_UNINITIALIZED) { 539 MPASS(tc->refcount == 0); 540 fa = i; /* first available, never used. */ 541 } 542 if (fa2 < 0 && tc->refcount == 0 && !(tc->flags & CF_USER)) { 543 fa2 = i; /* first available, used previously. */ 544 } 545 } 546 /* Not found */ 547 MPASS(i == sc->params.nsched_cls); 548 if (fa == -1) 549 fa = fa2; 550 if (fa == -1) { 551 *tc_idx = -1; 552 rc = ENOSPC; 553 } else { 554 MPASS(fa >= 0 && fa < sc->params.nsched_cls); 555 tc = &pi->sched_params->cl_rl[fa]; 556 MPASS(!(tc->flags & CF_USER)); 557 MPASS(tc->refcount == 0); 558 559 tc->refcount = 1; 560 tc->state = CS_HW_UPDATE_REQUESTED; 561 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS; 562 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 563 tc->mode = FW_SCHED_PARAMS_MODE_FLOW; 564 tc->maxrate = maxrate; 565 tc->pktsize = pktsize; 566 tc->burstsize = burstsize; 567 *tc_idx = fa; 568 update = true; 569 } 570 done: 571 mtx_unlock(&sc->tc_lock); 572 if (update) 573 t4_update_tx_sched(sc); 574 return (rc); 575 } 576 577 void 578 t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx) 579 { 580 struct tx_cl_rl_params *tc; 581 582 MPASS(port_id >= 0 && port_id < sc->params.nports); 583 MPASS(tc_idx >= 0 && tc_idx < sc->params.nsched_cls); 584 585 mtx_lock(&sc->tc_lock); 586 tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx]; 587 MPASS(tc->refcount > 0); 588 tc->refcount--; 589 mtx_unlock(&sc->tc_lock); 590 } 591 592 int 593 sysctl_tc(SYSCTL_HANDLER_ARGS) 594 { 595 struct vi_info *vi = arg1; 596 struct adapter *sc = vi->adapter; 597 struct sge_txq *txq; 598 int qidx = arg2, rc, tc_idx; 599 600 MPASS(qidx >= vi->first_txq && qidx < vi->first_txq + vi->ntxq); 601 602 txq = &sc->sge.txq[qidx]; 603 tc_idx = txq->tc_idx; 604 rc = sysctl_handle_int(oidp, &tc_idx, 0, req); 605 if (rc != 0 || req->newptr == NULL) 606 return (rc); 607 608 if (sc->flags & IS_VF) 609 return (EPERM); 610 if (!in_range(tc_idx, 0, sc->params.nsched_cls - 1)) 611 return (EINVAL); 612 613 return (bind_txq_to_traffic_class(sc, txq, tc_idx)); 614 } 615 616 int 617 sysctl_tc_params(SYSCTL_HANDLER_ARGS) 618 { 619 struct adapter *sc = arg1; 620 struct tx_cl_rl_params tc; 621 struct sbuf *sb; 622 int i, rc, port_id, mbps, gbps; 623 624 rc = sysctl_wire_old_buffer(req, 0); 625 if (rc != 0) 626 return (rc); 627 628 sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); 629 if (sb == NULL) 630 return (ENOMEM); 631 632 port_id = arg2 >> 16; 633 MPASS(port_id < sc->params.nports); 634 MPASS(sc->port[port_id] != NULL); 635 i = arg2 & 0xffff; 636 MPASS(i < sc->params.nsched_cls); 637 638 mtx_lock(&sc->tc_lock); 639 tc = sc->port[port_id]->sched_params->cl_rl[i]; 640 mtx_unlock(&sc->tc_lock); 641 642 if (tc.state < CS_PARAMS_SET) { 643 sbuf_printf(sb, "uninitialized"); 644 goto done; 645 } 646 647 switch (tc.rateunit) { 648 case SCHED_CLASS_RATEUNIT_BITS: 649 switch (tc.ratemode) { 650 case SCHED_CLASS_RATEMODE_REL: 651 /* XXX: top speed or actual link speed? */ 652 gbps = port_top_speed(sc->port[port_id]); 653 sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps); 654 break; 655 case SCHED_CLASS_RATEMODE_ABS: 656 mbps = tc.maxrate / 1000; 657 gbps = tc.maxrate / 1000000; 658 if (tc.maxrate == gbps * 1000000) 659 sbuf_printf(sb, "%uGbps", gbps); 660 else if (tc.maxrate == mbps * 1000) 661 sbuf_printf(sb, "%uMbps", mbps); 662 else 663 sbuf_printf(sb, "%uKbps", tc.maxrate); 664 break; 665 default: 666 rc = ENXIO; 667 goto done; 668 } 669 break; 670 case SCHED_CLASS_RATEUNIT_PKTS: 671 sbuf_printf(sb, "%upps", tc.maxrate); 672 break; 673 default: 674 rc = ENXIO; 675 goto done; 676 } 677 678 switch (tc.mode) { 679 case SCHED_CLASS_MODE_CLASS: 680 /* Note that pktsize and burstsize are not used in this mode. */ 681 sbuf_printf(sb, " aggregate"); 682 break; 683 case SCHED_CLASS_MODE_FLOW: 684 sbuf_printf(sb, " per-flow"); 685 if (tc.pktsize > 0) 686 sbuf_printf(sb, " pkt-size %u", tc.pktsize); 687 if (tc.burstsize > 0) 688 sbuf_printf(sb, " burst-size %u", tc.burstsize); 689 break; 690 default: 691 rc = ENXIO; 692 goto done; 693 } 694 695 done: 696 if (rc == 0) 697 rc = sbuf_finish(sb); 698 sbuf_delete(sb); 699 700 return (rc); 701 } 702 703 #ifdef RATELIMIT 704 void 705 t4_init_etid_table(struct adapter *sc) 706 { 707 int i; 708 struct tid_info *t; 709 710 if (!is_ethoffload(sc)) 711 return; 712 713 t = &sc->tids; 714 MPASS(t->netids > 0); 715 716 mtx_init(&t->etid_lock, "etid lock", NULL, MTX_DEF); 717 t->etid_tab = malloc(sizeof(*t->etid_tab) * t->netids, M_CXGBE, 718 M_ZERO | M_WAITOK); 719 t->efree = t->etid_tab; 720 t->etids_in_use = 0; 721 for (i = 1; i < t->netids; i++) 722 t->etid_tab[i - 1].next = &t->etid_tab[i]; 723 t->etid_tab[t->netids - 1].next = NULL; 724 } 725 726 void 727 t4_free_etid_table(struct adapter *sc) 728 { 729 struct tid_info *t; 730 731 if (!is_ethoffload(sc)) 732 return; 733 734 t = &sc->tids; 735 MPASS(t->netids > 0); 736 737 free(t->etid_tab, M_CXGBE); 738 t->etid_tab = NULL; 739 740 if (mtx_initialized(&t->etid_lock)) 741 mtx_destroy(&t->etid_lock); 742 } 743 744 /* etid services */ 745 static int alloc_etid(struct adapter *, struct cxgbe_rate_tag *); 746 static void free_etid(struct adapter *, int); 747 748 static int 749 alloc_etid(struct adapter *sc, struct cxgbe_rate_tag *cst) 750 { 751 struct tid_info *t = &sc->tids; 752 int etid = -1; 753 754 mtx_lock(&t->etid_lock); 755 if (t->efree) { 756 union etid_entry *p = t->efree; 757 758 etid = p - t->etid_tab + t->etid_base; 759 t->efree = p->next; 760 p->cst = cst; 761 t->etids_in_use++; 762 } 763 mtx_unlock(&t->etid_lock); 764 return (etid); 765 } 766 767 struct cxgbe_rate_tag * 768 lookup_etid(struct adapter *sc, int etid) 769 { 770 struct tid_info *t = &sc->tids; 771 772 return (t->etid_tab[etid - t->etid_base].cst); 773 } 774 775 static void 776 free_etid(struct adapter *sc, int etid) 777 { 778 struct tid_info *t = &sc->tids; 779 union etid_entry *p = &t->etid_tab[etid - t->etid_base]; 780 781 mtx_lock(&t->etid_lock); 782 p->next = t->efree; 783 t->efree = p; 784 t->etids_in_use--; 785 mtx_unlock(&t->etid_lock); 786 } 787 788 int 789 cxgbe_rate_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, 790 struct m_snd_tag **pt) 791 { 792 int rc, schedcl; 793 struct vi_info *vi = ifp->if_softc; 794 struct port_info *pi = vi->pi; 795 struct adapter *sc = pi->adapter; 796 struct cxgbe_rate_tag *cst; 797 798 MPASS(params->hdr.type == IF_SND_TAG_TYPE_RATE_LIMIT); 799 800 rc = t4_reserve_cl_rl_kbps(sc, pi->port_id, 801 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 802 if (rc != 0) 803 return (rc); 804 MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls); 805 806 cst = malloc(sizeof(*cst), M_CXGBE, M_ZERO | M_NOWAIT); 807 if (cst == NULL) { 808 failed: 809 t4_release_cl_rl(sc, pi->port_id, schedcl); 810 return (ENOMEM); 811 } 812 813 cst->etid = alloc_etid(sc, cst); 814 if (cst->etid < 0) { 815 free(cst, M_CXGBE); 816 goto failed; 817 } 818 819 mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF); 820 mbufq_init(&cst->pending_tx, INT_MAX); 821 mbufq_init(&cst->pending_fwack, INT_MAX); 822 m_snd_tag_init(&cst->com, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); 823 cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF; 824 cst->adapter = sc; 825 cst->port_id = pi->port_id; 826 cst->schedcl = schedcl; 827 cst->max_rate = params->rate_limit.max_rate; 828 cst->tx_credits = sc->params.eo_wr_cred; 829 cst->tx_total = cst->tx_credits; 830 cst->plen = 0; 831 cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 832 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 833 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 834 835 /* 836 * Queues will be selected later when the connection flowid is available. 837 */ 838 839 *pt = &cst->com; 840 return (0); 841 } 842 843 /* 844 * Change in parameters, no change in ifp. 845 */ 846 int 847 cxgbe_rate_tag_modify(struct m_snd_tag *mst, 848 union if_snd_tag_modify_params *params) 849 { 850 int rc, schedcl; 851 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 852 struct adapter *sc = cst->adapter; 853 854 /* XXX: is schedcl -1 ok here? */ 855 MPASS(cst->schedcl >= 0 && cst->schedcl < sc->params.nsched_cls); 856 857 mtx_lock(&cst->lock); 858 MPASS(cst->flags & EO_SND_TAG_REF); 859 rc = t4_reserve_cl_rl_kbps(sc, cst->port_id, 860 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 861 if (rc != 0) 862 return (rc); 863 MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls); 864 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 865 cst->schedcl = schedcl; 866 cst->max_rate = params->rate_limit.max_rate; 867 mtx_unlock(&cst->lock); 868 869 return (0); 870 } 871 872 int 873 cxgbe_rate_tag_query(struct m_snd_tag *mst, 874 union if_snd_tag_query_params *params) 875 { 876 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 877 878 params->rate_limit.max_rate = cst->max_rate; 879 880 #define CST_TO_MST_QLEVEL_SCALE (IF_SND_QUEUE_LEVEL_MAX / cst->tx_total) 881 params->rate_limit.queue_level = 882 (cst->tx_total - cst->tx_credits) * CST_TO_MST_QLEVEL_SCALE; 883 884 return (0); 885 } 886 887 /* 888 * Unlocks cst and frees it. 889 */ 890 void 891 cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *cst) 892 { 893 struct adapter *sc = cst->adapter; 894 895 mtx_assert(&cst->lock, MA_OWNED); 896 MPASS((cst->flags & EO_SND_TAG_REF) == 0); 897 MPASS(cst->tx_credits == cst->tx_total); 898 MPASS(cst->plen == 0); 899 MPASS(mbufq_first(&cst->pending_tx) == NULL); 900 MPASS(mbufq_first(&cst->pending_fwack) == NULL); 901 902 if (cst->etid >= 0) 903 free_etid(sc, cst->etid); 904 if (cst->schedcl != -1) 905 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 906 mtx_unlock(&cst->lock); 907 mtx_destroy(&cst->lock); 908 free(cst, M_CXGBE); 909 } 910 911 void 912 cxgbe_rate_tag_free(struct m_snd_tag *mst) 913 { 914 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 915 916 mtx_lock(&cst->lock); 917 918 /* The kernel is done with the snd_tag. Remove its reference. */ 919 MPASS(cst->flags & EO_SND_TAG_REF); 920 cst->flags &= ~EO_SND_TAG_REF; 921 922 if (cst->ncompl == 0) { 923 /* 924 * No fw4_ack in flight. Free the tag right away if there are 925 * no outstanding credits. Request the firmware to return all 926 * credits for the etid otherwise. 927 */ 928 if (cst->tx_credits == cst->tx_total) { 929 cxgbe_rate_tag_free_locked(cst); 930 return; /* cst is gone. */ 931 } 932 send_etid_flush_wr(cst); 933 } 934 mtx_unlock(&cst->lock); 935 } 936 937 void 938 cxgbe_ratelimit_query(struct ifnet *ifp, struct if_ratelimit_query_results *q) 939 { 940 struct vi_info *vi = ifp->if_softc; 941 struct adapter *sc = vi->adapter; 942 943 q->rate_table = NULL; 944 q->flags = RT_IS_SELECTABLE; 945 /* 946 * Absolute max limits from the firmware configuration. Practical 947 * limits depend on the burstsize, pktsize (ifp->if_mtu ultimately) and 948 * the card's cclk. 949 */ 950 q->max_flows = sc->tids.netids; 951 q->number_of_rates = sc->params.nsched_cls; 952 q->min_segment_burst = 4; /* matches PKTSCHED_BURST in the firmware. */ 953 954 #if 1 955 if (chip_id(sc) < CHELSIO_T6) { 956 /* Based on testing by rrs@ with a T580 at burstsize = 4. */ 957 MPASS(q->min_segment_burst == 4); 958 q->max_flows = min(4000, q->max_flows); 959 } else { 960 /* XXX: TBD, carried forward from T5 for now. */ 961 q->max_flows = min(4000, q->max_flows); 962 } 963 964 /* 965 * XXX: tcp_ratelimit.c grabs all available rates on link-up before it 966 * even knows whether hw pacing will be used or not. This prevents 967 * other consumers like SO_MAX_PACING_RATE or those using cxgbetool or 968 * the private ioctls from using any of traffic classes. 969 * 970 * Underreport the number of rates to tcp_ratelimit so that it doesn't 971 * hog all of them. This can be removed if/when tcp_ratelimit switches 972 * to making its allocations on first-use rather than link-up. There is 973 * nothing wrong with one particular consumer reserving all the classes 974 * but it should do so only if it'll actually use hw rate limiting. 975 */ 976 q->number_of_rates /= 4; 977 #endif 978 } 979 #endif 980