1 /*- 2 * Copyright (c) 2017 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_ratelimit.h" 32 33 #include <sys/types.h> 34 #include <sys/malloc.h> 35 #include <sys/queue.h> 36 #include <sys/sbuf.h> 37 #include <sys/taskqueue.h> 38 #include <sys/sysctl.h> 39 40 #include "common/common.h" 41 #include "common/t4_regs.h" 42 #include "common/t4_regs_values.h" 43 #include "common/t4_msg.h" 44 45 static int 46 in_range(int val, int lo, int hi) 47 { 48 49 return (val < 0 || (val <= hi && val >= lo)); 50 } 51 52 static int 53 set_sched_class_config(struct adapter *sc, int minmax) 54 { 55 int rc; 56 57 if (minmax < 0) 58 return (EINVAL); 59 60 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc"); 61 if (rc) 62 return (rc); 63 if (hw_off_limits(sc)) 64 rc = ENXIO; 65 else 66 rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1); 67 end_synchronized_op(sc, 0); 68 69 return (rc); 70 } 71 72 static int 73 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p, 74 int sleep_ok) 75 { 76 int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode; 77 struct port_info *pi; 78 struct tx_cl_rl_params *tc, old; 79 bool check_pktsize = false; 80 81 if (p->level == SCHED_CLASS_LEVEL_CL_RL) 82 fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; 83 else if (p->level == SCHED_CLASS_LEVEL_CL_WRR) 84 fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; 85 else if (p->level == SCHED_CLASS_LEVEL_CH_RL) 86 fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; 87 else 88 return (EINVAL); 89 90 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 91 if (p->mode == SCHED_CLASS_MODE_CLASS) 92 fw_mode = FW_SCHED_PARAMS_MODE_CLASS; 93 else if (p->mode == SCHED_CLASS_MODE_FLOW) { 94 check_pktsize = true; 95 fw_mode = FW_SCHED_PARAMS_MODE_FLOW; 96 } else 97 return (EINVAL); 98 } else 99 fw_mode = 0; 100 101 /* Valid channel must always be provided. */ 102 if (p->channel < 0) 103 return (EINVAL); 104 if (!in_range(p->channel, 0, sc->chip_params->nchan - 1)) 105 return (ERANGE); 106 107 pi = sc->port[sc->chan_map[p->channel]]; 108 if (pi == NULL) 109 return (ENXIO); 110 MPASS(pi->tx_chan == p->channel); 111 top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */ 112 113 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 114 p->level == SCHED_CLASS_LEVEL_CH_RL) { 115 /* 116 * Valid rate (mode, unit and values) must be provided. 117 */ 118 119 if (p->minrate < 0) 120 p->minrate = 0; 121 if (p->maxrate < 0) 122 return (EINVAL); 123 124 if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) { 125 fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 126 /* ratemode could be relative (%) or absolute. */ 127 if (p->ratemode == SCHED_CLASS_RATEMODE_REL) { 128 fw_ratemode = FW_SCHED_PARAMS_RATE_REL; 129 /* maxrate is % of port bandwidth. */ 130 if (!in_range(p->minrate, 0, 100) || 131 !in_range(p->maxrate, 0, 100)) { 132 return (ERANGE); 133 } 134 } else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) { 135 fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; 136 /* maxrate is absolute value in kbps. */ 137 if (!in_range(p->minrate, 0, top_speed) || 138 !in_range(p->maxrate, 0, top_speed)) { 139 return (ERANGE); 140 } 141 } else 142 return (EINVAL); 143 } else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) { 144 /* maxrate is the absolute value in pps. */ 145 check_pktsize = true; 146 fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; 147 } else 148 return (EINVAL); 149 } else { 150 MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR); 151 152 /* 153 * Valid weight must be provided. 154 */ 155 if (p->weight < 0) 156 return (EINVAL); 157 if (!in_range(p->weight, 1, 99)) 158 return (ERANGE); 159 160 fw_rateunit = 0; 161 fw_ratemode = 0; 162 } 163 164 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 165 p->level == SCHED_CLASS_LEVEL_CL_WRR) { 166 /* 167 * Valid scheduling class must be provided. 168 */ 169 if (p->cl < 0) 170 return (EINVAL); 171 if (!in_range(p->cl, 0, sc->params.nsched_cls - 1)) 172 return (ERANGE); 173 } 174 175 if (check_pktsize) { 176 if (p->pktsize < 0) 177 return (EINVAL); 178 if (!in_range(p->pktsize, 64, if_getmtu(pi->vi[0].ifp))) 179 return (ERANGE); 180 } 181 182 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 183 tc = &pi->sched_params->cl_rl[p->cl]; 184 mtx_lock(&sc->tc_lock); 185 if (tc->refcount > 0 || tc->state == CS_HW_UPDATE_IN_PROGRESS) 186 rc = EBUSY; 187 else { 188 old = *tc; 189 190 tc->flags |= CF_USER; 191 tc->state = CS_HW_UPDATE_IN_PROGRESS; 192 tc->ratemode = fw_ratemode; 193 tc->rateunit = fw_rateunit; 194 tc->mode = fw_mode; 195 tc->maxrate = p->maxrate; 196 tc->pktsize = p->pktsize; 197 rc = 0; 198 } 199 mtx_unlock(&sc->tc_lock); 200 if (rc != 0) 201 return (rc); 202 } 203 204 rc = begin_synchronized_op(sc, NULL, 205 sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp"); 206 if (rc != 0) { 207 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 208 mtx_lock(&sc->tc_lock); 209 MPASS(tc->refcount == 0); 210 MPASS(tc->flags & CF_USER); 211 MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS); 212 *tc = old; 213 mtx_unlock(&sc->tc_lock); 214 } 215 return (rc); 216 } 217 if (!hw_off_limits(sc)) { 218 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, 219 fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl, 220 p->minrate, p->maxrate, p->weight, p->pktsize, 0, sleep_ok); 221 } 222 end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD); 223 224 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 225 mtx_lock(&sc->tc_lock); 226 MPASS(tc->refcount == 0); 227 MPASS(tc->flags & CF_USER); 228 MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS); 229 230 if (rc == 0) 231 tc->state = CS_HW_CONFIGURED; 232 else { 233 /* parameters failed so we don't park at params_set */ 234 tc->state = CS_UNINITIALIZED; 235 tc->flags &= ~CF_USER; 236 CH_ERR(pi, "failed to configure traffic class %d: %d. " 237 "params: mode %d, rateunit %d, ratemode %d, " 238 "channel %d, minrate %d, maxrate %d, pktsize %d, " 239 "burstsize %d\n", p->cl, rc, fw_mode, fw_rateunit, 240 fw_ratemode, p->channel, p->minrate, p->maxrate, 241 p->pktsize, 0); 242 } 243 mtx_unlock(&sc->tc_lock); 244 } 245 246 return (rc); 247 } 248 249 static void 250 update_tx_sched(void *context, int pending) 251 { 252 int i, j, rc; 253 struct port_info *pi; 254 struct tx_cl_rl_params *tc; 255 struct adapter *sc = context; 256 const int n = sc->params.nsched_cls; 257 258 mtx_lock(&sc->tc_lock); 259 for_each_port(sc, i) { 260 pi = sc->port[i]; 261 tc = &pi->sched_params->cl_rl[0]; 262 for (j = 0; j < n; j++, tc++) { 263 MPASS(mtx_owned(&sc->tc_lock)); 264 if (tc->state != CS_HW_UPDATE_REQUESTED) 265 continue; 266 mtx_unlock(&sc->tc_lock); 267 268 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, 269 "t4utxs") != 0) { 270 mtx_lock(&sc->tc_lock); 271 continue; 272 } 273 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, 274 FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit, 275 tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0, 276 tc->pktsize, tc->burstsize, 1); 277 end_synchronized_op(sc, 0); 278 279 mtx_lock(&sc->tc_lock); 280 MPASS(tc->state == CS_HW_UPDATE_REQUESTED); 281 if (rc == 0) { 282 tc->state = CS_HW_CONFIGURED; 283 continue; 284 } 285 /* parameters failed so we try to avoid params_set */ 286 if (tc->refcount > 0) 287 tc->state = CS_PARAMS_SET; 288 else 289 tc->state = CS_UNINITIALIZED; 290 CH_ERR(pi, "failed to configure traffic class %d: %d. " 291 "params: mode %d, rateunit %d, ratemode %d, " 292 "channel %d, minrate %d, maxrate %d, pktsize %d, " 293 "burstsize %d\n", j, rc, tc->mode, tc->rateunit, 294 tc->ratemode, pi->tx_chan, 0, tc->maxrate, 295 tc->pktsize, tc->burstsize); 296 } 297 } 298 mtx_unlock(&sc->tc_lock); 299 } 300 301 int 302 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p) 303 { 304 305 if (p->type != SCHED_CLASS_TYPE_PACKET) 306 return (EINVAL); 307 308 if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) 309 return (set_sched_class_config(sc, p->u.config.minmax)); 310 311 if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) 312 return (set_sched_class_params(sc, &p->u.params, 1)); 313 314 return (EINVAL); 315 } 316 317 static int 318 bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx) 319 { 320 struct tx_cl_rl_params *tc0, *tc; 321 int rc, old_idx; 322 uint32_t fw_mnem, fw_class; 323 324 if (!(txq->eq.flags & EQ_HW_ALLOCATED)) 325 return (ENXIO); 326 327 mtx_lock(&sc->tc_lock); 328 if (txq->tc_idx == -2) { 329 rc = EBUSY; /* Another bind/unbind in progress already. */ 330 goto done; 331 } 332 if (idx == txq->tc_idx) { 333 rc = 0; /* No change, nothing to do. */ 334 goto done; 335 } 336 337 tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0]; 338 if (idx != -1) { 339 /* 340 * Bind to a different class at index idx. 341 */ 342 tc = &tc0[idx]; 343 if (tc->state != CS_HW_CONFIGURED) { 344 rc = ENXIO; 345 goto done; 346 } else { 347 /* 348 * Ok to proceed. Place a reference on the new class 349 * while still holding on to the reference on the 350 * previous class, if any. 351 */ 352 tc->refcount++; 353 } 354 } 355 /* Mark as busy before letting go of the lock. */ 356 old_idx = txq->tc_idx; 357 txq->tc_idx = -2; 358 mtx_unlock(&sc->tc_lock); 359 360 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq"); 361 if (rc == 0) { 362 fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 363 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 364 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); 365 fw_class = idx < 0 ? 0xffffffff : idx; 366 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem, 367 &fw_class); 368 end_synchronized_op(sc, 0); 369 } 370 371 mtx_lock(&sc->tc_lock); 372 MPASS(txq->tc_idx == -2); 373 if (rc == 0) { 374 /* 375 * Unbind, bind, or bind to a different class succeeded. Remove 376 * the reference on the old traffic class, if any. 377 */ 378 if (old_idx != -1) { 379 tc = &tc0[old_idx]; 380 MPASS(tc->refcount > 0); 381 tc->refcount--; 382 } 383 txq->tc_idx = idx; 384 } else { 385 /* 386 * Unbind, bind, or bind to a different class failed. Remove 387 * the anticipatory reference on the new traffic class, if any. 388 */ 389 if (idx != -1) { 390 tc = &tc0[idx]; 391 MPASS(tc->refcount > 0); 392 tc->refcount--; 393 } 394 txq->tc_idx = old_idx; 395 } 396 done: 397 MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->params.nsched_cls); 398 mtx_unlock(&sc->tc_lock); 399 return (rc); 400 } 401 402 int 403 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) 404 { 405 struct port_info *pi = NULL; 406 struct vi_info *vi; 407 struct sge_txq *txq; 408 int i, rc; 409 410 if (p->port >= sc->params.nports) 411 return (EINVAL); 412 413 /* 414 * XXX: cxgbetool allows the user to specify the physical port only. So 415 * we always operate on the main VI. 416 */ 417 pi = sc->port[p->port]; 418 vi = &pi->vi[0]; 419 420 /* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */ 421 if (!(vi->flags & VI_INIT_DONE)) 422 return (EAGAIN); 423 MPASS(vi->ntxq > 0); 424 425 if (!in_range(p->queue, 0, vi->ntxq - 1) || 426 !in_range(p->cl, 0, sc->params.nsched_cls - 1)) 427 return (EINVAL); 428 429 if (p->queue < 0) { 430 /* 431 * Change the scheduling on all the TX queues for the 432 * interface. 433 */ 434 for_each_txq(vi, i, txq) { 435 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 436 if (rc != 0) 437 break; 438 } 439 } else { 440 /* 441 * If op.queue is non-negative, then we're only changing the 442 * scheduling on a single specified TX queue. 443 */ 444 txq = &sc->sge.txq[vi->first_txq + p->queue]; 445 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 446 } 447 448 return (rc); 449 } 450 451 int 452 t4_init_tx_sched(struct adapter *sc) 453 { 454 int i; 455 const int n = sc->params.nsched_cls; 456 struct port_info *pi; 457 458 mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF); 459 TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc); 460 for_each_port(sc, i) { 461 pi = sc->port[i]; 462 pi->sched_params = malloc(sizeof(*pi->sched_params) + 463 n * sizeof(struct tx_cl_rl_params), M_CXGBE, M_ZERO | M_WAITOK); 464 } 465 466 return (0); 467 } 468 469 int 470 t4_free_tx_sched(struct adapter *sc) 471 { 472 int i; 473 474 taskqueue_drain(taskqueue_thread, &sc->tc_task); 475 476 for_each_port(sc, i) { 477 if (sc->port[i] != NULL) 478 free(sc->port[i]->sched_params, M_CXGBE); 479 } 480 481 if (mtx_initialized(&sc->tc_lock)) 482 mtx_destroy(&sc->tc_lock); 483 484 return (0); 485 } 486 487 void 488 t4_update_tx_sched(struct adapter *sc) 489 { 490 491 taskqueue_enqueue(taskqueue_thread, &sc->tc_task); 492 } 493 494 int 495 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate, 496 int *tc_idx) 497 { 498 int rc = 0, fa, fa2, i, pktsize, burstsize; 499 bool update; 500 struct tx_cl_rl_params *tc; 501 struct port_info *pi; 502 503 MPASS(port_id >= 0 && port_id < sc->params.nports); 504 505 pi = sc->port[port_id]; 506 if (pi->sched_params->pktsize > 0) 507 pktsize = pi->sched_params->pktsize; 508 else 509 pktsize = if_getmtu(pi->vi[0].ifp); 510 if (pi->sched_params->burstsize > 0) 511 burstsize = pi->sched_params->burstsize; 512 else 513 burstsize = pktsize * 4; 514 tc = &pi->sched_params->cl_rl[0]; 515 516 update = false; 517 fa = fa2 = -1; 518 mtx_lock(&sc->tc_lock); 519 for (i = 0; i < sc->params.nsched_cls; i++, tc++) { 520 if (tc->state >= CS_PARAMS_SET && 521 tc->ratemode == FW_SCHED_PARAMS_RATE_ABS && 522 tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE && 523 tc->mode == FW_SCHED_PARAMS_MODE_FLOW && 524 tc->maxrate == maxrate && tc->pktsize == pktsize && 525 tc->burstsize == burstsize) { 526 tc->refcount++; 527 *tc_idx = i; 528 if (tc->state == CS_PARAMS_SET) { 529 tc->state = CS_HW_UPDATE_REQUESTED; 530 update = true; 531 } 532 goto done; 533 } 534 535 if (fa < 0 && tc->state == CS_UNINITIALIZED) { 536 MPASS(tc->refcount == 0); 537 fa = i; /* first available, never used. */ 538 } 539 if (fa2 < 0 && tc->refcount == 0 && !(tc->flags & CF_USER)) { 540 fa2 = i; /* first available, used previously. */ 541 } 542 } 543 /* Not found */ 544 MPASS(i == sc->params.nsched_cls); 545 if (fa == -1) 546 fa = fa2; 547 if (fa == -1) { 548 *tc_idx = -1; 549 rc = ENOSPC; 550 } else { 551 MPASS(fa >= 0 && fa < sc->params.nsched_cls); 552 tc = &pi->sched_params->cl_rl[fa]; 553 MPASS(!(tc->flags & CF_USER)); 554 MPASS(tc->refcount == 0); 555 556 tc->refcount = 1; 557 tc->state = CS_HW_UPDATE_REQUESTED; 558 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS; 559 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 560 tc->mode = FW_SCHED_PARAMS_MODE_FLOW; 561 tc->maxrate = maxrate; 562 tc->pktsize = pktsize; 563 tc->burstsize = burstsize; 564 *tc_idx = fa; 565 update = true; 566 } 567 done: 568 mtx_unlock(&sc->tc_lock); 569 if (update) 570 t4_update_tx_sched(sc); 571 return (rc); 572 } 573 574 void 575 t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx) 576 { 577 struct tx_cl_rl_params *tc; 578 579 MPASS(port_id >= 0 && port_id < sc->params.nports); 580 MPASS(tc_idx >= 0 && tc_idx < sc->params.nsched_cls); 581 582 mtx_lock(&sc->tc_lock); 583 tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx]; 584 MPASS(tc->refcount > 0); 585 tc->refcount--; 586 mtx_unlock(&sc->tc_lock); 587 } 588 589 int 590 sysctl_tc(SYSCTL_HANDLER_ARGS) 591 { 592 struct vi_info *vi = arg1; 593 struct adapter *sc = vi->adapter; 594 struct sge_txq *txq; 595 int qidx = arg2, rc, tc_idx; 596 597 MPASS(qidx >= vi->first_txq && qidx < vi->first_txq + vi->ntxq); 598 599 txq = &sc->sge.txq[qidx]; 600 tc_idx = txq->tc_idx; 601 rc = sysctl_handle_int(oidp, &tc_idx, 0, req); 602 if (rc != 0 || req->newptr == NULL) 603 return (rc); 604 605 if (sc->flags & IS_VF) 606 return (EPERM); 607 if (!in_range(tc_idx, 0, sc->params.nsched_cls - 1)) 608 return (EINVAL); 609 610 return (bind_txq_to_traffic_class(sc, txq, tc_idx)); 611 } 612 613 int 614 sysctl_tc_params(SYSCTL_HANDLER_ARGS) 615 { 616 struct adapter *sc = arg1; 617 struct tx_cl_rl_params tc; 618 struct sbuf *sb; 619 int i, rc, port_id, mbps, gbps; 620 621 rc = sysctl_wire_old_buffer(req, 0); 622 if (rc != 0) 623 return (rc); 624 625 sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); 626 if (sb == NULL) 627 return (ENOMEM); 628 629 port_id = arg2 >> 16; 630 MPASS(port_id < sc->params.nports); 631 MPASS(sc->port[port_id] != NULL); 632 i = arg2 & 0xffff; 633 MPASS(i < sc->params.nsched_cls); 634 635 mtx_lock(&sc->tc_lock); 636 tc = sc->port[port_id]->sched_params->cl_rl[i]; 637 mtx_unlock(&sc->tc_lock); 638 639 if (tc.state < CS_PARAMS_SET) { 640 sbuf_printf(sb, "uninitialized"); 641 goto done; 642 } 643 644 switch (tc.rateunit) { 645 case SCHED_CLASS_RATEUNIT_BITS: 646 switch (tc.ratemode) { 647 case SCHED_CLASS_RATEMODE_REL: 648 /* XXX: top speed or actual link speed? */ 649 gbps = port_top_speed(sc->port[port_id]); 650 sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps); 651 break; 652 case SCHED_CLASS_RATEMODE_ABS: 653 mbps = tc.maxrate / 1000; 654 gbps = tc.maxrate / 1000000; 655 if (tc.maxrate == gbps * 1000000) 656 sbuf_printf(sb, "%uGbps", gbps); 657 else if (tc.maxrate == mbps * 1000) 658 sbuf_printf(sb, "%uMbps", mbps); 659 else 660 sbuf_printf(sb, "%uKbps", tc.maxrate); 661 break; 662 default: 663 rc = ENXIO; 664 goto done; 665 } 666 break; 667 case SCHED_CLASS_RATEUNIT_PKTS: 668 sbuf_printf(sb, "%upps", tc.maxrate); 669 break; 670 default: 671 rc = ENXIO; 672 goto done; 673 } 674 675 switch (tc.mode) { 676 case SCHED_CLASS_MODE_CLASS: 677 /* Note that pktsize and burstsize are not used in this mode. */ 678 sbuf_printf(sb, " aggregate"); 679 break; 680 case SCHED_CLASS_MODE_FLOW: 681 sbuf_printf(sb, " per-flow"); 682 if (tc.pktsize > 0) 683 sbuf_printf(sb, " pkt-size %u", tc.pktsize); 684 if (tc.burstsize > 0) 685 sbuf_printf(sb, " burst-size %u", tc.burstsize); 686 break; 687 default: 688 rc = ENXIO; 689 goto done; 690 } 691 692 done: 693 if (rc == 0) 694 rc = sbuf_finish(sb); 695 sbuf_delete(sb); 696 697 return (rc); 698 } 699 700 #ifdef RATELIMIT 701 void 702 t4_init_etid_table(struct adapter *sc) 703 { 704 int i; 705 struct tid_info *t; 706 707 if (!is_ethoffload(sc)) 708 return; 709 710 t = &sc->tids; 711 MPASS(t->netids > 0); 712 713 mtx_init(&t->etid_lock, "etid lock", NULL, MTX_DEF); 714 t->etid_tab = malloc(sizeof(*t->etid_tab) * t->netids, M_CXGBE, 715 M_ZERO | M_WAITOK); 716 t->efree = t->etid_tab; 717 t->etids_in_use = 0; 718 for (i = 1; i < t->netids; i++) 719 t->etid_tab[i - 1].next = &t->etid_tab[i]; 720 t->etid_tab[t->netids - 1].next = NULL; 721 } 722 723 void 724 t4_free_etid_table(struct adapter *sc) 725 { 726 struct tid_info *t; 727 728 if (!is_ethoffload(sc)) 729 return; 730 731 t = &sc->tids; 732 MPASS(t->netids > 0); 733 734 free(t->etid_tab, M_CXGBE); 735 t->etid_tab = NULL; 736 737 if (mtx_initialized(&t->etid_lock)) 738 mtx_destroy(&t->etid_lock); 739 } 740 741 /* etid services */ 742 static int alloc_etid(struct adapter *, struct cxgbe_rate_tag *); 743 static void free_etid(struct adapter *, int); 744 745 static int 746 alloc_etid(struct adapter *sc, struct cxgbe_rate_tag *cst) 747 { 748 struct tid_info *t = &sc->tids; 749 int etid = -1; 750 751 mtx_lock(&t->etid_lock); 752 if (t->efree) { 753 union etid_entry *p = t->efree; 754 755 etid = p - t->etid_tab + t->etid_base; 756 t->efree = p->next; 757 p->cst = cst; 758 t->etids_in_use++; 759 } 760 mtx_unlock(&t->etid_lock); 761 return (etid); 762 } 763 764 struct cxgbe_rate_tag * 765 lookup_etid(struct adapter *sc, int etid) 766 { 767 struct tid_info *t = &sc->tids; 768 769 return (t->etid_tab[etid - t->etid_base].cst); 770 } 771 772 static void 773 free_etid(struct adapter *sc, int etid) 774 { 775 struct tid_info *t = &sc->tids; 776 union etid_entry *p = &t->etid_tab[etid - t->etid_base]; 777 778 mtx_lock(&t->etid_lock); 779 p->next = t->efree; 780 t->efree = p; 781 t->etids_in_use--; 782 mtx_unlock(&t->etid_lock); 783 } 784 785 static int cxgbe_rate_tag_modify(struct m_snd_tag *, 786 union if_snd_tag_modify_params *); 787 static int cxgbe_rate_tag_query(struct m_snd_tag *, 788 union if_snd_tag_query_params *); 789 static void cxgbe_rate_tag_free(struct m_snd_tag *); 790 791 static const struct if_snd_tag_sw cxgbe_rate_tag_sw = { 792 .snd_tag_modify = cxgbe_rate_tag_modify, 793 .snd_tag_query = cxgbe_rate_tag_query, 794 .snd_tag_free = cxgbe_rate_tag_free, 795 .type = IF_SND_TAG_TYPE_RATE_LIMIT 796 }; 797 798 int 799 cxgbe_rate_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params, 800 struct m_snd_tag **pt) 801 { 802 int rc, schedcl; 803 struct vi_info *vi = if_getsoftc(ifp); 804 struct port_info *pi = vi->pi; 805 struct adapter *sc = pi->adapter; 806 struct cxgbe_rate_tag *cst; 807 808 MPASS(params->hdr.type == IF_SND_TAG_TYPE_RATE_LIMIT); 809 810 rc = t4_reserve_cl_rl_kbps(sc, pi->port_id, 811 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 812 if (rc != 0) 813 return (rc); 814 MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls); 815 816 cst = malloc(sizeof(*cst), M_CXGBE, M_ZERO | M_NOWAIT); 817 if (cst == NULL) { 818 failed: 819 t4_release_cl_rl(sc, pi->port_id, schedcl); 820 return (ENOMEM); 821 } 822 823 cst->etid = alloc_etid(sc, cst); 824 if (cst->etid < 0) { 825 free(cst, M_CXGBE); 826 goto failed; 827 } 828 829 mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF); 830 mbufq_init(&cst->pending_tx, INT_MAX); 831 mbufq_init(&cst->pending_fwack, INT_MAX); 832 m_snd_tag_init(&cst->com, ifp, &cxgbe_rate_tag_sw); 833 cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF; 834 cst->adapter = sc; 835 cst->port_id = pi->port_id; 836 cst->schedcl = schedcl; 837 cst->max_rate = params->rate_limit.max_rate; 838 cst->tx_credits = sc->params.eo_wr_cred; 839 cst->tx_total = cst->tx_credits; 840 cst->plen = 0; 841 cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 842 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 843 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 844 845 /* 846 * Queues will be selected later when the connection flowid is available. 847 */ 848 849 *pt = &cst->com; 850 return (0); 851 } 852 853 /* 854 * Change in parameters, no change in ifp. 855 */ 856 static int 857 cxgbe_rate_tag_modify(struct m_snd_tag *mst, 858 union if_snd_tag_modify_params *params) 859 { 860 int rc, schedcl; 861 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 862 struct adapter *sc = cst->adapter; 863 864 /* XXX: is schedcl -1 ok here? */ 865 MPASS(cst->schedcl >= 0 && cst->schedcl < sc->params.nsched_cls); 866 867 mtx_lock(&cst->lock); 868 MPASS(cst->flags & EO_SND_TAG_REF); 869 rc = t4_reserve_cl_rl_kbps(sc, cst->port_id, 870 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 871 if (rc != 0) 872 return (rc); 873 MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls); 874 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 875 cst->schedcl = schedcl; 876 cst->max_rate = params->rate_limit.max_rate; 877 mtx_unlock(&cst->lock); 878 879 return (0); 880 } 881 882 static int 883 cxgbe_rate_tag_query(struct m_snd_tag *mst, 884 union if_snd_tag_query_params *params) 885 { 886 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 887 888 params->rate_limit.max_rate = cst->max_rate; 889 890 #define CST_TO_MST_QLEVEL_SCALE (IF_SND_QUEUE_LEVEL_MAX / cst->tx_total) 891 params->rate_limit.queue_level = 892 (cst->tx_total - cst->tx_credits) * CST_TO_MST_QLEVEL_SCALE; 893 894 return (0); 895 } 896 897 /* 898 * Unlocks cst and frees it. 899 */ 900 void 901 cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *cst) 902 { 903 struct adapter *sc = cst->adapter; 904 905 mtx_assert(&cst->lock, MA_OWNED); 906 MPASS((cst->flags & EO_SND_TAG_REF) == 0); 907 MPASS(cst->tx_credits == cst->tx_total); 908 MPASS(cst->plen == 0); 909 MPASS(mbufq_first(&cst->pending_tx) == NULL); 910 MPASS(mbufq_first(&cst->pending_fwack) == NULL); 911 912 if (cst->etid >= 0) 913 free_etid(sc, cst->etid); 914 if (cst->schedcl != -1) 915 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 916 mtx_unlock(&cst->lock); 917 mtx_destroy(&cst->lock); 918 free(cst, M_CXGBE); 919 } 920 921 static void 922 cxgbe_rate_tag_free(struct m_snd_tag *mst) 923 { 924 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 925 926 mtx_lock(&cst->lock); 927 928 /* The kernel is done with the snd_tag. Remove its reference. */ 929 MPASS(cst->flags & EO_SND_TAG_REF); 930 cst->flags &= ~EO_SND_TAG_REF; 931 932 if (cst->ncompl == 0) { 933 /* 934 * No fw4_ack in flight. Free the tag right away if there are 935 * no outstanding credits. Request the firmware to return all 936 * credits for the etid otherwise. 937 */ 938 if (cst->tx_credits == cst->tx_total) { 939 cxgbe_rate_tag_free_locked(cst); 940 return; /* cst is gone. */ 941 } 942 send_etid_flush_wr(cst); 943 } 944 mtx_unlock(&cst->lock); 945 } 946 947 void 948 cxgbe_ratelimit_query(if_t ifp, struct if_ratelimit_query_results *q) 949 { 950 struct vi_info *vi = if_getsoftc(ifp); 951 struct adapter *sc = vi->adapter; 952 953 q->rate_table = NULL; 954 q->flags = RT_IS_SELECTABLE; 955 /* 956 * Absolute max limits from the firmware configuration. Practical 957 * limits depend on the burstsize, pktsize (if_getmtu(ifp) ultimately) and 958 * the card's cclk. 959 */ 960 q->max_flows = sc->tids.netids; 961 q->number_of_rates = sc->params.nsched_cls; 962 q->min_segment_burst = 4; /* matches PKTSCHED_BURST in the firmware. */ 963 964 #if 1 965 if (chip_id(sc) < CHELSIO_T6) { 966 /* Based on testing by rrs@ with a T580 at burstsize = 4. */ 967 MPASS(q->min_segment_burst == 4); 968 q->max_flows = min(4000, q->max_flows); 969 } else { 970 /* XXX: TBD, carried forward from T5 for now. */ 971 q->max_flows = min(4000, q->max_flows); 972 } 973 974 /* 975 * XXX: tcp_ratelimit.c grabs all available rates on link-up before it 976 * even knows whether hw pacing will be used or not. This prevents 977 * other consumers like SO_MAX_PACING_RATE or those using cxgbetool or 978 * the private ioctls from using any of traffic classes. 979 * 980 * Underreport the number of rates to tcp_ratelimit so that it doesn't 981 * hog all of them. This can be removed if/when tcp_ratelimit switches 982 * to making its allocations on first-use rather than link-up. There is 983 * nothing wrong with one particular consumer reserving all the classes 984 * but it should do so only if it'll actually use hw rate limiting. 985 */ 986 q->number_of_rates /= 4; 987 #endif 988 } 989 #endif 990