1 /*- 2 * Copyright (c) 2017 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_ratelimit.h" 34 35 #include <sys/types.h> 36 #include <sys/malloc.h> 37 #include <sys/queue.h> 38 #include <sys/sbuf.h> 39 #include <sys/taskqueue.h> 40 #include <sys/sysctl.h> 41 42 #include "common/common.h" 43 #include "common/t4_regs.h" 44 #include "common/t4_regs_values.h" 45 #include "common/t4_msg.h" 46 47 static int 48 in_range(int val, int lo, int hi) 49 { 50 51 return (val < 0 || (val <= hi && val >= lo)); 52 } 53 54 static int 55 set_sched_class_config(struct adapter *sc, int minmax) 56 { 57 int rc; 58 59 if (minmax < 0) 60 return (EINVAL); 61 62 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4sscc"); 63 if (rc) 64 return (rc); 65 if (hw_off_limits(sc)) 66 rc = ENXIO; 67 else 68 rc = -t4_sched_config(sc, FW_SCHED_TYPE_PKTSCHED, minmax, 1); 69 end_synchronized_op(sc, 0); 70 71 return (rc); 72 } 73 74 static int 75 set_sched_class_params(struct adapter *sc, struct t4_sched_class_params *p, 76 int sleep_ok) 77 { 78 int rc, top_speed, fw_level, fw_mode, fw_rateunit, fw_ratemode; 79 struct port_info *pi; 80 struct tx_cl_rl_params *tc, old; 81 bool check_pktsize = false; 82 83 if (p->level == SCHED_CLASS_LEVEL_CL_RL) 84 fw_level = FW_SCHED_PARAMS_LEVEL_CL_RL; 85 else if (p->level == SCHED_CLASS_LEVEL_CL_WRR) 86 fw_level = FW_SCHED_PARAMS_LEVEL_CL_WRR; 87 else if (p->level == SCHED_CLASS_LEVEL_CH_RL) 88 fw_level = FW_SCHED_PARAMS_LEVEL_CH_RL; 89 else 90 return (EINVAL); 91 92 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 93 if (p->mode == SCHED_CLASS_MODE_CLASS) 94 fw_mode = FW_SCHED_PARAMS_MODE_CLASS; 95 else if (p->mode == SCHED_CLASS_MODE_FLOW) { 96 check_pktsize = true; 97 fw_mode = FW_SCHED_PARAMS_MODE_FLOW; 98 } else 99 return (EINVAL); 100 } else 101 fw_mode = 0; 102 103 /* Valid channel must always be provided. */ 104 if (p->channel < 0) 105 return (EINVAL); 106 if (!in_range(p->channel, 0, sc->chip_params->nchan - 1)) 107 return (ERANGE); 108 109 pi = sc->port[sc->chan_map[p->channel]]; 110 if (pi == NULL) 111 return (ENXIO); 112 MPASS(pi->tx_chan == p->channel); 113 top_speed = port_top_speed(pi) * 1000000; /* Gbps -> Kbps */ 114 115 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 116 p->level == SCHED_CLASS_LEVEL_CH_RL) { 117 /* 118 * Valid rate (mode, unit and values) must be provided. 119 */ 120 121 if (p->minrate < 0) 122 p->minrate = 0; 123 if (p->maxrate < 0) 124 return (EINVAL); 125 126 if (p->rateunit == SCHED_CLASS_RATEUNIT_BITS) { 127 fw_rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 128 /* ratemode could be relative (%) or absolute. */ 129 if (p->ratemode == SCHED_CLASS_RATEMODE_REL) { 130 fw_ratemode = FW_SCHED_PARAMS_RATE_REL; 131 /* maxrate is % of port bandwidth. */ 132 if (!in_range(p->minrate, 0, 100) || 133 !in_range(p->maxrate, 0, 100)) { 134 return (ERANGE); 135 } 136 } else if (p->ratemode == SCHED_CLASS_RATEMODE_ABS) { 137 fw_ratemode = FW_SCHED_PARAMS_RATE_ABS; 138 /* maxrate is absolute value in kbps. */ 139 if (!in_range(p->minrate, 0, top_speed) || 140 !in_range(p->maxrate, 0, top_speed)) { 141 return (ERANGE); 142 } 143 } else 144 return (EINVAL); 145 } else if (p->rateunit == SCHED_CLASS_RATEUNIT_PKTS) { 146 /* maxrate is the absolute value in pps. */ 147 check_pktsize = true; 148 fw_rateunit = FW_SCHED_PARAMS_UNIT_PKTRATE; 149 } else 150 return (EINVAL); 151 } else { 152 MPASS(p->level == SCHED_CLASS_LEVEL_CL_WRR); 153 154 /* 155 * Valid weight must be provided. 156 */ 157 if (p->weight < 0) 158 return (EINVAL); 159 if (!in_range(p->weight, 1, 99)) 160 return (ERANGE); 161 162 fw_rateunit = 0; 163 fw_ratemode = 0; 164 } 165 166 if (p->level == SCHED_CLASS_LEVEL_CL_RL || 167 p->level == SCHED_CLASS_LEVEL_CL_WRR) { 168 /* 169 * Valid scheduling class must be provided. 170 */ 171 if (p->cl < 0) 172 return (EINVAL); 173 if (!in_range(p->cl, 0, sc->params.nsched_cls - 1)) 174 return (ERANGE); 175 } 176 177 if (check_pktsize) { 178 if (p->pktsize < 0) 179 return (EINVAL); 180 if (!in_range(p->pktsize, 64, if_getmtu(pi->vi[0].ifp))) 181 return (ERANGE); 182 } 183 184 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 185 tc = &pi->sched_params->cl_rl[p->cl]; 186 mtx_lock(&sc->tc_lock); 187 if (tc->refcount > 0 || tc->state == CS_HW_UPDATE_IN_PROGRESS) 188 rc = EBUSY; 189 else { 190 old = *tc; 191 192 tc->flags |= CF_USER; 193 tc->state = CS_HW_UPDATE_IN_PROGRESS; 194 tc->ratemode = fw_ratemode; 195 tc->rateunit = fw_rateunit; 196 tc->mode = fw_mode; 197 tc->maxrate = p->maxrate; 198 tc->pktsize = p->pktsize; 199 rc = 0; 200 } 201 mtx_unlock(&sc->tc_lock); 202 if (rc != 0) 203 return (rc); 204 } 205 206 rc = begin_synchronized_op(sc, NULL, 207 sleep_ok ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4sscp"); 208 if (rc != 0) { 209 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 210 mtx_lock(&sc->tc_lock); 211 MPASS(tc->refcount == 0); 212 MPASS(tc->flags & CF_USER); 213 MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS); 214 *tc = old; 215 mtx_unlock(&sc->tc_lock); 216 } 217 return (rc); 218 } 219 if (!hw_off_limits(sc)) { 220 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, fw_level, 221 fw_mode, fw_rateunit, fw_ratemode, p->channel, p->cl, 222 p->minrate, p->maxrate, p->weight, p->pktsize, 0, sleep_ok); 223 } 224 end_synchronized_op(sc, sleep_ok ? 0 : LOCK_HELD); 225 226 if (p->level == SCHED_CLASS_LEVEL_CL_RL) { 227 mtx_lock(&sc->tc_lock); 228 MPASS(tc->refcount == 0); 229 MPASS(tc->flags & CF_USER); 230 MPASS(tc->state == CS_HW_UPDATE_IN_PROGRESS); 231 232 if (rc == 0) 233 tc->state = CS_HW_CONFIGURED; 234 else { 235 /* parameters failed so we don't park at params_set */ 236 tc->state = CS_UNINITIALIZED; 237 tc->flags &= ~CF_USER; 238 CH_ERR(pi, "failed to configure traffic class %d: %d. " 239 "params: mode %d, rateunit %d, ratemode %d, " 240 "channel %d, minrate %d, maxrate %d, pktsize %d, " 241 "burstsize %d\n", p->cl, rc, fw_mode, fw_rateunit, 242 fw_ratemode, p->channel, p->minrate, p->maxrate, 243 p->pktsize, 0); 244 } 245 mtx_unlock(&sc->tc_lock); 246 } 247 248 return (rc); 249 } 250 251 static void 252 update_tx_sched(void *context, int pending) 253 { 254 int i, j, rc; 255 struct port_info *pi; 256 struct tx_cl_rl_params *tc; 257 struct adapter *sc = context; 258 const int n = sc->params.nsched_cls; 259 260 mtx_lock(&sc->tc_lock); 261 for_each_port(sc, i) { 262 pi = sc->port[i]; 263 tc = &pi->sched_params->cl_rl[0]; 264 for (j = 0; j < n; j++, tc++) { 265 MPASS(mtx_owned(&sc->tc_lock)); 266 if (tc->state != CS_HW_UPDATE_REQUESTED) 267 continue; 268 mtx_unlock(&sc->tc_lock); 269 270 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, 271 "t4utxs") != 0) { 272 mtx_lock(&sc->tc_lock); 273 continue; 274 } 275 rc = -t4_sched_params(sc, FW_SCHED_TYPE_PKTSCHED, 276 FW_SCHED_PARAMS_LEVEL_CL_RL, tc->mode, tc->rateunit, 277 tc->ratemode, pi->tx_chan, j, 0, tc->maxrate, 0, 278 tc->pktsize, tc->burstsize, 1); 279 end_synchronized_op(sc, 0); 280 281 mtx_lock(&sc->tc_lock); 282 MPASS(tc->state == CS_HW_UPDATE_REQUESTED); 283 if (rc == 0) { 284 tc->state = CS_HW_CONFIGURED; 285 continue; 286 } 287 /* parameters failed so we try to avoid params_set */ 288 if (tc->refcount > 0) 289 tc->state = CS_PARAMS_SET; 290 else 291 tc->state = CS_UNINITIALIZED; 292 CH_ERR(pi, "failed to configure traffic class %d: %d. " 293 "params: mode %d, rateunit %d, ratemode %d, " 294 "channel %d, minrate %d, maxrate %d, pktsize %d, " 295 "burstsize %d\n", j, rc, tc->mode, tc->rateunit, 296 tc->ratemode, pi->tx_chan, 0, tc->maxrate, 297 tc->pktsize, tc->burstsize); 298 } 299 } 300 mtx_unlock(&sc->tc_lock); 301 } 302 303 int 304 t4_set_sched_class(struct adapter *sc, struct t4_sched_params *p) 305 { 306 307 if (p->type != SCHED_CLASS_TYPE_PACKET) 308 return (EINVAL); 309 310 if (p->subcmd == SCHED_CLASS_SUBCMD_CONFIG) 311 return (set_sched_class_config(sc, p->u.config.minmax)); 312 313 if (p->subcmd == SCHED_CLASS_SUBCMD_PARAMS) 314 return (set_sched_class_params(sc, &p->u.params, 1)); 315 316 return (EINVAL); 317 } 318 319 static int 320 bind_txq_to_traffic_class(struct adapter *sc, struct sge_txq *txq, int idx) 321 { 322 struct tx_cl_rl_params *tc0, *tc; 323 int rc, old_idx; 324 uint32_t fw_mnem, fw_class; 325 326 if (!(txq->eq.flags & EQ_HW_ALLOCATED)) 327 return (ENXIO); 328 329 mtx_lock(&sc->tc_lock); 330 if (txq->tc_idx == -2) { 331 rc = EBUSY; /* Another bind/unbind in progress already. */ 332 goto done; 333 } 334 if (idx == txq->tc_idx) { 335 rc = 0; /* No change, nothing to do. */ 336 goto done; 337 } 338 339 tc0 = &sc->port[txq->eq.tx_chan]->sched_params->cl_rl[0]; 340 if (idx != -1) { 341 /* 342 * Bind to a different class at index idx. 343 */ 344 tc = &tc0[idx]; 345 if (tc->state != CS_HW_CONFIGURED) { 346 rc = ENXIO; 347 goto done; 348 } else { 349 /* 350 * Ok to proceed. Place a reference on the new class 351 * while still holding on to the reference on the 352 * previous class, if any. 353 */ 354 tc->refcount++; 355 } 356 } 357 /* Mark as busy before letting go of the lock. */ 358 old_idx = txq->tc_idx; 359 txq->tc_idx = -2; 360 mtx_unlock(&sc->tc_lock); 361 362 rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4btxq"); 363 if (rc == 0) { 364 fw_mnem = (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) | 365 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH) | 366 V_FW_PARAMS_PARAM_YZ(txq->eq.cntxt_id)); 367 fw_class = idx < 0 ? 0xffffffff : idx; 368 rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &fw_mnem, 369 &fw_class); 370 end_synchronized_op(sc, 0); 371 } 372 373 mtx_lock(&sc->tc_lock); 374 MPASS(txq->tc_idx == -2); 375 if (rc == 0) { 376 /* 377 * Unbind, bind, or bind to a different class succeeded. Remove 378 * the reference on the old traffic class, if any. 379 */ 380 if (old_idx != -1) { 381 tc = &tc0[old_idx]; 382 MPASS(tc->refcount > 0); 383 tc->refcount--; 384 } 385 txq->tc_idx = idx; 386 } else { 387 /* 388 * Unbind, bind, or bind to a different class failed. Remove 389 * the anticipatory reference on the new traffic class, if any. 390 */ 391 if (idx != -1) { 392 tc = &tc0[idx]; 393 MPASS(tc->refcount > 0); 394 tc->refcount--; 395 } 396 txq->tc_idx = old_idx; 397 } 398 done: 399 MPASS(txq->tc_idx >= -1 && txq->tc_idx < sc->params.nsched_cls); 400 mtx_unlock(&sc->tc_lock); 401 return (rc); 402 } 403 404 int 405 t4_set_sched_queue(struct adapter *sc, struct t4_sched_queue *p) 406 { 407 struct port_info *pi = NULL; 408 struct vi_info *vi; 409 struct sge_txq *txq; 410 int i, rc; 411 412 if (p->port >= sc->params.nports) 413 return (EINVAL); 414 415 /* 416 * XXX: cxgbetool allows the user to specify the physical port only. So 417 * we always operate on the main VI. 418 */ 419 pi = sc->port[p->port]; 420 vi = &pi->vi[0]; 421 422 /* Checking VI_INIT_DONE outside a synch-op is a harmless race here. */ 423 if (!(vi->flags & VI_INIT_DONE)) 424 return (EAGAIN); 425 MPASS(vi->ntxq > 0); 426 427 if (!in_range(p->queue, 0, vi->ntxq - 1) || 428 !in_range(p->cl, 0, sc->params.nsched_cls - 1)) 429 return (EINVAL); 430 431 if (p->queue < 0) { 432 /* 433 * Change the scheduling on all the TX queues for the 434 * interface. 435 */ 436 for_each_txq(vi, i, txq) { 437 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 438 if (rc != 0) 439 break; 440 } 441 } else { 442 /* 443 * If op.queue is non-negative, then we're only changing the 444 * scheduling on a single specified TX queue. 445 */ 446 txq = &sc->sge.txq[vi->first_txq + p->queue]; 447 rc = bind_txq_to_traffic_class(sc, txq, p->cl); 448 } 449 450 return (rc); 451 } 452 453 int 454 t4_init_tx_sched(struct adapter *sc) 455 { 456 int i; 457 const int n = sc->params.nsched_cls; 458 struct port_info *pi; 459 460 mtx_init(&sc->tc_lock, "tx_sched lock", NULL, MTX_DEF); 461 TASK_INIT(&sc->tc_task, 0, update_tx_sched, sc); 462 for_each_port(sc, i) { 463 pi = sc->port[i]; 464 pi->sched_params = malloc(sizeof(*pi->sched_params) + 465 n * sizeof(struct tx_cl_rl_params), M_CXGBE, M_ZERO | M_WAITOK); 466 } 467 468 return (0); 469 } 470 471 int 472 t4_free_tx_sched(struct adapter *sc) 473 { 474 int i; 475 476 taskqueue_drain(taskqueue_thread, &sc->tc_task); 477 478 for_each_port(sc, i) { 479 if (sc->port[i] != NULL) 480 free(sc->port[i]->sched_params, M_CXGBE); 481 } 482 483 if (mtx_initialized(&sc->tc_lock)) 484 mtx_destroy(&sc->tc_lock); 485 486 return (0); 487 } 488 489 void 490 t4_update_tx_sched(struct adapter *sc) 491 { 492 493 taskqueue_enqueue(taskqueue_thread, &sc->tc_task); 494 } 495 496 int 497 t4_reserve_cl_rl_kbps(struct adapter *sc, int port_id, u_int maxrate, 498 int *tc_idx) 499 { 500 int rc = 0, fa, fa2, i, pktsize, burstsize; 501 bool update; 502 struct tx_cl_rl_params *tc; 503 struct port_info *pi; 504 505 MPASS(port_id >= 0 && port_id < sc->params.nports); 506 507 pi = sc->port[port_id]; 508 if (pi->sched_params->pktsize > 0) 509 pktsize = pi->sched_params->pktsize; 510 else 511 pktsize = if_getmtu(pi->vi[0].ifp); 512 if (pi->sched_params->burstsize > 0) 513 burstsize = pi->sched_params->burstsize; 514 else 515 burstsize = pktsize * 4; 516 tc = &pi->sched_params->cl_rl[0]; 517 518 update = false; 519 fa = fa2 = -1; 520 mtx_lock(&sc->tc_lock); 521 for (i = 0; i < sc->params.nsched_cls; i++, tc++) { 522 if (tc->state >= CS_PARAMS_SET && 523 tc->ratemode == FW_SCHED_PARAMS_RATE_ABS && 524 tc->rateunit == FW_SCHED_PARAMS_UNIT_BITRATE && 525 tc->mode == FW_SCHED_PARAMS_MODE_FLOW && 526 tc->maxrate == maxrate && tc->pktsize == pktsize && 527 tc->burstsize == burstsize) { 528 tc->refcount++; 529 *tc_idx = i; 530 if (tc->state == CS_PARAMS_SET) { 531 tc->state = CS_HW_UPDATE_REQUESTED; 532 update = true; 533 } 534 goto done; 535 } 536 537 if (fa < 0 && tc->state == CS_UNINITIALIZED) { 538 MPASS(tc->refcount == 0); 539 fa = i; /* first available, never used. */ 540 } 541 if (fa2 < 0 && tc->refcount == 0 && !(tc->flags & CF_USER)) { 542 fa2 = i; /* first available, used previously. */ 543 } 544 } 545 /* Not found */ 546 MPASS(i == sc->params.nsched_cls); 547 if (fa == -1) 548 fa = fa2; 549 if (fa == -1) { 550 *tc_idx = -1; 551 rc = ENOSPC; 552 } else { 553 MPASS(fa >= 0 && fa < sc->params.nsched_cls); 554 tc = &pi->sched_params->cl_rl[fa]; 555 MPASS(!(tc->flags & CF_USER)); 556 MPASS(tc->refcount == 0); 557 558 tc->refcount = 1; 559 tc->state = CS_HW_UPDATE_REQUESTED; 560 tc->ratemode = FW_SCHED_PARAMS_RATE_ABS; 561 tc->rateunit = FW_SCHED_PARAMS_UNIT_BITRATE; 562 tc->mode = FW_SCHED_PARAMS_MODE_FLOW; 563 tc->maxrate = maxrate; 564 tc->pktsize = pktsize; 565 tc->burstsize = burstsize; 566 *tc_idx = fa; 567 update = true; 568 } 569 done: 570 mtx_unlock(&sc->tc_lock); 571 if (update) 572 t4_update_tx_sched(sc); 573 return (rc); 574 } 575 576 void 577 t4_release_cl_rl(struct adapter *sc, int port_id, int tc_idx) 578 { 579 struct tx_cl_rl_params *tc; 580 581 MPASS(port_id >= 0 && port_id < sc->params.nports); 582 MPASS(tc_idx >= 0 && tc_idx < sc->params.nsched_cls); 583 584 mtx_lock(&sc->tc_lock); 585 tc = &sc->port[port_id]->sched_params->cl_rl[tc_idx]; 586 MPASS(tc->refcount > 0); 587 tc->refcount--; 588 mtx_unlock(&sc->tc_lock); 589 } 590 591 int 592 sysctl_tc(SYSCTL_HANDLER_ARGS) 593 { 594 struct vi_info *vi = arg1; 595 struct adapter *sc = vi->adapter; 596 struct sge_txq *txq; 597 int qidx = arg2, rc, tc_idx; 598 599 MPASS(qidx >= vi->first_txq && qidx < vi->first_txq + vi->ntxq); 600 601 txq = &sc->sge.txq[qidx]; 602 tc_idx = txq->tc_idx; 603 rc = sysctl_handle_int(oidp, &tc_idx, 0, req); 604 if (rc != 0 || req->newptr == NULL) 605 return (rc); 606 607 if (sc->flags & IS_VF) 608 return (EPERM); 609 if (!in_range(tc_idx, 0, sc->params.nsched_cls - 1)) 610 return (EINVAL); 611 612 return (bind_txq_to_traffic_class(sc, txq, tc_idx)); 613 } 614 615 int 616 sysctl_tc_params(SYSCTL_HANDLER_ARGS) 617 { 618 struct adapter *sc = arg1; 619 struct tx_cl_rl_params tc; 620 struct sbuf *sb; 621 int i, rc, port_id, mbps, gbps; 622 623 rc = sysctl_wire_old_buffer(req, 0); 624 if (rc != 0) 625 return (rc); 626 627 sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); 628 if (sb == NULL) 629 return (ENOMEM); 630 631 port_id = arg2 >> 16; 632 MPASS(port_id < sc->params.nports); 633 MPASS(sc->port[port_id] != NULL); 634 i = arg2 & 0xffff; 635 MPASS(i < sc->params.nsched_cls); 636 637 mtx_lock(&sc->tc_lock); 638 tc = sc->port[port_id]->sched_params->cl_rl[i]; 639 mtx_unlock(&sc->tc_lock); 640 641 if (tc.state < CS_PARAMS_SET) { 642 sbuf_printf(sb, "uninitialized"); 643 goto done; 644 } 645 646 switch (tc.rateunit) { 647 case SCHED_CLASS_RATEUNIT_BITS: 648 switch (tc.ratemode) { 649 case SCHED_CLASS_RATEMODE_REL: 650 /* XXX: top speed or actual link speed? */ 651 gbps = port_top_speed(sc->port[port_id]); 652 sbuf_printf(sb, "%u%% of %uGbps", tc.maxrate, gbps); 653 break; 654 case SCHED_CLASS_RATEMODE_ABS: 655 mbps = tc.maxrate / 1000; 656 gbps = tc.maxrate / 1000000; 657 if (tc.maxrate == gbps * 1000000) 658 sbuf_printf(sb, "%uGbps", gbps); 659 else if (tc.maxrate == mbps * 1000) 660 sbuf_printf(sb, "%uMbps", mbps); 661 else 662 sbuf_printf(sb, "%uKbps", tc.maxrate); 663 break; 664 default: 665 rc = ENXIO; 666 goto done; 667 } 668 break; 669 case SCHED_CLASS_RATEUNIT_PKTS: 670 sbuf_printf(sb, "%upps", tc.maxrate); 671 break; 672 default: 673 rc = ENXIO; 674 goto done; 675 } 676 677 switch (tc.mode) { 678 case SCHED_CLASS_MODE_CLASS: 679 /* Note that pktsize and burstsize are not used in this mode. */ 680 sbuf_printf(sb, " aggregate"); 681 break; 682 case SCHED_CLASS_MODE_FLOW: 683 sbuf_printf(sb, " per-flow"); 684 if (tc.pktsize > 0) 685 sbuf_printf(sb, " pkt-size %u", tc.pktsize); 686 if (tc.burstsize > 0) 687 sbuf_printf(sb, " burst-size %u", tc.burstsize); 688 break; 689 default: 690 rc = ENXIO; 691 goto done; 692 } 693 694 done: 695 if (rc == 0) 696 rc = sbuf_finish(sb); 697 sbuf_delete(sb); 698 699 return (rc); 700 } 701 702 #ifdef RATELIMIT 703 void 704 t4_init_etid_table(struct adapter *sc) 705 { 706 int i; 707 struct tid_info *t; 708 709 if (!is_ethoffload(sc)) 710 return; 711 712 t = &sc->tids; 713 MPASS(t->netids > 0); 714 715 mtx_init(&t->etid_lock, "etid lock", NULL, MTX_DEF); 716 t->etid_tab = malloc(sizeof(*t->etid_tab) * t->netids, M_CXGBE, 717 M_ZERO | M_WAITOK); 718 t->efree = t->etid_tab; 719 t->etids_in_use = 0; 720 for (i = 1; i < t->netids; i++) 721 t->etid_tab[i - 1].next = &t->etid_tab[i]; 722 t->etid_tab[t->netids - 1].next = NULL; 723 } 724 725 void 726 t4_free_etid_table(struct adapter *sc) 727 { 728 struct tid_info *t; 729 730 if (!is_ethoffload(sc)) 731 return; 732 733 t = &sc->tids; 734 MPASS(t->netids > 0); 735 736 free(t->etid_tab, M_CXGBE); 737 t->etid_tab = NULL; 738 739 if (mtx_initialized(&t->etid_lock)) 740 mtx_destroy(&t->etid_lock); 741 } 742 743 /* etid services */ 744 static int alloc_etid(struct adapter *, struct cxgbe_rate_tag *); 745 static void free_etid(struct adapter *, int); 746 747 static int 748 alloc_etid(struct adapter *sc, struct cxgbe_rate_tag *cst) 749 { 750 struct tid_info *t = &sc->tids; 751 int etid = -1; 752 753 mtx_lock(&t->etid_lock); 754 if (t->efree) { 755 union etid_entry *p = t->efree; 756 757 etid = p - t->etid_tab + t->etid_base; 758 t->efree = p->next; 759 p->cst = cst; 760 t->etids_in_use++; 761 } 762 mtx_unlock(&t->etid_lock); 763 return (etid); 764 } 765 766 struct cxgbe_rate_tag * 767 lookup_etid(struct adapter *sc, int etid) 768 { 769 struct tid_info *t = &sc->tids; 770 771 return (t->etid_tab[etid - t->etid_base].cst); 772 } 773 774 static void 775 free_etid(struct adapter *sc, int etid) 776 { 777 struct tid_info *t = &sc->tids; 778 union etid_entry *p = &t->etid_tab[etid - t->etid_base]; 779 780 mtx_lock(&t->etid_lock); 781 p->next = t->efree; 782 t->efree = p; 783 t->etids_in_use--; 784 mtx_unlock(&t->etid_lock); 785 } 786 787 static int cxgbe_rate_tag_modify(struct m_snd_tag *, 788 union if_snd_tag_modify_params *); 789 static int cxgbe_rate_tag_query(struct m_snd_tag *, 790 union if_snd_tag_query_params *); 791 static void cxgbe_rate_tag_free(struct m_snd_tag *); 792 793 static const struct if_snd_tag_sw cxgbe_rate_tag_sw = { 794 .snd_tag_modify = cxgbe_rate_tag_modify, 795 .snd_tag_query = cxgbe_rate_tag_query, 796 .snd_tag_free = cxgbe_rate_tag_free, 797 .type = IF_SND_TAG_TYPE_RATE_LIMIT 798 }; 799 800 int 801 cxgbe_rate_tag_alloc(if_t ifp, union if_snd_tag_alloc_params *params, 802 struct m_snd_tag **pt) 803 { 804 int rc, schedcl; 805 struct vi_info *vi = if_getsoftc(ifp); 806 struct port_info *pi = vi->pi; 807 struct adapter *sc = pi->adapter; 808 struct cxgbe_rate_tag *cst; 809 810 MPASS(params->hdr.type == IF_SND_TAG_TYPE_RATE_LIMIT); 811 812 rc = t4_reserve_cl_rl_kbps(sc, pi->port_id, 813 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 814 if (rc != 0) 815 return (rc); 816 MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls); 817 818 cst = malloc(sizeof(*cst), M_CXGBE, M_ZERO | M_NOWAIT); 819 if (cst == NULL) { 820 failed: 821 t4_release_cl_rl(sc, pi->port_id, schedcl); 822 return (ENOMEM); 823 } 824 825 cst->etid = alloc_etid(sc, cst); 826 if (cst->etid < 0) { 827 free(cst, M_CXGBE); 828 goto failed; 829 } 830 831 mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF); 832 mbufq_init(&cst->pending_tx, INT_MAX); 833 mbufq_init(&cst->pending_fwack, INT_MAX); 834 m_snd_tag_init(&cst->com, ifp, &cxgbe_rate_tag_sw); 835 cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF; 836 cst->adapter = sc; 837 cst->port_id = pi->port_id; 838 cst->schedcl = schedcl; 839 cst->max_rate = params->rate_limit.max_rate; 840 cst->tx_credits = sc->params.eo_wr_cred; 841 cst->tx_total = cst->tx_credits; 842 cst->plen = 0; 843 cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) | 844 V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) | 845 V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld)); 846 847 /* 848 * Queues will be selected later when the connection flowid is available. 849 */ 850 851 *pt = &cst->com; 852 return (0); 853 } 854 855 /* 856 * Change in parameters, no change in ifp. 857 */ 858 static int 859 cxgbe_rate_tag_modify(struct m_snd_tag *mst, 860 union if_snd_tag_modify_params *params) 861 { 862 int rc, schedcl; 863 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 864 struct adapter *sc = cst->adapter; 865 866 /* XXX: is schedcl -1 ok here? */ 867 MPASS(cst->schedcl >= 0 && cst->schedcl < sc->params.nsched_cls); 868 869 mtx_lock(&cst->lock); 870 MPASS(cst->flags & EO_SND_TAG_REF); 871 rc = t4_reserve_cl_rl_kbps(sc, cst->port_id, 872 (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); 873 if (rc != 0) 874 return (rc); 875 MPASS(schedcl >= 0 && schedcl < sc->params.nsched_cls); 876 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 877 cst->schedcl = schedcl; 878 cst->max_rate = params->rate_limit.max_rate; 879 mtx_unlock(&cst->lock); 880 881 return (0); 882 } 883 884 static int 885 cxgbe_rate_tag_query(struct m_snd_tag *mst, 886 union if_snd_tag_query_params *params) 887 { 888 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 889 890 params->rate_limit.max_rate = cst->max_rate; 891 892 #define CST_TO_MST_QLEVEL_SCALE (IF_SND_QUEUE_LEVEL_MAX / cst->tx_total) 893 params->rate_limit.queue_level = 894 (cst->tx_total - cst->tx_credits) * CST_TO_MST_QLEVEL_SCALE; 895 896 return (0); 897 } 898 899 /* 900 * Unlocks cst and frees it. 901 */ 902 void 903 cxgbe_rate_tag_free_locked(struct cxgbe_rate_tag *cst) 904 { 905 struct adapter *sc = cst->adapter; 906 907 mtx_assert(&cst->lock, MA_OWNED); 908 MPASS((cst->flags & EO_SND_TAG_REF) == 0); 909 MPASS(cst->tx_credits == cst->tx_total); 910 MPASS(cst->plen == 0); 911 MPASS(mbufq_first(&cst->pending_tx) == NULL); 912 MPASS(mbufq_first(&cst->pending_fwack) == NULL); 913 914 if (cst->etid >= 0) 915 free_etid(sc, cst->etid); 916 if (cst->schedcl != -1) 917 t4_release_cl_rl(sc, cst->port_id, cst->schedcl); 918 mtx_unlock(&cst->lock); 919 mtx_destroy(&cst->lock); 920 free(cst, M_CXGBE); 921 } 922 923 static void 924 cxgbe_rate_tag_free(struct m_snd_tag *mst) 925 { 926 struct cxgbe_rate_tag *cst = mst_to_crt(mst); 927 928 mtx_lock(&cst->lock); 929 930 /* The kernel is done with the snd_tag. Remove its reference. */ 931 MPASS(cst->flags & EO_SND_TAG_REF); 932 cst->flags &= ~EO_SND_TAG_REF; 933 934 if (cst->ncompl == 0) { 935 /* 936 * No fw4_ack in flight. Free the tag right away if there are 937 * no outstanding credits. Request the firmware to return all 938 * credits for the etid otherwise. 939 */ 940 if (cst->tx_credits == cst->tx_total) { 941 cxgbe_rate_tag_free_locked(cst); 942 return; /* cst is gone. */ 943 } 944 send_etid_flush_wr(cst); 945 } 946 mtx_unlock(&cst->lock); 947 } 948 949 void 950 cxgbe_ratelimit_query(if_t ifp, struct if_ratelimit_query_results *q) 951 { 952 struct vi_info *vi = if_getsoftc(ifp); 953 struct adapter *sc = vi->adapter; 954 955 q->rate_table = NULL; 956 q->flags = RT_IS_SELECTABLE; 957 /* 958 * Absolute max limits from the firmware configuration. Practical 959 * limits depend on the burstsize, pktsize (if_getmtu(ifp) ultimately) and 960 * the card's cclk. 961 */ 962 q->max_flows = sc->tids.netids; 963 q->number_of_rates = sc->params.nsched_cls; 964 q->min_segment_burst = 4; /* matches PKTSCHED_BURST in the firmware. */ 965 966 #if 1 967 if (chip_id(sc) < CHELSIO_T6) { 968 /* Based on testing by rrs@ with a T580 at burstsize = 4. */ 969 MPASS(q->min_segment_burst == 4); 970 q->max_flows = min(4000, q->max_flows); 971 } else { 972 /* XXX: TBD, carried forward from T5 for now. */ 973 q->max_flows = min(4000, q->max_flows); 974 } 975 976 /* 977 * XXX: tcp_ratelimit.c grabs all available rates on link-up before it 978 * even knows whether hw pacing will be used or not. This prevents 979 * other consumers like SO_MAX_PACING_RATE or those using cxgbetool or 980 * the private ioctls from using any of traffic classes. 981 * 982 * Underreport the number of rates to tcp_ratelimit so that it doesn't 983 * hog all of them. This can be removed if/when tcp_ratelimit switches 984 * to making its allocations on first-use rather than link-up. There is 985 * nothing wrong with one particular consumer reserving all the classes 986 * but it should do so only if it'll actually use hw rate limiting. 987 */ 988 q->number_of_rates /= 4; 989 #endif 990 } 991 #endif 992