1 /*- 2 * Copyright (c) 2016-2020 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 30 #ifdef RATELIMIT 31 32 static int mlx5e_rl_open_workers(struct mlx5e_priv *); 33 static void mlx5e_rl_close_workers(struct mlx5e_priv *); 34 static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); 35 static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, 36 struct sysctl_oid *, const char *name, const char *desc); 37 static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 38 struct sysctl_oid *node, const char *name, const char *desc); 39 static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); 40 static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); 41 42 static void 43 mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, 44 struct mlx5e_sq_param *param) 45 { 46 void *sqc = param->sqc; 47 void *wq = MLX5_ADDR_OF(sqc, sqc, wq); 48 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 49 50 MLX5_SET(wq, wq, log_wq_sz, log_sq_size); 51 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 52 MLX5_SET(wq, wq, pd, rl->priv->pdn); 53 54 param->wq.buf_numa_node = 0; 55 param->wq.db_numa_node = 0; 56 param->wq.linear = 1; 57 } 58 59 static void 60 mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, 61 struct mlx5e_cq_param *param) 62 { 63 void *cqc = param->cqc; 64 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 65 66 MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); 67 MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); 68 MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); 69 MLX5_SET(cqc, cqc, uar_page, rl->priv->mdev->priv.uar->index); 70 71 switch (rl->param.tx_coalesce_mode) { 72 case 0: 73 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 74 break; 75 default: 76 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) 77 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); 78 else 79 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 80 break; 81 } 82 } 83 84 static void 85 mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, 86 struct mlx5e_rl_channel_param *cparam) 87 { 88 memset(cparam, 0, sizeof(*cparam)); 89 90 mlx5e_rl_build_sq_param(rl, &cparam->sq); 91 mlx5e_rl_build_cq_param(rl, &cparam->cq); 92 } 93 94 static int 95 mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 96 struct mlx5e_sq_param *param, int ix) 97 { 98 struct mlx5_core_dev *mdev = priv->mdev; 99 void *sqc = param->sqc; 100 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 101 int err; 102 103 /* Create DMA descriptor TAG */ 104 if ((err = -bus_dma_tag_create( 105 bus_get_dma_tag(mdev->pdev->dev.bsddev), 106 1, /* any alignment */ 107 0, /* no boundary */ 108 BUS_SPACE_MAXADDR, /* lowaddr */ 109 BUS_SPACE_MAXADDR, /* highaddr */ 110 NULL, NULL, /* filter, filterarg */ 111 MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ 112 MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ 113 MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 114 0, /* flags */ 115 NULL, NULL, /* lockfunc, lockfuncarg */ 116 &sq->dma_tag))) 117 goto done; 118 119 /* use shared UAR */ 120 sq->uar_map = priv->bfreg.map; 121 122 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, 123 &sq->wq_ctrl); 124 if (err) 125 goto err_free_dma_tag; 126 127 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 128 129 err = mlx5e_alloc_sq_db(sq); 130 if (err) 131 goto err_sq_wq_destroy; 132 133 sq->mkey_be = cpu_to_be32(priv->mr.key); 134 sq->ifp = priv->ifp; 135 sq->priv = priv; 136 137 mlx5e_update_sq_inline(sq); 138 139 return (0); 140 141 err_sq_wq_destroy: 142 mlx5_wq_destroy(&sq->wq_ctrl); 143 err_free_dma_tag: 144 bus_dma_tag_destroy(sq->dma_tag); 145 done: 146 return (err); 147 } 148 149 static void 150 mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) 151 { 152 153 mlx5e_free_sq_db(sq); 154 mlx5_wq_destroy(&sq->wq_ctrl); 155 bus_dma_tag_destroy(sq->dma_tag); 156 } 157 158 static int 159 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 160 struct mlx5e_sq_param *param, int ix) 161 { 162 int err; 163 164 err = mlx5e_rl_create_sq(priv, sq, param, ix); 165 if (err) 166 return (err); 167 168 err = mlx5e_enable_sq(sq, param, priv->rl.tisn); 169 if (err) 170 goto err_destroy_sq; 171 172 err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 173 if (err) 174 goto err_disable_sq; 175 176 WRITE_ONCE(sq->running, 1); 177 178 return (0); 179 180 err_disable_sq: 181 mlx5e_disable_sq(sq); 182 err_destroy_sq: 183 mlx5e_rl_destroy_sq(sq); 184 185 return (err); 186 } 187 188 static void 189 mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) 190 { 191 mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); 192 mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); 193 194 callout_init_mtx(&sq->cev_callout, &sq->lock, 0); 195 196 sq->cev_factor = priv->rl.param.tx_completion_fact; 197 198 /* ensure the TX completion event factor is not zero */ 199 if (sq->cev_factor == 0) 200 sq->cev_factor = 1; 201 } 202 203 static int 204 mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, 205 struct mlx5e_rl_channel_param *cparam, 206 struct mlx5e_sq *volatile *ppsq) 207 { 208 struct mlx5e_priv *priv = rlw->priv; 209 struct mlx5e_sq *sq; 210 int err; 211 212 sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); 213 214 /* init mutexes */ 215 mlx5e_rl_chan_mtx_init(priv, sq); 216 217 /* open TX completion queue */ 218 err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, 219 &mlx5e_tx_cq_comp, eq_ix); 220 if (err) 221 goto err_free; 222 223 err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); 224 if (err) 225 goto err_close_tx_cq; 226 227 /* store TX channel pointer */ 228 *ppsq = sq; 229 230 /* poll TX queue initially */ 231 sq->cq.mcq.comp(&sq->cq.mcq, NULL); 232 233 return (0); 234 235 err_close_tx_cq: 236 mlx5e_close_cq(&sq->cq); 237 238 err_free: 239 /* destroy mutexes */ 240 mtx_destroy(&sq->lock); 241 mtx_destroy(&sq->comp_lock); 242 free(sq, M_MLX5EN); 243 atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); 244 return (err); 245 } 246 247 static void 248 mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) 249 { 250 struct mlx5e_sq *sq = *ppsq; 251 252 /* check if channel is already closed */ 253 if (sq == NULL) 254 return; 255 /* ensure channel pointer is no longer used */ 256 *ppsq = NULL; 257 258 /* teardown and destroy SQ */ 259 mlx5e_drain_sq(sq); 260 mlx5e_disable_sq(sq); 261 mlx5e_rl_destroy_sq(sq); 262 263 /* close CQ */ 264 mlx5e_close_cq(&sq->cq); 265 266 /* destroy mutexes */ 267 mtx_destroy(&sq->lock); 268 mtx_destroy(&sq->comp_lock); 269 270 free(sq, M_MLX5EN); 271 } 272 273 static void 274 mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) 275 { 276 /* 277 * Limit the maximum distance between completion events to 278 * half of the currently set TX queue size. 279 * 280 * The maximum number of queue entries a single IP packet can 281 * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. 282 * 283 * The worst case max value is then given as below: 284 */ 285 uint64_t max = rl->param.tx_queue_size / 286 (2 * MLX5_SEND_WQE_MAX_WQEBBS); 287 288 /* 289 * Update the maximum completion factor value in case the 290 * tx_queue_size field changed. Ensure we don't overflow 291 * 16-bits. 292 */ 293 if (max < 1) 294 max = 1; 295 else if (max > 65535) 296 max = 65535; 297 rl->param.tx_completion_fact_max = max; 298 299 /* 300 * Verify that the current TX completion factor is within the 301 * given limits: 302 */ 303 if (rl->param.tx_completion_fact < 1) 304 rl->param.tx_completion_fact = 1; 305 else if (rl->param.tx_completion_fact > max) 306 rl->param.tx_completion_fact = max; 307 } 308 309 static int 310 mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) 311 { 312 struct mlx5e_priv *priv = sq->priv; 313 struct mlx5_core_dev *mdev = priv->mdev; 314 315 void *in; 316 void *sqc; 317 int inlen; 318 int err; 319 320 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 321 in = mlx5_vzalloc(inlen); 322 if (in == NULL) 323 return (-ENOMEM); 324 325 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 326 327 MLX5_SET(modify_sq_in, in, sqn, sq->sqn); 328 MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); 329 MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); 330 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); 331 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); 332 333 err = mlx5_core_modify_sq(mdev, in, inlen); 334 335 kvfree(in); 336 337 return (err); 338 } 339 340 /* 341 * This function will search the configured rate limit table for the 342 * best match to avoid that a single socket based application can 343 * allocate all the available hardware rates. If the user selected 344 * rate deviates too much from the closes rate available in the rate 345 * limit table, unlimited rate will be selected. 346 */ 347 static uint64_t 348 mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) 349 { 350 uint64_t distance = -1ULL; 351 uint64_t diff; 352 uint64_t retval = 0; /* unlimited */ 353 uint64_t x; 354 355 /* search for closest rate */ 356 for (x = 0; x != rl->param.tx_rates_def; x++) { 357 uint64_t rate = rl->rate_limit_table[x]; 358 if (rate == 0) 359 continue; 360 361 if (rate > user_rate) 362 diff = rate - user_rate; 363 else 364 diff = user_rate - rate; 365 366 /* check if distance is smaller than previous rate */ 367 if (diff < distance) { 368 distance = diff; 369 retval = rate; 370 } 371 } 372 373 /* range check for multiplication below */ 374 if (user_rate > rl->param.tx_limit_max) 375 user_rate = rl->param.tx_limit_max; 376 377 /* fallback to unlimited, if rate deviates too much */ 378 if (distance > howmany(user_rate * 379 rl->param.tx_allowed_deviation, 1000ULL)) 380 retval = 0; 381 382 return (retval); 383 } 384 385 /* 386 * This function sets the requested rate for a rate limit channel, in 387 * bits per second. The requested rate will be filtered through the 388 * find best rate function above. 389 */ 390 static int 391 mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, 392 struct mlx5e_rl_channel *channel, uint64_t rate) 393 { 394 struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; 395 struct mlx5e_sq *sq; 396 uint64_t temp; 397 uint16_t index; 398 uint16_t burst; 399 int error; 400 401 if (rate != 0) { 402 MLX5E_RL_WORKER_UNLOCK(rlw); 403 404 MLX5E_RL_RLOCK(rl); 405 406 /* get current burst size in bytes */ 407 temp = rl->param.tx_burst_size * 408 MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); 409 410 /* limit burst size to 64K currently */ 411 if (temp > 65535) 412 temp = 65535; 413 burst = temp; 414 415 /* find best rate */ 416 rate = mlx5e_rl_find_best_rate_locked(rl, rate); 417 418 MLX5E_RL_RUNLOCK(rl); 419 420 if (rate == 0) { 421 /* rate doesn't exist, fallback to unlimited */ 422 index = 0; 423 rate = 0; 424 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 425 } else { 426 /* get a reference on the new rate */ 427 error = -mlx5_rl_add_rate(rlw->priv->mdev, 428 howmany(rate, 1000), burst, &index); 429 430 if (error != 0) { 431 /* adding rate failed, fallback to unlimited */ 432 index = 0; 433 rate = 0; 434 atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); 435 } 436 } 437 MLX5E_RL_WORKER_LOCK(rlw); 438 } else { 439 index = 0; 440 burst = 0; /* default */ 441 } 442 443 /* atomically swap rates */ 444 temp = channel->last_rate; 445 channel->last_rate = rate; 446 rate = temp; 447 448 /* atomically swap burst size */ 449 temp = channel->last_burst; 450 channel->last_burst = burst; 451 burst = temp; 452 453 MLX5E_RL_WORKER_UNLOCK(rlw); 454 /* put reference on the old rate, if any */ 455 if (rate != 0) { 456 mlx5_rl_remove_rate(rlw->priv->mdev, 457 howmany(rate, 1000), burst); 458 } 459 460 /* set new rate, if SQ is running */ 461 sq = channel->sq; 462 if (sq != NULL && READ_ONCE(sq->running) != 0) { 463 error = mlx5e_rl_modify_sq(sq, index); 464 if (error != 0) 465 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 466 } else 467 error = 0; 468 MLX5E_RL_WORKER_LOCK(rlw); 469 470 return (-error); 471 } 472 473 static void 474 mlx5e_rl_worker(void *arg) 475 { 476 struct thread *td; 477 struct mlx5e_rl_worker *rlw = arg; 478 struct mlx5e_rl_channel *channel; 479 struct mlx5e_priv *priv; 480 unsigned ix; 481 uint64_t x; 482 int error; 483 484 /* set thread priority */ 485 td = curthread; 486 487 thread_lock(td); 488 sched_prio(td, PI_SWI(SWI_NET)); 489 thread_unlock(td); 490 491 priv = rlw->priv; 492 493 /* compute completion vector */ 494 ix = (rlw - priv->rl.workers) % 495 priv->mdev->priv.eq_table.num_comp_vectors; 496 497 /* TODO bind to CPU */ 498 499 /* open all the SQs */ 500 MLX5E_RL_WORKER_LOCK(rlw); 501 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 502 struct mlx5e_rl_channel *channel = rlw->channels + x; 503 504 #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) 505 if (channel->state == MLX5E_RL_ST_FREE) 506 continue; 507 #endif 508 MLX5E_RL_WORKER_UNLOCK(rlw); 509 510 MLX5E_RL_RLOCK(&priv->rl); 511 error = mlx5e_rl_open_channel(rlw, ix, 512 &priv->rl.chan_param, &channel->sq); 513 MLX5E_RL_RUNLOCK(&priv->rl); 514 515 MLX5E_RL_WORKER_LOCK(rlw); 516 if (error != 0) { 517 mlx5_en_err(priv->ifp, 518 "mlx5e_rl_open_channel failed: %d\n", error); 519 break; 520 } 521 mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); 522 } 523 while (1) { 524 if (STAILQ_FIRST(&rlw->process_head) == NULL) { 525 /* check if we are tearing down */ 526 if (rlw->worker_done != 0) 527 break; 528 cv_wait(&rlw->cv, &rlw->mtx); 529 } 530 /* check if we are tearing down */ 531 if (rlw->worker_done != 0) 532 break; 533 channel = STAILQ_FIRST(&rlw->process_head); 534 if (channel != NULL) { 535 STAILQ_REMOVE_HEAD(&rlw->process_head, entry); 536 537 switch (channel->state) { 538 case MLX5E_RL_ST_MODIFY: 539 channel->state = MLX5E_RL_ST_USED; 540 MLX5E_RL_WORKER_UNLOCK(rlw); 541 542 /* create channel by demand */ 543 if (channel->sq == NULL) { 544 MLX5E_RL_RLOCK(&priv->rl); 545 error = mlx5e_rl_open_channel(rlw, ix, 546 &priv->rl.chan_param, &channel->sq); 547 MLX5E_RL_RUNLOCK(&priv->rl); 548 549 if (error != 0) { 550 mlx5_en_err(priv->ifp, 551 "mlx5e_rl_open_channel failed: %d\n", error); 552 } else { 553 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); 554 } 555 } else { 556 mlx5e_resume_sq(channel->sq); 557 } 558 559 MLX5E_RL_WORKER_LOCK(rlw); 560 /* convert from bytes/s to bits/s and set new rate */ 561 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 562 channel->new_rate * 8ULL); 563 if (error != 0) { 564 mlx5_en_err(priv->ifp, 565 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 566 error); 567 } 568 break; 569 570 case MLX5E_RL_ST_DESTROY: 571 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 572 if (error != 0) { 573 mlx5_en_err(priv->ifp, 574 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 575 error); 576 } 577 if (channel->sq != NULL) { 578 /* 579 * Make sure all packets are 580 * transmitted before SQ is 581 * returned to free list: 582 */ 583 MLX5E_RL_WORKER_UNLOCK(rlw); 584 mlx5e_drain_sq(channel->sq); 585 MLX5E_RL_WORKER_LOCK(rlw); 586 } 587 /* put the channel back into the free list */ 588 STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); 589 channel->state = MLX5E_RL_ST_FREE; 590 atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); 591 break; 592 default: 593 /* NOP */ 594 break; 595 } 596 } 597 } 598 599 /* close all the SQs */ 600 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 601 struct mlx5e_rl_channel *channel = rlw->channels + x; 602 603 /* update the initial rate */ 604 channel->init_rate = channel->last_rate; 605 606 /* make sure we free up the rate resource */ 607 mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 608 609 if (channel->sq != NULL) { 610 MLX5E_RL_WORKER_UNLOCK(rlw); 611 mlx5e_rl_close_channel(&channel->sq); 612 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); 613 MLX5E_RL_WORKER_LOCK(rlw); 614 } 615 } 616 617 rlw->worker_done = 0; 618 cv_broadcast(&rlw->cv); 619 MLX5E_RL_WORKER_UNLOCK(rlw); 620 621 kthread_exit(); 622 } 623 624 static int 625 mlx5e_rl_open_tis(struct mlx5e_priv *priv) 626 { 627 struct mlx5_core_dev *mdev = priv->mdev; 628 u32 in[MLX5_ST_SZ_DW(create_tis_in)]; 629 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 630 631 memset(in, 0, sizeof(in)); 632 633 MLX5_SET(tisc, tisc, prio, 0); 634 MLX5_SET(tisc, tisc, transport_domain, priv->tdn); 635 636 return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); 637 } 638 639 static void 640 mlx5e_rl_close_tis(struct mlx5e_priv *priv) 641 { 642 mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn); 643 } 644 645 static void 646 mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, 647 struct mlx5_core_dev *mdev) 648 { 649 /* ratelimit workers */ 650 param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; 651 param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; 652 653 /* range check */ 654 if (param->tx_worker_threads_def == 0 || 655 param->tx_worker_threads_def > param->tx_worker_threads_max) 656 param->tx_worker_threads_def = param->tx_worker_threads_max; 657 658 /* ratelimit channels */ 659 param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / 660 param->tx_worker_threads_def; 661 param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; 662 663 /* range check */ 664 if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) 665 param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; 666 667 /* set default burst size */ 668 param->tx_burst_size = 4; /* MTUs */ 669 670 /* 671 * Set maximum burst size 672 * 673 * The burst size is multiplied by the MTU and clamped to the 674 * range 0 ... 65535 bytes inclusivly before fed into the 675 * firmware. 676 * 677 * NOTE: If the burst size or MTU is changed only ratelimit 678 * connections made after the change will use the new burst 679 * size. 680 */ 681 param->tx_burst_size_max = 255; 682 683 /* get firmware rate limits in 1000bit/s and convert them to bit/s */ 684 param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; 685 param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; 686 687 /* ratelimit table size */ 688 param->tx_rates_max = mdev->priv.rl_table.max_size; 689 690 /* range check */ 691 if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) 692 param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; 693 694 /* set default number of rates */ 695 param->tx_rates_def = param->tx_rates_max; 696 697 /* set maximum allowed rate deviation */ 698 if (param->tx_limit_max != 0) { 699 /* 700 * Make sure the deviation multiplication doesn't 701 * overflow unsigned 64-bit: 702 */ 703 param->tx_allowed_deviation_max = -1ULL / 704 param->tx_limit_max; 705 } 706 /* set default rate deviation */ 707 param->tx_allowed_deviation = 50; /* 5.0% */ 708 709 /* channel parameters */ 710 param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 711 param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; 712 param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; 713 param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; 714 param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; 715 } 716 717 static const char *mlx5e_rl_params_desc[] = { 718 MLX5E_RL_PARAMS(MLX5E_STATS_DESC) 719 }; 720 721 static const char *mlx5e_rl_table_params_desc[] = { 722 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) 723 }; 724 725 static const char *mlx5e_rl_stats_desc[] = { 726 MLX5E_RL_STATS(MLX5E_STATS_DESC) 727 }; 728 729 int 730 mlx5e_rl_init(struct mlx5e_priv *priv) 731 { 732 struct mlx5e_rl_priv_data *rl = &priv->rl; 733 struct sysctl_oid *node; 734 struct sysctl_oid *stats; 735 char buf[64]; 736 uint64_t i; 737 uint64_t j; 738 int error; 739 740 /* check if there is support for packet pacing */ 741 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 742 return (0); 743 744 rl->priv = priv; 745 746 sysctl_ctx_init(&rl->ctx); 747 748 sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); 749 750 /* open own TIS domain for ratelimit SQs */ 751 error = mlx5e_rl_open_tis(priv); 752 if (error) 753 goto done; 754 755 /* setup default value for parameters */ 756 mlx5e_rl_set_default_params(&rl->param, priv->mdev); 757 758 /* update the completion factor */ 759 mlx5e_rl_sync_tx_completion_fact(rl); 760 761 /* create root node */ 762 node = SYSCTL_ADD_NODE(&rl->ctx, 763 SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, 764 "rate_limit", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Rate limiting support"); 765 766 if (node != NULL) { 767 /* create SYSCTLs */ 768 for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { 769 mlx5e_rl_sysctl_add_u64_oid(rl, 770 MLX5E_RL_PARAMS_INDEX(arg[i]), 771 node, mlx5e_rl_params_desc[2 * i], 772 mlx5e_rl_params_desc[2 * i + 1]); 773 } 774 775 stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), 776 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 777 "Rate limiting statistics"); 778 if (stats != NULL) { 779 /* create SYSCTLs */ 780 for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { 781 mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, 782 stats, mlx5e_rl_stats_desc[2 * i], 783 mlx5e_rl_stats_desc[2 * i + 1]); 784 } 785 } 786 } 787 788 /* allocate workers array */ 789 rl->workers = malloc(sizeof(rl->workers[0]) * 790 rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); 791 792 /* allocate rate limit array */ 793 rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * 794 rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); 795 796 if (node != NULL) { 797 /* create more SYSCTls */ 798 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 799 "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | 800 CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, 801 "A", "Show table of all configured TX rates"); 802 803 /* try to fetch rate table from kernel environment */ 804 for (i = 0; i != rl->param.tx_rates_def; i++) { 805 /* compute path for tunable */ 806 snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", 807 device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); 808 if (TUNABLE_QUAD_FETCH(buf, &j)) 809 mlx5e_rl_tx_limit_add(rl, j); 810 } 811 812 /* setup rate table sysctls */ 813 for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { 814 mlx5e_rl_sysctl_add_u64_oid(rl, 815 MLX5E_RL_PARAMS_INDEX(table_arg[i]), 816 node, mlx5e_rl_table_params_desc[2 * i], 817 mlx5e_rl_table_params_desc[2 * i + 1]); 818 } 819 } 820 821 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 822 struct mlx5e_rl_worker *rlw = rl->workers + j; 823 824 rlw->priv = priv; 825 826 cv_init(&rlw->cv, "mlx5-worker-cv"); 827 mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); 828 STAILQ_INIT(&rlw->index_list_head); 829 STAILQ_INIT(&rlw->process_head); 830 831 rlw->channels = malloc(sizeof(rlw->channels[0]) * 832 rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); 833 834 MLX5E_RL_WORKER_LOCK(rlw); 835 for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { 836 struct mlx5e_rl_channel *channel = rlw->channels + i; 837 channel->worker = rlw; 838 channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT; 839 STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); 840 } 841 MLX5E_RL_WORKER_UNLOCK(rlw); 842 } 843 844 PRIV_LOCK(priv); 845 error = mlx5e_rl_open_workers(priv); 846 PRIV_UNLOCK(priv); 847 848 if (error != 0) { 849 mlx5_en_err(priv->ifp, 850 "mlx5e_rl_open_workers failed: %d\n", error); 851 } 852 853 return (0); 854 855 done: 856 sysctl_ctx_free(&rl->ctx); 857 sx_destroy(&rl->rl_sxlock); 858 return (error); 859 } 860 861 static int 862 mlx5e_rl_open_workers(struct mlx5e_priv *priv) 863 { 864 struct mlx5e_rl_priv_data *rl = &priv->rl; 865 struct thread *rl_thread = NULL; 866 struct proc *rl_proc = NULL; 867 uint64_t j; 868 int error; 869 870 if (priv->gone || rl->opened) 871 return (-EINVAL); 872 873 MLX5E_RL_WLOCK(rl); 874 /* compute channel parameters once */ 875 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 876 MLX5E_RL_WUNLOCK(rl); 877 878 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 879 struct mlx5e_rl_worker *rlw = rl->workers + j; 880 881 /* start worker thread */ 882 error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, 883 RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); 884 if (error != 0) { 885 mlx5_en_err(rl->priv->ifp, 886 "kproc_kthread_add failed: %d\n", error); 887 rlw->worker_done = 1; 888 } 889 } 890 891 rl->opened = 1; 892 893 return (0); 894 } 895 896 static void 897 mlx5e_rl_close_workers(struct mlx5e_priv *priv) 898 { 899 struct mlx5e_rl_priv_data *rl = &priv->rl; 900 uint64_t y; 901 902 if (rl->opened == 0) 903 return; 904 905 /* tear down worker threads simultaneously */ 906 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 907 struct mlx5e_rl_worker *rlw = rl->workers + y; 908 909 /* tear down worker before freeing SQs */ 910 MLX5E_RL_WORKER_LOCK(rlw); 911 if (rlw->worker_done == 0) { 912 rlw->worker_done = 1; 913 cv_broadcast(&rlw->cv); 914 } else { 915 /* XXX thread not started */ 916 rlw->worker_done = 0; 917 } 918 MLX5E_RL_WORKER_UNLOCK(rlw); 919 } 920 921 /* wait for worker threads to exit */ 922 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 923 struct mlx5e_rl_worker *rlw = rl->workers + y; 924 925 /* tear down worker before freeing SQs */ 926 MLX5E_RL_WORKER_LOCK(rlw); 927 while (rlw->worker_done != 0) 928 cv_wait(&rlw->cv, &rlw->mtx); 929 MLX5E_RL_WORKER_UNLOCK(rlw); 930 } 931 932 rl->opened = 0; 933 } 934 935 static void 936 mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) 937 { 938 unsigned x; 939 940 MLX5E_RL_WLOCK(rl); 941 for (x = 0; x != rl->param.tx_rates_def; x++) 942 rl->rate_limit_table[x] = 0; 943 MLX5E_RL_WUNLOCK(rl); 944 } 945 946 void 947 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 948 { 949 struct mlx5e_rl_priv_data *rl = &priv->rl; 950 uint64_t y; 951 952 /* check if there is support for packet pacing */ 953 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 954 return; 955 956 /* TODO check if there is support for packet pacing */ 957 958 sysctl_ctx_free(&rl->ctx); 959 960 PRIV_LOCK(priv); 961 mlx5e_rl_close_workers(priv); 962 PRIV_UNLOCK(priv); 963 964 mlx5e_rl_reset_rates(rl); 965 966 /* close TIS domain */ 967 mlx5e_rl_close_tis(priv); 968 969 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 970 struct mlx5e_rl_worker *rlw = rl->workers + y; 971 972 cv_destroy(&rlw->cv); 973 mtx_destroy(&rlw->mtx); 974 free(rlw->channels, M_MLX5EN); 975 } 976 free(rl->rate_limit_table, M_MLX5EN); 977 free(rl->workers, M_MLX5EN); 978 sx_destroy(&rl->rl_sxlock); 979 } 980 981 static void 982 mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, 983 struct mlx5e_rl_channel *channel) 984 { 985 STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); 986 cv_broadcast(&rlw->cv); 987 } 988 989 static void 990 mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) 991 { 992 if (channel == NULL) 993 return; 994 995 MLX5E_RL_WORKER_LOCK(rlw); 996 switch (channel->state) { 997 case MLX5E_RL_ST_MODIFY: 998 channel->state = MLX5E_RL_ST_DESTROY; 999 break; 1000 case MLX5E_RL_ST_USED: 1001 channel->state = MLX5E_RL_ST_DESTROY; 1002 mlx5e_rlw_queue_channel_locked(rlw, channel); 1003 break; 1004 default: 1005 break; 1006 } 1007 MLX5E_RL_WORKER_UNLOCK(rlw); 1008 } 1009 1010 static int 1011 mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) 1012 { 1013 1014 MLX5E_RL_WORKER_LOCK(rlw); 1015 channel->new_rate = rate; 1016 switch (channel->state) { 1017 case MLX5E_RL_ST_USED: 1018 channel->state = MLX5E_RL_ST_MODIFY; 1019 mlx5e_rlw_queue_channel_locked(rlw, channel); 1020 break; 1021 default: 1022 break; 1023 } 1024 MLX5E_RL_WORKER_UNLOCK(rlw); 1025 1026 return (0); 1027 } 1028 1029 static int 1030 mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, 1031 union if_snd_tag_query_params *params) 1032 { 1033 int retval; 1034 1035 MLX5E_RL_WORKER_LOCK(rlw); 1036 switch (channel->state) { 1037 case MLX5E_RL_ST_USED: 1038 params->rate_limit.max_rate = channel->last_rate; 1039 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1040 retval = 0; 1041 break; 1042 case MLX5E_RL_ST_MODIFY: 1043 params->rate_limit.max_rate = channel->last_rate; 1044 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1045 retval = EBUSY; 1046 break; 1047 default: 1048 retval = EINVAL; 1049 break; 1050 } 1051 MLX5E_RL_WORKER_UNLOCK(rlw); 1052 1053 return (retval); 1054 } 1055 1056 static int 1057 mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, 1058 struct mlx5e_rl_channel **pchannel) 1059 { 1060 struct mlx5e_rl_channel *channel; 1061 int retval = ENOMEM; 1062 1063 MLX5E_RL_WORKER_LOCK(rlw); 1064 /* Check for available channel in free list */ 1065 if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { 1066 retval = 0; 1067 /* Remove head index from available list */ 1068 STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); 1069 channel->state = MLX5E_RL_ST_USED; 1070 atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); 1071 } else { 1072 atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); 1073 } 1074 MLX5E_RL_WORKER_UNLOCK(rlw); 1075 1076 *pchannel = channel; 1077 #ifdef RATELIMIT_DEBUG 1078 mlx5_en_info(rlw->priv->ifp, 1079 "Channel pointer for rate limit connection is %p\n", channel); 1080 #endif 1081 return (retval); 1082 } 1083 1084 int 1085 mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, 1086 union if_snd_tag_alloc_params *params, 1087 struct m_snd_tag **ppmt) 1088 { 1089 struct mlx5e_rl_channel *channel; 1090 struct mlx5e_rl_worker *rlw; 1091 struct mlx5e_priv *priv; 1092 int error; 1093 1094 priv = ifp->if_softc; 1095 1096 /* check if there is support for packet pacing or if device is going away */ 1097 if (!MLX5_CAP_GEN(priv->mdev, qos) || 1098 !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || 1099 params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) 1100 return (EOPNOTSUPP); 1101 1102 /* compute worker thread this TCP connection belongs to */ 1103 rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % 1104 priv->rl.param.tx_worker_threads_def); 1105 1106 error = mlx5e_find_available_tx_ring_index(rlw, &channel); 1107 if (error != 0) 1108 goto done; 1109 1110 error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); 1111 if (error != 0) { 1112 mlx5e_rl_free(rlw, channel); 1113 goto done; 1114 } 1115 1116 /* store pointer to mbuf tag */ 1117 MPASS(channel->tag.refcount == 0); 1118 m_snd_tag_init(&channel->tag, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); 1119 *ppmt = &channel->tag; 1120 done: 1121 return (error); 1122 } 1123 1124 1125 int 1126 mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) 1127 { 1128 struct mlx5e_rl_channel *channel = 1129 container_of(pmt, struct mlx5e_rl_channel, tag); 1130 1131 return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); 1132 } 1133 1134 int 1135 mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) 1136 { 1137 struct mlx5e_rl_channel *channel = 1138 container_of(pmt, struct mlx5e_rl_channel, tag); 1139 1140 return (mlx5e_rl_query(channel->worker, channel, params)); 1141 } 1142 1143 void 1144 mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) 1145 { 1146 struct mlx5e_rl_channel *channel = 1147 container_of(pmt, struct mlx5e_rl_channel, tag); 1148 1149 mlx5e_rl_free(channel->worker, channel); 1150 } 1151 1152 static int 1153 mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) 1154 { 1155 struct mlx5e_rl_priv_data *rl = arg1; 1156 struct mlx5e_priv *priv = rl->priv; 1157 struct sbuf sbuf; 1158 unsigned x; 1159 int error; 1160 1161 error = sysctl_wire_old_buffer(req, 0); 1162 if (error != 0) 1163 return (error); 1164 1165 PRIV_LOCK(priv); 1166 1167 sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); 1168 1169 sbuf_printf(&sbuf, 1170 "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" 1171 "\t" "--------------------------------------------\n"); 1172 1173 MLX5E_RL_RLOCK(rl); 1174 for (x = 0; x != rl->param.tx_rates_def; x++) { 1175 if (rl->rate_limit_table[x] == 0) 1176 continue; 1177 1178 sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", 1179 x, (unsigned)rl->param.tx_burst_size, 1180 (long long)rl->rate_limit_table[x]); 1181 } 1182 MLX5E_RL_RUNLOCK(rl); 1183 1184 error = sbuf_finish(&sbuf); 1185 sbuf_delete(&sbuf); 1186 1187 PRIV_UNLOCK(priv); 1188 1189 return (error); 1190 } 1191 1192 static int 1193 mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) 1194 { 1195 uint64_t x; 1196 uint64_t y; 1197 1198 MLX5E_RL_WLOCK(rl); 1199 /* compute channel parameters once */ 1200 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 1201 MLX5E_RL_WUNLOCK(rl); 1202 1203 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1204 struct mlx5e_rl_worker *rlw = rl->workers + y; 1205 1206 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1207 struct mlx5e_rl_channel *channel; 1208 struct mlx5e_sq *sq; 1209 1210 channel = rlw->channels + x; 1211 sq = channel->sq; 1212 1213 if (sq == NULL) 1214 continue; 1215 1216 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { 1217 mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, 1218 rl->param.tx_coalesce_usecs, 1219 rl->param.tx_coalesce_pkts, 1220 rl->param.tx_coalesce_mode); 1221 } else { 1222 mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, 1223 rl->param.tx_coalesce_usecs, 1224 rl->param.tx_coalesce_pkts); 1225 } 1226 } 1227 } 1228 return (0); 1229 } 1230 1231 void 1232 mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl) 1233 { 1234 uint64_t x; 1235 uint64_t y; 1236 1237 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1238 struct mlx5e_rl_worker *rlw = rl->workers + y; 1239 1240 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1241 struct mlx5e_rl_channel *channel; 1242 struct mlx5e_sq *sq; 1243 1244 channel = rlw->channels + x; 1245 sq = channel->sq; 1246 1247 if (sq == NULL) 1248 continue; 1249 1250 mtx_lock(&sq->lock); 1251 mlx5e_update_sq_inline(sq); 1252 mtx_unlock(&sq->lock); 1253 } 1254 } 1255 } 1256 1257 static int 1258 mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) 1259 { 1260 unsigned x; 1261 int error; 1262 1263 if (value < 1000 || 1264 mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) 1265 return (EINVAL); 1266 1267 MLX5E_RL_WLOCK(rl); 1268 error = ENOMEM; 1269 1270 /* check if rate already exists */ 1271 for (x = 0; x != rl->param.tx_rates_def; x++) { 1272 if (rl->rate_limit_table[x] != value) 1273 continue; 1274 error = EEXIST; 1275 break; 1276 } 1277 1278 /* check if there is a free rate entry */ 1279 if (x == rl->param.tx_rates_def) { 1280 for (x = 0; x != rl->param.tx_rates_def; x++) { 1281 if (rl->rate_limit_table[x] != 0) 1282 continue; 1283 rl->rate_limit_table[x] = value; 1284 error = 0; 1285 break; 1286 } 1287 } 1288 MLX5E_RL_WUNLOCK(rl); 1289 1290 return (error); 1291 } 1292 1293 static int 1294 mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) 1295 { 1296 unsigned x; 1297 int error; 1298 1299 if (value == 0) 1300 return (EINVAL); 1301 1302 MLX5E_RL_WLOCK(rl); 1303 1304 /* check if rate already exists */ 1305 for (x = 0; x != rl->param.tx_rates_def; x++) { 1306 if (rl->rate_limit_table[x] != value) 1307 continue; 1308 /* free up rate */ 1309 rl->rate_limit_table[x] = 0; 1310 break; 1311 } 1312 1313 /* check if there is a free rate entry */ 1314 if (x == rl->param.tx_rates_def) 1315 error = ENOENT; 1316 else 1317 error = 0; 1318 MLX5E_RL_WUNLOCK(rl); 1319 1320 return (error); 1321 } 1322 1323 static int 1324 mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) 1325 { 1326 struct mlx5e_rl_priv_data *rl = arg1; 1327 struct mlx5e_priv *priv = rl->priv; 1328 unsigned mode_modify; 1329 unsigned was_opened; 1330 uint64_t value; 1331 uint64_t old; 1332 int error; 1333 1334 PRIV_LOCK(priv); 1335 1336 MLX5E_RL_RLOCK(rl); 1337 value = rl->param.arg[arg2]; 1338 MLX5E_RL_RUNLOCK(rl); 1339 1340 if (req != NULL) { 1341 old = value; 1342 error = sysctl_handle_64(oidp, &value, 0, req); 1343 if (error || req->newptr == NULL || 1344 value == rl->param.arg[arg2]) 1345 goto done; 1346 } else { 1347 old = 0; 1348 error = 0; 1349 } 1350 1351 /* check if device is gone */ 1352 if (priv->gone) { 1353 error = ENXIO; 1354 goto done; 1355 } 1356 was_opened = rl->opened; 1357 mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); 1358 1359 switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { 1360 case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): 1361 if (value > rl->param.tx_worker_threads_max) 1362 value = rl->param.tx_worker_threads_max; 1363 else if (value < 1) 1364 value = 1; 1365 1366 /* store new value */ 1367 rl->param.arg[arg2] = value; 1368 break; 1369 1370 case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): 1371 if (value > rl->param.tx_channels_per_worker_max) 1372 value = rl->param.tx_channels_per_worker_max; 1373 else if (value < 1) 1374 value = 1; 1375 1376 /* store new value */ 1377 rl->param.arg[arg2] = value; 1378 break; 1379 1380 case MLX5E_RL_PARAMS_INDEX(tx_rates_def): 1381 if (value > rl->param.tx_rates_max) 1382 value = rl->param.tx_rates_max; 1383 else if (value < 1) 1384 value = 1; 1385 1386 /* store new value */ 1387 rl->param.arg[arg2] = value; 1388 break; 1389 1390 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): 1391 /* range check */ 1392 if (value < 1) 1393 value = 0; 1394 else if (value > MLX5E_FLD_MAX(cqc, cq_period)) 1395 value = MLX5E_FLD_MAX(cqc, cq_period); 1396 1397 /* store new value */ 1398 rl->param.arg[arg2] = value; 1399 1400 /* check to avoid down and up the network interface */ 1401 if (was_opened) 1402 error = mlx5e_rl_refresh_channel_params(rl); 1403 break; 1404 1405 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): 1406 /* import TX coal pkts */ 1407 if (value < 1) 1408 value = 0; 1409 else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) 1410 value = MLX5E_FLD_MAX(cqc, cq_max_count); 1411 1412 /* store new value */ 1413 rl->param.arg[arg2] = value; 1414 1415 /* check to avoid down and up the network interface */ 1416 if (was_opened) 1417 error = mlx5e_rl_refresh_channel_params(rl); 1418 break; 1419 1420 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): 1421 /* network interface must be down */ 1422 if (was_opened != 0 && mode_modify == 0) 1423 mlx5e_rl_close_workers(priv); 1424 1425 /* import TX coalesce mode */ 1426 if (value != 0) 1427 value = 1; 1428 1429 /* store new value */ 1430 rl->param.arg[arg2] = value; 1431 1432 /* restart network interface, if any */ 1433 if (was_opened != 0) { 1434 if (mode_modify == 0) 1435 mlx5e_rl_open_workers(priv); 1436 else 1437 error = mlx5e_rl_refresh_channel_params(rl); 1438 } 1439 break; 1440 1441 case MLX5E_RL_PARAMS_INDEX(tx_queue_size): 1442 /* network interface must be down */ 1443 if (was_opened) 1444 mlx5e_rl_close_workers(priv); 1445 1446 /* import TX queue size */ 1447 if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) 1448 value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 1449 else if (value > priv->params_ethtool.tx_queue_size_max) 1450 value = priv->params_ethtool.tx_queue_size_max; 1451 1452 /* store actual TX queue size */ 1453 value = 1ULL << order_base_2(value); 1454 1455 /* store new value */ 1456 rl->param.arg[arg2] = value; 1457 1458 /* verify TX completion factor */ 1459 mlx5e_rl_sync_tx_completion_fact(rl); 1460 1461 /* restart network interface, if any */ 1462 if (was_opened) 1463 mlx5e_rl_open_workers(priv); 1464 break; 1465 1466 case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): 1467 /* network interface must be down */ 1468 if (was_opened) 1469 mlx5e_rl_close_workers(priv); 1470 1471 /* store new value */ 1472 rl->param.arg[arg2] = value; 1473 1474 /* verify parameter */ 1475 mlx5e_rl_sync_tx_completion_fact(rl); 1476 1477 /* restart network interface, if any */ 1478 if (was_opened) 1479 mlx5e_rl_open_workers(priv); 1480 break; 1481 1482 case MLX5E_RL_PARAMS_INDEX(tx_limit_add): 1483 error = mlx5e_rl_tx_limit_add(rl, value); 1484 break; 1485 1486 case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): 1487 error = mlx5e_rl_tx_limit_clr(rl, value); 1488 break; 1489 1490 case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): 1491 /* range check */ 1492 if (value > rl->param.tx_allowed_deviation_max) 1493 value = rl->param.tx_allowed_deviation_max; 1494 else if (value < rl->param.tx_allowed_deviation_min) 1495 value = rl->param.tx_allowed_deviation_min; 1496 1497 MLX5E_RL_WLOCK(rl); 1498 rl->param.arg[arg2] = value; 1499 MLX5E_RL_WUNLOCK(rl); 1500 break; 1501 1502 case MLX5E_RL_PARAMS_INDEX(tx_burst_size): 1503 /* range check */ 1504 if (value > rl->param.tx_burst_size_max) 1505 value = rl->param.tx_burst_size_max; 1506 else if (value < rl->param.tx_burst_size_min) 1507 value = rl->param.tx_burst_size_min; 1508 1509 MLX5E_RL_WLOCK(rl); 1510 rl->param.arg[arg2] = value; 1511 MLX5E_RL_WUNLOCK(rl); 1512 break; 1513 1514 default: 1515 break; 1516 } 1517 done: 1518 PRIV_UNLOCK(priv); 1519 return (error); 1520 } 1521 1522 static void 1523 mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1524 struct sysctl_oid *node, const char *name, const char *desc) 1525 { 1526 /* 1527 * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will 1528 * take care of loading default sysctl value from the kernel 1529 * environment, if any: 1530 */ 1531 if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { 1532 /* read-only SYSCTLs */ 1533 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1534 name, CTLTYPE_U64 | CTLFLAG_RD | 1535 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1536 } else { 1537 if (strstr(name, "_def") != 0) { 1538 #ifdef RATELIMIT_DEBUG 1539 /* tunable read-only advanced SYSCTLs */ 1540 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1541 name, CTLTYPE_U64 | CTLFLAG_RDTUN | 1542 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1543 #endif 1544 } else { 1545 /* read-write SYSCTLs */ 1546 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1547 name, CTLTYPE_U64 | CTLFLAG_RWTUN | 1548 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1549 } 1550 } 1551 } 1552 1553 static void 1554 mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1555 struct sysctl_oid *node, const char *name, const char *desc) 1556 { 1557 /* read-only SYSCTLs */ 1558 SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, 1559 CTLFLAG_RD, &rl->stats.arg[x], 0, desc); 1560 } 1561 1562 #else 1563 1564 int 1565 mlx5e_rl_init(struct mlx5e_priv *priv) 1566 { 1567 1568 return (0); 1569 } 1570 1571 void 1572 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 1573 { 1574 /* NOP */ 1575 } 1576 1577 #endif /* RATELIMIT */ 1578