1 /*- 2 * Copyright (c) 2016-2020 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 30 #ifdef RATELIMIT 31 32 static int mlx5e_rl_open_workers(struct mlx5e_priv *); 33 static void mlx5e_rl_close_workers(struct mlx5e_priv *); 34 static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); 35 static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, 36 struct sysctl_oid *, const char *name, const char *desc); 37 static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 38 struct sysctl_oid *node, const char *name, const char *desc); 39 static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); 40 static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); 41 42 static void 43 mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, 44 struct mlx5e_sq_param *param) 45 { 46 void *sqc = param->sqc; 47 void *wq = MLX5_ADDR_OF(sqc, sqc, wq); 48 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 49 50 MLX5_SET(wq, wq, log_wq_sz, log_sq_size); 51 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 52 MLX5_SET(wq, wq, pd, rl->priv->pdn); 53 54 param->wq.buf_numa_node = 0; 55 param->wq.db_numa_node = 0; 56 param->wq.linear = 1; 57 } 58 59 static void 60 mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, 61 struct mlx5e_cq_param *param) 62 { 63 void *cqc = param->cqc; 64 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 65 66 MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); 67 MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); 68 MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); 69 70 switch (rl->param.tx_coalesce_mode) { 71 case 0: 72 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 73 break; 74 default: 75 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) 76 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); 77 else 78 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 79 break; 80 } 81 } 82 83 static void 84 mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, 85 struct mlx5e_rl_channel_param *cparam) 86 { 87 memset(cparam, 0, sizeof(*cparam)); 88 89 mlx5e_rl_build_sq_param(rl, &cparam->sq); 90 mlx5e_rl_build_cq_param(rl, &cparam->cq); 91 } 92 93 static int 94 mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 95 struct mlx5e_sq_param *param, int ix) 96 { 97 struct mlx5_core_dev *mdev = priv->mdev; 98 void *sqc = param->sqc; 99 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 100 int err; 101 102 /* Create DMA descriptor TAG */ 103 if ((err = -bus_dma_tag_create( 104 bus_get_dma_tag(mdev->pdev->dev.bsddev), 105 1, /* any alignment */ 106 0, /* no boundary */ 107 BUS_SPACE_MAXADDR, /* lowaddr */ 108 BUS_SPACE_MAXADDR, /* highaddr */ 109 NULL, NULL, /* filter, filterarg */ 110 MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ 111 MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ 112 MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 113 0, /* flags */ 114 NULL, NULL, /* lockfunc, lockfuncarg */ 115 &sq->dma_tag))) 116 goto done; 117 118 /* use shared UAR */ 119 sq->uar_map = priv->bfreg.map; 120 121 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, 122 &sq->wq_ctrl); 123 if (err) 124 goto err_free_dma_tag; 125 126 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 127 128 err = mlx5e_alloc_sq_db(sq); 129 if (err) 130 goto err_sq_wq_destroy; 131 132 sq->mkey_be = cpu_to_be32(priv->mr.key); 133 sq->ifp = priv->ifp; 134 sq->priv = priv; 135 136 mlx5e_update_sq_inline(sq); 137 138 return (0); 139 140 err_sq_wq_destroy: 141 mlx5_wq_destroy(&sq->wq_ctrl); 142 err_free_dma_tag: 143 bus_dma_tag_destroy(sq->dma_tag); 144 done: 145 return (err); 146 } 147 148 static void 149 mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) 150 { 151 152 mlx5e_free_sq_db(sq); 153 mlx5_wq_destroy(&sq->wq_ctrl); 154 bus_dma_tag_destroy(sq->dma_tag); 155 } 156 157 static int 158 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 159 struct mlx5e_sq_param *param, int ix) 160 { 161 int err; 162 163 err = mlx5e_rl_create_sq(priv, sq, param, ix); 164 if (err) 165 return (err); 166 167 err = mlx5e_enable_sq(sq, param, priv->rl.tisn); 168 if (err) 169 goto err_destroy_sq; 170 171 err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 172 if (err) 173 goto err_disable_sq; 174 175 WRITE_ONCE(sq->running, 1); 176 177 return (0); 178 179 err_disable_sq: 180 mlx5e_disable_sq(sq); 181 err_destroy_sq: 182 mlx5e_rl_destroy_sq(sq); 183 184 return (err); 185 } 186 187 static void 188 mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) 189 { 190 mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); 191 mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); 192 193 callout_init_mtx(&sq->cev_callout, &sq->lock, 0); 194 195 sq->cev_factor = priv->rl.param.tx_completion_fact; 196 197 /* ensure the TX completion event factor is not zero */ 198 if (sq->cev_factor == 0) 199 sq->cev_factor = 1; 200 } 201 202 static int 203 mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, 204 struct mlx5e_rl_channel_param *cparam, 205 struct mlx5e_sq *volatile *ppsq) 206 { 207 struct mlx5e_priv *priv = rlw->priv; 208 struct mlx5e_sq *sq; 209 int err; 210 211 sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); 212 213 /* init mutexes */ 214 mlx5e_rl_chan_mtx_init(priv, sq); 215 216 /* open TX completion queue */ 217 err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, 218 &mlx5e_tx_cq_comp, eq_ix); 219 if (err) 220 goto err_free; 221 222 err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); 223 if (err) 224 goto err_close_tx_cq; 225 226 /* store TX channel pointer */ 227 *ppsq = sq; 228 229 /* poll TX queue initially */ 230 sq->cq.mcq.comp(&sq->cq.mcq, NULL); 231 232 return (0); 233 234 err_close_tx_cq: 235 mlx5e_close_cq(&sq->cq); 236 237 err_free: 238 /* destroy mutexes */ 239 mtx_destroy(&sq->lock); 240 mtx_destroy(&sq->comp_lock); 241 free(sq, M_MLX5EN); 242 atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); 243 return (err); 244 } 245 246 static void 247 mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) 248 { 249 struct mlx5e_sq *sq = *ppsq; 250 251 /* check if channel is already closed */ 252 if (sq == NULL) 253 return; 254 /* ensure channel pointer is no longer used */ 255 *ppsq = NULL; 256 257 /* teardown and destroy SQ */ 258 mlx5e_drain_sq(sq); 259 mlx5e_disable_sq(sq); 260 mlx5e_rl_destroy_sq(sq); 261 262 /* close CQ */ 263 mlx5e_close_cq(&sq->cq); 264 265 /* destroy mutexes */ 266 mtx_destroy(&sq->lock); 267 mtx_destroy(&sq->comp_lock); 268 269 free(sq, M_MLX5EN); 270 } 271 272 static void 273 mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) 274 { 275 /* 276 * Limit the maximum distance between completion events to 277 * half of the currently set TX queue size. 278 * 279 * The maximum number of queue entries a single IP packet can 280 * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. 281 * 282 * The worst case max value is then given as below: 283 */ 284 uint64_t max = rl->param.tx_queue_size / 285 (2 * MLX5_SEND_WQE_MAX_WQEBBS); 286 287 /* 288 * Update the maximum completion factor value in case the 289 * tx_queue_size field changed. Ensure we don't overflow 290 * 16-bits. 291 */ 292 if (max < 1) 293 max = 1; 294 else if (max > 65535) 295 max = 65535; 296 rl->param.tx_completion_fact_max = max; 297 298 /* 299 * Verify that the current TX completion factor is within the 300 * given limits: 301 */ 302 if (rl->param.tx_completion_fact < 1) 303 rl->param.tx_completion_fact = 1; 304 else if (rl->param.tx_completion_fact > max) 305 rl->param.tx_completion_fact = max; 306 } 307 308 static int 309 mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) 310 { 311 struct mlx5e_priv *priv = sq->priv; 312 struct mlx5_core_dev *mdev = priv->mdev; 313 314 void *in; 315 void *sqc; 316 int inlen; 317 int err; 318 319 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 320 in = mlx5_vzalloc(inlen); 321 if (in == NULL) 322 return (-ENOMEM); 323 324 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 325 326 MLX5_SET(modify_sq_in, in, sqn, sq->sqn); 327 MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); 328 MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); 329 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); 330 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); 331 332 err = mlx5_core_modify_sq(mdev, in, inlen); 333 334 kvfree(in); 335 336 return (err); 337 } 338 339 /* 340 * This function will search the configured rate limit table for the 341 * best match to avoid that a single socket based application can 342 * allocate all the available hardware rates. If the user selected 343 * rate deviates too much from the closes rate available in the rate 344 * limit table, unlimited rate will be selected. 345 */ 346 static uint64_t 347 mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) 348 { 349 uint64_t distance = -1ULL; 350 uint64_t diff; 351 uint64_t retval = 0; /* unlimited */ 352 uint64_t x; 353 354 /* search for closest rate */ 355 for (x = 0; x != rl->param.tx_rates_def; x++) { 356 uint64_t rate = rl->rate_limit_table[x]; 357 if (rate == 0) 358 continue; 359 360 if (rate > user_rate) 361 diff = rate - user_rate; 362 else 363 diff = user_rate - rate; 364 365 /* check if distance is smaller than previous rate */ 366 if (diff < distance) { 367 distance = diff; 368 retval = rate; 369 } 370 } 371 372 /* range check for multiplication below */ 373 if (user_rate > rl->param.tx_limit_max) 374 user_rate = rl->param.tx_limit_max; 375 376 /* fallback to unlimited, if rate deviates too much */ 377 if (distance > howmany(user_rate * 378 rl->param.tx_allowed_deviation, 1000ULL)) 379 retval = 0; 380 381 return (retval); 382 } 383 384 /* 385 * This function sets the requested rate for a rate limit channel, in 386 * bits per second. The requested rate will be filtered through the 387 * find best rate function above. 388 */ 389 static int 390 mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, 391 struct mlx5e_rl_channel *channel, uint64_t rate) 392 { 393 struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; 394 struct mlx5e_sq *sq; 395 uint64_t temp; 396 uint16_t index; 397 uint16_t burst; 398 int error; 399 400 if (rate != 0) { 401 MLX5E_RL_WORKER_UNLOCK(rlw); 402 403 MLX5E_RL_RLOCK(rl); 404 405 /* get current burst size in bytes */ 406 temp = rl->param.tx_burst_size * 407 MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); 408 409 /* limit burst size to 64K currently */ 410 if (temp > 65535) 411 temp = 65535; 412 burst = temp; 413 414 /* find best rate */ 415 rate = mlx5e_rl_find_best_rate_locked(rl, rate); 416 417 MLX5E_RL_RUNLOCK(rl); 418 419 if (rate == 0) { 420 /* rate doesn't exist, fallback to unlimited */ 421 index = 0; 422 rate = 0; 423 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 424 } else { 425 /* get a reference on the new rate */ 426 error = -mlx5_rl_add_rate(rlw->priv->mdev, 427 howmany(rate, 1000), burst, &index); 428 429 if (error != 0) { 430 /* adding rate failed, fallback to unlimited */ 431 index = 0; 432 rate = 0; 433 atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); 434 } 435 } 436 MLX5E_RL_WORKER_LOCK(rlw); 437 } else { 438 index = 0; 439 burst = 0; /* default */ 440 } 441 442 /* atomically swap rates */ 443 temp = channel->last_rate; 444 channel->last_rate = rate; 445 rate = temp; 446 447 /* atomically swap burst size */ 448 temp = channel->last_burst; 449 channel->last_burst = burst; 450 burst = temp; 451 452 MLX5E_RL_WORKER_UNLOCK(rlw); 453 /* put reference on the old rate, if any */ 454 if (rate != 0) { 455 mlx5_rl_remove_rate(rlw->priv->mdev, 456 howmany(rate, 1000), burst); 457 } 458 459 /* set new rate, if SQ is running */ 460 sq = channel->sq; 461 if (sq != NULL && READ_ONCE(sq->running) != 0) { 462 error = mlx5e_rl_modify_sq(sq, index); 463 if (error != 0) 464 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 465 } else 466 error = 0; 467 MLX5E_RL_WORKER_LOCK(rlw); 468 469 return (-error); 470 } 471 472 static void 473 mlx5e_rl_worker(void *arg) 474 { 475 struct thread *td; 476 struct mlx5e_rl_worker *rlw = arg; 477 struct mlx5e_rl_channel *channel; 478 struct mlx5e_priv *priv; 479 unsigned ix; 480 uint64_t x; 481 int error; 482 483 /* set thread priority */ 484 td = curthread; 485 486 thread_lock(td); 487 sched_prio(td, PI_SWI(SWI_NET)); 488 thread_unlock(td); 489 490 priv = rlw->priv; 491 492 /* compute completion vector */ 493 ix = (rlw - priv->rl.workers) % 494 priv->mdev->priv.eq_table.num_comp_vectors; 495 496 /* TODO bind to CPU */ 497 498 /* open all the SQs */ 499 MLX5E_RL_WORKER_LOCK(rlw); 500 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 501 struct mlx5e_rl_channel *channel = rlw->channels + x; 502 503 #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) 504 if (channel->state == MLX5E_RL_ST_FREE) 505 continue; 506 #endif 507 MLX5E_RL_WORKER_UNLOCK(rlw); 508 509 MLX5E_RL_RLOCK(&priv->rl); 510 error = mlx5e_rl_open_channel(rlw, ix, 511 &priv->rl.chan_param, &channel->sq); 512 MLX5E_RL_RUNLOCK(&priv->rl); 513 514 MLX5E_RL_WORKER_LOCK(rlw); 515 if (error != 0) { 516 mlx5_en_err(priv->ifp, 517 "mlx5e_rl_open_channel failed: %d\n", error); 518 break; 519 } 520 mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); 521 } 522 while (1) { 523 if (STAILQ_FIRST(&rlw->process_head) == NULL) { 524 /* check if we are tearing down */ 525 if (rlw->worker_done != 0) 526 break; 527 cv_wait(&rlw->cv, &rlw->mtx); 528 } 529 /* check if we are tearing down */ 530 if (rlw->worker_done != 0) 531 break; 532 channel = STAILQ_FIRST(&rlw->process_head); 533 if (channel != NULL) { 534 STAILQ_REMOVE_HEAD(&rlw->process_head, entry); 535 536 switch (channel->state) { 537 case MLX5E_RL_ST_MODIFY: 538 channel->state = MLX5E_RL_ST_USED; 539 MLX5E_RL_WORKER_UNLOCK(rlw); 540 541 /* create channel by demand */ 542 if (channel->sq == NULL) { 543 MLX5E_RL_RLOCK(&priv->rl); 544 error = mlx5e_rl_open_channel(rlw, ix, 545 &priv->rl.chan_param, &channel->sq); 546 MLX5E_RL_RUNLOCK(&priv->rl); 547 548 if (error != 0) { 549 mlx5_en_err(priv->ifp, 550 "mlx5e_rl_open_channel failed: %d\n", error); 551 } else { 552 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); 553 } 554 } else { 555 mlx5e_resume_sq(channel->sq); 556 } 557 558 MLX5E_RL_WORKER_LOCK(rlw); 559 /* convert from bytes/s to bits/s and set new rate */ 560 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 561 channel->new_rate * 8ULL); 562 if (error != 0) { 563 mlx5_en_err(priv->ifp, 564 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 565 error); 566 } 567 break; 568 569 case MLX5E_RL_ST_DESTROY: 570 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 571 if (error != 0) { 572 mlx5_en_err(priv->ifp, 573 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 574 error); 575 } 576 if (channel->sq != NULL) { 577 /* 578 * Make sure all packets are 579 * transmitted before SQ is 580 * returned to free list: 581 */ 582 MLX5E_RL_WORKER_UNLOCK(rlw); 583 mlx5e_drain_sq(channel->sq); 584 MLX5E_RL_WORKER_LOCK(rlw); 585 } 586 /* put the channel back into the free list */ 587 STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); 588 channel->state = MLX5E_RL_ST_FREE; 589 atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); 590 break; 591 default: 592 /* NOP */ 593 break; 594 } 595 } 596 } 597 598 /* close all the SQs */ 599 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 600 struct mlx5e_rl_channel *channel = rlw->channels + x; 601 602 /* update the initial rate */ 603 channel->init_rate = channel->last_rate; 604 605 /* make sure we free up the rate resource */ 606 mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 607 608 if (channel->sq != NULL) { 609 MLX5E_RL_WORKER_UNLOCK(rlw); 610 mlx5e_rl_close_channel(&channel->sq); 611 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); 612 MLX5E_RL_WORKER_LOCK(rlw); 613 } 614 } 615 616 rlw->worker_done = 0; 617 cv_broadcast(&rlw->cv); 618 MLX5E_RL_WORKER_UNLOCK(rlw); 619 620 kthread_exit(); 621 } 622 623 static int 624 mlx5e_rl_open_tis(struct mlx5e_priv *priv) 625 { 626 struct mlx5_core_dev *mdev = priv->mdev; 627 u32 in[MLX5_ST_SZ_DW(create_tis_in)]; 628 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 629 630 memset(in, 0, sizeof(in)); 631 632 MLX5_SET(tisc, tisc, prio, 0); 633 MLX5_SET(tisc, tisc, transport_domain, priv->tdn); 634 635 return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); 636 } 637 638 static void 639 mlx5e_rl_close_tis(struct mlx5e_priv *priv) 640 { 641 mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn); 642 } 643 644 static void 645 mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, 646 struct mlx5_core_dev *mdev) 647 { 648 /* ratelimit workers */ 649 param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; 650 param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; 651 652 /* range check */ 653 if (param->tx_worker_threads_def == 0 || 654 param->tx_worker_threads_def > param->tx_worker_threads_max) 655 param->tx_worker_threads_def = param->tx_worker_threads_max; 656 657 /* ratelimit channels */ 658 param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / 659 param->tx_worker_threads_def; 660 param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; 661 662 /* range check */ 663 if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) 664 param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; 665 666 /* set default burst size */ 667 param->tx_burst_size = 4; /* MTUs */ 668 669 /* 670 * Set maximum burst size 671 * 672 * The burst size is multiplied by the MTU and clamped to the 673 * range 0 ... 65535 bytes inclusivly before fed into the 674 * firmware. 675 * 676 * NOTE: If the burst size or MTU is changed only ratelimit 677 * connections made after the change will use the new burst 678 * size. 679 */ 680 param->tx_burst_size_max = 255; 681 682 /* get firmware rate limits in 1000bit/s and convert them to bit/s */ 683 param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; 684 param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; 685 686 /* ratelimit table size */ 687 param->tx_rates_max = mdev->priv.rl_table.max_size; 688 689 /* range check */ 690 if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) 691 param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; 692 693 /* set default number of rates */ 694 param->tx_rates_def = param->tx_rates_max; 695 696 /* set maximum allowed rate deviation */ 697 if (param->tx_limit_max != 0) { 698 /* 699 * Make sure the deviation multiplication doesn't 700 * overflow unsigned 64-bit: 701 */ 702 param->tx_allowed_deviation_max = -1ULL / 703 param->tx_limit_max; 704 } 705 /* set default rate deviation */ 706 param->tx_allowed_deviation = 50; /* 5.0% */ 707 708 /* channel parameters */ 709 param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 710 param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; 711 param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; 712 param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; 713 param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; 714 } 715 716 static const char *mlx5e_rl_params_desc[] = { 717 MLX5E_RL_PARAMS(MLX5E_STATS_DESC) 718 }; 719 720 static const char *mlx5e_rl_table_params_desc[] = { 721 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) 722 }; 723 724 static const char *mlx5e_rl_stats_desc[] = { 725 MLX5E_RL_STATS(MLX5E_STATS_DESC) 726 }; 727 728 int 729 mlx5e_rl_init(struct mlx5e_priv *priv) 730 { 731 struct mlx5e_rl_priv_data *rl = &priv->rl; 732 struct sysctl_oid *node; 733 struct sysctl_oid *stats; 734 char buf[64]; 735 uint64_t i; 736 uint64_t j; 737 int error; 738 739 /* check if there is support for packet pacing */ 740 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 741 return (0); 742 743 rl->priv = priv; 744 745 sysctl_ctx_init(&rl->ctx); 746 747 sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); 748 749 /* open own TIS domain for ratelimit SQs */ 750 error = mlx5e_rl_open_tis(priv); 751 if (error) 752 goto done; 753 754 /* setup default value for parameters */ 755 mlx5e_rl_set_default_params(&rl->param, priv->mdev); 756 757 /* update the completion factor */ 758 mlx5e_rl_sync_tx_completion_fact(rl); 759 760 /* create root node */ 761 node = SYSCTL_ADD_NODE(&rl->ctx, 762 SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, 763 "rate_limit", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Rate limiting support"); 764 765 if (node != NULL) { 766 /* create SYSCTLs */ 767 for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { 768 mlx5e_rl_sysctl_add_u64_oid(rl, 769 MLX5E_RL_PARAMS_INDEX(arg[i]), 770 node, mlx5e_rl_params_desc[2 * i], 771 mlx5e_rl_params_desc[2 * i + 1]); 772 } 773 774 stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), 775 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 776 "Rate limiting statistics"); 777 if (stats != NULL) { 778 /* create SYSCTLs */ 779 for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { 780 mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, 781 stats, mlx5e_rl_stats_desc[2 * i], 782 mlx5e_rl_stats_desc[2 * i + 1]); 783 } 784 } 785 } 786 787 /* allocate workers array */ 788 rl->workers = malloc(sizeof(rl->workers[0]) * 789 rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); 790 791 /* allocate rate limit array */ 792 rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * 793 rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); 794 795 if (node != NULL) { 796 /* create more SYSCTls */ 797 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 798 "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | 799 CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, 800 "A", "Show table of all configured TX rates"); 801 802 /* try to fetch rate table from kernel environment */ 803 for (i = 0; i != rl->param.tx_rates_def; i++) { 804 /* compute path for tunable */ 805 snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", 806 device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); 807 if (TUNABLE_QUAD_FETCH(buf, &j)) 808 mlx5e_rl_tx_limit_add(rl, j); 809 } 810 811 /* setup rate table sysctls */ 812 for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { 813 mlx5e_rl_sysctl_add_u64_oid(rl, 814 MLX5E_RL_PARAMS_INDEX(table_arg[i]), 815 node, mlx5e_rl_table_params_desc[2 * i], 816 mlx5e_rl_table_params_desc[2 * i + 1]); 817 } 818 } 819 820 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 821 struct mlx5e_rl_worker *rlw = rl->workers + j; 822 823 rlw->priv = priv; 824 825 cv_init(&rlw->cv, "mlx5-worker-cv"); 826 mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); 827 STAILQ_INIT(&rlw->index_list_head); 828 STAILQ_INIT(&rlw->process_head); 829 830 rlw->channels = malloc(sizeof(rlw->channels[0]) * 831 rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); 832 833 MLX5E_RL_WORKER_LOCK(rlw); 834 for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { 835 struct mlx5e_rl_channel *channel = rlw->channels + i; 836 channel->worker = rlw; 837 channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT; 838 STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); 839 } 840 MLX5E_RL_WORKER_UNLOCK(rlw); 841 } 842 843 PRIV_LOCK(priv); 844 error = mlx5e_rl_open_workers(priv); 845 PRIV_UNLOCK(priv); 846 847 if (error != 0) { 848 mlx5_en_err(priv->ifp, 849 "mlx5e_rl_open_workers failed: %d\n", error); 850 } 851 852 return (0); 853 854 done: 855 sysctl_ctx_free(&rl->ctx); 856 sx_destroy(&rl->rl_sxlock); 857 return (error); 858 } 859 860 static int 861 mlx5e_rl_open_workers(struct mlx5e_priv *priv) 862 { 863 struct mlx5e_rl_priv_data *rl = &priv->rl; 864 struct thread *rl_thread = NULL; 865 struct proc *rl_proc = NULL; 866 uint64_t j; 867 int error; 868 869 if (priv->gone || rl->opened) 870 return (-EINVAL); 871 872 MLX5E_RL_WLOCK(rl); 873 /* compute channel parameters once */ 874 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 875 MLX5E_RL_WUNLOCK(rl); 876 877 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 878 struct mlx5e_rl_worker *rlw = rl->workers + j; 879 880 /* start worker thread */ 881 error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, 882 RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); 883 if (error != 0) { 884 mlx5_en_err(rl->priv->ifp, 885 "kproc_kthread_add failed: %d\n", error); 886 rlw->worker_done = 1; 887 } 888 } 889 890 rl->opened = 1; 891 892 return (0); 893 } 894 895 static void 896 mlx5e_rl_close_workers(struct mlx5e_priv *priv) 897 { 898 struct mlx5e_rl_priv_data *rl = &priv->rl; 899 uint64_t y; 900 901 if (rl->opened == 0) 902 return; 903 904 /* tear down worker threads simultaneously */ 905 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 906 struct mlx5e_rl_worker *rlw = rl->workers + y; 907 908 /* tear down worker before freeing SQs */ 909 MLX5E_RL_WORKER_LOCK(rlw); 910 if (rlw->worker_done == 0) { 911 rlw->worker_done = 1; 912 cv_broadcast(&rlw->cv); 913 } else { 914 /* XXX thread not started */ 915 rlw->worker_done = 0; 916 } 917 MLX5E_RL_WORKER_UNLOCK(rlw); 918 } 919 920 /* wait for worker threads to exit */ 921 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 922 struct mlx5e_rl_worker *rlw = rl->workers + y; 923 924 /* tear down worker before freeing SQs */ 925 MLX5E_RL_WORKER_LOCK(rlw); 926 while (rlw->worker_done != 0) 927 cv_wait(&rlw->cv, &rlw->mtx); 928 MLX5E_RL_WORKER_UNLOCK(rlw); 929 } 930 931 rl->opened = 0; 932 } 933 934 static void 935 mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) 936 { 937 unsigned x; 938 939 MLX5E_RL_WLOCK(rl); 940 for (x = 0; x != rl->param.tx_rates_def; x++) 941 rl->rate_limit_table[x] = 0; 942 MLX5E_RL_WUNLOCK(rl); 943 } 944 945 void 946 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 947 { 948 struct mlx5e_rl_priv_data *rl = &priv->rl; 949 uint64_t y; 950 951 /* check if there is support for packet pacing */ 952 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 953 return; 954 955 /* TODO check if there is support for packet pacing */ 956 957 sysctl_ctx_free(&rl->ctx); 958 959 PRIV_LOCK(priv); 960 mlx5e_rl_close_workers(priv); 961 PRIV_UNLOCK(priv); 962 963 mlx5e_rl_reset_rates(rl); 964 965 /* close TIS domain */ 966 mlx5e_rl_close_tis(priv); 967 968 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 969 struct mlx5e_rl_worker *rlw = rl->workers + y; 970 971 cv_destroy(&rlw->cv); 972 mtx_destroy(&rlw->mtx); 973 free(rlw->channels, M_MLX5EN); 974 } 975 free(rl->rate_limit_table, M_MLX5EN); 976 free(rl->workers, M_MLX5EN); 977 sx_destroy(&rl->rl_sxlock); 978 } 979 980 static void 981 mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, 982 struct mlx5e_rl_channel *channel) 983 { 984 STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); 985 cv_broadcast(&rlw->cv); 986 } 987 988 static void 989 mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) 990 { 991 if (channel == NULL) 992 return; 993 994 MLX5E_RL_WORKER_LOCK(rlw); 995 switch (channel->state) { 996 case MLX5E_RL_ST_MODIFY: 997 channel->state = MLX5E_RL_ST_DESTROY; 998 break; 999 case MLX5E_RL_ST_USED: 1000 channel->state = MLX5E_RL_ST_DESTROY; 1001 mlx5e_rlw_queue_channel_locked(rlw, channel); 1002 break; 1003 default: 1004 break; 1005 } 1006 MLX5E_RL_WORKER_UNLOCK(rlw); 1007 } 1008 1009 static int 1010 mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) 1011 { 1012 1013 MLX5E_RL_WORKER_LOCK(rlw); 1014 channel->new_rate = rate; 1015 switch (channel->state) { 1016 case MLX5E_RL_ST_USED: 1017 channel->state = MLX5E_RL_ST_MODIFY; 1018 mlx5e_rlw_queue_channel_locked(rlw, channel); 1019 break; 1020 default: 1021 break; 1022 } 1023 MLX5E_RL_WORKER_UNLOCK(rlw); 1024 1025 return (0); 1026 } 1027 1028 static int 1029 mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, 1030 union if_snd_tag_query_params *params) 1031 { 1032 int retval; 1033 1034 MLX5E_RL_WORKER_LOCK(rlw); 1035 switch (channel->state) { 1036 case MLX5E_RL_ST_USED: 1037 params->rate_limit.max_rate = channel->last_rate; 1038 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1039 retval = 0; 1040 break; 1041 case MLX5E_RL_ST_MODIFY: 1042 params->rate_limit.max_rate = channel->last_rate; 1043 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1044 retval = EBUSY; 1045 break; 1046 default: 1047 retval = EINVAL; 1048 break; 1049 } 1050 MLX5E_RL_WORKER_UNLOCK(rlw); 1051 1052 return (retval); 1053 } 1054 1055 static int 1056 mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, 1057 struct mlx5e_rl_channel **pchannel) 1058 { 1059 struct mlx5e_rl_channel *channel; 1060 int retval = ENOMEM; 1061 1062 MLX5E_RL_WORKER_LOCK(rlw); 1063 /* Check for available channel in free list */ 1064 if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { 1065 retval = 0; 1066 /* Remove head index from available list */ 1067 STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); 1068 channel->state = MLX5E_RL_ST_USED; 1069 atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); 1070 } else { 1071 atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); 1072 } 1073 MLX5E_RL_WORKER_UNLOCK(rlw); 1074 1075 *pchannel = channel; 1076 #ifdef RATELIMIT_DEBUG 1077 mlx5_en_info(rlw->priv->ifp, 1078 "Channel pointer for rate limit connection is %p\n", channel); 1079 #endif 1080 return (retval); 1081 } 1082 1083 int 1084 mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, 1085 union if_snd_tag_alloc_params *params, 1086 struct m_snd_tag **ppmt) 1087 { 1088 struct mlx5e_rl_channel *channel; 1089 struct mlx5e_rl_worker *rlw; 1090 struct mlx5e_priv *priv; 1091 int error; 1092 1093 priv = ifp->if_softc; 1094 1095 /* check if there is support for packet pacing or if device is going away */ 1096 if (!MLX5_CAP_GEN(priv->mdev, qos) || 1097 !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || 1098 params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) 1099 return (EOPNOTSUPP); 1100 1101 /* compute worker thread this TCP connection belongs to */ 1102 rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % 1103 priv->rl.param.tx_worker_threads_def); 1104 1105 error = mlx5e_find_available_tx_ring_index(rlw, &channel); 1106 if (error != 0) 1107 goto done; 1108 1109 error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); 1110 if (error != 0) { 1111 mlx5e_rl_free(rlw, channel); 1112 goto done; 1113 } 1114 1115 /* store pointer to mbuf tag */ 1116 MPASS(channel->tag.refcount == 0); 1117 m_snd_tag_init(&channel->tag, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); 1118 *ppmt = &channel->tag; 1119 done: 1120 return (error); 1121 } 1122 1123 1124 int 1125 mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) 1126 { 1127 struct mlx5e_rl_channel *channel = 1128 container_of(pmt, struct mlx5e_rl_channel, tag); 1129 1130 return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); 1131 } 1132 1133 int 1134 mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) 1135 { 1136 struct mlx5e_rl_channel *channel = 1137 container_of(pmt, struct mlx5e_rl_channel, tag); 1138 1139 return (mlx5e_rl_query(channel->worker, channel, params)); 1140 } 1141 1142 void 1143 mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) 1144 { 1145 struct mlx5e_rl_channel *channel = 1146 container_of(pmt, struct mlx5e_rl_channel, tag); 1147 1148 mlx5e_rl_free(channel->worker, channel); 1149 } 1150 1151 static int 1152 mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) 1153 { 1154 struct mlx5e_rl_priv_data *rl = arg1; 1155 struct mlx5e_priv *priv = rl->priv; 1156 struct sbuf sbuf; 1157 unsigned x; 1158 int error; 1159 1160 error = sysctl_wire_old_buffer(req, 0); 1161 if (error != 0) 1162 return (error); 1163 1164 PRIV_LOCK(priv); 1165 1166 sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); 1167 1168 sbuf_printf(&sbuf, 1169 "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" 1170 "\t" "--------------------------------------------\n"); 1171 1172 MLX5E_RL_RLOCK(rl); 1173 for (x = 0; x != rl->param.tx_rates_def; x++) { 1174 if (rl->rate_limit_table[x] == 0) 1175 continue; 1176 1177 sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", 1178 x, (unsigned)rl->param.tx_burst_size, 1179 (long long)rl->rate_limit_table[x]); 1180 } 1181 MLX5E_RL_RUNLOCK(rl); 1182 1183 error = sbuf_finish(&sbuf); 1184 sbuf_delete(&sbuf); 1185 1186 PRIV_UNLOCK(priv); 1187 1188 return (error); 1189 } 1190 1191 static int 1192 mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) 1193 { 1194 uint64_t x; 1195 uint64_t y; 1196 1197 MLX5E_RL_WLOCK(rl); 1198 /* compute channel parameters once */ 1199 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 1200 MLX5E_RL_WUNLOCK(rl); 1201 1202 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1203 struct mlx5e_rl_worker *rlw = rl->workers + y; 1204 1205 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1206 struct mlx5e_rl_channel *channel; 1207 struct mlx5e_sq *sq; 1208 1209 channel = rlw->channels + x; 1210 sq = channel->sq; 1211 1212 if (sq == NULL) 1213 continue; 1214 1215 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { 1216 mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, 1217 rl->param.tx_coalesce_usecs, 1218 rl->param.tx_coalesce_pkts, 1219 rl->param.tx_coalesce_mode); 1220 } else { 1221 mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, 1222 rl->param.tx_coalesce_usecs, 1223 rl->param.tx_coalesce_pkts); 1224 } 1225 } 1226 } 1227 return (0); 1228 } 1229 1230 void 1231 mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl) 1232 { 1233 uint64_t x; 1234 uint64_t y; 1235 1236 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1237 struct mlx5e_rl_worker *rlw = rl->workers + y; 1238 1239 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1240 struct mlx5e_rl_channel *channel; 1241 struct mlx5e_sq *sq; 1242 1243 channel = rlw->channels + x; 1244 sq = channel->sq; 1245 1246 if (sq == NULL) 1247 continue; 1248 1249 mtx_lock(&sq->lock); 1250 mlx5e_update_sq_inline(sq); 1251 mtx_unlock(&sq->lock); 1252 } 1253 } 1254 } 1255 1256 static int 1257 mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) 1258 { 1259 unsigned x; 1260 int error; 1261 1262 if (value < 1000 || 1263 mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) 1264 return (EINVAL); 1265 1266 MLX5E_RL_WLOCK(rl); 1267 error = ENOMEM; 1268 1269 /* check if rate already exists */ 1270 for (x = 0; x != rl->param.tx_rates_def; x++) { 1271 if (rl->rate_limit_table[x] != value) 1272 continue; 1273 error = EEXIST; 1274 break; 1275 } 1276 1277 /* check if there is a free rate entry */ 1278 if (x == rl->param.tx_rates_def) { 1279 for (x = 0; x != rl->param.tx_rates_def; x++) { 1280 if (rl->rate_limit_table[x] != 0) 1281 continue; 1282 rl->rate_limit_table[x] = value; 1283 error = 0; 1284 break; 1285 } 1286 } 1287 MLX5E_RL_WUNLOCK(rl); 1288 1289 return (error); 1290 } 1291 1292 static int 1293 mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) 1294 { 1295 unsigned x; 1296 int error; 1297 1298 if (value == 0) 1299 return (EINVAL); 1300 1301 MLX5E_RL_WLOCK(rl); 1302 1303 /* check if rate already exists */ 1304 for (x = 0; x != rl->param.tx_rates_def; x++) { 1305 if (rl->rate_limit_table[x] != value) 1306 continue; 1307 /* free up rate */ 1308 rl->rate_limit_table[x] = 0; 1309 break; 1310 } 1311 1312 /* check if there is a free rate entry */ 1313 if (x == rl->param.tx_rates_def) 1314 error = ENOENT; 1315 else 1316 error = 0; 1317 MLX5E_RL_WUNLOCK(rl); 1318 1319 return (error); 1320 } 1321 1322 static int 1323 mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) 1324 { 1325 struct mlx5e_rl_priv_data *rl = arg1; 1326 struct mlx5e_priv *priv = rl->priv; 1327 unsigned mode_modify; 1328 unsigned was_opened; 1329 uint64_t value; 1330 uint64_t old; 1331 int error; 1332 1333 PRIV_LOCK(priv); 1334 1335 MLX5E_RL_RLOCK(rl); 1336 value = rl->param.arg[arg2]; 1337 MLX5E_RL_RUNLOCK(rl); 1338 1339 if (req != NULL) { 1340 old = value; 1341 error = sysctl_handle_64(oidp, &value, 0, req); 1342 if (error || req->newptr == NULL || 1343 value == rl->param.arg[arg2]) 1344 goto done; 1345 } else { 1346 old = 0; 1347 error = 0; 1348 } 1349 1350 /* check if device is gone */ 1351 if (priv->gone) { 1352 error = ENXIO; 1353 goto done; 1354 } 1355 was_opened = rl->opened; 1356 mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); 1357 1358 switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { 1359 case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): 1360 if (value > rl->param.tx_worker_threads_max) 1361 value = rl->param.tx_worker_threads_max; 1362 else if (value < 1) 1363 value = 1; 1364 1365 /* store new value */ 1366 rl->param.arg[arg2] = value; 1367 break; 1368 1369 case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): 1370 if (value > rl->param.tx_channels_per_worker_max) 1371 value = rl->param.tx_channels_per_worker_max; 1372 else if (value < 1) 1373 value = 1; 1374 1375 /* store new value */ 1376 rl->param.arg[arg2] = value; 1377 break; 1378 1379 case MLX5E_RL_PARAMS_INDEX(tx_rates_def): 1380 if (value > rl->param.tx_rates_max) 1381 value = rl->param.tx_rates_max; 1382 else if (value < 1) 1383 value = 1; 1384 1385 /* store new value */ 1386 rl->param.arg[arg2] = value; 1387 break; 1388 1389 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): 1390 /* range check */ 1391 if (value < 1) 1392 value = 0; 1393 else if (value > MLX5E_FLD_MAX(cqc, cq_period)) 1394 value = MLX5E_FLD_MAX(cqc, cq_period); 1395 1396 /* store new value */ 1397 rl->param.arg[arg2] = value; 1398 1399 /* check to avoid down and up the network interface */ 1400 if (was_opened) 1401 error = mlx5e_rl_refresh_channel_params(rl); 1402 break; 1403 1404 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): 1405 /* import TX coal pkts */ 1406 if (value < 1) 1407 value = 0; 1408 else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) 1409 value = MLX5E_FLD_MAX(cqc, cq_max_count); 1410 1411 /* store new value */ 1412 rl->param.arg[arg2] = value; 1413 1414 /* check to avoid down and up the network interface */ 1415 if (was_opened) 1416 error = mlx5e_rl_refresh_channel_params(rl); 1417 break; 1418 1419 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): 1420 /* network interface must be down */ 1421 if (was_opened != 0 && mode_modify == 0) 1422 mlx5e_rl_close_workers(priv); 1423 1424 /* import TX coalesce mode */ 1425 if (value != 0) 1426 value = 1; 1427 1428 /* store new value */ 1429 rl->param.arg[arg2] = value; 1430 1431 /* restart network interface, if any */ 1432 if (was_opened != 0) { 1433 if (mode_modify == 0) 1434 mlx5e_rl_open_workers(priv); 1435 else 1436 error = mlx5e_rl_refresh_channel_params(rl); 1437 } 1438 break; 1439 1440 case MLX5E_RL_PARAMS_INDEX(tx_queue_size): 1441 /* network interface must be down */ 1442 if (was_opened) 1443 mlx5e_rl_close_workers(priv); 1444 1445 /* import TX queue size */ 1446 if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) 1447 value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 1448 else if (value > priv->params_ethtool.tx_queue_size_max) 1449 value = priv->params_ethtool.tx_queue_size_max; 1450 1451 /* store actual TX queue size */ 1452 value = 1ULL << order_base_2(value); 1453 1454 /* store new value */ 1455 rl->param.arg[arg2] = value; 1456 1457 /* verify TX completion factor */ 1458 mlx5e_rl_sync_tx_completion_fact(rl); 1459 1460 /* restart network interface, if any */ 1461 if (was_opened) 1462 mlx5e_rl_open_workers(priv); 1463 break; 1464 1465 case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): 1466 /* network interface must be down */ 1467 if (was_opened) 1468 mlx5e_rl_close_workers(priv); 1469 1470 /* store new value */ 1471 rl->param.arg[arg2] = value; 1472 1473 /* verify parameter */ 1474 mlx5e_rl_sync_tx_completion_fact(rl); 1475 1476 /* restart network interface, if any */ 1477 if (was_opened) 1478 mlx5e_rl_open_workers(priv); 1479 break; 1480 1481 case MLX5E_RL_PARAMS_INDEX(tx_limit_add): 1482 error = mlx5e_rl_tx_limit_add(rl, value); 1483 break; 1484 1485 case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): 1486 error = mlx5e_rl_tx_limit_clr(rl, value); 1487 break; 1488 1489 case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): 1490 /* range check */ 1491 if (value > rl->param.tx_allowed_deviation_max) 1492 value = rl->param.tx_allowed_deviation_max; 1493 else if (value < rl->param.tx_allowed_deviation_min) 1494 value = rl->param.tx_allowed_deviation_min; 1495 1496 MLX5E_RL_WLOCK(rl); 1497 rl->param.arg[arg2] = value; 1498 MLX5E_RL_WUNLOCK(rl); 1499 break; 1500 1501 case MLX5E_RL_PARAMS_INDEX(tx_burst_size): 1502 /* range check */ 1503 if (value > rl->param.tx_burst_size_max) 1504 value = rl->param.tx_burst_size_max; 1505 else if (value < rl->param.tx_burst_size_min) 1506 value = rl->param.tx_burst_size_min; 1507 1508 MLX5E_RL_WLOCK(rl); 1509 rl->param.arg[arg2] = value; 1510 MLX5E_RL_WUNLOCK(rl); 1511 break; 1512 1513 default: 1514 break; 1515 } 1516 done: 1517 PRIV_UNLOCK(priv); 1518 return (error); 1519 } 1520 1521 static void 1522 mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1523 struct sysctl_oid *node, const char *name, const char *desc) 1524 { 1525 /* 1526 * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will 1527 * take care of loading default sysctl value from the kernel 1528 * environment, if any: 1529 */ 1530 if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { 1531 /* read-only SYSCTLs */ 1532 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1533 name, CTLTYPE_U64 | CTLFLAG_RD | 1534 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1535 } else { 1536 if (strstr(name, "_def") != 0) { 1537 #ifdef RATELIMIT_DEBUG 1538 /* tunable read-only advanced SYSCTLs */ 1539 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1540 name, CTLTYPE_U64 | CTLFLAG_RDTUN | 1541 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1542 #endif 1543 } else { 1544 /* read-write SYSCTLs */ 1545 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1546 name, CTLTYPE_U64 | CTLFLAG_RWTUN | 1547 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1548 } 1549 } 1550 } 1551 1552 static void 1553 mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1554 struct sysctl_oid *node, const char *name, const char *desc) 1555 { 1556 /* read-only SYSCTLs */ 1557 SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, 1558 CTLFLAG_RD, &rl->stats.arg[x], 0, desc); 1559 } 1560 1561 #else 1562 1563 int 1564 mlx5e_rl_init(struct mlx5e_priv *priv) 1565 { 1566 1567 return (0); 1568 } 1569 1570 void 1571 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 1572 { 1573 /* NOP */ 1574 } 1575 1576 #endif /* RATELIMIT */ 1577