1 /*- 2 * Copyright (c) 2016-2020 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 30 #ifdef RATELIMIT 31 32 static int mlx5e_rl_open_workers(struct mlx5e_priv *); 33 static void mlx5e_rl_close_workers(struct mlx5e_priv *); 34 static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); 35 static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, 36 struct sysctl_oid *, const char *name, const char *desc); 37 static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 38 struct sysctl_oid *node, const char *name, const char *desc); 39 static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); 40 static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); 41 42 static void 43 mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, 44 struct mlx5e_sq_param *param) 45 { 46 void *sqc = param->sqc; 47 void *wq = MLX5_ADDR_OF(sqc, sqc, wq); 48 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 49 50 MLX5_SET(wq, wq, log_wq_sz, log_sq_size); 51 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 52 MLX5_SET(wq, wq, pd, rl->priv->pdn); 53 54 param->wq.linear = 1; 55 } 56 57 static void 58 mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, 59 struct mlx5e_cq_param *param) 60 { 61 void *cqc = param->cqc; 62 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 63 64 MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); 65 MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); 66 MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); 67 MLX5_SET(cqc, cqc, uar_page, rl->priv->mdev->priv.uar->index); 68 69 switch (rl->param.tx_coalesce_mode) { 70 case 0: 71 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 72 break; 73 default: 74 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) 75 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); 76 else 77 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 78 break; 79 } 80 } 81 82 static void 83 mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, 84 struct mlx5e_rl_channel_param *cparam) 85 { 86 memset(cparam, 0, sizeof(*cparam)); 87 88 mlx5e_rl_build_sq_param(rl, &cparam->sq); 89 mlx5e_rl_build_cq_param(rl, &cparam->cq); 90 } 91 92 static int 93 mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 94 struct mlx5e_sq_param *param, int ix) 95 { 96 struct mlx5_core_dev *mdev = priv->mdev; 97 void *sqc = param->sqc; 98 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 99 int err; 100 101 /* Create DMA descriptor TAG */ 102 if ((err = -bus_dma_tag_create( 103 bus_get_dma_tag(mdev->pdev->dev.bsddev), 104 1, /* any alignment */ 105 0, /* no boundary */ 106 BUS_SPACE_MAXADDR, /* lowaddr */ 107 BUS_SPACE_MAXADDR, /* highaddr */ 108 NULL, NULL, /* filter, filterarg */ 109 MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ 110 MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ 111 MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 112 0, /* flags */ 113 NULL, NULL, /* lockfunc, lockfuncarg */ 114 &sq->dma_tag))) 115 goto done; 116 117 sq->mkey_be = cpu_to_be32(priv->mr.key); 118 sq->ifp = priv->ifp; 119 sq->priv = priv; 120 121 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, 122 &sq->wq_ctrl); 123 if (err) 124 goto err_free_dma_tag; 125 126 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 127 128 err = mlx5e_alloc_sq_db(sq); 129 if (err) 130 goto err_sq_wq_destroy; 131 132 mlx5e_update_sq_inline(sq); 133 134 return (0); 135 136 err_sq_wq_destroy: 137 mlx5_wq_destroy(&sq->wq_ctrl); 138 err_free_dma_tag: 139 bus_dma_tag_destroy(sq->dma_tag); 140 done: 141 return (err); 142 } 143 144 static void 145 mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) 146 { 147 148 mlx5e_free_sq_db(sq); 149 mlx5_wq_destroy(&sq->wq_ctrl); 150 bus_dma_tag_destroy(sq->dma_tag); 151 } 152 153 static int 154 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 155 struct mlx5e_sq_param *param, int ix) 156 { 157 int err; 158 159 err = mlx5e_rl_create_sq(priv, sq, param, ix); 160 if (err) 161 return (err); 162 163 err = mlx5e_enable_sq(sq, param, &priv->channel[ix].bfreg, priv->rl.tisn); 164 if (err) 165 goto err_destroy_sq; 166 167 err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 168 if (err) 169 goto err_disable_sq; 170 171 WRITE_ONCE(sq->running, 1); 172 173 return (0); 174 175 err_disable_sq: 176 mlx5e_disable_sq(sq); 177 err_destroy_sq: 178 mlx5e_rl_destroy_sq(sq); 179 180 return (err); 181 } 182 183 static void 184 mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) 185 { 186 mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); 187 mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); 188 189 callout_init_mtx(&sq->cev_callout, &sq->lock, 0); 190 191 sq->cev_factor = priv->rl.param.tx_completion_fact; 192 193 /* ensure the TX completion event factor is not zero */ 194 if (sq->cev_factor == 0) 195 sq->cev_factor = 1; 196 } 197 198 static int 199 mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, 200 struct mlx5e_rl_channel_param *cparam, 201 struct mlx5e_sq *volatile *ppsq) 202 { 203 struct mlx5e_priv *priv = rlw->priv; 204 struct mlx5e_sq *sq; 205 int err; 206 207 sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); 208 209 /* init mutexes */ 210 mlx5e_rl_chan_mtx_init(priv, sq); 211 212 /* open TX completion queue */ 213 err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, 214 &mlx5e_tx_cq_comp, eq_ix); 215 if (err) 216 goto err_free; 217 218 err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); 219 if (err) 220 goto err_close_tx_cq; 221 222 /* store TX channel pointer */ 223 *ppsq = sq; 224 225 /* poll TX queue initially */ 226 sq->cq.mcq.comp(&sq->cq.mcq, NULL); 227 228 return (0); 229 230 err_close_tx_cq: 231 mlx5e_close_cq(&sq->cq); 232 233 err_free: 234 /* destroy mutexes */ 235 mtx_destroy(&sq->lock); 236 mtx_destroy(&sq->comp_lock); 237 free(sq, M_MLX5EN); 238 atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); 239 return (err); 240 } 241 242 static void 243 mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) 244 { 245 struct mlx5e_sq *sq = *ppsq; 246 247 /* check if channel is already closed */ 248 if (sq == NULL) 249 return; 250 /* ensure channel pointer is no longer used */ 251 *ppsq = NULL; 252 253 /* teardown and destroy SQ */ 254 mlx5e_drain_sq(sq); 255 mlx5e_disable_sq(sq); 256 mlx5e_rl_destroy_sq(sq); 257 258 /* close CQ */ 259 mlx5e_close_cq(&sq->cq); 260 261 /* destroy mutexes */ 262 mtx_destroy(&sq->lock); 263 mtx_destroy(&sq->comp_lock); 264 265 free(sq, M_MLX5EN); 266 } 267 268 static void 269 mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) 270 { 271 /* 272 * Limit the maximum distance between completion events to 273 * half of the currently set TX queue size. 274 * 275 * The maximum number of queue entries a single IP packet can 276 * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. 277 * 278 * The worst case max value is then given as below: 279 */ 280 uint64_t max = rl->param.tx_queue_size / 281 (2 * MLX5_SEND_WQE_MAX_WQEBBS); 282 283 /* 284 * Update the maximum completion factor value in case the 285 * tx_queue_size field changed. Ensure we don't overflow 286 * 16-bits. 287 */ 288 if (max < 1) 289 max = 1; 290 else if (max > 65535) 291 max = 65535; 292 rl->param.tx_completion_fact_max = max; 293 294 /* 295 * Verify that the current TX completion factor is within the 296 * given limits: 297 */ 298 if (rl->param.tx_completion_fact < 1) 299 rl->param.tx_completion_fact = 1; 300 else if (rl->param.tx_completion_fact > max) 301 rl->param.tx_completion_fact = max; 302 } 303 304 static int 305 mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) 306 { 307 struct mlx5e_priv *priv = sq->priv; 308 struct mlx5_core_dev *mdev = priv->mdev; 309 310 void *in; 311 void *sqc; 312 int inlen; 313 int err; 314 315 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 316 in = mlx5_vzalloc(inlen); 317 if (in == NULL) 318 return (-ENOMEM); 319 320 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 321 322 MLX5_SET(modify_sq_in, in, sqn, sq->sqn); 323 MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); 324 MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); 325 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); 326 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); 327 328 err = mlx5_core_modify_sq(mdev, in, inlen); 329 330 kvfree(in); 331 332 return (err); 333 } 334 335 /* 336 * This function will search the configured rate limit table for the 337 * best match to avoid that a single socket based application can 338 * allocate all the available hardware rates. If the user selected 339 * rate deviates too much from the closes rate available in the rate 340 * limit table, unlimited rate will be selected. 341 */ 342 static uint64_t 343 mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) 344 { 345 uint64_t distance = -1ULL; 346 uint64_t diff; 347 uint64_t retval = 0; /* unlimited */ 348 uint64_t x; 349 350 /* search for closest rate */ 351 for (x = 0; x != rl->param.tx_rates_def; x++) { 352 uint64_t rate = rl->rate_limit_table[x]; 353 if (rate == 0) 354 continue; 355 356 if (rate > user_rate) 357 diff = rate - user_rate; 358 else 359 diff = user_rate - rate; 360 361 /* check if distance is smaller than previous rate */ 362 if (diff < distance) { 363 distance = diff; 364 retval = rate; 365 } 366 } 367 368 /* range check for multiplication below */ 369 if (user_rate > rl->param.tx_limit_max) 370 user_rate = rl->param.tx_limit_max; 371 372 /* fallback to unlimited, if rate deviates too much */ 373 if (distance > howmany(user_rate * 374 rl->param.tx_allowed_deviation, 1000ULL)) 375 retval = 0; 376 377 return (retval); 378 } 379 380 /* 381 * This function sets the requested rate for a rate limit channel, in 382 * bits per second. The requested rate will be filtered through the 383 * find best rate function above. 384 */ 385 static int 386 mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, 387 struct mlx5e_rl_channel *channel, uint64_t rate) 388 { 389 struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; 390 struct mlx5e_sq *sq; 391 uint64_t temp; 392 uint16_t index; 393 uint16_t burst; 394 int error; 395 396 if (rate != 0) { 397 MLX5E_RL_WORKER_UNLOCK(rlw); 398 399 MLX5E_RL_RLOCK(rl); 400 401 /* get current burst size in bytes */ 402 temp = rl->param.tx_burst_size * 403 MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); 404 405 /* limit burst size to 64K currently */ 406 if (temp > 65535) 407 temp = 65535; 408 burst = temp; 409 410 /* find best rate */ 411 rate = mlx5e_rl_find_best_rate_locked(rl, rate); 412 413 MLX5E_RL_RUNLOCK(rl); 414 415 if (rate == 0) { 416 /* rate doesn't exist, fallback to unlimited */ 417 index = 0; 418 rate = 0; 419 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 420 } else { 421 /* get a reference on the new rate */ 422 error = -mlx5_rl_add_rate(rlw->priv->mdev, 423 howmany(rate, 1000), burst, &index); 424 425 if (error != 0) { 426 /* adding rate failed, fallback to unlimited */ 427 index = 0; 428 rate = 0; 429 atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); 430 } 431 } 432 MLX5E_RL_WORKER_LOCK(rlw); 433 } else { 434 index = 0; 435 burst = 0; /* default */ 436 } 437 438 /* atomically swap rates */ 439 temp = channel->last_rate; 440 channel->last_rate = rate; 441 rate = temp; 442 443 /* atomically swap burst size */ 444 temp = channel->last_burst; 445 channel->last_burst = burst; 446 burst = temp; 447 448 MLX5E_RL_WORKER_UNLOCK(rlw); 449 /* put reference on the old rate, if any */ 450 if (rate != 0) { 451 mlx5_rl_remove_rate(rlw->priv->mdev, 452 howmany(rate, 1000), burst); 453 } 454 455 /* set new rate, if SQ is running */ 456 sq = channel->sq; 457 if (sq != NULL && READ_ONCE(sq->running) != 0) { 458 error = mlx5e_rl_modify_sq(sq, index); 459 if (error != 0) 460 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 461 } else 462 error = 0; 463 MLX5E_RL_WORKER_LOCK(rlw); 464 465 return (-error); 466 } 467 468 static void 469 mlx5e_rl_worker(void *arg) 470 { 471 struct thread *td; 472 struct mlx5e_rl_worker *rlw = arg; 473 struct mlx5e_rl_channel *channel; 474 struct mlx5e_priv *priv; 475 unsigned ix; 476 uint64_t x; 477 int error; 478 479 /* set thread priority */ 480 td = curthread; 481 482 thread_lock(td); 483 sched_prio(td, PI_SWI(SWI_NET)); 484 thread_unlock(td); 485 486 priv = rlw->priv; 487 488 /* compute completion vector */ 489 ix = (rlw - priv->rl.workers) % 490 priv->mdev->priv.eq_table.num_comp_vectors; 491 492 /* TODO bind to CPU */ 493 494 /* open all the SQs */ 495 MLX5E_RL_WORKER_LOCK(rlw); 496 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 497 struct mlx5e_rl_channel *channel = rlw->channels + x; 498 499 #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) 500 if (channel->state == MLX5E_RL_ST_FREE) 501 continue; 502 #endif 503 MLX5E_RL_WORKER_UNLOCK(rlw); 504 505 MLX5E_RL_RLOCK(&priv->rl); 506 error = mlx5e_rl_open_channel(rlw, ix, 507 &priv->rl.chan_param, &channel->sq); 508 MLX5E_RL_RUNLOCK(&priv->rl); 509 510 MLX5E_RL_WORKER_LOCK(rlw); 511 if (error != 0) { 512 mlx5_en_err(priv->ifp, 513 "mlx5e_rl_open_channel failed: %d\n", error); 514 break; 515 } 516 mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); 517 } 518 while (1) { 519 if (STAILQ_FIRST(&rlw->process_head) == NULL) { 520 /* check if we are tearing down */ 521 if (rlw->worker_done != 0) 522 break; 523 cv_wait(&rlw->cv, &rlw->mtx); 524 } 525 /* check if we are tearing down */ 526 if (rlw->worker_done != 0) 527 break; 528 channel = STAILQ_FIRST(&rlw->process_head); 529 if (channel != NULL) { 530 STAILQ_REMOVE_HEAD(&rlw->process_head, entry); 531 532 switch (channel->state) { 533 case MLX5E_RL_ST_MODIFY: 534 channel->state = MLX5E_RL_ST_USED; 535 MLX5E_RL_WORKER_UNLOCK(rlw); 536 537 /* create channel by demand */ 538 if (channel->sq == NULL) { 539 MLX5E_RL_RLOCK(&priv->rl); 540 error = mlx5e_rl_open_channel(rlw, ix, 541 &priv->rl.chan_param, &channel->sq); 542 MLX5E_RL_RUNLOCK(&priv->rl); 543 544 if (error != 0) { 545 mlx5_en_err(priv->ifp, 546 "mlx5e_rl_open_channel failed: %d\n", error); 547 } else { 548 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); 549 } 550 } else { 551 mlx5e_resume_sq(channel->sq); 552 } 553 554 MLX5E_RL_WORKER_LOCK(rlw); 555 /* convert from bytes/s to bits/s and set new rate */ 556 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 557 channel->new_rate * 8ULL); 558 if (error != 0) { 559 mlx5_en_err(priv->ifp, 560 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 561 error); 562 } 563 break; 564 565 case MLX5E_RL_ST_DESTROY: 566 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 567 if (error != 0) { 568 mlx5_en_err(priv->ifp, 569 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 570 error); 571 } 572 if (channel->sq != NULL) { 573 /* 574 * Make sure all packets are 575 * transmitted before SQ is 576 * returned to free list: 577 */ 578 MLX5E_RL_WORKER_UNLOCK(rlw); 579 mlx5e_drain_sq(channel->sq); 580 MLX5E_RL_WORKER_LOCK(rlw); 581 } 582 /* put the channel back into the free list */ 583 STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); 584 channel->state = MLX5E_RL_ST_FREE; 585 atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); 586 break; 587 default: 588 /* NOP */ 589 break; 590 } 591 } 592 } 593 594 /* close all the SQs */ 595 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 596 struct mlx5e_rl_channel *channel = rlw->channels + x; 597 598 /* update the initial rate */ 599 channel->init_rate = channel->last_rate; 600 601 /* make sure we free up the rate resource */ 602 mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 603 604 if (channel->sq != NULL) { 605 MLX5E_RL_WORKER_UNLOCK(rlw); 606 mlx5e_rl_close_channel(&channel->sq); 607 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); 608 MLX5E_RL_WORKER_LOCK(rlw); 609 } 610 } 611 612 rlw->worker_done = 0; 613 cv_broadcast(&rlw->cv); 614 MLX5E_RL_WORKER_UNLOCK(rlw); 615 616 kthread_exit(); 617 } 618 619 static int 620 mlx5e_rl_open_tis(struct mlx5e_priv *priv) 621 { 622 struct mlx5_core_dev *mdev = priv->mdev; 623 u32 in[MLX5_ST_SZ_DW(create_tis_in)]; 624 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 625 626 memset(in, 0, sizeof(in)); 627 628 MLX5_SET(tisc, tisc, prio, 0); 629 MLX5_SET(tisc, tisc, transport_domain, priv->tdn); 630 631 return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); 632 } 633 634 static void 635 mlx5e_rl_close_tis(struct mlx5e_priv *priv) 636 { 637 mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn); 638 } 639 640 static void 641 mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, 642 struct mlx5_core_dev *mdev) 643 { 644 /* ratelimit workers */ 645 param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; 646 param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; 647 648 /* range check */ 649 if (param->tx_worker_threads_def == 0 || 650 param->tx_worker_threads_def > param->tx_worker_threads_max) 651 param->tx_worker_threads_def = param->tx_worker_threads_max; 652 653 /* ratelimit channels */ 654 param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / 655 param->tx_worker_threads_def; 656 param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; 657 658 /* range check */ 659 if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) 660 param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; 661 662 /* set default burst size */ 663 param->tx_burst_size = 4; /* MTUs */ 664 665 /* 666 * Set maximum burst size 667 * 668 * The burst size is multiplied by the MTU and clamped to the 669 * range 0 ... 65535 bytes inclusivly before fed into the 670 * firmware. 671 * 672 * NOTE: If the burst size or MTU is changed only ratelimit 673 * connections made after the change will use the new burst 674 * size. 675 */ 676 param->tx_burst_size_max = 255; 677 678 /* get firmware rate limits in 1000bit/s and convert them to bit/s */ 679 param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; 680 param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; 681 682 /* ratelimit table size */ 683 param->tx_rates_max = mdev->priv.rl_table.max_size; 684 685 /* range check */ 686 if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) 687 param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; 688 689 /* set default number of rates */ 690 param->tx_rates_def = param->tx_rates_max; 691 692 /* set maximum allowed rate deviation */ 693 if (param->tx_limit_max != 0) { 694 /* 695 * Make sure the deviation multiplication doesn't 696 * overflow unsigned 64-bit: 697 */ 698 param->tx_allowed_deviation_max = -1ULL / 699 param->tx_limit_max; 700 } 701 /* set default rate deviation */ 702 param->tx_allowed_deviation = 50; /* 5.0% */ 703 704 /* channel parameters */ 705 param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 706 param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; 707 param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; 708 param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; 709 param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; 710 } 711 712 static const char *mlx5e_rl_params_desc[] = { 713 MLX5E_RL_PARAMS(MLX5E_STATS_DESC) 714 }; 715 716 static const char *mlx5e_rl_table_params_desc[] = { 717 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) 718 }; 719 720 static const char *mlx5e_rl_stats_desc[] = { 721 MLX5E_RL_STATS(MLX5E_STATS_DESC) 722 }; 723 724 int 725 mlx5e_rl_init(struct mlx5e_priv *priv) 726 { 727 struct mlx5e_rl_priv_data *rl = &priv->rl; 728 struct sysctl_oid *node; 729 struct sysctl_oid *stats; 730 char buf[64]; 731 uint64_t i; 732 uint64_t j; 733 int error; 734 735 /* check if there is support for packet pacing */ 736 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 737 return (0); 738 739 rl->priv = priv; 740 741 sysctl_ctx_init(&rl->ctx); 742 743 sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); 744 745 /* open own TIS domain for ratelimit SQs */ 746 error = mlx5e_rl_open_tis(priv); 747 if (error) 748 goto done; 749 750 /* setup default value for parameters */ 751 mlx5e_rl_set_default_params(&rl->param, priv->mdev); 752 753 /* update the completion factor */ 754 mlx5e_rl_sync_tx_completion_fact(rl); 755 756 /* create root node */ 757 node = SYSCTL_ADD_NODE(&rl->ctx, 758 SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, 759 "rate_limit", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Rate limiting support"); 760 761 if (node != NULL) { 762 /* create SYSCTLs */ 763 for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { 764 mlx5e_rl_sysctl_add_u64_oid(rl, 765 MLX5E_RL_PARAMS_INDEX(arg[i]), 766 node, mlx5e_rl_params_desc[2 * i], 767 mlx5e_rl_params_desc[2 * i + 1]); 768 } 769 770 stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), 771 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 772 "Rate limiting statistics"); 773 if (stats != NULL) { 774 /* create SYSCTLs */ 775 for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { 776 mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, 777 stats, mlx5e_rl_stats_desc[2 * i], 778 mlx5e_rl_stats_desc[2 * i + 1]); 779 } 780 } 781 } 782 783 /* allocate workers array */ 784 rl->workers = malloc(sizeof(rl->workers[0]) * 785 rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); 786 787 /* allocate rate limit array */ 788 rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * 789 rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); 790 791 if (node != NULL) { 792 /* create more SYSCTls */ 793 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 794 "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | 795 CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, 796 "A", "Show table of all configured TX rates"); 797 798 /* try to fetch rate table from kernel environment */ 799 for (i = 0; i != rl->param.tx_rates_def; i++) { 800 /* compute path for tunable */ 801 snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", 802 device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); 803 if (TUNABLE_QUAD_FETCH(buf, &j)) 804 mlx5e_rl_tx_limit_add(rl, j); 805 } 806 807 /* setup rate table sysctls */ 808 for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { 809 mlx5e_rl_sysctl_add_u64_oid(rl, 810 MLX5E_RL_PARAMS_INDEX(table_arg[i]), 811 node, mlx5e_rl_table_params_desc[2 * i], 812 mlx5e_rl_table_params_desc[2 * i + 1]); 813 } 814 } 815 816 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 817 struct mlx5e_rl_worker *rlw = rl->workers + j; 818 819 rlw->priv = priv; 820 821 cv_init(&rlw->cv, "mlx5-worker-cv"); 822 mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); 823 STAILQ_INIT(&rlw->index_list_head); 824 STAILQ_INIT(&rlw->process_head); 825 826 rlw->channels = malloc(sizeof(rlw->channels[0]) * 827 rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); 828 829 MLX5E_RL_WORKER_LOCK(rlw); 830 for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { 831 struct mlx5e_rl_channel *channel = rlw->channels + i; 832 channel->worker = rlw; 833 channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT; 834 STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); 835 } 836 MLX5E_RL_WORKER_UNLOCK(rlw); 837 } 838 839 PRIV_LOCK(priv); 840 error = mlx5e_rl_open_workers(priv); 841 PRIV_UNLOCK(priv); 842 843 if (error != 0) { 844 mlx5_en_err(priv->ifp, 845 "mlx5e_rl_open_workers failed: %d\n", error); 846 } 847 848 return (0); 849 850 done: 851 sysctl_ctx_free(&rl->ctx); 852 sx_destroy(&rl->rl_sxlock); 853 return (error); 854 } 855 856 static int 857 mlx5e_rl_open_workers(struct mlx5e_priv *priv) 858 { 859 struct mlx5e_rl_priv_data *rl = &priv->rl; 860 struct thread *rl_thread = NULL; 861 struct proc *rl_proc = NULL; 862 uint64_t j; 863 int error; 864 865 if (priv->gone || rl->opened) 866 return (-EINVAL); 867 868 MLX5E_RL_WLOCK(rl); 869 /* compute channel parameters once */ 870 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 871 MLX5E_RL_WUNLOCK(rl); 872 873 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 874 struct mlx5e_rl_worker *rlw = rl->workers + j; 875 876 /* start worker thread */ 877 error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, 878 RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); 879 if (error != 0) { 880 mlx5_en_err(rl->priv->ifp, 881 "kproc_kthread_add failed: %d\n", error); 882 rlw->worker_done = 1; 883 } 884 } 885 886 rl->opened = 1; 887 888 return (0); 889 } 890 891 static void 892 mlx5e_rl_close_workers(struct mlx5e_priv *priv) 893 { 894 struct mlx5e_rl_priv_data *rl = &priv->rl; 895 uint64_t y; 896 897 if (rl->opened == 0) 898 return; 899 900 /* tear down worker threads simultaneously */ 901 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 902 struct mlx5e_rl_worker *rlw = rl->workers + y; 903 904 /* tear down worker before freeing SQs */ 905 MLX5E_RL_WORKER_LOCK(rlw); 906 if (rlw->worker_done == 0) { 907 rlw->worker_done = 1; 908 cv_broadcast(&rlw->cv); 909 } else { 910 /* XXX thread not started */ 911 rlw->worker_done = 0; 912 } 913 MLX5E_RL_WORKER_UNLOCK(rlw); 914 } 915 916 /* wait for worker threads to exit */ 917 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 918 struct mlx5e_rl_worker *rlw = rl->workers + y; 919 920 /* tear down worker before freeing SQs */ 921 MLX5E_RL_WORKER_LOCK(rlw); 922 while (rlw->worker_done != 0) 923 cv_wait(&rlw->cv, &rlw->mtx); 924 MLX5E_RL_WORKER_UNLOCK(rlw); 925 } 926 927 rl->opened = 0; 928 } 929 930 static void 931 mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) 932 { 933 unsigned x; 934 935 MLX5E_RL_WLOCK(rl); 936 for (x = 0; x != rl->param.tx_rates_def; x++) 937 rl->rate_limit_table[x] = 0; 938 MLX5E_RL_WUNLOCK(rl); 939 } 940 941 void 942 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 943 { 944 struct mlx5e_rl_priv_data *rl = &priv->rl; 945 uint64_t y; 946 947 /* check if there is support for packet pacing */ 948 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 949 return; 950 951 /* TODO check if there is support for packet pacing */ 952 953 sysctl_ctx_free(&rl->ctx); 954 955 PRIV_LOCK(priv); 956 mlx5e_rl_close_workers(priv); 957 PRIV_UNLOCK(priv); 958 959 mlx5e_rl_reset_rates(rl); 960 961 /* close TIS domain */ 962 mlx5e_rl_close_tis(priv); 963 964 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 965 struct mlx5e_rl_worker *rlw = rl->workers + y; 966 967 cv_destroy(&rlw->cv); 968 mtx_destroy(&rlw->mtx); 969 free(rlw->channels, M_MLX5EN); 970 } 971 free(rl->rate_limit_table, M_MLX5EN); 972 free(rl->workers, M_MLX5EN); 973 sx_destroy(&rl->rl_sxlock); 974 } 975 976 static void 977 mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, 978 struct mlx5e_rl_channel *channel) 979 { 980 STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); 981 cv_broadcast(&rlw->cv); 982 } 983 984 static void 985 mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) 986 { 987 if (channel == NULL) 988 return; 989 990 MLX5E_RL_WORKER_LOCK(rlw); 991 switch (channel->state) { 992 case MLX5E_RL_ST_MODIFY: 993 channel->state = MLX5E_RL_ST_DESTROY; 994 break; 995 case MLX5E_RL_ST_USED: 996 channel->state = MLX5E_RL_ST_DESTROY; 997 mlx5e_rlw_queue_channel_locked(rlw, channel); 998 break; 999 default: 1000 break; 1001 } 1002 MLX5E_RL_WORKER_UNLOCK(rlw); 1003 } 1004 1005 static int 1006 mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) 1007 { 1008 1009 MLX5E_RL_WORKER_LOCK(rlw); 1010 channel->new_rate = rate; 1011 switch (channel->state) { 1012 case MLX5E_RL_ST_USED: 1013 channel->state = MLX5E_RL_ST_MODIFY; 1014 mlx5e_rlw_queue_channel_locked(rlw, channel); 1015 break; 1016 default: 1017 break; 1018 } 1019 MLX5E_RL_WORKER_UNLOCK(rlw); 1020 1021 return (0); 1022 } 1023 1024 static int 1025 mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, 1026 union if_snd_tag_query_params *params) 1027 { 1028 int retval; 1029 1030 MLX5E_RL_WORKER_LOCK(rlw); 1031 switch (channel->state) { 1032 case MLX5E_RL_ST_USED: 1033 params->rate_limit.max_rate = channel->last_rate; 1034 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1035 retval = 0; 1036 break; 1037 case MLX5E_RL_ST_MODIFY: 1038 params->rate_limit.max_rate = channel->last_rate; 1039 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1040 retval = EBUSY; 1041 break; 1042 default: 1043 retval = EINVAL; 1044 break; 1045 } 1046 MLX5E_RL_WORKER_UNLOCK(rlw); 1047 1048 return (retval); 1049 } 1050 1051 static int 1052 mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, 1053 struct mlx5e_rl_channel **pchannel) 1054 { 1055 struct mlx5e_rl_channel *channel; 1056 int retval = ENOMEM; 1057 1058 MLX5E_RL_WORKER_LOCK(rlw); 1059 /* Check for available channel in free list */ 1060 if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { 1061 retval = 0; 1062 /* Remove head index from available list */ 1063 STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); 1064 channel->state = MLX5E_RL_ST_USED; 1065 atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); 1066 } else { 1067 atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); 1068 } 1069 MLX5E_RL_WORKER_UNLOCK(rlw); 1070 1071 *pchannel = channel; 1072 #ifdef RATELIMIT_DEBUG 1073 mlx5_en_info(rlw->priv->ifp, 1074 "Channel pointer for rate limit connection is %p\n", channel); 1075 #endif 1076 return (retval); 1077 } 1078 1079 int 1080 mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, 1081 union if_snd_tag_alloc_params *params, 1082 struct m_snd_tag **ppmt) 1083 { 1084 struct mlx5e_rl_channel *channel; 1085 struct mlx5e_rl_worker *rlw; 1086 struct mlx5e_priv *priv; 1087 int error; 1088 1089 priv = ifp->if_softc; 1090 1091 /* check if there is support for packet pacing or if device is going away */ 1092 if (!MLX5_CAP_GEN(priv->mdev, qos) || 1093 !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || 1094 params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) 1095 return (EOPNOTSUPP); 1096 1097 /* compute worker thread this TCP connection belongs to */ 1098 rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % 1099 priv->rl.param.tx_worker_threads_def); 1100 1101 error = mlx5e_find_available_tx_ring_index(rlw, &channel); 1102 if (error != 0) 1103 goto done; 1104 1105 error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); 1106 if (error != 0) { 1107 mlx5e_rl_free(rlw, channel); 1108 goto done; 1109 } 1110 1111 /* store pointer to mbuf tag */ 1112 MPASS(channel->tag.refcount == 0); 1113 m_snd_tag_init(&channel->tag, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); 1114 *ppmt = &channel->tag; 1115 done: 1116 return (error); 1117 } 1118 1119 1120 int 1121 mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) 1122 { 1123 struct mlx5e_rl_channel *channel = 1124 container_of(pmt, struct mlx5e_rl_channel, tag); 1125 1126 return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); 1127 } 1128 1129 int 1130 mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) 1131 { 1132 struct mlx5e_rl_channel *channel = 1133 container_of(pmt, struct mlx5e_rl_channel, tag); 1134 1135 return (mlx5e_rl_query(channel->worker, channel, params)); 1136 } 1137 1138 void 1139 mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) 1140 { 1141 struct mlx5e_rl_channel *channel = 1142 container_of(pmt, struct mlx5e_rl_channel, tag); 1143 1144 mlx5e_rl_free(channel->worker, channel); 1145 } 1146 1147 static int 1148 mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) 1149 { 1150 struct mlx5e_rl_priv_data *rl = arg1; 1151 struct mlx5e_priv *priv = rl->priv; 1152 struct sbuf sbuf; 1153 unsigned x; 1154 int error; 1155 1156 error = sysctl_wire_old_buffer(req, 0); 1157 if (error != 0) 1158 return (error); 1159 1160 PRIV_LOCK(priv); 1161 1162 sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); 1163 1164 sbuf_printf(&sbuf, 1165 "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" 1166 "\t" "--------------------------------------------\n"); 1167 1168 MLX5E_RL_RLOCK(rl); 1169 for (x = 0; x != rl->param.tx_rates_def; x++) { 1170 if (rl->rate_limit_table[x] == 0) 1171 continue; 1172 1173 sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", 1174 x, (unsigned)rl->param.tx_burst_size, 1175 (long long)rl->rate_limit_table[x]); 1176 } 1177 MLX5E_RL_RUNLOCK(rl); 1178 1179 error = sbuf_finish(&sbuf); 1180 sbuf_delete(&sbuf); 1181 1182 PRIV_UNLOCK(priv); 1183 1184 return (error); 1185 } 1186 1187 static int 1188 mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) 1189 { 1190 uint64_t x; 1191 uint64_t y; 1192 1193 MLX5E_RL_WLOCK(rl); 1194 /* compute channel parameters once */ 1195 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 1196 MLX5E_RL_WUNLOCK(rl); 1197 1198 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1199 struct mlx5e_rl_worker *rlw = rl->workers + y; 1200 1201 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1202 struct mlx5e_rl_channel *channel; 1203 struct mlx5e_sq *sq; 1204 1205 channel = rlw->channels + x; 1206 sq = channel->sq; 1207 1208 if (sq == NULL) 1209 continue; 1210 1211 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { 1212 mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, 1213 rl->param.tx_coalesce_usecs, 1214 rl->param.tx_coalesce_pkts, 1215 rl->param.tx_coalesce_mode); 1216 } else { 1217 mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, 1218 rl->param.tx_coalesce_usecs, 1219 rl->param.tx_coalesce_pkts); 1220 } 1221 } 1222 } 1223 return (0); 1224 } 1225 1226 void 1227 mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl) 1228 { 1229 uint64_t x; 1230 uint64_t y; 1231 1232 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1233 struct mlx5e_rl_worker *rlw = rl->workers + y; 1234 1235 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1236 struct mlx5e_rl_channel *channel; 1237 struct mlx5e_sq *sq; 1238 1239 channel = rlw->channels + x; 1240 sq = channel->sq; 1241 1242 if (sq == NULL) 1243 continue; 1244 1245 mtx_lock(&sq->lock); 1246 mlx5e_update_sq_inline(sq); 1247 mtx_unlock(&sq->lock); 1248 } 1249 } 1250 } 1251 1252 static int 1253 mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) 1254 { 1255 unsigned x; 1256 int error; 1257 1258 if (value < 1000 || 1259 mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) 1260 return (EINVAL); 1261 1262 MLX5E_RL_WLOCK(rl); 1263 error = ENOMEM; 1264 1265 /* check if rate already exists */ 1266 for (x = 0; x != rl->param.tx_rates_def; x++) { 1267 if (rl->rate_limit_table[x] != value) 1268 continue; 1269 error = EEXIST; 1270 break; 1271 } 1272 1273 /* check if there is a free rate entry */ 1274 if (x == rl->param.tx_rates_def) { 1275 for (x = 0; x != rl->param.tx_rates_def; x++) { 1276 if (rl->rate_limit_table[x] != 0) 1277 continue; 1278 rl->rate_limit_table[x] = value; 1279 error = 0; 1280 break; 1281 } 1282 } 1283 MLX5E_RL_WUNLOCK(rl); 1284 1285 return (error); 1286 } 1287 1288 static int 1289 mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) 1290 { 1291 unsigned x; 1292 int error; 1293 1294 if (value == 0) 1295 return (EINVAL); 1296 1297 MLX5E_RL_WLOCK(rl); 1298 1299 /* check if rate already exists */ 1300 for (x = 0; x != rl->param.tx_rates_def; x++) { 1301 if (rl->rate_limit_table[x] != value) 1302 continue; 1303 /* free up rate */ 1304 rl->rate_limit_table[x] = 0; 1305 break; 1306 } 1307 1308 /* check if there is a free rate entry */ 1309 if (x == rl->param.tx_rates_def) 1310 error = ENOENT; 1311 else 1312 error = 0; 1313 MLX5E_RL_WUNLOCK(rl); 1314 1315 return (error); 1316 } 1317 1318 static int 1319 mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) 1320 { 1321 struct mlx5e_rl_priv_data *rl = arg1; 1322 struct mlx5e_priv *priv = rl->priv; 1323 unsigned mode_modify; 1324 unsigned was_opened; 1325 uint64_t value; 1326 uint64_t old; 1327 int error; 1328 1329 PRIV_LOCK(priv); 1330 1331 MLX5E_RL_RLOCK(rl); 1332 value = rl->param.arg[arg2]; 1333 MLX5E_RL_RUNLOCK(rl); 1334 1335 if (req != NULL) { 1336 old = value; 1337 error = sysctl_handle_64(oidp, &value, 0, req); 1338 if (error || req->newptr == NULL || 1339 value == rl->param.arg[arg2]) 1340 goto done; 1341 } else { 1342 old = 0; 1343 error = 0; 1344 } 1345 1346 /* check if device is gone */ 1347 if (priv->gone) { 1348 error = ENXIO; 1349 goto done; 1350 } 1351 was_opened = rl->opened; 1352 mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); 1353 1354 switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { 1355 case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): 1356 if (value > rl->param.tx_worker_threads_max) 1357 value = rl->param.tx_worker_threads_max; 1358 else if (value < 1) 1359 value = 1; 1360 1361 /* store new value */ 1362 rl->param.arg[arg2] = value; 1363 break; 1364 1365 case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): 1366 if (value > rl->param.tx_channels_per_worker_max) 1367 value = rl->param.tx_channels_per_worker_max; 1368 else if (value < 1) 1369 value = 1; 1370 1371 /* store new value */ 1372 rl->param.arg[arg2] = value; 1373 break; 1374 1375 case MLX5E_RL_PARAMS_INDEX(tx_rates_def): 1376 if (value > rl->param.tx_rates_max) 1377 value = rl->param.tx_rates_max; 1378 else if (value < 1) 1379 value = 1; 1380 1381 /* store new value */ 1382 rl->param.arg[arg2] = value; 1383 break; 1384 1385 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): 1386 /* range check */ 1387 if (value < 1) 1388 value = 0; 1389 else if (value > MLX5E_FLD_MAX(cqc, cq_period)) 1390 value = MLX5E_FLD_MAX(cqc, cq_period); 1391 1392 /* store new value */ 1393 rl->param.arg[arg2] = value; 1394 1395 /* check to avoid down and up the network interface */ 1396 if (was_opened) 1397 error = mlx5e_rl_refresh_channel_params(rl); 1398 break; 1399 1400 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): 1401 /* import TX coal pkts */ 1402 if (value < 1) 1403 value = 0; 1404 else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) 1405 value = MLX5E_FLD_MAX(cqc, cq_max_count); 1406 1407 /* store new value */ 1408 rl->param.arg[arg2] = value; 1409 1410 /* check to avoid down and up the network interface */ 1411 if (was_opened) 1412 error = mlx5e_rl_refresh_channel_params(rl); 1413 break; 1414 1415 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): 1416 /* network interface must be down */ 1417 if (was_opened != 0 && mode_modify == 0) 1418 mlx5e_rl_close_workers(priv); 1419 1420 /* import TX coalesce mode */ 1421 if (value != 0) 1422 value = 1; 1423 1424 /* store new value */ 1425 rl->param.arg[arg2] = value; 1426 1427 /* restart network interface, if any */ 1428 if (was_opened != 0) { 1429 if (mode_modify == 0) 1430 mlx5e_rl_open_workers(priv); 1431 else 1432 error = mlx5e_rl_refresh_channel_params(rl); 1433 } 1434 break; 1435 1436 case MLX5E_RL_PARAMS_INDEX(tx_queue_size): 1437 /* network interface must be down */ 1438 if (was_opened) 1439 mlx5e_rl_close_workers(priv); 1440 1441 /* import TX queue size */ 1442 if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) 1443 value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 1444 else if (value > priv->params_ethtool.tx_queue_size_max) 1445 value = priv->params_ethtool.tx_queue_size_max; 1446 1447 /* store actual TX queue size */ 1448 value = 1ULL << order_base_2(value); 1449 1450 /* store new value */ 1451 rl->param.arg[arg2] = value; 1452 1453 /* verify TX completion factor */ 1454 mlx5e_rl_sync_tx_completion_fact(rl); 1455 1456 /* restart network interface, if any */ 1457 if (was_opened) 1458 mlx5e_rl_open_workers(priv); 1459 break; 1460 1461 case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): 1462 /* network interface must be down */ 1463 if (was_opened) 1464 mlx5e_rl_close_workers(priv); 1465 1466 /* store new value */ 1467 rl->param.arg[arg2] = value; 1468 1469 /* verify parameter */ 1470 mlx5e_rl_sync_tx_completion_fact(rl); 1471 1472 /* restart network interface, if any */ 1473 if (was_opened) 1474 mlx5e_rl_open_workers(priv); 1475 break; 1476 1477 case MLX5E_RL_PARAMS_INDEX(tx_limit_add): 1478 error = mlx5e_rl_tx_limit_add(rl, value); 1479 break; 1480 1481 case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): 1482 error = mlx5e_rl_tx_limit_clr(rl, value); 1483 break; 1484 1485 case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): 1486 /* range check */ 1487 if (value > rl->param.tx_allowed_deviation_max) 1488 value = rl->param.tx_allowed_deviation_max; 1489 else if (value < rl->param.tx_allowed_deviation_min) 1490 value = rl->param.tx_allowed_deviation_min; 1491 1492 MLX5E_RL_WLOCK(rl); 1493 rl->param.arg[arg2] = value; 1494 MLX5E_RL_WUNLOCK(rl); 1495 break; 1496 1497 case MLX5E_RL_PARAMS_INDEX(tx_burst_size): 1498 /* range check */ 1499 if (value > rl->param.tx_burst_size_max) 1500 value = rl->param.tx_burst_size_max; 1501 else if (value < rl->param.tx_burst_size_min) 1502 value = rl->param.tx_burst_size_min; 1503 1504 MLX5E_RL_WLOCK(rl); 1505 rl->param.arg[arg2] = value; 1506 MLX5E_RL_WUNLOCK(rl); 1507 break; 1508 1509 default: 1510 break; 1511 } 1512 done: 1513 PRIV_UNLOCK(priv); 1514 return (error); 1515 } 1516 1517 static void 1518 mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1519 struct sysctl_oid *node, const char *name, const char *desc) 1520 { 1521 /* 1522 * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will 1523 * take care of loading default sysctl value from the kernel 1524 * environment, if any: 1525 */ 1526 if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { 1527 /* read-only SYSCTLs */ 1528 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1529 name, CTLTYPE_U64 | CTLFLAG_RD | 1530 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1531 } else { 1532 if (strstr(name, "_def") != 0) { 1533 #ifdef RATELIMIT_DEBUG 1534 /* tunable read-only advanced SYSCTLs */ 1535 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1536 name, CTLTYPE_U64 | CTLFLAG_RDTUN | 1537 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1538 #endif 1539 } else { 1540 /* read-write SYSCTLs */ 1541 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1542 name, CTLTYPE_U64 | CTLFLAG_RWTUN | 1543 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1544 } 1545 } 1546 } 1547 1548 static void 1549 mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1550 struct sysctl_oid *node, const char *name, const char *desc) 1551 { 1552 /* read-only SYSCTLs */ 1553 SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, 1554 CTLFLAG_RD, &rl->stats.arg[x], 0, desc); 1555 } 1556 1557 #else 1558 1559 int 1560 mlx5e_rl_init(struct mlx5e_priv *priv) 1561 { 1562 1563 return (0); 1564 } 1565 1566 void 1567 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 1568 { 1569 /* NOP */ 1570 } 1571 1572 #endif /* RATELIMIT */ 1573