1 /*- 2 * Copyright (c) 2016 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 30 #ifdef RATELIMIT 31 32 static int mlx5e_rl_open_workers(struct mlx5e_priv *); 33 static void mlx5e_rl_close_workers(struct mlx5e_priv *); 34 static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); 35 static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, 36 struct sysctl_oid *, const char *name, const char *desc); 37 static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 38 struct sysctl_oid *node, const char *name, const char *desc); 39 static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); 40 static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); 41 42 static void 43 mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, 44 struct mlx5e_sq_param *param) 45 { 46 void *sqc = param->sqc; 47 void *wq = MLX5_ADDR_OF(sqc, sqc, wq); 48 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 49 50 MLX5_SET(wq, wq, log_wq_sz, log_sq_size); 51 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 52 MLX5_SET(wq, wq, pd, rl->priv->pdn); 53 54 param->wq.buf_numa_node = 0; 55 param->wq.db_numa_node = 0; 56 param->wq.linear = 1; 57 } 58 59 static void 60 mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, 61 struct mlx5e_cq_param *param) 62 { 63 void *cqc = param->cqc; 64 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 65 66 MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); 67 MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); 68 MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); 69 70 switch (rl->param.tx_coalesce_mode) { 71 case 0: 72 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 73 break; 74 default: 75 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) 76 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); 77 else 78 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 79 break; 80 } 81 } 82 83 static void 84 mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, 85 struct mlx5e_rl_channel_param *cparam) 86 { 87 memset(cparam, 0, sizeof(*cparam)); 88 89 mlx5e_rl_build_sq_param(rl, &cparam->sq); 90 mlx5e_rl_build_cq_param(rl, &cparam->cq); 91 } 92 93 static int 94 mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 95 struct mlx5e_sq_param *param, int ix) 96 { 97 struct mlx5_core_dev *mdev = priv->mdev; 98 void *sqc = param->sqc; 99 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 100 int err; 101 102 /* Create DMA descriptor TAG */ 103 if ((err = -bus_dma_tag_create( 104 bus_get_dma_tag(mdev->pdev->dev.bsddev), 105 1, /* any alignment */ 106 0, /* no boundary */ 107 BUS_SPACE_MAXADDR, /* lowaddr */ 108 BUS_SPACE_MAXADDR, /* highaddr */ 109 NULL, NULL, /* filter, filterarg */ 110 MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ 111 MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ 112 MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 113 0, /* flags */ 114 NULL, NULL, /* lockfunc, lockfuncarg */ 115 &sq->dma_tag))) 116 goto done; 117 118 /* use shared UAR */ 119 sq->uar = priv->rl.sq_uar; 120 121 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, 122 &sq->wq_ctrl); 123 if (err) 124 goto err_free_dma_tag; 125 126 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 127 /* 128 * The sq->bf_buf_size variable is intentionally left zero so 129 * that the doorbell writes will occur at the same memory 130 * location. 131 */ 132 133 err = mlx5e_alloc_sq_db(sq); 134 if (err) 135 goto err_sq_wq_destroy; 136 137 sq->mkey_be = cpu_to_be32(priv->mr.key); 138 sq->ifp = priv->ifp; 139 sq->priv = priv; 140 141 mlx5e_update_sq_inline(sq); 142 143 return (0); 144 145 err_sq_wq_destroy: 146 mlx5_wq_destroy(&sq->wq_ctrl); 147 err_free_dma_tag: 148 bus_dma_tag_destroy(sq->dma_tag); 149 done: 150 return (err); 151 } 152 153 static void 154 mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) 155 { 156 157 mlx5e_free_sq_db(sq); 158 mlx5_wq_destroy(&sq->wq_ctrl); 159 } 160 161 static int 162 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 163 struct mlx5e_sq_param *param, int ix) 164 { 165 int err; 166 167 err = mlx5e_rl_create_sq(priv, sq, param, ix); 168 if (err) 169 return (err); 170 171 err = mlx5e_enable_sq(sq, param, priv->rl.tisn); 172 if (err) 173 goto err_destroy_sq; 174 175 err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 176 if (err) 177 goto err_disable_sq; 178 179 return (0); 180 181 err_disable_sq: 182 mlx5e_disable_sq(sq); 183 err_destroy_sq: 184 mlx5e_rl_destroy_sq(sq); 185 186 return (err); 187 } 188 189 static void 190 mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) 191 { 192 mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); 193 mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); 194 195 callout_init_mtx(&sq->cev_callout, &sq->lock, 0); 196 197 sq->cev_factor = priv->rl.param.tx_completion_fact; 198 199 /* ensure the TX completion event factor is not zero */ 200 if (sq->cev_factor == 0) 201 sq->cev_factor = 1; 202 } 203 204 static int 205 mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, 206 struct mlx5e_rl_channel_param *cparam, 207 struct mlx5e_sq *volatile *ppsq) 208 { 209 struct mlx5e_priv *priv = rlw->priv; 210 struct mlx5e_sq *sq; 211 int err; 212 213 sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); 214 215 /* init mutexes */ 216 mlx5e_rl_chan_mtx_init(priv, sq); 217 218 /* open TX completion queue */ 219 err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, 220 &mlx5e_tx_cq_comp, eq_ix); 221 if (err) 222 goto err_free; 223 224 err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); 225 if (err) 226 goto err_close_tx_cq; 227 228 /* store TX channel pointer */ 229 *ppsq = sq; 230 231 /* poll TX queue initially */ 232 sq->cq.mcq.comp(&sq->cq.mcq); 233 234 return (0); 235 236 err_close_tx_cq: 237 mlx5e_close_cq(&sq->cq); 238 239 err_free: 240 /* destroy mutexes */ 241 mtx_destroy(&sq->lock); 242 mtx_destroy(&sq->comp_lock); 243 free(sq, M_MLX5EN); 244 atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); 245 return (err); 246 } 247 248 static void 249 mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) 250 { 251 struct mlx5e_sq *sq = *ppsq; 252 253 /* check if channel is already closed */ 254 if (sq == NULL) 255 return; 256 /* ensure channel pointer is no longer used */ 257 *ppsq = NULL; 258 259 /* teardown and destroy SQ */ 260 mlx5e_drain_sq(sq); 261 mlx5e_disable_sq(sq); 262 mlx5e_rl_destroy_sq(sq); 263 264 /* close CQ */ 265 mlx5e_close_cq(&sq->cq); 266 267 /* destroy mutexes */ 268 mtx_destroy(&sq->lock); 269 mtx_destroy(&sq->comp_lock); 270 271 free(sq, M_MLX5EN); 272 } 273 274 static void 275 mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) 276 { 277 /* 278 * Limit the maximum distance between completion events to 279 * half of the currently set TX queue size. 280 * 281 * The maximum number of queue entries a single IP packet can 282 * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. 283 * 284 * The worst case max value is then given as below: 285 */ 286 uint64_t max = rl->param.tx_queue_size / 287 (2 * MLX5_SEND_WQE_MAX_WQEBBS); 288 289 /* 290 * Update the maximum completion factor value in case the 291 * tx_queue_size field changed. Ensure we don't overflow 292 * 16-bits. 293 */ 294 if (max < 1) 295 max = 1; 296 else if (max > 65535) 297 max = 65535; 298 rl->param.tx_completion_fact_max = max; 299 300 /* 301 * Verify that the current TX completion factor is within the 302 * given limits: 303 */ 304 if (rl->param.tx_completion_fact < 1) 305 rl->param.tx_completion_fact = 1; 306 else if (rl->param.tx_completion_fact > max) 307 rl->param.tx_completion_fact = max; 308 } 309 310 static int 311 mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) 312 { 313 struct mlx5e_priv *priv = sq->priv; 314 struct mlx5_core_dev *mdev = priv->mdev; 315 316 void *in; 317 void *sqc; 318 int inlen; 319 int err; 320 321 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 322 in = mlx5_vzalloc(inlen); 323 if (in == NULL) 324 return (-ENOMEM); 325 326 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 327 328 MLX5_SET(modify_sq_in, in, sqn, sq->sqn); 329 MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); 330 MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); 331 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); 332 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); 333 334 err = mlx5_core_modify_sq(mdev, in, inlen); 335 336 kvfree(in); 337 338 return (err); 339 } 340 341 /* 342 * This function will search the configured rate limit table for the 343 * best match to avoid that a single socket based application can 344 * allocate all the available hardware rates. If the user selected 345 * rate deviates too much from the closes rate available in the rate 346 * limit table, unlimited rate will be selected. 347 */ 348 static uint64_t 349 mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) 350 { 351 uint64_t distance = -1ULL; 352 uint64_t diff; 353 uint64_t retval = 0; /* unlimited */ 354 uint64_t x; 355 356 /* search for closest rate */ 357 for (x = 0; x != rl->param.tx_rates_def; x++) { 358 uint64_t rate = rl->rate_limit_table[x]; 359 if (rate == 0) 360 continue; 361 362 if (rate > user_rate) 363 diff = rate - user_rate; 364 else 365 diff = user_rate - rate; 366 367 /* check if distance is smaller than previous rate */ 368 if (diff < distance) { 369 distance = diff; 370 retval = rate; 371 } 372 } 373 374 /* range check for multiplication below */ 375 if (user_rate > rl->param.tx_limit_max) 376 user_rate = rl->param.tx_limit_max; 377 378 /* fallback to unlimited, if rate deviates too much */ 379 if (distance > howmany(user_rate * 380 rl->param.tx_allowed_deviation, 1000ULL)) 381 retval = 0; 382 383 return (retval); 384 } 385 386 /* 387 * This function sets the requested rate for a rate limit channel, in 388 * bits per second. The requested rate will be filtered through the 389 * find best rate function above. 390 */ 391 static int 392 mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, 393 struct mlx5e_rl_channel *channel, uint64_t rate) 394 { 395 struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; 396 struct mlx5e_sq *sq; 397 uint64_t temp; 398 uint16_t index; 399 uint16_t burst; 400 int error; 401 402 if (rate != 0) { 403 MLX5E_RL_WORKER_UNLOCK(rlw); 404 405 MLX5E_RL_RLOCK(rl); 406 407 /* get current burst size in bytes */ 408 temp = rl->param.tx_burst_size * 409 MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); 410 411 /* limit burst size to 64K currently */ 412 if (temp > 65535) 413 temp = 65535; 414 burst = temp; 415 416 /* find best rate */ 417 rate = mlx5e_rl_find_best_rate_locked(rl, rate); 418 419 MLX5E_RL_RUNLOCK(rl); 420 421 if (rate == 0) { 422 /* rate doesn't exist, fallback to unlimited */ 423 index = 0; 424 rate = 0; 425 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 426 } else { 427 /* get a reference on the new rate */ 428 error = -mlx5_rl_add_rate(rlw->priv->mdev, 429 howmany(rate, 1000), burst, &index); 430 431 if (error != 0) { 432 /* adding rate failed, fallback to unlimited */ 433 index = 0; 434 rate = 0; 435 atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); 436 } 437 } 438 MLX5E_RL_WORKER_LOCK(rlw); 439 } else { 440 index = 0; 441 burst = 0; /* default */ 442 } 443 444 /* atomically swap rates */ 445 temp = channel->last_rate; 446 channel->last_rate = rate; 447 rate = temp; 448 449 /* atomically swap burst size */ 450 temp = channel->last_burst; 451 channel->last_burst = burst; 452 burst = temp; 453 454 MLX5E_RL_WORKER_UNLOCK(rlw); 455 /* put reference on the old rate, if any */ 456 if (rate != 0) { 457 mlx5_rl_remove_rate(rlw->priv->mdev, 458 howmany(rate, 1000), burst); 459 } 460 461 /* set new rate, if SQ is running */ 462 sq = channel->sq; 463 if (sq != NULL && READ_ONCE(sq->running) != 0) { 464 error = mlx5e_rl_modify_sq(sq, index); 465 if (error != 0) 466 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 467 } else 468 error = 0; 469 MLX5E_RL_WORKER_LOCK(rlw); 470 471 return (-error); 472 } 473 474 static void 475 mlx5e_rl_worker(void *arg) 476 { 477 struct thread *td; 478 struct mlx5e_rl_worker *rlw = arg; 479 struct mlx5e_rl_channel *channel; 480 struct mlx5e_priv *priv; 481 unsigned ix; 482 uint64_t x; 483 int error; 484 485 /* set thread priority */ 486 td = curthread; 487 488 thread_lock(td); 489 sched_prio(td, PI_SWI(SWI_NET)); 490 thread_unlock(td); 491 492 priv = rlw->priv; 493 494 /* compute completion vector */ 495 ix = (rlw - priv->rl.workers) % 496 priv->mdev->priv.eq_table.num_comp_vectors; 497 498 /* TODO bind to CPU */ 499 500 /* open all the SQs */ 501 MLX5E_RL_WORKER_LOCK(rlw); 502 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 503 struct mlx5e_rl_channel *channel = rlw->channels + x; 504 505 #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) 506 if (channel->state == MLX5E_RL_ST_FREE) 507 continue; 508 #endif 509 MLX5E_RL_WORKER_UNLOCK(rlw); 510 511 MLX5E_RL_RLOCK(&priv->rl); 512 error = mlx5e_rl_open_channel(rlw, ix, 513 &priv->rl.chan_param, &channel->sq); 514 MLX5E_RL_RUNLOCK(&priv->rl); 515 516 MLX5E_RL_WORKER_LOCK(rlw); 517 if (error != 0) { 518 if_printf(priv->ifp, 519 "mlx5e_rl_open_channel failed: %d\n", error); 520 break; 521 } 522 mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); 523 } 524 while (1) { 525 if (STAILQ_FIRST(&rlw->process_head) == NULL) { 526 /* check if we are tearing down */ 527 if (rlw->worker_done != 0) 528 break; 529 cv_wait(&rlw->cv, &rlw->mtx); 530 } 531 /* check if we are tearing down */ 532 if (rlw->worker_done != 0) 533 break; 534 channel = STAILQ_FIRST(&rlw->process_head); 535 if (channel != NULL) { 536 STAILQ_REMOVE_HEAD(&rlw->process_head, entry); 537 538 switch (channel->state) { 539 case MLX5E_RL_ST_MODIFY: 540 channel->state = MLX5E_RL_ST_USED; 541 MLX5E_RL_WORKER_UNLOCK(rlw); 542 543 /* create channel by demand */ 544 if (channel->sq == NULL) { 545 MLX5E_RL_RLOCK(&priv->rl); 546 error = mlx5e_rl_open_channel(rlw, ix, 547 &priv->rl.chan_param, &channel->sq); 548 MLX5E_RL_RUNLOCK(&priv->rl); 549 550 if (error != 0) { 551 if_printf(priv->ifp, 552 "mlx5e_rl_open_channel failed: %d\n", error); 553 } else { 554 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); 555 } 556 } else { 557 mlx5e_resume_sq(channel->sq); 558 } 559 560 MLX5E_RL_WORKER_LOCK(rlw); 561 /* convert from bytes/s to bits/s and set new rate */ 562 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 563 channel->new_rate * 8ULL); 564 if (error != 0) { 565 if_printf(priv->ifp, 566 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 567 error); 568 } 569 break; 570 571 case MLX5E_RL_ST_DESTROY: 572 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 573 if (error != 0) { 574 if_printf(priv->ifp, 575 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 576 error); 577 } 578 if (channel->sq != NULL) { 579 /* 580 * Make sure all packets are 581 * transmitted before SQ is 582 * returned to free list: 583 */ 584 MLX5E_RL_WORKER_UNLOCK(rlw); 585 mlx5e_drain_sq(channel->sq); 586 MLX5E_RL_WORKER_LOCK(rlw); 587 } 588 /* put the channel back into the free list */ 589 STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); 590 channel->state = MLX5E_RL_ST_FREE; 591 atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); 592 break; 593 default: 594 /* NOP */ 595 break; 596 } 597 } 598 } 599 600 /* close all the SQs */ 601 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 602 struct mlx5e_rl_channel *channel = rlw->channels + x; 603 604 /* update the initial rate */ 605 channel->init_rate = channel->last_rate; 606 607 /* make sure we free up the rate resource */ 608 mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 609 610 if (channel->sq != NULL) { 611 MLX5E_RL_WORKER_UNLOCK(rlw); 612 mlx5e_rl_close_channel(&channel->sq); 613 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); 614 MLX5E_RL_WORKER_LOCK(rlw); 615 } 616 } 617 618 rlw->worker_done = 0; 619 cv_broadcast(&rlw->cv); 620 MLX5E_RL_WORKER_UNLOCK(rlw); 621 622 kthread_exit(); 623 } 624 625 static int 626 mlx5e_rl_open_tis(struct mlx5e_priv *priv) 627 { 628 struct mlx5_core_dev *mdev = priv->mdev; 629 u32 in[MLX5_ST_SZ_DW(create_tis_in)]; 630 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 631 632 memset(in, 0, sizeof(in)); 633 634 MLX5_SET(tisc, tisc, prio, 0); 635 MLX5_SET(tisc, tisc, transport_domain, priv->tdn); 636 637 return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); 638 } 639 640 static void 641 mlx5e_rl_close_tis(struct mlx5e_priv *priv) 642 { 643 mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn); 644 } 645 646 static void 647 mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, 648 struct mlx5_core_dev *mdev) 649 { 650 /* ratelimit workers */ 651 param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; 652 param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; 653 654 /* range check */ 655 if (param->tx_worker_threads_def == 0 || 656 param->tx_worker_threads_def > param->tx_worker_threads_max) 657 param->tx_worker_threads_def = param->tx_worker_threads_max; 658 659 /* ratelimit channels */ 660 param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / 661 param->tx_worker_threads_def; 662 param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; 663 664 /* range check */ 665 if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) 666 param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; 667 668 /* set default burst size */ 669 param->tx_burst_size = 4; /* MTUs */ 670 671 /* 672 * Set maximum burst size 673 * 674 * The burst size is multiplied by the MTU and clamped to the 675 * range 0 ... 65535 bytes inclusivly before fed into the 676 * firmware. 677 * 678 * NOTE: If the burst size or MTU is changed only ratelimit 679 * connections made after the change will use the new burst 680 * size. 681 */ 682 param->tx_burst_size_max = 255; 683 684 /* get firmware rate limits in 1000bit/s and convert them to bit/s */ 685 param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; 686 param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; 687 688 /* ratelimit table size */ 689 param->tx_rates_max = mdev->priv.rl_table.max_size; 690 691 /* range check */ 692 if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) 693 param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; 694 695 /* set default number of rates */ 696 param->tx_rates_def = param->tx_rates_max; 697 698 /* set maximum allowed rate deviation */ 699 if (param->tx_limit_max != 0) { 700 /* 701 * Make sure the deviation multiplication doesn't 702 * overflow unsigned 64-bit: 703 */ 704 param->tx_allowed_deviation_max = -1ULL / 705 param->tx_limit_max; 706 } 707 /* set default rate deviation */ 708 param->tx_allowed_deviation = 50; /* 5.0% */ 709 710 /* channel parameters */ 711 param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 712 param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; 713 param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; 714 param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; 715 param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; 716 } 717 718 static const char *mlx5e_rl_params_desc[] = { 719 MLX5E_RL_PARAMS(MLX5E_STATS_DESC) 720 }; 721 722 static const char *mlx5e_rl_table_params_desc[] = { 723 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) 724 }; 725 726 static const char *mlx5e_rl_stats_desc[] = { 727 MLX5E_RL_STATS(MLX5E_STATS_DESC) 728 }; 729 730 int 731 mlx5e_rl_init(struct mlx5e_priv *priv) 732 { 733 struct mlx5e_rl_priv_data *rl = &priv->rl; 734 struct sysctl_oid *node; 735 struct sysctl_oid *stats; 736 char buf[64]; 737 uint64_t i; 738 uint64_t j; 739 int error; 740 741 /* check if there is support for packet pacing */ 742 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 743 return (0); 744 745 rl->priv = priv; 746 747 sysctl_ctx_init(&rl->ctx); 748 749 sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); 750 751 /* allocate shared UAR for SQs */ 752 error = mlx5_alloc_map_uar(priv->mdev, &rl->sq_uar); 753 if (error) 754 goto done; 755 756 /* open own TIS domain for ratelimit SQs */ 757 error = mlx5e_rl_open_tis(priv); 758 if (error) 759 goto err_uar; 760 761 /* setup default value for parameters */ 762 mlx5e_rl_set_default_params(&rl->param, priv->mdev); 763 764 /* update the completion factor */ 765 mlx5e_rl_sync_tx_completion_fact(rl); 766 767 /* create root node */ 768 node = SYSCTL_ADD_NODE(&rl->ctx, 769 SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, 770 "rate_limit", CTLFLAG_RW, NULL, "Rate limiting support"); 771 772 if (node != NULL) { 773 /* create SYSCTLs */ 774 for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { 775 mlx5e_rl_sysctl_add_u64_oid(rl, 776 MLX5E_RL_PARAMS_INDEX(arg[i]), 777 node, mlx5e_rl_params_desc[2 * i], 778 mlx5e_rl_params_desc[2 * i + 1]); 779 } 780 781 stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), 782 OID_AUTO, "stats", CTLFLAG_RD, NULL, 783 "Rate limiting statistics"); 784 if (stats != NULL) { 785 /* create SYSCTLs */ 786 for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { 787 mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, 788 stats, mlx5e_rl_stats_desc[2 * i], 789 mlx5e_rl_stats_desc[2 * i + 1]); 790 } 791 } 792 } 793 794 /* allocate workers array */ 795 rl->workers = malloc(sizeof(rl->workers[0]) * 796 rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); 797 798 /* allocate rate limit array */ 799 rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * 800 rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); 801 802 if (node != NULL) { 803 /* create more SYSCTls */ 804 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 805 "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | 806 CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, 807 "A", "Show table of all configured TX rates"); 808 809 /* try to fetch rate table from kernel environment */ 810 for (i = 0; i != rl->param.tx_rates_def; i++) { 811 /* compute path for tunable */ 812 snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", 813 device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); 814 if (TUNABLE_QUAD_FETCH(buf, &j)) 815 mlx5e_rl_tx_limit_add(rl, j); 816 } 817 818 /* setup rate table sysctls */ 819 for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { 820 mlx5e_rl_sysctl_add_u64_oid(rl, 821 MLX5E_RL_PARAMS_INDEX(table_arg[i]), 822 node, mlx5e_rl_table_params_desc[2 * i], 823 mlx5e_rl_table_params_desc[2 * i + 1]); 824 } 825 } 826 827 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 828 struct mlx5e_rl_worker *rlw = rl->workers + j; 829 830 rlw->priv = priv; 831 832 cv_init(&rlw->cv, "mlx5-worker-cv"); 833 mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); 834 STAILQ_INIT(&rlw->index_list_head); 835 STAILQ_INIT(&rlw->process_head); 836 837 rlw->channels = malloc(sizeof(rlw->channels[0]) * 838 rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); 839 840 MLX5E_RL_WORKER_LOCK(rlw); 841 for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { 842 struct mlx5e_rl_channel *channel = rlw->channels + i; 843 channel->worker = rlw; 844 channel->tag.m_snd_tag.ifp = priv->ifp; 845 channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT; 846 STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); 847 } 848 MLX5E_RL_WORKER_UNLOCK(rlw); 849 } 850 851 PRIV_LOCK(priv); 852 error = mlx5e_rl_open_workers(priv); 853 PRIV_UNLOCK(priv); 854 855 if (error != 0) { 856 if_printf(priv->ifp, 857 "mlx5e_rl_open_workers failed: %d\n", error); 858 } 859 860 return (0); 861 862 err_uar: 863 mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); 864 done: 865 sysctl_ctx_free(&rl->ctx); 866 sx_destroy(&rl->rl_sxlock); 867 return (error); 868 } 869 870 static int 871 mlx5e_rl_open_workers(struct mlx5e_priv *priv) 872 { 873 struct mlx5e_rl_priv_data *rl = &priv->rl; 874 struct thread *rl_thread = NULL; 875 struct proc *rl_proc = NULL; 876 uint64_t j; 877 int error; 878 879 if (priv->gone || rl->opened) 880 return (-EINVAL); 881 882 MLX5E_RL_WLOCK(rl); 883 /* compute channel parameters once */ 884 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 885 MLX5E_RL_WUNLOCK(rl); 886 887 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 888 struct mlx5e_rl_worker *rlw = rl->workers + j; 889 890 /* start worker thread */ 891 error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, 892 RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); 893 if (error != 0) { 894 if_printf(rl->priv->ifp, 895 "kproc_kthread_add failed: %d\n", error); 896 rlw->worker_done = 1; 897 } 898 } 899 900 rl->opened = 1; 901 902 return (0); 903 } 904 905 static void 906 mlx5e_rl_close_workers(struct mlx5e_priv *priv) 907 { 908 struct mlx5e_rl_priv_data *rl = &priv->rl; 909 uint64_t y; 910 911 if (rl->opened == 0) 912 return; 913 914 /* tear down worker threads simultaneously */ 915 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 916 struct mlx5e_rl_worker *rlw = rl->workers + y; 917 918 /* tear down worker before freeing SQs */ 919 MLX5E_RL_WORKER_LOCK(rlw); 920 if (rlw->worker_done == 0) { 921 rlw->worker_done = 1; 922 cv_broadcast(&rlw->cv); 923 } else { 924 /* XXX thread not started */ 925 rlw->worker_done = 0; 926 } 927 MLX5E_RL_WORKER_UNLOCK(rlw); 928 } 929 930 /* wait for worker threads to exit */ 931 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 932 struct mlx5e_rl_worker *rlw = rl->workers + y; 933 934 /* tear down worker before freeing SQs */ 935 MLX5E_RL_WORKER_LOCK(rlw); 936 while (rlw->worker_done != 0) 937 cv_wait(&rlw->cv, &rlw->mtx); 938 MLX5E_RL_WORKER_UNLOCK(rlw); 939 } 940 941 rl->opened = 0; 942 } 943 944 static void 945 mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) 946 { 947 unsigned x; 948 949 MLX5E_RL_WLOCK(rl); 950 for (x = 0; x != rl->param.tx_rates_def; x++) 951 rl->rate_limit_table[x] = 0; 952 MLX5E_RL_WUNLOCK(rl); 953 } 954 955 void 956 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 957 { 958 struct mlx5e_rl_priv_data *rl = &priv->rl; 959 uint64_t y; 960 961 /* check if there is support for packet pacing */ 962 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 963 return; 964 965 /* TODO check if there is support for packet pacing */ 966 967 sysctl_ctx_free(&rl->ctx); 968 969 PRIV_LOCK(priv); 970 mlx5e_rl_close_workers(priv); 971 PRIV_UNLOCK(priv); 972 973 mlx5e_rl_reset_rates(rl); 974 975 /* free shared UAR for SQs */ 976 mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); 977 978 /* close TIS domain */ 979 mlx5e_rl_close_tis(priv); 980 981 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 982 struct mlx5e_rl_worker *rlw = rl->workers + y; 983 984 cv_destroy(&rlw->cv); 985 mtx_destroy(&rlw->mtx); 986 free(rlw->channels, M_MLX5EN); 987 } 988 free(rl->rate_limit_table, M_MLX5EN); 989 free(rl->workers, M_MLX5EN); 990 sx_destroy(&rl->rl_sxlock); 991 } 992 993 static void 994 mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, 995 struct mlx5e_rl_channel *channel) 996 { 997 STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); 998 cv_broadcast(&rlw->cv); 999 } 1000 1001 static void 1002 mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) 1003 { 1004 if (channel == NULL) 1005 return; 1006 1007 MLX5E_RL_WORKER_LOCK(rlw); 1008 switch (channel->state) { 1009 case MLX5E_RL_ST_MODIFY: 1010 channel->state = MLX5E_RL_ST_DESTROY; 1011 break; 1012 case MLX5E_RL_ST_USED: 1013 channel->state = MLX5E_RL_ST_DESTROY; 1014 mlx5e_rlw_queue_channel_locked(rlw, channel); 1015 break; 1016 default: 1017 break; 1018 } 1019 MLX5E_RL_WORKER_UNLOCK(rlw); 1020 } 1021 1022 static int 1023 mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) 1024 { 1025 1026 MLX5E_RL_WORKER_LOCK(rlw); 1027 channel->new_rate = rate; 1028 switch (channel->state) { 1029 case MLX5E_RL_ST_USED: 1030 channel->state = MLX5E_RL_ST_MODIFY; 1031 mlx5e_rlw_queue_channel_locked(rlw, channel); 1032 break; 1033 default: 1034 break; 1035 } 1036 MLX5E_RL_WORKER_UNLOCK(rlw); 1037 1038 return (0); 1039 } 1040 1041 static int 1042 mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, 1043 union if_snd_tag_query_params *params) 1044 { 1045 int retval; 1046 1047 MLX5E_RL_WORKER_LOCK(rlw); 1048 switch (channel->state) { 1049 case MLX5E_RL_ST_USED: 1050 params->rate_limit.max_rate = channel->last_rate; 1051 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1052 retval = 0; 1053 break; 1054 case MLX5E_RL_ST_MODIFY: 1055 params->rate_limit.max_rate = channel->last_rate; 1056 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1057 retval = EBUSY; 1058 break; 1059 default: 1060 retval = EINVAL; 1061 break; 1062 } 1063 MLX5E_RL_WORKER_UNLOCK(rlw); 1064 1065 return (retval); 1066 } 1067 1068 static int 1069 mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, 1070 struct mlx5e_rl_channel **pchannel) 1071 { 1072 struct mlx5e_rl_channel *channel; 1073 int retval = ENOMEM; 1074 1075 MLX5E_RL_WORKER_LOCK(rlw); 1076 /* Check for available channel in free list */ 1077 if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { 1078 retval = 0; 1079 /* Remove head index from available list */ 1080 STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); 1081 channel->state = MLX5E_RL_ST_USED; 1082 atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); 1083 } else { 1084 atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); 1085 } 1086 MLX5E_RL_WORKER_UNLOCK(rlw); 1087 1088 *pchannel = channel; 1089 #ifdef RATELIMIT_DEBUG 1090 if_printf(rlw->priv->ifp, "Channel pointer for rate limit connection is %p\n", channel); 1091 #endif 1092 return (retval); 1093 } 1094 1095 int 1096 mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, 1097 union if_snd_tag_alloc_params *params, 1098 struct m_snd_tag **ppmt) 1099 { 1100 struct mlx5e_rl_channel *channel; 1101 struct mlx5e_rl_worker *rlw; 1102 struct mlx5e_priv *priv; 1103 int error; 1104 1105 priv = ifp->if_softc; 1106 1107 /* check if there is support for packet pacing or if device is going away */ 1108 if (!MLX5_CAP_GEN(priv->mdev, qos) || 1109 !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || 1110 params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) 1111 return (EOPNOTSUPP); 1112 1113 /* compute worker thread this TCP connection belongs to */ 1114 rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % 1115 priv->rl.param.tx_worker_threads_def); 1116 1117 error = mlx5e_find_available_tx_ring_index(rlw, &channel); 1118 if (error != 0) 1119 goto done; 1120 1121 error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); 1122 if (error != 0) { 1123 mlx5e_rl_free(rlw, channel); 1124 goto done; 1125 } 1126 1127 /* store pointer to mbuf tag */ 1128 *ppmt = &channel->tag.m_snd_tag; 1129 done: 1130 return (error); 1131 } 1132 1133 1134 int 1135 mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) 1136 { 1137 struct mlx5e_rl_channel *channel = 1138 container_of(pmt, struct mlx5e_rl_channel, tag.m_snd_tag); 1139 1140 return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); 1141 } 1142 1143 int 1144 mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) 1145 { 1146 struct mlx5e_rl_channel *channel = 1147 container_of(pmt, struct mlx5e_rl_channel, tag.m_snd_tag); 1148 1149 return (mlx5e_rl_query(channel->worker, channel, params)); 1150 } 1151 1152 void 1153 mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) 1154 { 1155 struct mlx5e_rl_channel *channel = 1156 container_of(pmt, struct mlx5e_rl_channel, tag.m_snd_tag); 1157 1158 mlx5e_rl_free(channel->worker, channel); 1159 } 1160 1161 static int 1162 mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) 1163 { 1164 struct mlx5e_rl_priv_data *rl = arg1; 1165 struct mlx5e_priv *priv = rl->priv; 1166 struct sbuf sbuf; 1167 unsigned x; 1168 int error; 1169 1170 error = sysctl_wire_old_buffer(req, 0); 1171 if (error != 0) 1172 return (error); 1173 1174 PRIV_LOCK(priv); 1175 1176 sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); 1177 1178 sbuf_printf(&sbuf, 1179 "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" 1180 "\t" "--------------------------------------------\n"); 1181 1182 MLX5E_RL_RLOCK(rl); 1183 for (x = 0; x != rl->param.tx_rates_def; x++) { 1184 if (rl->rate_limit_table[x] == 0) 1185 continue; 1186 1187 sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", 1188 x, (unsigned)rl->param.tx_burst_size, 1189 (long long)rl->rate_limit_table[x]); 1190 } 1191 MLX5E_RL_RUNLOCK(rl); 1192 1193 error = sbuf_finish(&sbuf); 1194 sbuf_delete(&sbuf); 1195 1196 PRIV_UNLOCK(priv); 1197 1198 return (error); 1199 } 1200 1201 static int 1202 mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) 1203 { 1204 uint64_t x; 1205 uint64_t y; 1206 1207 MLX5E_RL_WLOCK(rl); 1208 /* compute channel parameters once */ 1209 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 1210 MLX5E_RL_WUNLOCK(rl); 1211 1212 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1213 struct mlx5e_rl_worker *rlw = rl->workers + y; 1214 1215 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1216 struct mlx5e_rl_channel *channel; 1217 struct mlx5e_sq *sq; 1218 1219 channel = rlw->channels + x; 1220 sq = channel->sq; 1221 1222 if (sq == NULL) 1223 continue; 1224 1225 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { 1226 mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, 1227 rl->param.tx_coalesce_usecs, 1228 rl->param.tx_coalesce_pkts, 1229 rl->param.tx_coalesce_mode); 1230 } else { 1231 mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, 1232 rl->param.tx_coalesce_usecs, 1233 rl->param.tx_coalesce_pkts); 1234 } 1235 } 1236 } 1237 return (0); 1238 } 1239 1240 void 1241 mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl) 1242 { 1243 uint64_t x; 1244 uint64_t y; 1245 1246 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1247 struct mlx5e_rl_worker *rlw = rl->workers + y; 1248 1249 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1250 struct mlx5e_rl_channel *channel; 1251 struct mlx5e_sq *sq; 1252 1253 channel = rlw->channels + x; 1254 sq = channel->sq; 1255 1256 if (sq == NULL) 1257 continue; 1258 1259 mtx_lock(&sq->lock); 1260 mlx5e_update_sq_inline(sq); 1261 mtx_unlock(&sq->lock); 1262 } 1263 } 1264 } 1265 1266 static int 1267 mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) 1268 { 1269 unsigned x; 1270 int error; 1271 1272 if (value < 1000 || 1273 mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) 1274 return (EINVAL); 1275 1276 MLX5E_RL_WLOCK(rl); 1277 error = ENOMEM; 1278 1279 /* check if rate already exists */ 1280 for (x = 0; x != rl->param.tx_rates_def; x++) { 1281 if (rl->rate_limit_table[x] != value) 1282 continue; 1283 error = EEXIST; 1284 break; 1285 } 1286 1287 /* check if there is a free rate entry */ 1288 if (x == rl->param.tx_rates_def) { 1289 for (x = 0; x != rl->param.tx_rates_def; x++) { 1290 if (rl->rate_limit_table[x] != 0) 1291 continue; 1292 rl->rate_limit_table[x] = value; 1293 error = 0; 1294 break; 1295 } 1296 } 1297 MLX5E_RL_WUNLOCK(rl); 1298 1299 return (error); 1300 } 1301 1302 static int 1303 mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) 1304 { 1305 unsigned x; 1306 int error; 1307 1308 if (value == 0) 1309 return (EINVAL); 1310 1311 MLX5E_RL_WLOCK(rl); 1312 1313 /* check if rate already exists */ 1314 for (x = 0; x != rl->param.tx_rates_def; x++) { 1315 if (rl->rate_limit_table[x] != value) 1316 continue; 1317 /* free up rate */ 1318 rl->rate_limit_table[x] = 0; 1319 break; 1320 } 1321 1322 /* check if there is a free rate entry */ 1323 if (x == rl->param.tx_rates_def) 1324 error = ENOENT; 1325 else 1326 error = 0; 1327 MLX5E_RL_WUNLOCK(rl); 1328 1329 return (error); 1330 } 1331 1332 static int 1333 mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) 1334 { 1335 struct mlx5e_rl_priv_data *rl = arg1; 1336 struct mlx5e_priv *priv = rl->priv; 1337 unsigned mode_modify; 1338 unsigned was_opened; 1339 uint64_t value; 1340 uint64_t old; 1341 int error; 1342 1343 PRIV_LOCK(priv); 1344 1345 MLX5E_RL_RLOCK(rl); 1346 value = rl->param.arg[arg2]; 1347 MLX5E_RL_RUNLOCK(rl); 1348 1349 if (req != NULL) { 1350 old = value; 1351 error = sysctl_handle_64(oidp, &value, 0, req); 1352 if (error || req->newptr == NULL || 1353 value == rl->param.arg[arg2]) 1354 goto done; 1355 } else { 1356 old = 0; 1357 error = 0; 1358 } 1359 1360 /* check if device is gone */ 1361 if (priv->gone) { 1362 error = ENXIO; 1363 goto done; 1364 } 1365 was_opened = rl->opened; 1366 mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); 1367 1368 switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { 1369 case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): 1370 if (value > rl->param.tx_worker_threads_max) 1371 value = rl->param.tx_worker_threads_max; 1372 else if (value < 1) 1373 value = 1; 1374 1375 /* store new value */ 1376 rl->param.arg[arg2] = value; 1377 break; 1378 1379 case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): 1380 if (value > rl->param.tx_channels_per_worker_max) 1381 value = rl->param.tx_channels_per_worker_max; 1382 else if (value < 1) 1383 value = 1; 1384 1385 /* store new value */ 1386 rl->param.arg[arg2] = value; 1387 break; 1388 1389 case MLX5E_RL_PARAMS_INDEX(tx_rates_def): 1390 if (value > rl->param.tx_rates_max) 1391 value = rl->param.tx_rates_max; 1392 else if (value < 1) 1393 value = 1; 1394 1395 /* store new value */ 1396 rl->param.arg[arg2] = value; 1397 break; 1398 1399 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): 1400 /* range check */ 1401 if (value < 1) 1402 value = 0; 1403 else if (value > MLX5E_FLD_MAX(cqc, cq_period)) 1404 value = MLX5E_FLD_MAX(cqc, cq_period); 1405 1406 /* store new value */ 1407 rl->param.arg[arg2] = value; 1408 1409 /* check to avoid down and up the network interface */ 1410 if (was_opened) 1411 error = mlx5e_rl_refresh_channel_params(rl); 1412 break; 1413 1414 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): 1415 /* import TX coal pkts */ 1416 if (value < 1) 1417 value = 0; 1418 else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) 1419 value = MLX5E_FLD_MAX(cqc, cq_max_count); 1420 1421 /* store new value */ 1422 rl->param.arg[arg2] = value; 1423 1424 /* check to avoid down and up the network interface */ 1425 if (was_opened) 1426 error = mlx5e_rl_refresh_channel_params(rl); 1427 break; 1428 1429 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): 1430 /* network interface must be down */ 1431 if (was_opened != 0 && mode_modify == 0) 1432 mlx5e_rl_close_workers(priv); 1433 1434 /* import TX coalesce mode */ 1435 if (value != 0) 1436 value = 1; 1437 1438 /* store new value */ 1439 rl->param.arg[arg2] = value; 1440 1441 /* restart network interface, if any */ 1442 if (was_opened != 0) { 1443 if (mode_modify == 0) 1444 mlx5e_rl_open_workers(priv); 1445 else 1446 error = mlx5e_rl_refresh_channel_params(rl); 1447 } 1448 break; 1449 1450 case MLX5E_RL_PARAMS_INDEX(tx_queue_size): 1451 /* network interface must be down */ 1452 if (was_opened) 1453 mlx5e_rl_close_workers(priv); 1454 1455 /* import TX queue size */ 1456 if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) 1457 value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 1458 else if (value > priv->params_ethtool.tx_queue_size_max) 1459 value = priv->params_ethtool.tx_queue_size_max; 1460 1461 /* store actual TX queue size */ 1462 value = 1ULL << order_base_2(value); 1463 1464 /* store new value */ 1465 rl->param.arg[arg2] = value; 1466 1467 /* verify TX completion factor */ 1468 mlx5e_rl_sync_tx_completion_fact(rl); 1469 1470 /* restart network interface, if any */ 1471 if (was_opened) 1472 mlx5e_rl_open_workers(priv); 1473 break; 1474 1475 case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): 1476 /* network interface must be down */ 1477 if (was_opened) 1478 mlx5e_rl_close_workers(priv); 1479 1480 /* store new value */ 1481 rl->param.arg[arg2] = value; 1482 1483 /* verify parameter */ 1484 mlx5e_rl_sync_tx_completion_fact(rl); 1485 1486 /* restart network interface, if any */ 1487 if (was_opened) 1488 mlx5e_rl_open_workers(priv); 1489 break; 1490 1491 case MLX5E_RL_PARAMS_INDEX(tx_limit_add): 1492 error = mlx5e_rl_tx_limit_add(rl, value); 1493 break; 1494 1495 case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): 1496 error = mlx5e_rl_tx_limit_clr(rl, value); 1497 break; 1498 1499 case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): 1500 /* range check */ 1501 if (value > rl->param.tx_allowed_deviation_max) 1502 value = rl->param.tx_allowed_deviation_max; 1503 else if (value < rl->param.tx_allowed_deviation_min) 1504 value = rl->param.tx_allowed_deviation_min; 1505 1506 MLX5E_RL_WLOCK(rl); 1507 rl->param.arg[arg2] = value; 1508 MLX5E_RL_WUNLOCK(rl); 1509 break; 1510 1511 case MLX5E_RL_PARAMS_INDEX(tx_burst_size): 1512 /* range check */ 1513 if (value > rl->param.tx_burst_size_max) 1514 value = rl->param.tx_burst_size_max; 1515 else if (value < rl->param.tx_burst_size_min) 1516 value = rl->param.tx_burst_size_min; 1517 1518 MLX5E_RL_WLOCK(rl); 1519 rl->param.arg[arg2] = value; 1520 MLX5E_RL_WUNLOCK(rl); 1521 break; 1522 1523 default: 1524 break; 1525 } 1526 done: 1527 PRIV_UNLOCK(priv); 1528 return (error); 1529 } 1530 1531 static void 1532 mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1533 struct sysctl_oid *node, const char *name, const char *desc) 1534 { 1535 /* 1536 * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will 1537 * take care of loading default sysctl value from the kernel 1538 * environment, if any: 1539 */ 1540 if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { 1541 /* read-only SYSCTLs */ 1542 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1543 name, CTLTYPE_U64 | CTLFLAG_RD | 1544 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1545 } else { 1546 if (strstr(name, "_def") != 0) { 1547 #ifdef RATELIMIT_DEBUG 1548 /* tunable read-only advanced SYSCTLs */ 1549 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1550 name, CTLTYPE_U64 | CTLFLAG_RDTUN | 1551 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1552 #endif 1553 } else { 1554 /* read-write SYSCTLs */ 1555 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1556 name, CTLTYPE_U64 | CTLFLAG_RWTUN | 1557 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1558 } 1559 } 1560 } 1561 1562 static void 1563 mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1564 struct sysctl_oid *node, const char *name, const char *desc) 1565 { 1566 /* read-only SYSCTLs */ 1567 SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, 1568 CTLFLAG_RD, &rl->stats.arg[x], 0, desc); 1569 } 1570 1571 #endif 1572