1 /*- 2 * Copyright (c) 2016 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 30 #ifdef RATELIMIT 31 32 static int mlx5e_rl_open_workers(struct mlx5e_priv *); 33 static void mlx5e_rl_close_workers(struct mlx5e_priv *); 34 static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); 35 static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, 36 struct sysctl_oid *, const char *name, const char *desc); 37 static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 38 struct sysctl_oid *node, const char *name, const char *desc); 39 static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); 40 static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); 41 42 static void 43 mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, 44 struct mlx5e_sq_param *param) 45 { 46 void *sqc = param->sqc; 47 void *wq = MLX5_ADDR_OF(sqc, sqc, wq); 48 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 49 50 MLX5_SET(wq, wq, log_wq_sz, log_sq_size); 51 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 52 MLX5_SET(wq, wq, pd, rl->priv->pdn); 53 54 param->wq.buf_numa_node = 0; 55 param->wq.db_numa_node = 0; 56 param->wq.linear = 1; 57 } 58 59 static void 60 mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, 61 struct mlx5e_cq_param *param) 62 { 63 void *cqc = param->cqc; 64 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 65 66 MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); 67 MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); 68 MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); 69 70 switch (rl->param.tx_coalesce_mode) { 71 case 0: 72 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 73 break; 74 default: 75 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) 76 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); 77 else 78 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 79 break; 80 } 81 } 82 83 static void 84 mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, 85 struct mlx5e_rl_channel_param *cparam) 86 { 87 memset(cparam, 0, sizeof(*cparam)); 88 89 mlx5e_rl_build_sq_param(rl, &cparam->sq); 90 mlx5e_rl_build_cq_param(rl, &cparam->cq); 91 } 92 93 static int 94 mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 95 struct mlx5e_sq_param *param, int ix) 96 { 97 struct mlx5_core_dev *mdev = priv->mdev; 98 void *sqc = param->sqc; 99 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 100 int err; 101 102 /* Create DMA descriptor TAG */ 103 if ((err = -bus_dma_tag_create( 104 bus_get_dma_tag(mdev->pdev->dev.bsddev), 105 1, /* any alignment */ 106 0, /* no boundary */ 107 BUS_SPACE_MAXADDR, /* lowaddr */ 108 BUS_SPACE_MAXADDR, /* highaddr */ 109 NULL, NULL, /* filter, filterarg */ 110 MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ 111 MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ 112 MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 113 0, /* flags */ 114 NULL, NULL, /* lockfunc, lockfuncarg */ 115 &sq->dma_tag))) 116 goto done; 117 118 /* use shared UAR */ 119 sq->uar = priv->rl.sq_uar; 120 121 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, 122 &sq->wq_ctrl); 123 if (err) 124 goto err_free_dma_tag; 125 126 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 127 /* 128 * The sq->bf_buf_size variable is intentionally left zero so 129 * that the doorbell writes will occur at the same memory 130 * location. 131 */ 132 133 err = mlx5e_alloc_sq_db(sq); 134 if (err) 135 goto err_sq_wq_destroy; 136 137 sq->mkey_be = cpu_to_be32(priv->mr.key); 138 sq->ifp = priv->ifp; 139 sq->priv = priv; 140 sq->max_inline = priv->params.tx_max_inline; 141 sq->min_inline_mode = priv->params.tx_min_inline_mode; 142 sq->vlan_inline_cap = MLX5_CAP_ETH(mdev, wqe_vlan_insert); 143 144 return (0); 145 146 err_sq_wq_destroy: 147 mlx5_wq_destroy(&sq->wq_ctrl); 148 err_free_dma_tag: 149 bus_dma_tag_destroy(sq->dma_tag); 150 done: 151 return (err); 152 } 153 154 static void 155 mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) 156 { 157 158 mlx5e_free_sq_db(sq); 159 mlx5_wq_destroy(&sq->wq_ctrl); 160 } 161 162 static int 163 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 164 struct mlx5e_sq_param *param, int ix) 165 { 166 int err; 167 168 err = mlx5e_rl_create_sq(priv, sq, param, ix); 169 if (err) 170 return (err); 171 172 err = mlx5e_enable_sq(sq, param, priv->rl.tisn); 173 if (err) 174 goto err_destroy_sq; 175 176 err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 177 if (err) 178 goto err_disable_sq; 179 180 return (0); 181 182 err_disable_sq: 183 mlx5e_disable_sq(sq); 184 err_destroy_sq: 185 mlx5e_rl_destroy_sq(sq); 186 187 return (err); 188 } 189 190 static void 191 mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) 192 { 193 mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); 194 mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); 195 196 callout_init_mtx(&sq->cev_callout, &sq->lock, 0); 197 198 sq->cev_factor = priv->rl.param.tx_completion_fact; 199 200 /* ensure the TX completion event factor is not zero */ 201 if (sq->cev_factor == 0) 202 sq->cev_factor = 1; 203 } 204 205 static int 206 mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, 207 struct mlx5e_rl_channel_param *cparam, 208 struct mlx5e_sq *volatile *ppsq) 209 { 210 struct mlx5e_priv *priv = rlw->priv; 211 struct mlx5e_sq *sq; 212 int err; 213 214 sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); 215 216 /* init mutexes */ 217 mlx5e_rl_chan_mtx_init(priv, sq); 218 219 /* open TX completion queue */ 220 err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, 221 &mlx5e_tx_cq_comp, eq_ix); 222 if (err) 223 goto err_free; 224 225 err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); 226 if (err) 227 goto err_close_tx_cq; 228 229 /* store TX channel pointer */ 230 *ppsq = sq; 231 232 /* poll TX queue initially */ 233 sq->cq.mcq.comp(&sq->cq.mcq); 234 235 return (0); 236 237 err_close_tx_cq: 238 mlx5e_close_cq(&sq->cq); 239 240 err_free: 241 /* destroy mutexes */ 242 mtx_destroy(&sq->lock); 243 mtx_destroy(&sq->comp_lock); 244 free(sq, M_MLX5EN); 245 atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); 246 return (err); 247 } 248 249 static void 250 mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) 251 { 252 struct mlx5e_sq *sq = *ppsq; 253 254 /* check if channel is already closed */ 255 if (sq == NULL) 256 return; 257 /* ensure channel pointer is no longer used */ 258 *ppsq = NULL; 259 260 /* teardown and destroy SQ */ 261 mlx5e_drain_sq(sq); 262 mlx5e_disable_sq(sq); 263 mlx5e_rl_destroy_sq(sq); 264 265 /* close CQ */ 266 mlx5e_close_cq(&sq->cq); 267 268 /* destroy mutexes */ 269 mtx_destroy(&sq->lock); 270 mtx_destroy(&sq->comp_lock); 271 272 free(sq, M_MLX5EN); 273 } 274 275 static void 276 mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) 277 { 278 /* 279 * Limit the maximum distance between completion events to 280 * half of the currently set TX queue size. 281 * 282 * The maximum number of queue entries a single IP packet can 283 * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. 284 * 285 * The worst case max value is then given as below: 286 */ 287 uint64_t max = rl->param.tx_queue_size / 288 (2 * MLX5_SEND_WQE_MAX_WQEBBS); 289 290 /* 291 * Update the maximum completion factor value in case the 292 * tx_queue_size field changed. Ensure we don't overflow 293 * 16-bits. 294 */ 295 if (max < 1) 296 max = 1; 297 else if (max > 65535) 298 max = 65535; 299 rl->param.tx_completion_fact_max = max; 300 301 /* 302 * Verify that the current TX completion factor is within the 303 * given limits: 304 */ 305 if (rl->param.tx_completion_fact < 1) 306 rl->param.tx_completion_fact = 1; 307 else if (rl->param.tx_completion_fact > max) 308 rl->param.tx_completion_fact = max; 309 } 310 311 static int 312 mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) 313 { 314 struct mlx5e_priv *priv = sq->priv; 315 struct mlx5_core_dev *mdev = priv->mdev; 316 317 void *in; 318 void *sqc; 319 int inlen; 320 int err; 321 322 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 323 in = mlx5_vzalloc(inlen); 324 if (in == NULL) 325 return (-ENOMEM); 326 327 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 328 329 MLX5_SET(modify_sq_in, in, sqn, sq->sqn); 330 MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); 331 MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); 332 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); 333 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); 334 335 err = mlx5_core_modify_sq(mdev, in, inlen); 336 337 kvfree(in); 338 339 return (err); 340 } 341 342 /* 343 * This function will search the configured rate limit table for the 344 * best match to avoid that a single socket based application can 345 * allocate all the available hardware rates. If the user selected 346 * rate deviates too much from the closes rate available in the rate 347 * limit table, unlimited rate will be selected. 348 */ 349 static uint64_t 350 mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) 351 { 352 uint64_t distance = -1ULL; 353 uint64_t diff; 354 uint64_t retval = 0; /* unlimited */ 355 uint64_t x; 356 357 /* search for closest rate */ 358 for (x = 0; x != rl->param.tx_rates_def; x++) { 359 uint64_t rate = rl->rate_limit_table[x]; 360 if (rate == 0) 361 continue; 362 363 if (rate > user_rate) 364 diff = rate - user_rate; 365 else 366 diff = user_rate - rate; 367 368 /* check if distance is smaller than previous rate */ 369 if (diff < distance) { 370 distance = diff; 371 retval = rate; 372 } 373 } 374 375 /* range check for multiplication below */ 376 if (user_rate > rl->param.tx_limit_max) 377 user_rate = rl->param.tx_limit_max; 378 379 /* fallback to unlimited, if rate deviates too much */ 380 if (distance > howmany(user_rate * 381 rl->param.tx_allowed_deviation, 1000ULL)) 382 retval = 0; 383 384 return (retval); 385 } 386 387 /* 388 * This function sets the requested rate for a rate limit channel, in 389 * bits per second. The requested rate will be filtered through the 390 * find best rate function above. 391 */ 392 static int 393 mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, 394 struct mlx5e_rl_channel *channel, uint64_t rate) 395 { 396 struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; 397 struct mlx5e_sq *sq; 398 uint64_t temp; 399 uint16_t index; 400 uint16_t burst; 401 int error; 402 403 if (rate != 0) { 404 MLX5E_RL_WORKER_UNLOCK(rlw); 405 406 MLX5E_RL_RLOCK(rl); 407 408 /* get current burst size in bytes */ 409 temp = rl->param.tx_burst_size * 410 MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); 411 412 /* limit burst size to 64K currently */ 413 if (temp > 65535) 414 temp = 65535; 415 burst = temp; 416 417 /* find best rate */ 418 rate = mlx5e_rl_find_best_rate_locked(rl, rate); 419 420 MLX5E_RL_RUNLOCK(rl); 421 422 if (rate == 0) { 423 /* rate doesn't exist, fallback to unlimited */ 424 error = EINVAL; 425 index = 0; 426 rate = 0; 427 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 428 } else { 429 /* get a reference on the new rate */ 430 error = -mlx5_rl_add_rate(rlw->priv->mdev, 431 howmany(rate, 1000), burst, &index); 432 433 if (error != 0) { 434 /* adding rate failed, fallback to unlimited */ 435 index = 0; 436 rate = 0; 437 atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); 438 } 439 } 440 MLX5E_RL_WORKER_LOCK(rlw); 441 } else { 442 index = 0; 443 burst = 0; /* default */ 444 } 445 446 /* atomically swap rates */ 447 temp = channel->last_rate; 448 channel->last_rate = rate; 449 rate = temp; 450 451 /* atomically swap burst size */ 452 temp = channel->last_burst; 453 channel->last_burst = burst; 454 burst = temp; 455 456 MLX5E_RL_WORKER_UNLOCK(rlw); 457 /* put reference on the old rate, if any */ 458 if (rate != 0) { 459 mlx5_rl_remove_rate(rlw->priv->mdev, 460 howmany(rate, 1000), burst); 461 } 462 463 /* set new rate */ 464 sq = channel->sq; 465 if (sq != NULL) { 466 error = mlx5e_rl_modify_sq(sq, index); 467 if (error != 0) 468 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 469 } else 470 error = 0; 471 MLX5E_RL_WORKER_LOCK(rlw); 472 473 return (-error); 474 } 475 476 static void 477 mlx5e_rl_worker(void *arg) 478 { 479 struct thread *td; 480 struct mlx5e_rl_worker *rlw = arg; 481 struct mlx5e_rl_channel *channel; 482 struct mlx5e_priv *priv; 483 unsigned ix; 484 uint64_t x; 485 int error; 486 487 /* set thread priority */ 488 td = curthread; 489 490 thread_lock(td); 491 sched_prio(td, PI_SWI(SWI_NET)); 492 thread_unlock(td); 493 494 priv = rlw->priv; 495 496 /* compute completion vector */ 497 ix = (rlw - priv->rl.workers) % 498 priv->mdev->priv.eq_table.num_comp_vectors; 499 500 /* TODO bind to CPU */ 501 502 /* open all the SQs */ 503 MLX5E_RL_WORKER_LOCK(rlw); 504 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 505 struct mlx5e_rl_channel *channel = rlw->channels + x; 506 507 #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) 508 if (channel->state == MLX5E_RL_ST_FREE) 509 continue; 510 #endif 511 MLX5E_RL_WORKER_UNLOCK(rlw); 512 513 MLX5E_RL_RLOCK(&priv->rl); 514 error = mlx5e_rl_open_channel(rlw, ix, 515 &priv->rl.chan_param, &channel->sq); 516 MLX5E_RL_RUNLOCK(&priv->rl); 517 518 MLX5E_RL_WORKER_LOCK(rlw); 519 if (error != 0) { 520 if_printf(priv->ifp, 521 "mlx5e_rl_open_channel failed: %d\n", error); 522 break; 523 } 524 mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); 525 } 526 while (1) { 527 if (STAILQ_FIRST(&rlw->process_head) == NULL) { 528 /* check if we are tearing down */ 529 if (rlw->worker_done != 0) 530 break; 531 cv_wait(&rlw->cv, &rlw->mtx); 532 } 533 /* check if we are tearing down */ 534 if (rlw->worker_done != 0) 535 break; 536 channel = STAILQ_FIRST(&rlw->process_head); 537 if (channel != NULL) { 538 STAILQ_REMOVE_HEAD(&rlw->process_head, entry); 539 540 switch (channel->state) { 541 case MLX5E_RL_ST_MODIFY: 542 channel->state = MLX5E_RL_ST_USED; 543 MLX5E_RL_WORKER_UNLOCK(rlw); 544 545 /* create channel by demand */ 546 if (channel->sq == NULL) { 547 MLX5E_RL_RLOCK(&priv->rl); 548 error = mlx5e_rl_open_channel(rlw, ix, 549 &priv->rl.chan_param, &channel->sq); 550 MLX5E_RL_RUNLOCK(&priv->rl); 551 552 if (error != 0) { 553 if_printf(priv->ifp, 554 "mlx5e_rl_open_channel failed: %d\n", error); 555 } else { 556 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); 557 } 558 } else { 559 mlx5e_resume_sq(channel->sq); 560 } 561 562 MLX5E_RL_WORKER_LOCK(rlw); 563 /* convert from bytes/s to bits/s and set new rate */ 564 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 565 channel->new_rate * 8ULL); 566 if (error != 0) { 567 if_printf(priv->ifp, 568 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 569 error); 570 } 571 break; 572 573 case MLX5E_RL_ST_DESTROY: 574 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 575 if (error != 0) { 576 if_printf(priv->ifp, 577 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 578 error); 579 } 580 if (channel->sq != NULL) { 581 /* 582 * Make sure all packets are 583 * transmitted before SQ is 584 * returned to free list: 585 */ 586 MLX5E_RL_WORKER_UNLOCK(rlw); 587 mlx5e_drain_sq(channel->sq); 588 MLX5E_RL_WORKER_LOCK(rlw); 589 } 590 /* put the channel back into the free list */ 591 STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); 592 channel->state = MLX5E_RL_ST_FREE; 593 atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); 594 break; 595 default: 596 /* NOP */ 597 break; 598 } 599 } 600 } 601 602 /* close all the SQs */ 603 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 604 struct mlx5e_rl_channel *channel = rlw->channels + x; 605 606 /* update the initial rate */ 607 channel->init_rate = channel->last_rate; 608 609 /* make sure we free up the rate resource */ 610 mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 611 612 if (channel->sq != NULL) { 613 MLX5E_RL_WORKER_UNLOCK(rlw); 614 mlx5e_rl_close_channel(&channel->sq); 615 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); 616 MLX5E_RL_WORKER_LOCK(rlw); 617 } 618 } 619 620 rlw->worker_done = 0; 621 cv_broadcast(&rlw->cv); 622 MLX5E_RL_WORKER_UNLOCK(rlw); 623 624 kthread_exit(); 625 } 626 627 static int 628 mlx5e_rl_open_tis(struct mlx5e_priv *priv) 629 { 630 struct mlx5_core_dev *mdev = priv->mdev; 631 u32 in[MLX5_ST_SZ_DW(create_tis_in)]; 632 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 633 634 memset(in, 0, sizeof(in)); 635 636 MLX5_SET(tisc, tisc, prio, 0); 637 MLX5_SET(tisc, tisc, transport_domain, priv->tdn); 638 639 return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); 640 } 641 642 static void 643 mlx5e_rl_close_tis(struct mlx5e_priv *priv) 644 { 645 mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn); 646 } 647 648 static void 649 mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, 650 struct mlx5_core_dev *mdev) 651 { 652 /* ratelimit workers */ 653 param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; 654 param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; 655 656 /* range check */ 657 if (param->tx_worker_threads_def == 0 || 658 param->tx_worker_threads_def > param->tx_worker_threads_max) 659 param->tx_worker_threads_def = param->tx_worker_threads_max; 660 661 /* ratelimit channels */ 662 param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / 663 param->tx_worker_threads_def; 664 param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; 665 666 /* range check */ 667 if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) 668 param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; 669 670 /* set default burst size */ 671 param->tx_burst_size = 4; /* MTUs */ 672 673 /* 674 * Set maximum burst size 675 * 676 * The burst size is multiplied by the MTU and clamped to the 677 * range 0 ... 65535 bytes inclusivly before fed into the 678 * firmware. 679 * 680 * NOTE: If the burst size or MTU is changed only ratelimit 681 * connections made after the change will use the new burst 682 * size. 683 */ 684 param->tx_burst_size_max = 255; 685 686 /* get firmware rate limits in 1000bit/s and convert them to bit/s */ 687 param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; 688 param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; 689 690 /* ratelimit table size */ 691 param->tx_rates_max = mdev->priv.rl_table.max_size; 692 693 /* range check */ 694 if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) 695 param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; 696 697 /* set default number of rates */ 698 param->tx_rates_def = param->tx_rates_max; 699 700 /* set maximum allowed rate deviation */ 701 if (param->tx_limit_max != 0) { 702 /* 703 * Make sure the deviation multiplication doesn't 704 * overflow unsigned 64-bit: 705 */ 706 param->tx_allowed_deviation_max = -1ULL / 707 param->tx_limit_max; 708 } 709 /* set default rate deviation */ 710 param->tx_allowed_deviation = 50; /* 5.0% */ 711 712 /* channel parameters */ 713 param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 714 param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; 715 param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; 716 param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; 717 param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; 718 } 719 720 static const char *mlx5e_rl_params_desc[] = { 721 MLX5E_RL_PARAMS(MLX5E_STATS_DESC) 722 }; 723 724 static const char *mlx5e_rl_table_params_desc[] = { 725 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) 726 }; 727 728 static const char *mlx5e_rl_stats_desc[] = { 729 MLX5E_RL_STATS(MLX5E_STATS_DESC) 730 }; 731 732 int 733 mlx5e_rl_init(struct mlx5e_priv *priv) 734 { 735 struct mlx5e_rl_priv_data *rl = &priv->rl; 736 struct sysctl_oid *node; 737 struct sysctl_oid *stats; 738 char buf[64]; 739 uint64_t i; 740 uint64_t j; 741 int error; 742 743 /* check if there is support for packet pacing */ 744 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 745 return (0); 746 747 rl->priv = priv; 748 749 sysctl_ctx_init(&rl->ctx); 750 751 sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); 752 753 /* allocate shared UAR for SQs */ 754 error = mlx5_alloc_map_uar(priv->mdev, &rl->sq_uar); 755 if (error) 756 goto done; 757 758 /* open own TIS domain for ratelimit SQs */ 759 error = mlx5e_rl_open_tis(priv); 760 if (error) 761 goto err_uar; 762 763 /* setup default value for parameters */ 764 mlx5e_rl_set_default_params(&rl->param, priv->mdev); 765 766 /* update the completion factor */ 767 mlx5e_rl_sync_tx_completion_fact(rl); 768 769 /* create root node */ 770 node = SYSCTL_ADD_NODE(&rl->ctx, 771 SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, 772 "rate_limit", CTLFLAG_RW, NULL, "Rate limiting support"); 773 774 if (node != NULL) { 775 /* create SYSCTLs */ 776 for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { 777 mlx5e_rl_sysctl_add_u64_oid(rl, 778 MLX5E_RL_PARAMS_INDEX(arg[i]), 779 node, mlx5e_rl_params_desc[2 * i], 780 mlx5e_rl_params_desc[2 * i + 1]); 781 } 782 783 stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), 784 OID_AUTO, "stats", CTLFLAG_RD, NULL, 785 "Rate limiting statistics"); 786 if (stats != NULL) { 787 /* create SYSCTLs */ 788 for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { 789 mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, 790 stats, mlx5e_rl_stats_desc[2 * i], 791 mlx5e_rl_stats_desc[2 * i + 1]); 792 } 793 } 794 } 795 796 /* allocate workers array */ 797 rl->workers = malloc(sizeof(rl->workers[0]) * 798 rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); 799 800 /* allocate rate limit array */ 801 rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * 802 rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); 803 804 if (node != NULL) { 805 /* create more SYSCTls */ 806 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 807 "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | 808 CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, 809 "A", "Show table of all configured TX rates"); 810 811 /* try to fetch rate table from kernel environment */ 812 for (i = 0; i != rl->param.tx_rates_def; i++) { 813 /* compute path for tunable */ 814 snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", 815 device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); 816 if (TUNABLE_QUAD_FETCH(buf, &j)) 817 mlx5e_rl_tx_limit_add(rl, j); 818 } 819 820 /* setup rate table sysctls */ 821 for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { 822 mlx5e_rl_sysctl_add_u64_oid(rl, 823 MLX5E_RL_PARAMS_INDEX(table_arg[i]), 824 node, mlx5e_rl_table_params_desc[2 * i], 825 mlx5e_rl_table_params_desc[2 * i + 1]); 826 } 827 } 828 829 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 830 struct mlx5e_rl_worker *rlw = rl->workers + j; 831 832 rlw->priv = priv; 833 834 cv_init(&rlw->cv, "mlx5-worker-cv"); 835 mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); 836 STAILQ_INIT(&rlw->index_list_head); 837 STAILQ_INIT(&rlw->process_head); 838 839 rlw->channels = malloc(sizeof(rlw->channels[0]) * 840 rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); 841 842 MLX5E_RL_WORKER_LOCK(rlw); 843 for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { 844 struct mlx5e_rl_channel *channel = rlw->channels + i; 845 channel->worker = rlw; 846 channel->m_snd_tag.ifp = priv->ifp; 847 STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); 848 } 849 MLX5E_RL_WORKER_UNLOCK(rlw); 850 } 851 852 PRIV_LOCK(priv); 853 error = mlx5e_rl_open_workers(priv); 854 PRIV_UNLOCK(priv); 855 856 if (error != 0) { 857 if_printf(priv->ifp, 858 "mlx5e_rl_open_workers failed: %d\n", error); 859 } 860 861 return (0); 862 863 err_uar: 864 mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); 865 done: 866 sysctl_ctx_free(&rl->ctx); 867 sx_destroy(&rl->rl_sxlock); 868 return (error); 869 } 870 871 static int 872 mlx5e_rl_open_workers(struct mlx5e_priv *priv) 873 { 874 struct mlx5e_rl_priv_data *rl = &priv->rl; 875 struct thread *rl_thread = NULL; 876 struct proc *rl_proc = NULL; 877 uint64_t j; 878 int error; 879 880 if (priv->gone || rl->opened) 881 return (-EINVAL); 882 883 MLX5E_RL_WLOCK(rl); 884 /* compute channel parameters once */ 885 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 886 MLX5E_RL_WUNLOCK(rl); 887 888 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 889 struct mlx5e_rl_worker *rlw = rl->workers + j; 890 891 /* start worker thread */ 892 error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, 893 RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); 894 if (error != 0) { 895 if_printf(rl->priv->ifp, 896 "kproc_kthread_add failed: %d\n", error); 897 rlw->worker_done = 1; 898 } 899 } 900 901 rl->opened = 1; 902 903 return (0); 904 } 905 906 static void 907 mlx5e_rl_close_workers(struct mlx5e_priv *priv) 908 { 909 struct mlx5e_rl_priv_data *rl = &priv->rl; 910 uint64_t y; 911 912 if (rl->opened == 0) 913 return; 914 915 /* tear down worker threads simultaneously */ 916 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 917 struct mlx5e_rl_worker *rlw = rl->workers + y; 918 919 /* tear down worker before freeing SQs */ 920 MLX5E_RL_WORKER_LOCK(rlw); 921 if (rlw->worker_done == 0) { 922 rlw->worker_done = 1; 923 cv_broadcast(&rlw->cv); 924 } else { 925 /* XXX thread not started */ 926 rlw->worker_done = 0; 927 } 928 MLX5E_RL_WORKER_UNLOCK(rlw); 929 } 930 931 /* wait for worker threads to exit */ 932 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 933 struct mlx5e_rl_worker *rlw = rl->workers + y; 934 935 /* tear down worker before freeing SQs */ 936 MLX5E_RL_WORKER_LOCK(rlw); 937 while (rlw->worker_done != 0) 938 cv_wait(&rlw->cv, &rlw->mtx); 939 MLX5E_RL_WORKER_UNLOCK(rlw); 940 } 941 942 rl->opened = 0; 943 } 944 945 static void 946 mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) 947 { 948 unsigned x; 949 950 MLX5E_RL_WLOCK(rl); 951 for (x = 0; x != rl->param.tx_rates_def; x++) 952 rl->rate_limit_table[x] = 0; 953 MLX5E_RL_WUNLOCK(rl); 954 } 955 956 void 957 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 958 { 959 struct mlx5e_rl_priv_data *rl = &priv->rl; 960 uint64_t y; 961 962 /* check if there is support for packet pacing */ 963 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 964 return; 965 966 /* TODO check if there is support for packet pacing */ 967 968 sysctl_ctx_free(&rl->ctx); 969 970 PRIV_LOCK(priv); 971 mlx5e_rl_close_workers(priv); 972 PRIV_UNLOCK(priv); 973 974 mlx5e_rl_reset_rates(rl); 975 976 /* free shared UAR for SQs */ 977 mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); 978 979 /* close TIS domain */ 980 mlx5e_rl_close_tis(priv); 981 982 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 983 struct mlx5e_rl_worker *rlw = rl->workers + y; 984 985 cv_destroy(&rlw->cv); 986 mtx_destroy(&rlw->mtx); 987 free(rlw->channels, M_MLX5EN); 988 } 989 free(rl->rate_limit_table, M_MLX5EN); 990 free(rl->workers, M_MLX5EN); 991 sx_destroy(&rl->rl_sxlock); 992 } 993 994 static void 995 mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, 996 struct mlx5e_rl_channel *channel) 997 { 998 STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); 999 cv_broadcast(&rlw->cv); 1000 } 1001 1002 static void 1003 mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) 1004 { 1005 if (channel == NULL) 1006 return; 1007 1008 MLX5E_RL_WORKER_LOCK(rlw); 1009 switch (channel->state) { 1010 case MLX5E_RL_ST_MODIFY: 1011 channel->state = MLX5E_RL_ST_DESTROY; 1012 break; 1013 case MLX5E_RL_ST_USED: 1014 channel->state = MLX5E_RL_ST_DESTROY; 1015 mlx5e_rlw_queue_channel_locked(rlw, channel); 1016 break; 1017 default: 1018 break; 1019 } 1020 MLX5E_RL_WORKER_UNLOCK(rlw); 1021 } 1022 1023 static int 1024 mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) 1025 { 1026 1027 MLX5E_RL_WORKER_LOCK(rlw); 1028 channel->new_rate = rate; 1029 switch (channel->state) { 1030 case MLX5E_RL_ST_USED: 1031 channel->state = MLX5E_RL_ST_MODIFY; 1032 mlx5e_rlw_queue_channel_locked(rlw, channel); 1033 break; 1034 default: 1035 break; 1036 } 1037 MLX5E_RL_WORKER_UNLOCK(rlw); 1038 1039 return (0); 1040 } 1041 1042 static int 1043 mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t *prate) 1044 { 1045 int retval; 1046 1047 MLX5E_RL_WORKER_LOCK(rlw); 1048 switch (channel->state) { 1049 case MLX5E_RL_ST_USED: 1050 *prate = channel->last_rate; 1051 retval = 0; 1052 break; 1053 case MLX5E_RL_ST_MODIFY: 1054 retval = EBUSY; 1055 break; 1056 default: 1057 retval = EINVAL; 1058 break; 1059 } 1060 MLX5E_RL_WORKER_UNLOCK(rlw); 1061 1062 return (retval); 1063 } 1064 1065 static int 1066 mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, 1067 struct mlx5e_rl_channel **pchannel) 1068 { 1069 struct mlx5e_rl_channel *channel; 1070 int retval = ENOMEM; 1071 1072 MLX5E_RL_WORKER_LOCK(rlw); 1073 /* Check for available channel in free list */ 1074 if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { 1075 retval = 0; 1076 /* Remove head index from available list */ 1077 STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); 1078 channel->state = MLX5E_RL_ST_USED; 1079 atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); 1080 } else { 1081 atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); 1082 } 1083 MLX5E_RL_WORKER_UNLOCK(rlw); 1084 1085 *pchannel = channel; 1086 #ifdef RATELIMIT_DEBUG 1087 if_printf(rlw->priv->ifp, "Channel pointer for rate limit connection is %p\n", channel); 1088 #endif 1089 return (retval); 1090 } 1091 1092 int 1093 mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, 1094 union if_snd_tag_alloc_params *params, 1095 struct m_snd_tag **ppmt) 1096 { 1097 struct mlx5e_rl_channel *channel; 1098 struct mlx5e_rl_worker *rlw; 1099 struct mlx5e_priv *priv; 1100 int error; 1101 1102 priv = ifp->if_softc; 1103 1104 /* check if there is support for packet pacing or if device is going away */ 1105 if (!MLX5_CAP_GEN(priv->mdev, qos) || 1106 !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || 1107 params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) 1108 return (EOPNOTSUPP); 1109 1110 /* compute worker thread this TCP connection belongs to */ 1111 rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % 1112 priv->rl.param.tx_worker_threads_def); 1113 1114 error = mlx5e_find_available_tx_ring_index(rlw, &channel); 1115 if (error != 0) 1116 goto done; 1117 1118 error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); 1119 if (error != 0) { 1120 mlx5e_rl_free(rlw, channel); 1121 goto done; 1122 } 1123 1124 /* store pointer to mbuf tag */ 1125 *ppmt = &channel->m_snd_tag; 1126 done: 1127 return (error); 1128 } 1129 1130 1131 int 1132 mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) 1133 { 1134 struct mlx5e_rl_channel *channel = 1135 container_of(pmt, struct mlx5e_rl_channel, m_snd_tag); 1136 1137 return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); 1138 } 1139 1140 int 1141 mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) 1142 { 1143 struct mlx5e_rl_channel *channel = 1144 container_of(pmt, struct mlx5e_rl_channel, m_snd_tag); 1145 1146 return (mlx5e_rl_query(channel->worker, channel, ¶ms->rate_limit.max_rate)); 1147 } 1148 1149 void 1150 mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) 1151 { 1152 struct mlx5e_rl_channel *channel = 1153 container_of(pmt, struct mlx5e_rl_channel, m_snd_tag); 1154 1155 mlx5e_rl_free(channel->worker, channel); 1156 } 1157 1158 static int 1159 mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) 1160 { 1161 struct mlx5e_rl_priv_data *rl = arg1; 1162 struct mlx5e_priv *priv = rl->priv; 1163 struct sbuf sbuf; 1164 unsigned x; 1165 int error; 1166 1167 error = sysctl_wire_old_buffer(req, 0); 1168 if (error != 0) 1169 return (error); 1170 1171 PRIV_LOCK(priv); 1172 1173 sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); 1174 1175 sbuf_printf(&sbuf, 1176 "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" 1177 "\t" "--------------------------------------------\n"); 1178 1179 MLX5E_RL_RLOCK(rl); 1180 for (x = 0; x != rl->param.tx_rates_def; x++) { 1181 if (rl->rate_limit_table[x] == 0) 1182 continue; 1183 1184 sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", 1185 x, (unsigned)rl->param.tx_burst_size, 1186 (long long)rl->rate_limit_table[x]); 1187 } 1188 MLX5E_RL_RUNLOCK(rl); 1189 1190 error = sbuf_finish(&sbuf); 1191 sbuf_delete(&sbuf); 1192 1193 PRIV_UNLOCK(priv); 1194 1195 return (error); 1196 } 1197 1198 static int 1199 mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) 1200 { 1201 uint64_t x; 1202 uint64_t y; 1203 1204 MLX5E_RL_WLOCK(rl); 1205 /* compute channel parameters once */ 1206 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 1207 MLX5E_RL_WUNLOCK(rl); 1208 1209 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1210 struct mlx5e_rl_worker *rlw = rl->workers + y; 1211 1212 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1213 struct mlx5e_rl_channel *channel; 1214 struct mlx5e_sq *sq; 1215 1216 channel = rlw->channels + x; 1217 sq = channel->sq; 1218 1219 if (sq == NULL) 1220 continue; 1221 1222 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { 1223 mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, 1224 rl->param.tx_coalesce_usecs, 1225 rl->param.tx_coalesce_pkts, 1226 rl->param.tx_coalesce_mode); 1227 } else { 1228 mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, 1229 rl->param.tx_coalesce_usecs, 1230 rl->param.tx_coalesce_pkts); 1231 } 1232 } 1233 } 1234 return (0); 1235 } 1236 1237 static int 1238 mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) 1239 { 1240 unsigned x; 1241 int error; 1242 1243 if (value < 1000 || 1244 mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) 1245 return (EINVAL); 1246 1247 MLX5E_RL_WLOCK(rl); 1248 error = ENOMEM; 1249 1250 /* check if rate already exists */ 1251 for (x = 0; x != rl->param.tx_rates_def; x++) { 1252 if (rl->rate_limit_table[x] != value) 1253 continue; 1254 error = EEXIST; 1255 break; 1256 } 1257 1258 /* check if there is a free rate entry */ 1259 if (x == rl->param.tx_rates_def) { 1260 for (x = 0; x != rl->param.tx_rates_def; x++) { 1261 if (rl->rate_limit_table[x] != 0) 1262 continue; 1263 rl->rate_limit_table[x] = value; 1264 error = 0; 1265 break; 1266 } 1267 } 1268 MLX5E_RL_WUNLOCK(rl); 1269 1270 return (error); 1271 } 1272 1273 static int 1274 mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) 1275 { 1276 unsigned x; 1277 int error; 1278 1279 if (value == 0) 1280 return (EINVAL); 1281 1282 MLX5E_RL_WLOCK(rl); 1283 1284 /* check if rate already exists */ 1285 for (x = 0; x != rl->param.tx_rates_def; x++) { 1286 if (rl->rate_limit_table[x] != value) 1287 continue; 1288 /* free up rate */ 1289 rl->rate_limit_table[x] = 0; 1290 break; 1291 } 1292 1293 /* check if there is a free rate entry */ 1294 if (x == rl->param.tx_rates_def) 1295 error = ENOENT; 1296 else 1297 error = 0; 1298 MLX5E_RL_WUNLOCK(rl); 1299 1300 return (error); 1301 } 1302 1303 static int 1304 mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) 1305 { 1306 struct mlx5e_rl_priv_data *rl = arg1; 1307 struct mlx5e_priv *priv = rl->priv; 1308 unsigned mode_modify; 1309 unsigned was_opened; 1310 uint64_t value; 1311 uint64_t old; 1312 int error; 1313 1314 PRIV_LOCK(priv); 1315 1316 MLX5E_RL_RLOCK(rl); 1317 value = rl->param.arg[arg2]; 1318 MLX5E_RL_RUNLOCK(rl); 1319 1320 if (req != NULL) { 1321 old = value; 1322 error = sysctl_handle_64(oidp, &value, 0, req); 1323 if (error || req->newptr == NULL || 1324 value == rl->param.arg[arg2]) 1325 goto done; 1326 } else { 1327 old = 0; 1328 error = 0; 1329 } 1330 1331 /* check if device is gone */ 1332 if (priv->gone) { 1333 error = ENXIO; 1334 goto done; 1335 } 1336 was_opened = rl->opened; 1337 mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); 1338 1339 switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { 1340 case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): 1341 if (value > rl->param.tx_worker_threads_max) 1342 value = rl->param.tx_worker_threads_max; 1343 else if (value < 1) 1344 value = 1; 1345 1346 /* store new value */ 1347 rl->param.arg[arg2] = value; 1348 break; 1349 1350 case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): 1351 if (value > rl->param.tx_channels_per_worker_max) 1352 value = rl->param.tx_channels_per_worker_max; 1353 else if (value < 1) 1354 value = 1; 1355 1356 /* store new value */ 1357 rl->param.arg[arg2] = value; 1358 break; 1359 1360 case MLX5E_RL_PARAMS_INDEX(tx_rates_def): 1361 if (value > rl->param.tx_rates_max) 1362 value = rl->param.tx_rates_max; 1363 else if (value < 1) 1364 value = 1; 1365 1366 /* store new value */ 1367 rl->param.arg[arg2] = value; 1368 break; 1369 1370 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): 1371 /* range check */ 1372 if (value < 1) 1373 value = 0; 1374 else if (value > MLX5E_FLD_MAX(cqc, cq_period)) 1375 value = MLX5E_FLD_MAX(cqc, cq_period); 1376 1377 /* store new value */ 1378 rl->param.arg[arg2] = value; 1379 1380 /* check to avoid down and up the network interface */ 1381 if (was_opened) 1382 error = mlx5e_rl_refresh_channel_params(rl); 1383 break; 1384 1385 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): 1386 /* import TX coal pkts */ 1387 if (value < 1) 1388 value = 0; 1389 else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) 1390 value = MLX5E_FLD_MAX(cqc, cq_max_count); 1391 1392 /* store new value */ 1393 rl->param.arg[arg2] = value; 1394 1395 /* check to avoid down and up the network interface */ 1396 if (was_opened) 1397 error = mlx5e_rl_refresh_channel_params(rl); 1398 break; 1399 1400 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): 1401 /* network interface must be down */ 1402 if (was_opened != 0 && mode_modify == 0) 1403 mlx5e_rl_close_workers(priv); 1404 1405 /* import TX coalesce mode */ 1406 if (value != 0) 1407 value = 1; 1408 1409 /* store new value */ 1410 rl->param.arg[arg2] = value; 1411 1412 /* restart network interface, if any */ 1413 if (was_opened != 0) { 1414 if (mode_modify == 0) 1415 mlx5e_rl_open_workers(priv); 1416 else 1417 error = mlx5e_rl_refresh_channel_params(rl); 1418 } 1419 break; 1420 1421 case MLX5E_RL_PARAMS_INDEX(tx_queue_size): 1422 /* network interface must be down */ 1423 if (was_opened) 1424 mlx5e_rl_close_workers(priv); 1425 1426 /* import TX queue size */ 1427 if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) 1428 value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 1429 else if (value > priv->params_ethtool.tx_queue_size_max) 1430 value = priv->params_ethtool.tx_queue_size_max; 1431 1432 /* store actual TX queue size */ 1433 value = 1ULL << order_base_2(value); 1434 1435 /* store new value */ 1436 rl->param.arg[arg2] = value; 1437 1438 /* verify TX completion factor */ 1439 mlx5e_rl_sync_tx_completion_fact(rl); 1440 1441 /* restart network interface, if any */ 1442 if (was_opened) 1443 mlx5e_rl_open_workers(priv); 1444 break; 1445 1446 case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): 1447 /* network interface must be down */ 1448 if (was_opened) 1449 mlx5e_rl_close_workers(priv); 1450 1451 /* store new value */ 1452 rl->param.arg[arg2] = value; 1453 1454 /* verify parameter */ 1455 mlx5e_rl_sync_tx_completion_fact(rl); 1456 1457 /* restart network interface, if any */ 1458 if (was_opened) 1459 mlx5e_rl_open_workers(priv); 1460 break; 1461 1462 case MLX5E_RL_PARAMS_INDEX(tx_limit_add): 1463 error = mlx5e_rl_tx_limit_add(rl, value); 1464 break; 1465 1466 case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): 1467 error = mlx5e_rl_tx_limit_clr(rl, value); 1468 break; 1469 1470 case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): 1471 /* range check */ 1472 if (value > rl->param.tx_allowed_deviation_max) 1473 value = rl->param.tx_allowed_deviation_max; 1474 else if (value < rl->param.tx_allowed_deviation_min) 1475 value = rl->param.tx_allowed_deviation_min; 1476 1477 MLX5E_RL_WLOCK(rl); 1478 rl->param.arg[arg2] = value; 1479 MLX5E_RL_WUNLOCK(rl); 1480 break; 1481 1482 case MLX5E_RL_PARAMS_INDEX(tx_burst_size): 1483 /* range check */ 1484 if (value > rl->param.tx_burst_size_max) 1485 value = rl->param.tx_burst_size_max; 1486 else if (value < rl->param.tx_burst_size_min) 1487 value = rl->param.tx_burst_size_min; 1488 1489 MLX5E_RL_WLOCK(rl); 1490 rl->param.arg[arg2] = value; 1491 MLX5E_RL_WUNLOCK(rl); 1492 break; 1493 1494 default: 1495 break; 1496 } 1497 done: 1498 PRIV_UNLOCK(priv); 1499 return (error); 1500 } 1501 1502 static void 1503 mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1504 struct sysctl_oid *node, const char *name, const char *desc) 1505 { 1506 /* 1507 * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will 1508 * take care of loading default sysctl value from the kernel 1509 * environment, if any: 1510 */ 1511 if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { 1512 /* read-only SYSCTLs */ 1513 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1514 name, CTLTYPE_U64 | CTLFLAG_RD | 1515 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1516 } else { 1517 if (strstr(name, "_def") != 0) { 1518 #ifdef RATELIMIT_DEBUG 1519 /* tunable read-only advanced SYSCTLs */ 1520 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1521 name, CTLTYPE_U64 | CTLFLAG_RDTUN | 1522 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1523 #endif 1524 } else { 1525 /* read-write SYSCTLs */ 1526 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1527 name, CTLTYPE_U64 | CTLFLAG_RWTUN | 1528 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1529 } 1530 } 1531 } 1532 1533 static void 1534 mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1535 struct sysctl_oid *node, const char *name, const char *desc) 1536 { 1537 /* read-only SYSCTLs */ 1538 SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, 1539 CTLFLAG_RD, &rl->stats.arg[x], 0, desc); 1540 } 1541 1542 #endif 1543