1 /*- 2 * Copyright (c) 2016-2020 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 30 #ifdef RATELIMIT 31 32 static int mlx5e_rl_open_workers(struct mlx5e_priv *); 33 static void mlx5e_rl_close_workers(struct mlx5e_priv *); 34 static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); 35 static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, 36 struct sysctl_oid *, const char *name, const char *desc); 37 static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 38 struct sysctl_oid *node, const char *name, const char *desc); 39 static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); 40 static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); 41 static if_snd_tag_modify_t mlx5e_rl_snd_tag_modify; 42 static if_snd_tag_query_t mlx5e_rl_snd_tag_query; 43 static if_snd_tag_free_t mlx5e_rl_snd_tag_free; 44 45 static const struct if_snd_tag_sw mlx5e_rl_snd_tag_sw = { 46 .snd_tag_modify = mlx5e_rl_snd_tag_modify, 47 .snd_tag_query = mlx5e_rl_snd_tag_query, 48 .snd_tag_free = mlx5e_rl_snd_tag_free, 49 .type = IF_SND_TAG_TYPE_RATE_LIMIT 50 }; 51 52 static void 53 mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, 54 struct mlx5e_sq_param *param) 55 { 56 void *sqc = param->sqc; 57 void *wq = MLX5_ADDR_OF(sqc, sqc, wq); 58 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 59 60 MLX5_SET(wq, wq, log_wq_sz, log_sq_size); 61 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 62 MLX5_SET(wq, wq, pd, rl->priv->pdn); 63 64 param->wq.linear = 1; 65 } 66 67 static void 68 mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, 69 struct mlx5e_cq_param *param) 70 { 71 void *cqc = param->cqc; 72 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 73 74 MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); 75 MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); 76 MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); 77 MLX5_SET(cqc, cqc, uar_page, rl->priv->mdev->priv.uar->index); 78 79 switch (rl->param.tx_coalesce_mode) { 80 case 0: 81 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 82 break; 83 default: 84 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) 85 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); 86 else 87 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 88 break; 89 } 90 } 91 92 static void 93 mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, 94 struct mlx5e_rl_channel_param *cparam) 95 { 96 memset(cparam, 0, sizeof(*cparam)); 97 98 mlx5e_rl_build_sq_param(rl, &cparam->sq); 99 mlx5e_rl_build_cq_param(rl, &cparam->cq); 100 } 101 102 static int 103 mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 104 struct mlx5e_sq_param *param, int ix) 105 { 106 struct mlx5_core_dev *mdev = priv->mdev; 107 void *sqc = param->sqc; 108 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 109 int err; 110 111 /* Create DMA descriptor TAG */ 112 if ((err = -bus_dma_tag_create( 113 bus_get_dma_tag(mdev->pdev->dev.bsddev), 114 1, /* any alignment */ 115 0, /* no boundary */ 116 BUS_SPACE_MAXADDR, /* lowaddr */ 117 BUS_SPACE_MAXADDR, /* highaddr */ 118 NULL, NULL, /* filter, filterarg */ 119 MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ 120 MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ 121 MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 122 0, /* flags */ 123 NULL, NULL, /* lockfunc, lockfuncarg */ 124 &sq->dma_tag))) 125 goto done; 126 127 sq->mkey_be = cpu_to_be32(priv->mr.key); 128 sq->ifp = priv->ifp; 129 sq->priv = priv; 130 131 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, 132 &sq->wq_ctrl); 133 if (err) 134 goto err_free_dma_tag; 135 136 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 137 138 err = mlx5e_alloc_sq_db(sq); 139 if (err) 140 goto err_sq_wq_destroy; 141 142 mlx5e_update_sq_inline(sq); 143 144 return (0); 145 146 err_sq_wq_destroy: 147 mlx5_wq_destroy(&sq->wq_ctrl); 148 err_free_dma_tag: 149 bus_dma_tag_destroy(sq->dma_tag); 150 done: 151 return (err); 152 } 153 154 static void 155 mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) 156 { 157 158 mlx5e_free_sq_db(sq); 159 mlx5_wq_destroy(&sq->wq_ctrl); 160 bus_dma_tag_destroy(sq->dma_tag); 161 } 162 163 static int 164 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 165 struct mlx5e_sq_param *param, int ix) 166 { 167 int err; 168 169 err = mlx5e_rl_create_sq(priv, sq, param, ix); 170 if (err) 171 return (err); 172 173 err = mlx5e_enable_sq(sq, param, &priv->channel[ix].bfreg, priv->rl.tisn); 174 if (err) 175 goto err_destroy_sq; 176 177 err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 178 if (err) 179 goto err_disable_sq; 180 181 WRITE_ONCE(sq->running, 1); 182 183 return (0); 184 185 err_disable_sq: 186 mlx5e_disable_sq(sq); 187 err_destroy_sq: 188 mlx5e_rl_destroy_sq(sq); 189 190 return (err); 191 } 192 193 static void 194 mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) 195 { 196 mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); 197 mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); 198 199 callout_init_mtx(&sq->cev_callout, &sq->lock, 0); 200 201 sq->cev_factor = priv->rl.param.tx_completion_fact; 202 203 /* ensure the TX completion event factor is not zero */ 204 if (sq->cev_factor == 0) 205 sq->cev_factor = 1; 206 } 207 208 static int 209 mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, 210 struct mlx5e_rl_channel_param *cparam, 211 struct mlx5e_sq *volatile *ppsq) 212 { 213 struct mlx5e_priv *priv = rlw->priv; 214 struct mlx5e_sq *sq; 215 int err; 216 217 sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); 218 219 /* init mutexes */ 220 mlx5e_rl_chan_mtx_init(priv, sq); 221 222 /* open TX completion queue */ 223 err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, 224 &mlx5e_tx_cq_comp, eq_ix); 225 if (err) 226 goto err_free; 227 228 err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); 229 if (err) 230 goto err_close_tx_cq; 231 232 /* store TX channel pointer */ 233 *ppsq = sq; 234 235 /* poll TX queue initially */ 236 sq->cq.mcq.comp(&sq->cq.mcq, NULL); 237 238 return (0); 239 240 err_close_tx_cq: 241 mlx5e_close_cq(&sq->cq); 242 243 err_free: 244 /* destroy mutexes */ 245 mtx_destroy(&sq->lock); 246 mtx_destroy(&sq->comp_lock); 247 free(sq, M_MLX5EN); 248 atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); 249 return (err); 250 } 251 252 static void 253 mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) 254 { 255 struct mlx5e_sq *sq = *ppsq; 256 257 /* check if channel is already closed */ 258 if (sq == NULL) 259 return; 260 /* ensure channel pointer is no longer used */ 261 *ppsq = NULL; 262 263 /* teardown and destroy SQ */ 264 mlx5e_drain_sq(sq); 265 mlx5e_disable_sq(sq); 266 mlx5e_rl_destroy_sq(sq); 267 268 /* close CQ */ 269 mlx5e_close_cq(&sq->cq); 270 271 /* destroy mutexes */ 272 mtx_destroy(&sq->lock); 273 mtx_destroy(&sq->comp_lock); 274 275 free(sq, M_MLX5EN); 276 } 277 278 static void 279 mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) 280 { 281 /* 282 * Limit the maximum distance between completion events to 283 * half of the currently set TX queue size. 284 * 285 * The maximum number of queue entries a single IP packet can 286 * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. 287 * 288 * The worst case max value is then given as below: 289 */ 290 uint64_t max = rl->param.tx_queue_size / 291 (2 * MLX5_SEND_WQE_MAX_WQEBBS); 292 293 /* 294 * Update the maximum completion factor value in case the 295 * tx_queue_size field changed. Ensure we don't overflow 296 * 16-bits. 297 */ 298 if (max < 1) 299 max = 1; 300 else if (max > 65535) 301 max = 65535; 302 rl->param.tx_completion_fact_max = max; 303 304 /* 305 * Verify that the current TX completion factor is within the 306 * given limits: 307 */ 308 if (rl->param.tx_completion_fact < 1) 309 rl->param.tx_completion_fact = 1; 310 else if (rl->param.tx_completion_fact > max) 311 rl->param.tx_completion_fact = max; 312 } 313 314 static int 315 mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) 316 { 317 struct mlx5e_priv *priv = sq->priv; 318 struct mlx5_core_dev *mdev = priv->mdev; 319 320 void *in; 321 void *sqc; 322 int inlen; 323 int err; 324 325 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 326 in = mlx5_vzalloc(inlen); 327 if (in == NULL) 328 return (-ENOMEM); 329 330 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 331 332 MLX5_SET(modify_sq_in, in, sqn, sq->sqn); 333 MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); 334 MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); 335 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); 336 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); 337 338 err = mlx5_core_modify_sq(mdev, in, inlen); 339 340 kvfree(in); 341 342 return (err); 343 } 344 345 /* 346 * This function will search the configured rate limit table for the 347 * best match to avoid that a single socket based application can 348 * allocate all the available hardware rates. If the user selected 349 * rate deviates too much from the closes rate available in the rate 350 * limit table, unlimited rate will be selected. 351 */ 352 static uint64_t 353 mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) 354 { 355 uint64_t distance = -1ULL; 356 uint64_t diff; 357 uint64_t retval = 0; /* unlimited */ 358 uint64_t x; 359 360 /* search for closest rate */ 361 for (x = 0; x != rl->param.tx_rates_def; x++) { 362 uint64_t rate = rl->rate_limit_table[x]; 363 if (rate == 0) 364 continue; 365 366 if (rate > user_rate) 367 diff = rate - user_rate; 368 else 369 diff = user_rate - rate; 370 371 /* check if distance is smaller than previous rate */ 372 if (diff < distance) { 373 distance = diff; 374 retval = rate; 375 } 376 } 377 378 /* range check for multiplication below */ 379 if (user_rate > rl->param.tx_limit_max) 380 user_rate = rl->param.tx_limit_max; 381 382 /* fallback to unlimited, if rate deviates too much */ 383 if (distance > howmany(user_rate * 384 rl->param.tx_allowed_deviation, 1000ULL)) 385 retval = 0; 386 387 return (retval); 388 } 389 390 /* 391 * This function sets the requested rate for a rate limit channel, in 392 * bits per second. The requested rate will be filtered through the 393 * find best rate function above. 394 */ 395 static int 396 mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, 397 struct mlx5e_rl_channel *channel, uint64_t rate) 398 { 399 struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; 400 struct mlx5e_sq *sq; 401 uint64_t temp; 402 uint16_t index; 403 uint16_t burst; 404 int error; 405 406 if (rate != 0) { 407 MLX5E_RL_WORKER_UNLOCK(rlw); 408 409 MLX5E_RL_RLOCK(rl); 410 411 /* get current burst size in bytes */ 412 temp = rl->param.tx_burst_size * 413 MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); 414 415 /* limit burst size to 64K currently */ 416 if (temp > 65535) 417 temp = 65535; 418 burst = temp; 419 420 /* find best rate */ 421 rate = mlx5e_rl_find_best_rate_locked(rl, rate); 422 423 MLX5E_RL_RUNLOCK(rl); 424 425 if (rate == 0) { 426 /* rate doesn't exist, fallback to unlimited */ 427 index = 0; 428 rate = 0; 429 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 430 } else { 431 /* get a reference on the new rate */ 432 error = -mlx5_rl_add_rate(rlw->priv->mdev, 433 howmany(rate, 1000), burst, &index); 434 435 if (error != 0) { 436 /* adding rate failed, fallback to unlimited */ 437 index = 0; 438 rate = 0; 439 atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); 440 } 441 } 442 MLX5E_RL_WORKER_LOCK(rlw); 443 } else { 444 index = 0; 445 burst = 0; /* default */ 446 } 447 448 /* atomically swap rates */ 449 temp = channel->last_rate; 450 channel->last_rate = rate; 451 rate = temp; 452 453 /* atomically swap burst size */ 454 temp = channel->last_burst; 455 channel->last_burst = burst; 456 burst = temp; 457 458 MLX5E_RL_WORKER_UNLOCK(rlw); 459 /* put reference on the old rate, if any */ 460 if (rate != 0) { 461 mlx5_rl_remove_rate(rlw->priv->mdev, 462 howmany(rate, 1000), burst); 463 } 464 465 /* set new rate, if SQ is running */ 466 sq = channel->sq; 467 if (sq != NULL && READ_ONCE(sq->running) != 0) { 468 error = mlx5e_rl_modify_sq(sq, index); 469 if (error != 0) 470 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 471 } else 472 error = 0; 473 MLX5E_RL_WORKER_LOCK(rlw); 474 475 return (-error); 476 } 477 478 static void 479 mlx5e_rl_worker(void *arg) 480 { 481 struct thread *td; 482 struct mlx5e_rl_worker *rlw = arg; 483 struct mlx5e_rl_channel *channel; 484 struct mlx5e_priv *priv; 485 unsigned ix; 486 uint64_t x; 487 int error; 488 489 /* set thread priority */ 490 td = curthread; 491 492 thread_lock(td); 493 sched_prio(td, PI_SWI(SWI_NET)); 494 thread_unlock(td); 495 496 priv = rlw->priv; 497 498 /* compute completion vector */ 499 ix = (rlw - priv->rl.workers) % 500 priv->mdev->priv.eq_table.num_comp_vectors; 501 502 /* TODO bind to CPU */ 503 504 /* open all the SQs */ 505 MLX5E_RL_WORKER_LOCK(rlw); 506 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 507 struct mlx5e_rl_channel *channel = rlw->channels + x; 508 509 #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) 510 if (channel->state == MLX5E_RL_ST_FREE) 511 continue; 512 #endif 513 MLX5E_RL_WORKER_UNLOCK(rlw); 514 515 MLX5E_RL_RLOCK(&priv->rl); 516 error = mlx5e_rl_open_channel(rlw, ix, 517 &priv->rl.chan_param, &channel->sq); 518 MLX5E_RL_RUNLOCK(&priv->rl); 519 520 MLX5E_RL_WORKER_LOCK(rlw); 521 if (error != 0) { 522 mlx5_en_err(priv->ifp, 523 "mlx5e_rl_open_channel failed: %d\n", error); 524 break; 525 } 526 mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); 527 } 528 while (1) { 529 if (STAILQ_FIRST(&rlw->process_head) == NULL) { 530 /* check if we are tearing down */ 531 if (rlw->worker_done != 0) 532 break; 533 cv_wait(&rlw->cv, &rlw->mtx); 534 } 535 /* check if we are tearing down */ 536 if (rlw->worker_done != 0) 537 break; 538 channel = STAILQ_FIRST(&rlw->process_head); 539 if (channel != NULL) { 540 STAILQ_REMOVE_HEAD(&rlw->process_head, entry); 541 542 switch (channel->state) { 543 case MLX5E_RL_ST_MODIFY: 544 channel->state = MLX5E_RL_ST_USED; 545 MLX5E_RL_WORKER_UNLOCK(rlw); 546 547 /* create channel by demand */ 548 if (channel->sq == NULL) { 549 MLX5E_RL_RLOCK(&priv->rl); 550 error = mlx5e_rl_open_channel(rlw, ix, 551 &priv->rl.chan_param, &channel->sq); 552 MLX5E_RL_RUNLOCK(&priv->rl); 553 554 if (error != 0) { 555 mlx5_en_err(priv->ifp, 556 "mlx5e_rl_open_channel failed: %d\n", error); 557 } else { 558 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); 559 } 560 } else { 561 mlx5e_resume_sq(channel->sq); 562 } 563 564 MLX5E_RL_WORKER_LOCK(rlw); 565 /* convert from bytes/s to bits/s and set new rate */ 566 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 567 channel->new_rate * 8ULL); 568 if (error != 0) { 569 mlx5_en_err(priv->ifp, 570 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 571 error); 572 } 573 break; 574 575 case MLX5E_RL_ST_DESTROY: 576 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 577 if (error != 0) { 578 mlx5_en_err(priv->ifp, 579 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 580 error); 581 } 582 if (channel->sq != NULL) { 583 /* 584 * Make sure all packets are 585 * transmitted before SQ is 586 * returned to free list: 587 */ 588 MLX5E_RL_WORKER_UNLOCK(rlw); 589 mlx5e_drain_sq(channel->sq); 590 MLX5E_RL_WORKER_LOCK(rlw); 591 } 592 /* put the channel back into the free list */ 593 STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); 594 channel->state = MLX5E_RL_ST_FREE; 595 atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); 596 break; 597 default: 598 /* NOP */ 599 break; 600 } 601 } 602 } 603 604 /* close all the SQs */ 605 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 606 struct mlx5e_rl_channel *channel = rlw->channels + x; 607 608 /* update the initial rate */ 609 channel->init_rate = channel->last_rate; 610 611 /* make sure we free up the rate resource */ 612 mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 613 614 if (channel->sq != NULL) { 615 MLX5E_RL_WORKER_UNLOCK(rlw); 616 mlx5e_rl_close_channel(&channel->sq); 617 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); 618 MLX5E_RL_WORKER_LOCK(rlw); 619 } 620 } 621 622 rlw->worker_done = 0; 623 cv_broadcast(&rlw->cv); 624 MLX5E_RL_WORKER_UNLOCK(rlw); 625 626 kthread_exit(); 627 } 628 629 static int 630 mlx5e_rl_open_tis(struct mlx5e_priv *priv) 631 { 632 struct mlx5_core_dev *mdev = priv->mdev; 633 u32 in[MLX5_ST_SZ_DW(create_tis_in)]; 634 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 635 636 memset(in, 0, sizeof(in)); 637 638 MLX5_SET(tisc, tisc, prio, 0); 639 MLX5_SET(tisc, tisc, transport_domain, priv->tdn); 640 641 return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); 642 } 643 644 static void 645 mlx5e_rl_close_tis(struct mlx5e_priv *priv) 646 { 647 mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn, 0); 648 } 649 650 static void 651 mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, 652 struct mlx5_core_dev *mdev) 653 { 654 /* ratelimit workers */ 655 param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; 656 param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; 657 658 /* range check */ 659 if (param->tx_worker_threads_def == 0 || 660 param->tx_worker_threads_def > param->tx_worker_threads_max) 661 param->tx_worker_threads_def = param->tx_worker_threads_max; 662 663 /* ratelimit channels */ 664 param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / 665 param->tx_worker_threads_def; 666 param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; 667 668 /* range check */ 669 if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) 670 param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; 671 672 /* set default burst size */ 673 param->tx_burst_size = 4; /* MTUs */ 674 675 /* 676 * Set maximum burst size 677 * 678 * The burst size is multiplied by the MTU and clamped to the 679 * range 0 ... 65535 bytes inclusivly before fed into the 680 * firmware. 681 * 682 * NOTE: If the burst size or MTU is changed only ratelimit 683 * connections made after the change will use the new burst 684 * size. 685 */ 686 param->tx_burst_size_max = 255; 687 688 /* get firmware rate limits in 1000bit/s and convert them to bit/s */ 689 param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; 690 param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; 691 692 /* ratelimit table size */ 693 param->tx_rates_max = mdev->priv.rl_table.max_size; 694 695 /* range check */ 696 if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) 697 param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; 698 699 /* set default number of rates */ 700 param->tx_rates_def = param->tx_rates_max; 701 702 /* set maximum allowed rate deviation */ 703 if (param->tx_limit_max != 0) { 704 /* 705 * Make sure the deviation multiplication doesn't 706 * overflow unsigned 64-bit: 707 */ 708 param->tx_allowed_deviation_max = -1ULL / 709 param->tx_limit_max; 710 } 711 /* set default rate deviation */ 712 param->tx_allowed_deviation = 50; /* 5.0% */ 713 714 /* channel parameters */ 715 param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 716 param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; 717 param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; 718 param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; 719 param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; 720 } 721 722 static const char *mlx5e_rl_params_desc[] = { 723 MLX5E_RL_PARAMS(MLX5E_STATS_DESC) 724 }; 725 726 static const char *mlx5e_rl_table_params_desc[] = { 727 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) 728 }; 729 730 static const char *mlx5e_rl_stats_desc[] = { 731 MLX5E_RL_STATS(MLX5E_STATS_DESC) 732 }; 733 734 int 735 mlx5e_rl_init(struct mlx5e_priv *priv) 736 { 737 struct mlx5e_rl_priv_data *rl = &priv->rl; 738 struct sysctl_oid *node; 739 struct sysctl_oid *stats; 740 char buf[64]; 741 uint64_t i; 742 uint64_t j; 743 int error; 744 745 /* check if there is support for packet pacing */ 746 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 747 return (0); 748 749 rl->priv = priv; 750 751 sysctl_ctx_init(&rl->ctx); 752 753 sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); 754 755 /* open own TIS domain for ratelimit SQs */ 756 error = mlx5e_rl_open_tis(priv); 757 if (error) 758 goto done; 759 760 /* setup default value for parameters */ 761 mlx5e_rl_set_default_params(&rl->param, priv->mdev); 762 763 /* update the completion factor */ 764 mlx5e_rl_sync_tx_completion_fact(rl); 765 766 /* create root node */ 767 node = SYSCTL_ADD_NODE(&rl->ctx, 768 SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, 769 "rate_limit", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Rate limiting support"); 770 771 if (node != NULL) { 772 /* create SYSCTLs */ 773 for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { 774 mlx5e_rl_sysctl_add_u64_oid(rl, 775 MLX5E_RL_PARAMS_INDEX(arg[i]), 776 node, mlx5e_rl_params_desc[2 * i], 777 mlx5e_rl_params_desc[2 * i + 1]); 778 } 779 780 stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), 781 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 782 "Rate limiting statistics"); 783 if (stats != NULL) { 784 /* create SYSCTLs */ 785 for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { 786 mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, 787 stats, mlx5e_rl_stats_desc[2 * i], 788 mlx5e_rl_stats_desc[2 * i + 1]); 789 } 790 } 791 } 792 793 /* allocate workers array */ 794 rl->workers = malloc(sizeof(rl->workers[0]) * 795 rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); 796 797 /* allocate rate limit array */ 798 rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * 799 rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); 800 801 if (node != NULL) { 802 /* create more SYSCTls */ 803 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 804 "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | 805 CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, 806 "A", "Show table of all configured TX rates"); 807 808 /* try to fetch rate table from kernel environment */ 809 for (i = 0; i != rl->param.tx_rates_def; i++) { 810 /* compute path for tunable */ 811 snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", 812 device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); 813 if (TUNABLE_QUAD_FETCH(buf, &j)) 814 mlx5e_rl_tx_limit_add(rl, j); 815 } 816 817 /* setup rate table sysctls */ 818 for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { 819 mlx5e_rl_sysctl_add_u64_oid(rl, 820 MLX5E_RL_PARAMS_INDEX(table_arg[i]), 821 node, mlx5e_rl_table_params_desc[2 * i], 822 mlx5e_rl_table_params_desc[2 * i + 1]); 823 } 824 } 825 826 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 827 struct mlx5e_rl_worker *rlw = rl->workers + j; 828 829 rlw->priv = priv; 830 831 cv_init(&rlw->cv, "mlx5-worker-cv"); 832 mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); 833 STAILQ_INIT(&rlw->index_list_head); 834 STAILQ_INIT(&rlw->process_head); 835 836 rlw->channels = malloc(sizeof(rlw->channels[0]) * 837 rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); 838 839 MLX5E_RL_WORKER_LOCK(rlw); 840 for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { 841 struct mlx5e_rl_channel *channel = rlw->channels + i; 842 channel->worker = rlw; 843 STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); 844 } 845 MLX5E_RL_WORKER_UNLOCK(rlw); 846 } 847 848 PRIV_LOCK(priv); 849 error = mlx5e_rl_open_workers(priv); 850 PRIV_UNLOCK(priv); 851 852 if (error != 0) { 853 mlx5_en_err(priv->ifp, 854 "mlx5e_rl_open_workers failed: %d\n", error); 855 } 856 857 return (0); 858 859 done: 860 sysctl_ctx_free(&rl->ctx); 861 sx_destroy(&rl->rl_sxlock); 862 return (error); 863 } 864 865 static int 866 mlx5e_rl_open_workers(struct mlx5e_priv *priv) 867 { 868 struct mlx5e_rl_priv_data *rl = &priv->rl; 869 struct thread *rl_thread = NULL; 870 struct proc *rl_proc = NULL; 871 uint64_t j; 872 int error; 873 874 if (priv->gone || rl->opened) 875 return (-EINVAL); 876 877 MLX5E_RL_WLOCK(rl); 878 /* compute channel parameters once */ 879 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 880 MLX5E_RL_WUNLOCK(rl); 881 882 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 883 struct mlx5e_rl_worker *rlw = rl->workers + j; 884 885 /* start worker thread */ 886 error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, 887 RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); 888 if (error != 0) { 889 mlx5_en_err(rl->priv->ifp, 890 "kproc_kthread_add failed: %d\n", error); 891 rlw->worker_done = 1; 892 } 893 } 894 895 rl->opened = 1; 896 897 return (0); 898 } 899 900 static void 901 mlx5e_rl_close_workers(struct mlx5e_priv *priv) 902 { 903 struct mlx5e_rl_priv_data *rl = &priv->rl; 904 uint64_t y; 905 906 if (rl->opened == 0) 907 return; 908 909 /* tear down worker threads simultaneously */ 910 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 911 struct mlx5e_rl_worker *rlw = rl->workers + y; 912 913 /* tear down worker before freeing SQs */ 914 MLX5E_RL_WORKER_LOCK(rlw); 915 if (rlw->worker_done == 0) { 916 rlw->worker_done = 1; 917 cv_broadcast(&rlw->cv); 918 } else { 919 /* XXX thread not started */ 920 rlw->worker_done = 0; 921 } 922 MLX5E_RL_WORKER_UNLOCK(rlw); 923 } 924 925 /* wait for worker threads to exit */ 926 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 927 struct mlx5e_rl_worker *rlw = rl->workers + y; 928 929 /* tear down worker before freeing SQs */ 930 MLX5E_RL_WORKER_LOCK(rlw); 931 while (rlw->worker_done != 0) 932 cv_wait(&rlw->cv, &rlw->mtx); 933 MLX5E_RL_WORKER_UNLOCK(rlw); 934 } 935 936 rl->opened = 0; 937 } 938 939 static void 940 mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) 941 { 942 unsigned x; 943 944 MLX5E_RL_WLOCK(rl); 945 for (x = 0; x != rl->param.tx_rates_def; x++) 946 rl->rate_limit_table[x] = 0; 947 MLX5E_RL_WUNLOCK(rl); 948 } 949 950 void 951 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 952 { 953 struct mlx5e_rl_priv_data *rl = &priv->rl; 954 uint64_t y; 955 956 /* check if there is support for packet pacing */ 957 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 958 return; 959 960 /* TODO check if there is support for packet pacing */ 961 962 sysctl_ctx_free(&rl->ctx); 963 964 PRIV_LOCK(priv); 965 mlx5e_rl_close_workers(priv); 966 PRIV_UNLOCK(priv); 967 968 mlx5e_rl_reset_rates(rl); 969 970 /* close TIS domain */ 971 mlx5e_rl_close_tis(priv); 972 973 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 974 struct mlx5e_rl_worker *rlw = rl->workers + y; 975 976 cv_destroy(&rlw->cv); 977 mtx_destroy(&rlw->mtx); 978 free(rlw->channels, M_MLX5EN); 979 } 980 free(rl->rate_limit_table, M_MLX5EN); 981 free(rl->workers, M_MLX5EN); 982 sx_destroy(&rl->rl_sxlock); 983 } 984 985 static void 986 mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, 987 struct mlx5e_rl_channel *channel) 988 { 989 STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); 990 cv_broadcast(&rlw->cv); 991 } 992 993 static void 994 mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) 995 { 996 if (channel == NULL) 997 return; 998 999 MLX5E_RL_WORKER_LOCK(rlw); 1000 switch (channel->state) { 1001 case MLX5E_RL_ST_MODIFY: 1002 channel->state = MLX5E_RL_ST_DESTROY; 1003 break; 1004 case MLX5E_RL_ST_USED: 1005 channel->state = MLX5E_RL_ST_DESTROY; 1006 mlx5e_rlw_queue_channel_locked(rlw, channel); 1007 break; 1008 default: 1009 break; 1010 } 1011 MLX5E_RL_WORKER_UNLOCK(rlw); 1012 } 1013 1014 static int 1015 mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) 1016 { 1017 1018 MLX5E_RL_WORKER_LOCK(rlw); 1019 channel->new_rate = rate; 1020 switch (channel->state) { 1021 case MLX5E_RL_ST_USED: 1022 channel->state = MLX5E_RL_ST_MODIFY; 1023 mlx5e_rlw_queue_channel_locked(rlw, channel); 1024 break; 1025 default: 1026 break; 1027 } 1028 MLX5E_RL_WORKER_UNLOCK(rlw); 1029 1030 return (0); 1031 } 1032 1033 static int 1034 mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, 1035 union if_snd_tag_query_params *params) 1036 { 1037 int retval; 1038 1039 MLX5E_RL_WORKER_LOCK(rlw); 1040 switch (channel->state) { 1041 case MLX5E_RL_ST_USED: 1042 params->rate_limit.max_rate = channel->last_rate; 1043 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1044 retval = 0; 1045 break; 1046 case MLX5E_RL_ST_MODIFY: 1047 params->rate_limit.max_rate = channel->last_rate; 1048 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1049 retval = EBUSY; 1050 break; 1051 default: 1052 retval = EINVAL; 1053 break; 1054 } 1055 MLX5E_RL_WORKER_UNLOCK(rlw); 1056 1057 return (retval); 1058 } 1059 1060 static int 1061 mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, 1062 struct mlx5e_rl_channel **pchannel) 1063 { 1064 struct mlx5e_rl_channel *channel; 1065 int retval = ENOMEM; 1066 1067 MLX5E_RL_WORKER_LOCK(rlw); 1068 /* Check for available channel in free list */ 1069 if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { 1070 retval = 0; 1071 /* Remove head index from available list */ 1072 STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); 1073 channel->state = MLX5E_RL_ST_USED; 1074 atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); 1075 } else { 1076 atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); 1077 } 1078 MLX5E_RL_WORKER_UNLOCK(rlw); 1079 1080 *pchannel = channel; 1081 #ifdef RATELIMIT_DEBUG 1082 mlx5_en_info(rlw->priv->ifp, 1083 "Channel pointer for rate limit connection is %p\n", channel); 1084 #endif 1085 return (retval); 1086 } 1087 1088 int 1089 mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, 1090 union if_snd_tag_alloc_params *params, 1091 struct m_snd_tag **ppmt) 1092 { 1093 struct mlx5e_rl_channel *channel; 1094 struct mlx5e_rl_worker *rlw; 1095 struct mlx5e_priv *priv; 1096 int error; 1097 1098 priv = ifp->if_softc; 1099 1100 /* check if there is support for packet pacing or if device is going away */ 1101 if (!MLX5_CAP_GEN(priv->mdev, qos) || 1102 !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || 1103 params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) 1104 return (EOPNOTSUPP); 1105 1106 /* compute worker thread this TCP connection belongs to */ 1107 rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % 1108 priv->rl.param.tx_worker_threads_def); 1109 1110 error = mlx5e_find_available_tx_ring_index(rlw, &channel); 1111 if (error != 0) 1112 goto done; 1113 1114 error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); 1115 if (error != 0) { 1116 mlx5e_rl_free(rlw, channel); 1117 goto done; 1118 } 1119 1120 /* store pointer to mbuf tag */ 1121 MPASS(channel->tag.refcount == 0); 1122 m_snd_tag_init(&channel->tag, ifp, &mlx5e_rl_snd_tag_sw); 1123 *ppmt = &channel->tag; 1124 done: 1125 return (error); 1126 } 1127 1128 1129 static int 1130 mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) 1131 { 1132 struct mlx5e_rl_channel *channel = 1133 container_of(pmt, struct mlx5e_rl_channel, tag); 1134 1135 return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); 1136 } 1137 1138 static int 1139 mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) 1140 { 1141 struct mlx5e_rl_channel *channel = 1142 container_of(pmt, struct mlx5e_rl_channel, tag); 1143 1144 return (mlx5e_rl_query(channel->worker, channel, params)); 1145 } 1146 1147 static void 1148 mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) 1149 { 1150 struct mlx5e_rl_channel *channel = 1151 container_of(pmt, struct mlx5e_rl_channel, tag); 1152 1153 mlx5e_rl_free(channel->worker, channel); 1154 } 1155 1156 static int 1157 mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) 1158 { 1159 struct mlx5e_rl_priv_data *rl = arg1; 1160 struct mlx5e_priv *priv = rl->priv; 1161 struct sbuf sbuf; 1162 unsigned x; 1163 int error; 1164 1165 error = sysctl_wire_old_buffer(req, 0); 1166 if (error != 0) 1167 return (error); 1168 1169 PRIV_LOCK(priv); 1170 1171 sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); 1172 1173 sbuf_printf(&sbuf, 1174 "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" 1175 "\t" "--------------------------------------------\n"); 1176 1177 MLX5E_RL_RLOCK(rl); 1178 for (x = 0; x != rl->param.tx_rates_def; x++) { 1179 if (rl->rate_limit_table[x] == 0) 1180 continue; 1181 1182 sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", 1183 x, (unsigned)rl->param.tx_burst_size, 1184 (long long)rl->rate_limit_table[x]); 1185 } 1186 MLX5E_RL_RUNLOCK(rl); 1187 1188 error = sbuf_finish(&sbuf); 1189 sbuf_delete(&sbuf); 1190 1191 PRIV_UNLOCK(priv); 1192 1193 return (error); 1194 } 1195 1196 static int 1197 mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) 1198 { 1199 uint64_t x; 1200 uint64_t y; 1201 1202 MLX5E_RL_WLOCK(rl); 1203 /* compute channel parameters once */ 1204 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 1205 MLX5E_RL_WUNLOCK(rl); 1206 1207 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1208 struct mlx5e_rl_worker *rlw = rl->workers + y; 1209 1210 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1211 struct mlx5e_rl_channel *channel; 1212 struct mlx5e_sq *sq; 1213 1214 channel = rlw->channels + x; 1215 sq = channel->sq; 1216 1217 if (sq == NULL) 1218 continue; 1219 1220 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { 1221 mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, 1222 rl->param.tx_coalesce_usecs, 1223 rl->param.tx_coalesce_pkts, 1224 rl->param.tx_coalesce_mode); 1225 } else { 1226 mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, 1227 rl->param.tx_coalesce_usecs, 1228 rl->param.tx_coalesce_pkts); 1229 } 1230 } 1231 } 1232 return (0); 1233 } 1234 1235 void 1236 mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl) 1237 { 1238 uint64_t x; 1239 uint64_t y; 1240 1241 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1242 struct mlx5e_rl_worker *rlw = rl->workers + y; 1243 1244 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1245 struct mlx5e_rl_channel *channel; 1246 struct mlx5e_sq *sq; 1247 1248 channel = rlw->channels + x; 1249 sq = channel->sq; 1250 1251 if (sq == NULL) 1252 continue; 1253 1254 mtx_lock(&sq->lock); 1255 mlx5e_update_sq_inline(sq); 1256 mtx_unlock(&sq->lock); 1257 } 1258 } 1259 } 1260 1261 static int 1262 mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) 1263 { 1264 unsigned x; 1265 int error; 1266 1267 if (value < 1000 || 1268 mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) 1269 return (EINVAL); 1270 1271 MLX5E_RL_WLOCK(rl); 1272 error = ENOMEM; 1273 1274 /* check if rate already exists */ 1275 for (x = 0; x != rl->param.tx_rates_def; x++) { 1276 if (rl->rate_limit_table[x] != value) 1277 continue; 1278 error = EEXIST; 1279 break; 1280 } 1281 1282 /* check if there is a free rate entry */ 1283 if (x == rl->param.tx_rates_def) { 1284 for (x = 0; x != rl->param.tx_rates_def; x++) { 1285 if (rl->rate_limit_table[x] != 0) 1286 continue; 1287 rl->rate_limit_table[x] = value; 1288 error = 0; 1289 break; 1290 } 1291 } 1292 MLX5E_RL_WUNLOCK(rl); 1293 1294 return (error); 1295 } 1296 1297 static int 1298 mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) 1299 { 1300 unsigned x; 1301 int error; 1302 1303 if (value == 0) 1304 return (EINVAL); 1305 1306 MLX5E_RL_WLOCK(rl); 1307 1308 /* check if rate already exists */ 1309 for (x = 0; x != rl->param.tx_rates_def; x++) { 1310 if (rl->rate_limit_table[x] != value) 1311 continue; 1312 /* free up rate */ 1313 rl->rate_limit_table[x] = 0; 1314 break; 1315 } 1316 1317 /* check if there is a free rate entry */ 1318 if (x == rl->param.tx_rates_def) 1319 error = ENOENT; 1320 else 1321 error = 0; 1322 MLX5E_RL_WUNLOCK(rl); 1323 1324 return (error); 1325 } 1326 1327 static int 1328 mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) 1329 { 1330 struct mlx5e_rl_priv_data *rl = arg1; 1331 struct mlx5e_priv *priv = rl->priv; 1332 unsigned mode_modify; 1333 unsigned was_opened; 1334 uint64_t value; 1335 uint64_t old; 1336 int error; 1337 1338 PRIV_LOCK(priv); 1339 1340 MLX5E_RL_RLOCK(rl); 1341 value = rl->param.arg[arg2]; 1342 MLX5E_RL_RUNLOCK(rl); 1343 1344 if (req != NULL) { 1345 old = value; 1346 error = sysctl_handle_64(oidp, &value, 0, req); 1347 if (error || req->newptr == NULL || 1348 value == rl->param.arg[arg2]) 1349 goto done; 1350 } else { 1351 old = 0; 1352 error = 0; 1353 } 1354 1355 /* check if device is gone */ 1356 if (priv->gone) { 1357 error = ENXIO; 1358 goto done; 1359 } 1360 was_opened = rl->opened; 1361 mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); 1362 1363 switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { 1364 case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): 1365 if (value > rl->param.tx_worker_threads_max) 1366 value = rl->param.tx_worker_threads_max; 1367 else if (value < 1) 1368 value = 1; 1369 1370 /* store new value */ 1371 rl->param.arg[arg2] = value; 1372 break; 1373 1374 case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): 1375 if (value > rl->param.tx_channels_per_worker_max) 1376 value = rl->param.tx_channels_per_worker_max; 1377 else if (value < 1) 1378 value = 1; 1379 1380 /* store new value */ 1381 rl->param.arg[arg2] = value; 1382 break; 1383 1384 case MLX5E_RL_PARAMS_INDEX(tx_rates_def): 1385 if (value > rl->param.tx_rates_max) 1386 value = rl->param.tx_rates_max; 1387 else if (value < 1) 1388 value = 1; 1389 1390 /* store new value */ 1391 rl->param.arg[arg2] = value; 1392 break; 1393 1394 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): 1395 /* range check */ 1396 if (value < 1) 1397 value = 0; 1398 else if (value > MLX5E_FLD_MAX(cqc, cq_period)) 1399 value = MLX5E_FLD_MAX(cqc, cq_period); 1400 1401 /* store new value */ 1402 rl->param.arg[arg2] = value; 1403 1404 /* check to avoid down and up the network interface */ 1405 if (was_opened) 1406 error = mlx5e_rl_refresh_channel_params(rl); 1407 break; 1408 1409 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): 1410 /* import TX coal pkts */ 1411 if (value < 1) 1412 value = 0; 1413 else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) 1414 value = MLX5E_FLD_MAX(cqc, cq_max_count); 1415 1416 /* store new value */ 1417 rl->param.arg[arg2] = value; 1418 1419 /* check to avoid down and up the network interface */ 1420 if (was_opened) 1421 error = mlx5e_rl_refresh_channel_params(rl); 1422 break; 1423 1424 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): 1425 /* network interface must be down */ 1426 if (was_opened != 0 && mode_modify == 0) 1427 mlx5e_rl_close_workers(priv); 1428 1429 /* import TX coalesce mode */ 1430 if (value != 0) 1431 value = 1; 1432 1433 /* store new value */ 1434 rl->param.arg[arg2] = value; 1435 1436 /* restart network interface, if any */ 1437 if (was_opened != 0) { 1438 if (mode_modify == 0) 1439 mlx5e_rl_open_workers(priv); 1440 else 1441 error = mlx5e_rl_refresh_channel_params(rl); 1442 } 1443 break; 1444 1445 case MLX5E_RL_PARAMS_INDEX(tx_queue_size): 1446 /* network interface must be down */ 1447 if (was_opened) 1448 mlx5e_rl_close_workers(priv); 1449 1450 /* import TX queue size */ 1451 if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) 1452 value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 1453 else if (value > priv->params_ethtool.tx_queue_size_max) 1454 value = priv->params_ethtool.tx_queue_size_max; 1455 1456 /* store actual TX queue size */ 1457 value = 1ULL << order_base_2(value); 1458 1459 /* store new value */ 1460 rl->param.arg[arg2] = value; 1461 1462 /* verify TX completion factor */ 1463 mlx5e_rl_sync_tx_completion_fact(rl); 1464 1465 /* restart network interface, if any */ 1466 if (was_opened) 1467 mlx5e_rl_open_workers(priv); 1468 break; 1469 1470 case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): 1471 /* network interface must be down */ 1472 if (was_opened) 1473 mlx5e_rl_close_workers(priv); 1474 1475 /* store new value */ 1476 rl->param.arg[arg2] = value; 1477 1478 /* verify parameter */ 1479 mlx5e_rl_sync_tx_completion_fact(rl); 1480 1481 /* restart network interface, if any */ 1482 if (was_opened) 1483 mlx5e_rl_open_workers(priv); 1484 break; 1485 1486 case MLX5E_RL_PARAMS_INDEX(tx_limit_add): 1487 error = mlx5e_rl_tx_limit_add(rl, value); 1488 break; 1489 1490 case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): 1491 error = mlx5e_rl_tx_limit_clr(rl, value); 1492 break; 1493 1494 case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): 1495 /* range check */ 1496 if (value > rl->param.tx_allowed_deviation_max) 1497 value = rl->param.tx_allowed_deviation_max; 1498 else if (value < rl->param.tx_allowed_deviation_min) 1499 value = rl->param.tx_allowed_deviation_min; 1500 1501 MLX5E_RL_WLOCK(rl); 1502 rl->param.arg[arg2] = value; 1503 MLX5E_RL_WUNLOCK(rl); 1504 break; 1505 1506 case MLX5E_RL_PARAMS_INDEX(tx_burst_size): 1507 /* range check */ 1508 if (value > rl->param.tx_burst_size_max) 1509 value = rl->param.tx_burst_size_max; 1510 else if (value < rl->param.tx_burst_size_min) 1511 value = rl->param.tx_burst_size_min; 1512 1513 MLX5E_RL_WLOCK(rl); 1514 rl->param.arg[arg2] = value; 1515 MLX5E_RL_WUNLOCK(rl); 1516 break; 1517 1518 default: 1519 break; 1520 } 1521 done: 1522 PRIV_UNLOCK(priv); 1523 return (error); 1524 } 1525 1526 static void 1527 mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1528 struct sysctl_oid *node, const char *name, const char *desc) 1529 { 1530 /* 1531 * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will 1532 * take care of loading default sysctl value from the kernel 1533 * environment, if any: 1534 */ 1535 if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { 1536 /* read-only SYSCTLs */ 1537 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1538 name, CTLTYPE_U64 | CTLFLAG_RD | 1539 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1540 } else { 1541 if (strstr(name, "_def") != 0) { 1542 #ifdef RATELIMIT_DEBUG 1543 /* tunable read-only advanced SYSCTLs */ 1544 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1545 name, CTLTYPE_U64 | CTLFLAG_RDTUN | 1546 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1547 #endif 1548 } else { 1549 /* read-write SYSCTLs */ 1550 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1551 name, CTLTYPE_U64 | CTLFLAG_RWTUN | 1552 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1553 } 1554 } 1555 } 1556 1557 static void 1558 mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1559 struct sysctl_oid *node, const char *name, const char *desc) 1560 { 1561 /* read-only SYSCTLs */ 1562 SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, 1563 CTLFLAG_RD, &rl->stats.arg[x], 0, desc); 1564 } 1565 1566 #else 1567 1568 int 1569 mlx5e_rl_init(struct mlx5e_priv *priv) 1570 { 1571 1572 return (0); 1573 } 1574 1575 void 1576 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 1577 { 1578 /* NOP */ 1579 } 1580 1581 #endif /* RATELIMIT */ 1582