1 /*- 2 * Copyright (c) 2016 Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include "en.h" 29 30 #ifdef RATELIMIT 31 32 static int mlx5e_rl_open_workers(struct mlx5e_priv *); 33 static void mlx5e_rl_close_workers(struct mlx5e_priv *); 34 static int mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS); 35 static void mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *, unsigned x, 36 struct sysctl_oid *, const char *name, const char *desc); 37 static void mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 38 struct sysctl_oid *node, const char *name, const char *desc); 39 static int mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *, uint64_t value); 40 static int mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *, uint64_t value); 41 42 static void 43 mlx5e_rl_build_sq_param(struct mlx5e_rl_priv_data *rl, 44 struct mlx5e_sq_param *param) 45 { 46 void *sqc = param->sqc; 47 void *wq = MLX5_ADDR_OF(sqc, sqc, wq); 48 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 49 50 MLX5_SET(wq, wq, log_wq_sz, log_sq_size); 51 MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); 52 MLX5_SET(wq, wq, pd, rl->priv->pdn); 53 54 param->wq.buf_numa_node = 0; 55 param->wq.db_numa_node = 0; 56 param->wq.linear = 1; 57 } 58 59 static void 60 mlx5e_rl_build_cq_param(struct mlx5e_rl_priv_data *rl, 61 struct mlx5e_cq_param *param) 62 { 63 void *cqc = param->cqc; 64 uint8_t log_sq_size = order_base_2(rl->param.tx_queue_size); 65 66 MLX5_SET(cqc, cqc, log_cq_size, log_sq_size); 67 MLX5_SET(cqc, cqc, cq_period, rl->param.tx_coalesce_usecs); 68 MLX5_SET(cqc, cqc, cq_max_count, rl->param.tx_coalesce_pkts); 69 70 switch (rl->param.tx_coalesce_mode) { 71 case 0: 72 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 73 break; 74 default: 75 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_start_from_cqe)) 76 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); 77 else 78 MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); 79 break; 80 } 81 } 82 83 static void 84 mlx5e_rl_build_channel_param(struct mlx5e_rl_priv_data *rl, 85 struct mlx5e_rl_channel_param *cparam) 86 { 87 memset(cparam, 0, sizeof(*cparam)); 88 89 mlx5e_rl_build_sq_param(rl, &cparam->sq); 90 mlx5e_rl_build_cq_param(rl, &cparam->cq); 91 } 92 93 static int 94 mlx5e_rl_create_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 95 struct mlx5e_sq_param *param, int ix) 96 { 97 struct mlx5_core_dev *mdev = priv->mdev; 98 void *sqc = param->sqc; 99 void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); 100 int err; 101 102 /* Create DMA descriptor TAG */ 103 if ((err = -bus_dma_tag_create( 104 bus_get_dma_tag(mdev->pdev->dev.bsddev), 105 1, /* any alignment */ 106 0, /* no boundary */ 107 BUS_SPACE_MAXADDR, /* lowaddr */ 108 BUS_SPACE_MAXADDR, /* highaddr */ 109 NULL, NULL, /* filter, filterarg */ 110 MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ 111 MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ 112 MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 113 0, /* flags */ 114 NULL, NULL, /* lockfunc, lockfuncarg */ 115 &sq->dma_tag))) 116 goto done; 117 118 /* use shared UAR */ 119 sq->uar = priv->rl.sq_uar; 120 121 err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, 122 &sq->wq_ctrl); 123 if (err) 124 goto err_free_dma_tag; 125 126 sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; 127 /* 128 * The sq->bf_buf_size variable is intentionally left zero so 129 * that the doorbell writes will occur at the same memory 130 * location. 131 */ 132 133 err = mlx5e_alloc_sq_db(sq); 134 if (err) 135 goto err_sq_wq_destroy; 136 137 sq->mkey_be = cpu_to_be32(priv->mr.key); 138 sq->ifp = priv->ifp; 139 sq->priv = priv; 140 141 mlx5e_update_sq_inline(sq); 142 143 return (0); 144 145 err_sq_wq_destroy: 146 mlx5_wq_destroy(&sq->wq_ctrl); 147 err_free_dma_tag: 148 bus_dma_tag_destroy(sq->dma_tag); 149 done: 150 return (err); 151 } 152 153 static void 154 mlx5e_rl_destroy_sq(struct mlx5e_sq *sq) 155 { 156 157 mlx5e_free_sq_db(sq); 158 mlx5_wq_destroy(&sq->wq_ctrl); 159 bus_dma_tag_destroy(sq->dma_tag); 160 } 161 162 static int 163 mlx5e_rl_open_sq(struct mlx5e_priv *priv, struct mlx5e_sq *sq, 164 struct mlx5e_sq_param *param, int ix) 165 { 166 int err; 167 168 err = mlx5e_rl_create_sq(priv, sq, param, ix); 169 if (err) 170 return (err); 171 172 err = mlx5e_enable_sq(sq, param, priv->rl.tisn); 173 if (err) 174 goto err_destroy_sq; 175 176 err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); 177 if (err) 178 goto err_disable_sq; 179 180 WRITE_ONCE(sq->running, 1); 181 182 return (0); 183 184 err_disable_sq: 185 mlx5e_disable_sq(sq); 186 err_destroy_sq: 187 mlx5e_rl_destroy_sq(sq); 188 189 return (err); 190 } 191 192 static void 193 mlx5e_rl_chan_mtx_init(struct mlx5e_priv *priv, struct mlx5e_sq *sq) 194 { 195 mtx_init(&sq->lock, "mlx5tx-rl", NULL, MTX_DEF); 196 mtx_init(&sq->comp_lock, "mlx5comp-rl", NULL, MTX_DEF); 197 198 callout_init_mtx(&sq->cev_callout, &sq->lock, 0); 199 200 sq->cev_factor = priv->rl.param.tx_completion_fact; 201 202 /* ensure the TX completion event factor is not zero */ 203 if (sq->cev_factor == 0) 204 sq->cev_factor = 1; 205 } 206 207 static int 208 mlx5e_rl_open_channel(struct mlx5e_rl_worker *rlw, int eq_ix, 209 struct mlx5e_rl_channel_param *cparam, 210 struct mlx5e_sq *volatile *ppsq) 211 { 212 struct mlx5e_priv *priv = rlw->priv; 213 struct mlx5e_sq *sq; 214 int err; 215 216 sq = malloc(sizeof(*sq), M_MLX5EN, M_WAITOK | M_ZERO); 217 218 /* init mutexes */ 219 mlx5e_rl_chan_mtx_init(priv, sq); 220 221 /* open TX completion queue */ 222 err = mlx5e_open_cq(priv, &cparam->cq, &sq->cq, 223 &mlx5e_tx_cq_comp, eq_ix); 224 if (err) 225 goto err_free; 226 227 err = mlx5e_rl_open_sq(priv, sq, &cparam->sq, eq_ix); 228 if (err) 229 goto err_close_tx_cq; 230 231 /* store TX channel pointer */ 232 *ppsq = sq; 233 234 /* poll TX queue initially */ 235 sq->cq.mcq.comp(&sq->cq.mcq, NULL); 236 237 return (0); 238 239 err_close_tx_cq: 240 mlx5e_close_cq(&sq->cq); 241 242 err_free: 243 /* destroy mutexes */ 244 mtx_destroy(&sq->lock); 245 mtx_destroy(&sq->comp_lock); 246 free(sq, M_MLX5EN); 247 atomic_add_64(&priv->rl.stats.tx_allocate_resource_failure, 1ULL); 248 return (err); 249 } 250 251 static void 252 mlx5e_rl_close_channel(struct mlx5e_sq *volatile *ppsq) 253 { 254 struct mlx5e_sq *sq = *ppsq; 255 256 /* check if channel is already closed */ 257 if (sq == NULL) 258 return; 259 /* ensure channel pointer is no longer used */ 260 *ppsq = NULL; 261 262 /* teardown and destroy SQ */ 263 mlx5e_drain_sq(sq); 264 mlx5e_disable_sq(sq); 265 mlx5e_rl_destroy_sq(sq); 266 267 /* close CQ */ 268 mlx5e_close_cq(&sq->cq); 269 270 /* destroy mutexes */ 271 mtx_destroy(&sq->lock); 272 mtx_destroy(&sq->comp_lock); 273 274 free(sq, M_MLX5EN); 275 } 276 277 static void 278 mlx5e_rl_sync_tx_completion_fact(struct mlx5e_rl_priv_data *rl) 279 { 280 /* 281 * Limit the maximum distance between completion events to 282 * half of the currently set TX queue size. 283 * 284 * The maximum number of queue entries a single IP packet can 285 * consume is given by MLX5_SEND_WQE_MAX_WQEBBS. 286 * 287 * The worst case max value is then given as below: 288 */ 289 uint64_t max = rl->param.tx_queue_size / 290 (2 * MLX5_SEND_WQE_MAX_WQEBBS); 291 292 /* 293 * Update the maximum completion factor value in case the 294 * tx_queue_size field changed. Ensure we don't overflow 295 * 16-bits. 296 */ 297 if (max < 1) 298 max = 1; 299 else if (max > 65535) 300 max = 65535; 301 rl->param.tx_completion_fact_max = max; 302 303 /* 304 * Verify that the current TX completion factor is within the 305 * given limits: 306 */ 307 if (rl->param.tx_completion_fact < 1) 308 rl->param.tx_completion_fact = 1; 309 else if (rl->param.tx_completion_fact > max) 310 rl->param.tx_completion_fact = max; 311 } 312 313 static int 314 mlx5e_rl_modify_sq(struct mlx5e_sq *sq, uint16_t rl_index) 315 { 316 struct mlx5e_priv *priv = sq->priv; 317 struct mlx5_core_dev *mdev = priv->mdev; 318 319 void *in; 320 void *sqc; 321 int inlen; 322 int err; 323 324 inlen = MLX5_ST_SZ_BYTES(modify_sq_in); 325 in = mlx5_vzalloc(inlen); 326 if (in == NULL) 327 return (-ENOMEM); 328 329 sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); 330 331 MLX5_SET(modify_sq_in, in, sqn, sq->sqn); 332 MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RDY); 333 MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); 334 MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); 335 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); 336 337 err = mlx5_core_modify_sq(mdev, in, inlen); 338 339 kvfree(in); 340 341 return (err); 342 } 343 344 /* 345 * This function will search the configured rate limit table for the 346 * best match to avoid that a single socket based application can 347 * allocate all the available hardware rates. If the user selected 348 * rate deviates too much from the closes rate available in the rate 349 * limit table, unlimited rate will be selected. 350 */ 351 static uint64_t 352 mlx5e_rl_find_best_rate_locked(struct mlx5e_rl_priv_data *rl, uint64_t user_rate) 353 { 354 uint64_t distance = -1ULL; 355 uint64_t diff; 356 uint64_t retval = 0; /* unlimited */ 357 uint64_t x; 358 359 /* search for closest rate */ 360 for (x = 0; x != rl->param.tx_rates_def; x++) { 361 uint64_t rate = rl->rate_limit_table[x]; 362 if (rate == 0) 363 continue; 364 365 if (rate > user_rate) 366 diff = rate - user_rate; 367 else 368 diff = user_rate - rate; 369 370 /* check if distance is smaller than previous rate */ 371 if (diff < distance) { 372 distance = diff; 373 retval = rate; 374 } 375 } 376 377 /* range check for multiplication below */ 378 if (user_rate > rl->param.tx_limit_max) 379 user_rate = rl->param.tx_limit_max; 380 381 /* fallback to unlimited, if rate deviates too much */ 382 if (distance > howmany(user_rate * 383 rl->param.tx_allowed_deviation, 1000ULL)) 384 retval = 0; 385 386 return (retval); 387 } 388 389 /* 390 * This function sets the requested rate for a rate limit channel, in 391 * bits per second. The requested rate will be filtered through the 392 * find best rate function above. 393 */ 394 static int 395 mlx5e_rlw_channel_set_rate_locked(struct mlx5e_rl_worker *rlw, 396 struct mlx5e_rl_channel *channel, uint64_t rate) 397 { 398 struct mlx5e_rl_priv_data *rl = &rlw->priv->rl; 399 struct mlx5e_sq *sq; 400 uint64_t temp; 401 uint16_t index; 402 uint16_t burst; 403 int error; 404 405 if (rate != 0) { 406 MLX5E_RL_WORKER_UNLOCK(rlw); 407 408 MLX5E_RL_RLOCK(rl); 409 410 /* get current burst size in bytes */ 411 temp = rl->param.tx_burst_size * 412 MLX5E_SW2HW_MTU(rlw->priv->ifp->if_mtu); 413 414 /* limit burst size to 64K currently */ 415 if (temp > 65535) 416 temp = 65535; 417 burst = temp; 418 419 /* find best rate */ 420 rate = mlx5e_rl_find_best_rate_locked(rl, rate); 421 422 MLX5E_RL_RUNLOCK(rl); 423 424 if (rate == 0) { 425 /* rate doesn't exist, fallback to unlimited */ 426 index = 0; 427 rate = 0; 428 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 429 } else { 430 /* get a reference on the new rate */ 431 error = -mlx5_rl_add_rate(rlw->priv->mdev, 432 howmany(rate, 1000), burst, &index); 433 434 if (error != 0) { 435 /* adding rate failed, fallback to unlimited */ 436 index = 0; 437 rate = 0; 438 atomic_add_64(&rlw->priv->rl.stats.tx_add_new_rate_failure, 1ULL); 439 } 440 } 441 MLX5E_RL_WORKER_LOCK(rlw); 442 } else { 443 index = 0; 444 burst = 0; /* default */ 445 } 446 447 /* atomically swap rates */ 448 temp = channel->last_rate; 449 channel->last_rate = rate; 450 rate = temp; 451 452 /* atomically swap burst size */ 453 temp = channel->last_burst; 454 channel->last_burst = burst; 455 burst = temp; 456 457 MLX5E_RL_WORKER_UNLOCK(rlw); 458 /* put reference on the old rate, if any */ 459 if (rate != 0) { 460 mlx5_rl_remove_rate(rlw->priv->mdev, 461 howmany(rate, 1000), burst); 462 } 463 464 /* set new rate, if SQ is running */ 465 sq = channel->sq; 466 if (sq != NULL && READ_ONCE(sq->running) != 0) { 467 error = mlx5e_rl_modify_sq(sq, index); 468 if (error != 0) 469 atomic_add_64(&rlw->priv->rl.stats.tx_modify_rate_failure, 1ULL); 470 } else 471 error = 0; 472 MLX5E_RL_WORKER_LOCK(rlw); 473 474 return (-error); 475 } 476 477 static void 478 mlx5e_rl_worker(void *arg) 479 { 480 struct thread *td; 481 struct mlx5e_rl_worker *rlw = arg; 482 struct mlx5e_rl_channel *channel; 483 struct mlx5e_priv *priv; 484 unsigned ix; 485 uint64_t x; 486 int error; 487 488 /* set thread priority */ 489 td = curthread; 490 491 thread_lock(td); 492 sched_prio(td, PI_SWI(SWI_NET)); 493 thread_unlock(td); 494 495 priv = rlw->priv; 496 497 /* compute completion vector */ 498 ix = (rlw - priv->rl.workers) % 499 priv->mdev->priv.eq_table.num_comp_vectors; 500 501 /* TODO bind to CPU */ 502 503 /* open all the SQs */ 504 MLX5E_RL_WORKER_LOCK(rlw); 505 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 506 struct mlx5e_rl_channel *channel = rlw->channels + x; 507 508 #if !defined(HAVE_RL_PRE_ALLOCATE_CHANNELS) 509 if (channel->state == MLX5E_RL_ST_FREE) 510 continue; 511 #endif 512 MLX5E_RL_WORKER_UNLOCK(rlw); 513 514 MLX5E_RL_RLOCK(&priv->rl); 515 error = mlx5e_rl_open_channel(rlw, ix, 516 &priv->rl.chan_param, &channel->sq); 517 MLX5E_RL_RUNLOCK(&priv->rl); 518 519 MLX5E_RL_WORKER_LOCK(rlw); 520 if (error != 0) { 521 mlx5_en_err(priv->ifp, 522 "mlx5e_rl_open_channel failed: %d\n", error); 523 break; 524 } 525 mlx5e_rlw_channel_set_rate_locked(rlw, channel, channel->init_rate); 526 } 527 while (1) { 528 if (STAILQ_FIRST(&rlw->process_head) == NULL) { 529 /* check if we are tearing down */ 530 if (rlw->worker_done != 0) 531 break; 532 cv_wait(&rlw->cv, &rlw->mtx); 533 } 534 /* check if we are tearing down */ 535 if (rlw->worker_done != 0) 536 break; 537 channel = STAILQ_FIRST(&rlw->process_head); 538 if (channel != NULL) { 539 STAILQ_REMOVE_HEAD(&rlw->process_head, entry); 540 541 switch (channel->state) { 542 case MLX5E_RL_ST_MODIFY: 543 channel->state = MLX5E_RL_ST_USED; 544 MLX5E_RL_WORKER_UNLOCK(rlw); 545 546 /* create channel by demand */ 547 if (channel->sq == NULL) { 548 MLX5E_RL_RLOCK(&priv->rl); 549 error = mlx5e_rl_open_channel(rlw, ix, 550 &priv->rl.chan_param, &channel->sq); 551 MLX5E_RL_RUNLOCK(&priv->rl); 552 553 if (error != 0) { 554 mlx5_en_err(priv->ifp, 555 "mlx5e_rl_open_channel failed: %d\n", error); 556 } else { 557 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, 1ULL); 558 } 559 } else { 560 mlx5e_resume_sq(channel->sq); 561 } 562 563 MLX5E_RL_WORKER_LOCK(rlw); 564 /* convert from bytes/s to bits/s and set new rate */ 565 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 566 channel->new_rate * 8ULL); 567 if (error != 0) { 568 mlx5_en_err(priv->ifp, 569 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 570 error); 571 } 572 break; 573 574 case MLX5E_RL_ST_DESTROY: 575 error = mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 576 if (error != 0) { 577 mlx5_en_err(priv->ifp, 578 "mlx5e_rlw_channel_set_rate_locked failed: %d\n", 579 error); 580 } 581 if (channel->sq != NULL) { 582 /* 583 * Make sure all packets are 584 * transmitted before SQ is 585 * returned to free list: 586 */ 587 MLX5E_RL_WORKER_UNLOCK(rlw); 588 mlx5e_drain_sq(channel->sq); 589 MLX5E_RL_WORKER_LOCK(rlw); 590 } 591 /* put the channel back into the free list */ 592 STAILQ_INSERT_HEAD(&rlw->index_list_head, channel, entry); 593 channel->state = MLX5E_RL_ST_FREE; 594 atomic_add_64(&priv->rl.stats.tx_active_connections, -1ULL); 595 break; 596 default: 597 /* NOP */ 598 break; 599 } 600 } 601 } 602 603 /* close all the SQs */ 604 for (x = 0; x < priv->rl.param.tx_channels_per_worker_def; x++) { 605 struct mlx5e_rl_channel *channel = rlw->channels + x; 606 607 /* update the initial rate */ 608 channel->init_rate = channel->last_rate; 609 610 /* make sure we free up the rate resource */ 611 mlx5e_rlw_channel_set_rate_locked(rlw, channel, 0); 612 613 if (channel->sq != NULL) { 614 MLX5E_RL_WORKER_UNLOCK(rlw); 615 mlx5e_rl_close_channel(&channel->sq); 616 atomic_add_64(&rlw->priv->rl.stats.tx_open_queues, -1ULL); 617 MLX5E_RL_WORKER_LOCK(rlw); 618 } 619 } 620 621 rlw->worker_done = 0; 622 cv_broadcast(&rlw->cv); 623 MLX5E_RL_WORKER_UNLOCK(rlw); 624 625 kthread_exit(); 626 } 627 628 static int 629 mlx5e_rl_open_tis(struct mlx5e_priv *priv) 630 { 631 struct mlx5_core_dev *mdev = priv->mdev; 632 u32 in[MLX5_ST_SZ_DW(create_tis_in)]; 633 void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); 634 635 memset(in, 0, sizeof(in)); 636 637 MLX5_SET(tisc, tisc, prio, 0); 638 MLX5_SET(tisc, tisc, transport_domain, priv->tdn); 639 640 return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->rl.tisn)); 641 } 642 643 static void 644 mlx5e_rl_close_tis(struct mlx5e_priv *priv) 645 { 646 mlx5_core_destroy_tis(priv->mdev, priv->rl.tisn); 647 } 648 649 static void 650 mlx5e_rl_set_default_params(struct mlx5e_rl_params *param, 651 struct mlx5_core_dev *mdev) 652 { 653 /* ratelimit workers */ 654 param->tx_worker_threads_def = mdev->priv.eq_table.num_comp_vectors; 655 param->tx_worker_threads_max = MLX5E_RL_MAX_WORKERS; 656 657 /* range check */ 658 if (param->tx_worker_threads_def == 0 || 659 param->tx_worker_threads_def > param->tx_worker_threads_max) 660 param->tx_worker_threads_def = param->tx_worker_threads_max; 661 662 /* ratelimit channels */ 663 param->tx_channels_per_worker_def = MLX5E_RL_MAX_SQS / 664 param->tx_worker_threads_def; 665 param->tx_channels_per_worker_max = MLX5E_RL_MAX_SQS; 666 667 /* range check */ 668 if (param->tx_channels_per_worker_def > MLX5E_RL_DEF_SQ_PER_WORKER) 669 param->tx_channels_per_worker_def = MLX5E_RL_DEF_SQ_PER_WORKER; 670 671 /* set default burst size */ 672 param->tx_burst_size = 4; /* MTUs */ 673 674 /* 675 * Set maximum burst size 676 * 677 * The burst size is multiplied by the MTU and clamped to the 678 * range 0 ... 65535 bytes inclusivly before fed into the 679 * firmware. 680 * 681 * NOTE: If the burst size or MTU is changed only ratelimit 682 * connections made after the change will use the new burst 683 * size. 684 */ 685 param->tx_burst_size_max = 255; 686 687 /* get firmware rate limits in 1000bit/s and convert them to bit/s */ 688 param->tx_limit_min = mdev->priv.rl_table.min_rate * 1000ULL; 689 param->tx_limit_max = mdev->priv.rl_table.max_rate * 1000ULL; 690 691 /* ratelimit table size */ 692 param->tx_rates_max = mdev->priv.rl_table.max_size; 693 694 /* range check */ 695 if (param->tx_rates_max > MLX5E_RL_MAX_TX_RATES) 696 param->tx_rates_max = MLX5E_RL_MAX_TX_RATES; 697 698 /* set default number of rates */ 699 param->tx_rates_def = param->tx_rates_max; 700 701 /* set maximum allowed rate deviation */ 702 if (param->tx_limit_max != 0) { 703 /* 704 * Make sure the deviation multiplication doesn't 705 * overflow unsigned 64-bit: 706 */ 707 param->tx_allowed_deviation_max = -1ULL / 708 param->tx_limit_max; 709 } 710 /* set default rate deviation */ 711 param->tx_allowed_deviation = 50; /* 5.0% */ 712 713 /* channel parameters */ 714 param->tx_queue_size = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 715 param->tx_coalesce_usecs = MLX5E_RL_TX_COAL_USEC_DEFAULT; 716 param->tx_coalesce_pkts = MLX5E_RL_TX_COAL_PKTS_DEFAULT; 717 param->tx_coalesce_mode = MLX5E_RL_TX_COAL_MODE_DEFAULT; 718 param->tx_completion_fact = MLX5E_RL_TX_COMP_FACT_DEFAULT; 719 } 720 721 static const char *mlx5e_rl_params_desc[] = { 722 MLX5E_RL_PARAMS(MLX5E_STATS_DESC) 723 }; 724 725 static const char *mlx5e_rl_table_params_desc[] = { 726 MLX5E_RL_TABLE_PARAMS(MLX5E_STATS_DESC) 727 }; 728 729 static const char *mlx5e_rl_stats_desc[] = { 730 MLX5E_RL_STATS(MLX5E_STATS_DESC) 731 }; 732 733 int 734 mlx5e_rl_init(struct mlx5e_priv *priv) 735 { 736 struct mlx5e_rl_priv_data *rl = &priv->rl; 737 struct sysctl_oid *node; 738 struct sysctl_oid *stats; 739 char buf[64]; 740 uint64_t i; 741 uint64_t j; 742 int error; 743 744 /* check if there is support for packet pacing */ 745 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 746 return (0); 747 748 rl->priv = priv; 749 750 sysctl_ctx_init(&rl->ctx); 751 752 sx_init(&rl->rl_sxlock, "ratelimit-sxlock"); 753 754 /* allocate shared UAR for SQs */ 755 error = mlx5_alloc_map_uar(priv->mdev, &rl->sq_uar); 756 if (error) 757 goto done; 758 759 /* open own TIS domain for ratelimit SQs */ 760 error = mlx5e_rl_open_tis(priv); 761 if (error) 762 goto err_uar; 763 764 /* setup default value for parameters */ 765 mlx5e_rl_set_default_params(&rl->param, priv->mdev); 766 767 /* update the completion factor */ 768 mlx5e_rl_sync_tx_completion_fact(rl); 769 770 /* create root node */ 771 node = SYSCTL_ADD_NODE(&rl->ctx, 772 SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, 773 "rate_limit", CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Rate limiting support"); 774 775 if (node != NULL) { 776 /* create SYSCTLs */ 777 for (i = 0; i != MLX5E_RL_PARAMS_NUM; i++) { 778 mlx5e_rl_sysctl_add_u64_oid(rl, 779 MLX5E_RL_PARAMS_INDEX(arg[i]), 780 node, mlx5e_rl_params_desc[2 * i], 781 mlx5e_rl_params_desc[2 * i + 1]); 782 } 783 784 stats = SYSCTL_ADD_NODE(&rl->ctx, SYSCTL_CHILDREN(node), 785 OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 786 "Rate limiting statistics"); 787 if (stats != NULL) { 788 /* create SYSCTLs */ 789 for (i = 0; i != MLX5E_RL_STATS_NUM; i++) { 790 mlx5e_rl_sysctl_add_stats_u64_oid(rl, i, 791 stats, mlx5e_rl_stats_desc[2 * i], 792 mlx5e_rl_stats_desc[2 * i + 1]); 793 } 794 } 795 } 796 797 /* allocate workers array */ 798 rl->workers = malloc(sizeof(rl->workers[0]) * 799 rl->param.tx_worker_threads_def, M_MLX5EN, M_WAITOK | M_ZERO); 800 801 /* allocate rate limit array */ 802 rl->rate_limit_table = malloc(sizeof(rl->rate_limit_table[0]) * 803 rl->param.tx_rates_def, M_MLX5EN, M_WAITOK | M_ZERO); 804 805 if (node != NULL) { 806 /* create more SYSCTls */ 807 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 808 "tx_rate_show", CTLTYPE_STRING | CTLFLAG_RD | 809 CTLFLAG_MPSAFE, rl, 0, &mlx5e_rl_sysctl_show_rate_table, 810 "A", "Show table of all configured TX rates"); 811 812 /* try to fetch rate table from kernel environment */ 813 for (i = 0; i != rl->param.tx_rates_def; i++) { 814 /* compute path for tunable */ 815 snprintf(buf, sizeof(buf), "dev.mce.%d.rate_limit.tx_rate_add_%d", 816 device_get_unit(priv->mdev->pdev->dev.bsddev), (int)i); 817 if (TUNABLE_QUAD_FETCH(buf, &j)) 818 mlx5e_rl_tx_limit_add(rl, j); 819 } 820 821 /* setup rate table sysctls */ 822 for (i = 0; i != MLX5E_RL_TABLE_PARAMS_NUM; i++) { 823 mlx5e_rl_sysctl_add_u64_oid(rl, 824 MLX5E_RL_PARAMS_INDEX(table_arg[i]), 825 node, mlx5e_rl_table_params_desc[2 * i], 826 mlx5e_rl_table_params_desc[2 * i + 1]); 827 } 828 } 829 830 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 831 struct mlx5e_rl_worker *rlw = rl->workers + j; 832 833 rlw->priv = priv; 834 835 cv_init(&rlw->cv, "mlx5-worker-cv"); 836 mtx_init(&rlw->mtx, "mlx5-worker-mtx", NULL, MTX_DEF); 837 STAILQ_INIT(&rlw->index_list_head); 838 STAILQ_INIT(&rlw->process_head); 839 840 rlw->channels = malloc(sizeof(rlw->channels[0]) * 841 rl->param.tx_channels_per_worker_def, M_MLX5EN, M_WAITOK | M_ZERO); 842 843 MLX5E_RL_WORKER_LOCK(rlw); 844 for (i = 0; i < rl->param.tx_channels_per_worker_def; i++) { 845 struct mlx5e_rl_channel *channel = rlw->channels + i; 846 channel->worker = rlw; 847 channel->tag.type = IF_SND_TAG_TYPE_RATE_LIMIT; 848 STAILQ_INSERT_TAIL(&rlw->index_list_head, channel, entry); 849 } 850 MLX5E_RL_WORKER_UNLOCK(rlw); 851 } 852 853 PRIV_LOCK(priv); 854 error = mlx5e_rl_open_workers(priv); 855 PRIV_UNLOCK(priv); 856 857 if (error != 0) { 858 mlx5_en_err(priv->ifp, 859 "mlx5e_rl_open_workers failed: %d\n", error); 860 } 861 862 return (0); 863 864 err_uar: 865 mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); 866 done: 867 sysctl_ctx_free(&rl->ctx); 868 sx_destroy(&rl->rl_sxlock); 869 return (error); 870 } 871 872 static int 873 mlx5e_rl_open_workers(struct mlx5e_priv *priv) 874 { 875 struct mlx5e_rl_priv_data *rl = &priv->rl; 876 struct thread *rl_thread = NULL; 877 struct proc *rl_proc = NULL; 878 uint64_t j; 879 int error; 880 881 if (priv->gone || rl->opened) 882 return (-EINVAL); 883 884 MLX5E_RL_WLOCK(rl); 885 /* compute channel parameters once */ 886 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 887 MLX5E_RL_WUNLOCK(rl); 888 889 for (j = 0; j < rl->param.tx_worker_threads_def; j++) { 890 struct mlx5e_rl_worker *rlw = rl->workers + j; 891 892 /* start worker thread */ 893 error = kproc_kthread_add(mlx5e_rl_worker, rlw, &rl_proc, &rl_thread, 894 RFHIGHPID, 0, "mlx5-ratelimit", "mlx5-rl-worker-thread-%d", (int)j); 895 if (error != 0) { 896 mlx5_en_err(rl->priv->ifp, 897 "kproc_kthread_add failed: %d\n", error); 898 rlw->worker_done = 1; 899 } 900 } 901 902 rl->opened = 1; 903 904 return (0); 905 } 906 907 static void 908 mlx5e_rl_close_workers(struct mlx5e_priv *priv) 909 { 910 struct mlx5e_rl_priv_data *rl = &priv->rl; 911 uint64_t y; 912 913 if (rl->opened == 0) 914 return; 915 916 /* tear down worker threads simultaneously */ 917 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 918 struct mlx5e_rl_worker *rlw = rl->workers + y; 919 920 /* tear down worker before freeing SQs */ 921 MLX5E_RL_WORKER_LOCK(rlw); 922 if (rlw->worker_done == 0) { 923 rlw->worker_done = 1; 924 cv_broadcast(&rlw->cv); 925 } else { 926 /* XXX thread not started */ 927 rlw->worker_done = 0; 928 } 929 MLX5E_RL_WORKER_UNLOCK(rlw); 930 } 931 932 /* wait for worker threads to exit */ 933 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 934 struct mlx5e_rl_worker *rlw = rl->workers + y; 935 936 /* tear down worker before freeing SQs */ 937 MLX5E_RL_WORKER_LOCK(rlw); 938 while (rlw->worker_done != 0) 939 cv_wait(&rlw->cv, &rlw->mtx); 940 MLX5E_RL_WORKER_UNLOCK(rlw); 941 } 942 943 rl->opened = 0; 944 } 945 946 static void 947 mlx5e_rl_reset_rates(struct mlx5e_rl_priv_data *rl) 948 { 949 unsigned x; 950 951 MLX5E_RL_WLOCK(rl); 952 for (x = 0; x != rl->param.tx_rates_def; x++) 953 rl->rate_limit_table[x] = 0; 954 MLX5E_RL_WUNLOCK(rl); 955 } 956 957 void 958 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 959 { 960 struct mlx5e_rl_priv_data *rl = &priv->rl; 961 uint64_t y; 962 963 /* check if there is support for packet pacing */ 964 if (!MLX5_CAP_GEN(priv->mdev, qos) || !MLX5_CAP_QOS(priv->mdev, packet_pacing)) 965 return; 966 967 /* TODO check if there is support for packet pacing */ 968 969 sysctl_ctx_free(&rl->ctx); 970 971 PRIV_LOCK(priv); 972 mlx5e_rl_close_workers(priv); 973 PRIV_UNLOCK(priv); 974 975 mlx5e_rl_reset_rates(rl); 976 977 /* free shared UAR for SQs */ 978 mlx5_unmap_free_uar(priv->mdev, &rl->sq_uar); 979 980 /* close TIS domain */ 981 mlx5e_rl_close_tis(priv); 982 983 for (y = 0; y < rl->param.tx_worker_threads_def; y++) { 984 struct mlx5e_rl_worker *rlw = rl->workers + y; 985 986 cv_destroy(&rlw->cv); 987 mtx_destroy(&rlw->mtx); 988 free(rlw->channels, M_MLX5EN); 989 } 990 free(rl->rate_limit_table, M_MLX5EN); 991 free(rl->workers, M_MLX5EN); 992 sx_destroy(&rl->rl_sxlock); 993 } 994 995 static void 996 mlx5e_rlw_queue_channel_locked(struct mlx5e_rl_worker *rlw, 997 struct mlx5e_rl_channel *channel) 998 { 999 STAILQ_INSERT_TAIL(&rlw->process_head, channel, entry); 1000 cv_broadcast(&rlw->cv); 1001 } 1002 1003 static void 1004 mlx5e_rl_free(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel) 1005 { 1006 if (channel == NULL) 1007 return; 1008 1009 MLX5E_RL_WORKER_LOCK(rlw); 1010 switch (channel->state) { 1011 case MLX5E_RL_ST_MODIFY: 1012 channel->state = MLX5E_RL_ST_DESTROY; 1013 break; 1014 case MLX5E_RL_ST_USED: 1015 channel->state = MLX5E_RL_ST_DESTROY; 1016 mlx5e_rlw_queue_channel_locked(rlw, channel); 1017 break; 1018 default: 1019 break; 1020 } 1021 MLX5E_RL_WORKER_UNLOCK(rlw); 1022 } 1023 1024 static int 1025 mlx5e_rl_modify(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, uint64_t rate) 1026 { 1027 1028 MLX5E_RL_WORKER_LOCK(rlw); 1029 channel->new_rate = rate; 1030 switch (channel->state) { 1031 case MLX5E_RL_ST_USED: 1032 channel->state = MLX5E_RL_ST_MODIFY; 1033 mlx5e_rlw_queue_channel_locked(rlw, channel); 1034 break; 1035 default: 1036 break; 1037 } 1038 MLX5E_RL_WORKER_UNLOCK(rlw); 1039 1040 return (0); 1041 } 1042 1043 static int 1044 mlx5e_rl_query(struct mlx5e_rl_worker *rlw, struct mlx5e_rl_channel *channel, 1045 union if_snd_tag_query_params *params) 1046 { 1047 int retval; 1048 1049 MLX5E_RL_WORKER_LOCK(rlw); 1050 switch (channel->state) { 1051 case MLX5E_RL_ST_USED: 1052 params->rate_limit.max_rate = channel->last_rate; 1053 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1054 retval = 0; 1055 break; 1056 case MLX5E_RL_ST_MODIFY: 1057 params->rate_limit.max_rate = channel->last_rate; 1058 params->rate_limit.queue_level = mlx5e_sq_queue_level(channel->sq); 1059 retval = EBUSY; 1060 break; 1061 default: 1062 retval = EINVAL; 1063 break; 1064 } 1065 MLX5E_RL_WORKER_UNLOCK(rlw); 1066 1067 return (retval); 1068 } 1069 1070 static int 1071 mlx5e_find_available_tx_ring_index(struct mlx5e_rl_worker *rlw, 1072 struct mlx5e_rl_channel **pchannel) 1073 { 1074 struct mlx5e_rl_channel *channel; 1075 int retval = ENOMEM; 1076 1077 MLX5E_RL_WORKER_LOCK(rlw); 1078 /* Check for available channel in free list */ 1079 if ((channel = STAILQ_FIRST(&rlw->index_list_head)) != NULL) { 1080 retval = 0; 1081 /* Remove head index from available list */ 1082 STAILQ_REMOVE_HEAD(&rlw->index_list_head, entry); 1083 channel->state = MLX5E_RL_ST_USED; 1084 atomic_add_64(&rlw->priv->rl.stats.tx_active_connections, 1ULL); 1085 } else { 1086 atomic_add_64(&rlw->priv->rl.stats.tx_available_resource_failure, 1ULL); 1087 } 1088 MLX5E_RL_WORKER_UNLOCK(rlw); 1089 1090 *pchannel = channel; 1091 #ifdef RATELIMIT_DEBUG 1092 mlx5_en_info(rlw->priv->ifp, 1093 "Channel pointer for rate limit connection is %p\n", channel); 1094 #endif 1095 return (retval); 1096 } 1097 1098 int 1099 mlx5e_rl_snd_tag_alloc(struct ifnet *ifp, 1100 union if_snd_tag_alloc_params *params, 1101 struct m_snd_tag **ppmt) 1102 { 1103 struct mlx5e_rl_channel *channel; 1104 struct mlx5e_rl_worker *rlw; 1105 struct mlx5e_priv *priv; 1106 int error; 1107 1108 priv = ifp->if_softc; 1109 1110 /* check if there is support for packet pacing or if device is going away */ 1111 if (!MLX5_CAP_GEN(priv->mdev, qos) || 1112 !MLX5_CAP_QOS(priv->mdev, packet_pacing) || priv->gone || 1113 params->rate_limit.hdr.type != IF_SND_TAG_TYPE_RATE_LIMIT) 1114 return (EOPNOTSUPP); 1115 1116 /* compute worker thread this TCP connection belongs to */ 1117 rlw = priv->rl.workers + ((params->rate_limit.hdr.flowid % 128) % 1118 priv->rl.param.tx_worker_threads_def); 1119 1120 error = mlx5e_find_available_tx_ring_index(rlw, &channel); 1121 if (error != 0) 1122 goto done; 1123 1124 error = mlx5e_rl_modify(rlw, channel, params->rate_limit.max_rate); 1125 if (error != 0) { 1126 mlx5e_rl_free(rlw, channel); 1127 goto done; 1128 } 1129 1130 /* store pointer to mbuf tag */ 1131 MPASS(channel->tag.refcount == 0); 1132 m_snd_tag_init(&channel->tag, ifp, IF_SND_TAG_TYPE_RATE_LIMIT); 1133 *ppmt = &channel->tag; 1134 done: 1135 return (error); 1136 } 1137 1138 1139 int 1140 mlx5e_rl_snd_tag_modify(struct m_snd_tag *pmt, union if_snd_tag_modify_params *params) 1141 { 1142 struct mlx5e_rl_channel *channel = 1143 container_of(pmt, struct mlx5e_rl_channel, tag); 1144 1145 return (mlx5e_rl_modify(channel->worker, channel, params->rate_limit.max_rate)); 1146 } 1147 1148 int 1149 mlx5e_rl_snd_tag_query(struct m_snd_tag *pmt, union if_snd_tag_query_params *params) 1150 { 1151 struct mlx5e_rl_channel *channel = 1152 container_of(pmt, struct mlx5e_rl_channel, tag); 1153 1154 return (mlx5e_rl_query(channel->worker, channel, params)); 1155 } 1156 1157 void 1158 mlx5e_rl_snd_tag_free(struct m_snd_tag *pmt) 1159 { 1160 struct mlx5e_rl_channel *channel = 1161 container_of(pmt, struct mlx5e_rl_channel, tag); 1162 1163 mlx5e_rl_free(channel->worker, channel); 1164 } 1165 1166 static int 1167 mlx5e_rl_sysctl_show_rate_table(SYSCTL_HANDLER_ARGS) 1168 { 1169 struct mlx5e_rl_priv_data *rl = arg1; 1170 struct mlx5e_priv *priv = rl->priv; 1171 struct sbuf sbuf; 1172 unsigned x; 1173 int error; 1174 1175 error = sysctl_wire_old_buffer(req, 0); 1176 if (error != 0) 1177 return (error); 1178 1179 PRIV_LOCK(priv); 1180 1181 sbuf_new_for_sysctl(&sbuf, NULL, 128 * rl->param.tx_rates_def, req); 1182 1183 sbuf_printf(&sbuf, 1184 "\n\n" "\t" "ENTRY" "\t" "BURST" "\t" "RATE [bit/s]\n" 1185 "\t" "--------------------------------------------\n"); 1186 1187 MLX5E_RL_RLOCK(rl); 1188 for (x = 0; x != rl->param.tx_rates_def; x++) { 1189 if (rl->rate_limit_table[x] == 0) 1190 continue; 1191 1192 sbuf_printf(&sbuf, "\t" "%3u" "\t" "%3u" "\t" "%lld\n", 1193 x, (unsigned)rl->param.tx_burst_size, 1194 (long long)rl->rate_limit_table[x]); 1195 } 1196 MLX5E_RL_RUNLOCK(rl); 1197 1198 error = sbuf_finish(&sbuf); 1199 sbuf_delete(&sbuf); 1200 1201 PRIV_UNLOCK(priv); 1202 1203 return (error); 1204 } 1205 1206 static int 1207 mlx5e_rl_refresh_channel_params(struct mlx5e_rl_priv_data *rl) 1208 { 1209 uint64_t x; 1210 uint64_t y; 1211 1212 MLX5E_RL_WLOCK(rl); 1213 /* compute channel parameters once */ 1214 mlx5e_rl_build_channel_param(rl, &rl->chan_param); 1215 MLX5E_RL_WUNLOCK(rl); 1216 1217 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1218 struct mlx5e_rl_worker *rlw = rl->workers + y; 1219 1220 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1221 struct mlx5e_rl_channel *channel; 1222 struct mlx5e_sq *sq; 1223 1224 channel = rlw->channels + x; 1225 sq = channel->sq; 1226 1227 if (sq == NULL) 1228 continue; 1229 1230 if (MLX5_CAP_GEN(rl->priv->mdev, cq_period_mode_modify)) { 1231 mlx5_core_modify_cq_moderation_mode(rl->priv->mdev, &sq->cq.mcq, 1232 rl->param.tx_coalesce_usecs, 1233 rl->param.tx_coalesce_pkts, 1234 rl->param.tx_coalesce_mode); 1235 } else { 1236 mlx5_core_modify_cq_moderation(rl->priv->mdev, &sq->cq.mcq, 1237 rl->param.tx_coalesce_usecs, 1238 rl->param.tx_coalesce_pkts); 1239 } 1240 } 1241 } 1242 return (0); 1243 } 1244 1245 void 1246 mlx5e_rl_refresh_sq_inline(struct mlx5e_rl_priv_data *rl) 1247 { 1248 uint64_t x; 1249 uint64_t y; 1250 1251 for (y = 0; y != rl->param.tx_worker_threads_def; y++) { 1252 struct mlx5e_rl_worker *rlw = rl->workers + y; 1253 1254 for (x = 0; x != rl->param.tx_channels_per_worker_def; x++) { 1255 struct mlx5e_rl_channel *channel; 1256 struct mlx5e_sq *sq; 1257 1258 channel = rlw->channels + x; 1259 sq = channel->sq; 1260 1261 if (sq == NULL) 1262 continue; 1263 1264 mtx_lock(&sq->lock); 1265 mlx5e_update_sq_inline(sq); 1266 mtx_unlock(&sq->lock); 1267 } 1268 } 1269 } 1270 1271 static int 1272 mlx5e_rl_tx_limit_add(struct mlx5e_rl_priv_data *rl, uint64_t value) 1273 { 1274 unsigned x; 1275 int error; 1276 1277 if (value < 1000 || 1278 mlx5_rl_is_in_range(rl->priv->mdev, howmany(value, 1000), 0) == 0) 1279 return (EINVAL); 1280 1281 MLX5E_RL_WLOCK(rl); 1282 error = ENOMEM; 1283 1284 /* check if rate already exists */ 1285 for (x = 0; x != rl->param.tx_rates_def; x++) { 1286 if (rl->rate_limit_table[x] != value) 1287 continue; 1288 error = EEXIST; 1289 break; 1290 } 1291 1292 /* check if there is a free rate entry */ 1293 if (x == rl->param.tx_rates_def) { 1294 for (x = 0; x != rl->param.tx_rates_def; x++) { 1295 if (rl->rate_limit_table[x] != 0) 1296 continue; 1297 rl->rate_limit_table[x] = value; 1298 error = 0; 1299 break; 1300 } 1301 } 1302 MLX5E_RL_WUNLOCK(rl); 1303 1304 return (error); 1305 } 1306 1307 static int 1308 mlx5e_rl_tx_limit_clr(struct mlx5e_rl_priv_data *rl, uint64_t value) 1309 { 1310 unsigned x; 1311 int error; 1312 1313 if (value == 0) 1314 return (EINVAL); 1315 1316 MLX5E_RL_WLOCK(rl); 1317 1318 /* check if rate already exists */ 1319 for (x = 0; x != rl->param.tx_rates_def; x++) { 1320 if (rl->rate_limit_table[x] != value) 1321 continue; 1322 /* free up rate */ 1323 rl->rate_limit_table[x] = 0; 1324 break; 1325 } 1326 1327 /* check if there is a free rate entry */ 1328 if (x == rl->param.tx_rates_def) 1329 error = ENOENT; 1330 else 1331 error = 0; 1332 MLX5E_RL_WUNLOCK(rl); 1333 1334 return (error); 1335 } 1336 1337 static int 1338 mlx5e_rl_sysctl_handler(SYSCTL_HANDLER_ARGS) 1339 { 1340 struct mlx5e_rl_priv_data *rl = arg1; 1341 struct mlx5e_priv *priv = rl->priv; 1342 unsigned mode_modify; 1343 unsigned was_opened; 1344 uint64_t value; 1345 uint64_t old; 1346 int error; 1347 1348 PRIV_LOCK(priv); 1349 1350 MLX5E_RL_RLOCK(rl); 1351 value = rl->param.arg[arg2]; 1352 MLX5E_RL_RUNLOCK(rl); 1353 1354 if (req != NULL) { 1355 old = value; 1356 error = sysctl_handle_64(oidp, &value, 0, req); 1357 if (error || req->newptr == NULL || 1358 value == rl->param.arg[arg2]) 1359 goto done; 1360 } else { 1361 old = 0; 1362 error = 0; 1363 } 1364 1365 /* check if device is gone */ 1366 if (priv->gone) { 1367 error = ENXIO; 1368 goto done; 1369 } 1370 was_opened = rl->opened; 1371 mode_modify = MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify); 1372 1373 switch (MLX5E_RL_PARAMS_INDEX(arg[arg2])) { 1374 case MLX5E_RL_PARAMS_INDEX(tx_worker_threads_def): 1375 if (value > rl->param.tx_worker_threads_max) 1376 value = rl->param.tx_worker_threads_max; 1377 else if (value < 1) 1378 value = 1; 1379 1380 /* store new value */ 1381 rl->param.arg[arg2] = value; 1382 break; 1383 1384 case MLX5E_RL_PARAMS_INDEX(tx_channels_per_worker_def): 1385 if (value > rl->param.tx_channels_per_worker_max) 1386 value = rl->param.tx_channels_per_worker_max; 1387 else if (value < 1) 1388 value = 1; 1389 1390 /* store new value */ 1391 rl->param.arg[arg2] = value; 1392 break; 1393 1394 case MLX5E_RL_PARAMS_INDEX(tx_rates_def): 1395 if (value > rl->param.tx_rates_max) 1396 value = rl->param.tx_rates_max; 1397 else if (value < 1) 1398 value = 1; 1399 1400 /* store new value */ 1401 rl->param.arg[arg2] = value; 1402 break; 1403 1404 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_usecs): 1405 /* range check */ 1406 if (value < 1) 1407 value = 0; 1408 else if (value > MLX5E_FLD_MAX(cqc, cq_period)) 1409 value = MLX5E_FLD_MAX(cqc, cq_period); 1410 1411 /* store new value */ 1412 rl->param.arg[arg2] = value; 1413 1414 /* check to avoid down and up the network interface */ 1415 if (was_opened) 1416 error = mlx5e_rl_refresh_channel_params(rl); 1417 break; 1418 1419 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_pkts): 1420 /* import TX coal pkts */ 1421 if (value < 1) 1422 value = 0; 1423 else if (value > MLX5E_FLD_MAX(cqc, cq_max_count)) 1424 value = MLX5E_FLD_MAX(cqc, cq_max_count); 1425 1426 /* store new value */ 1427 rl->param.arg[arg2] = value; 1428 1429 /* check to avoid down and up the network interface */ 1430 if (was_opened) 1431 error = mlx5e_rl_refresh_channel_params(rl); 1432 break; 1433 1434 case MLX5E_RL_PARAMS_INDEX(tx_coalesce_mode): 1435 /* network interface must be down */ 1436 if (was_opened != 0 && mode_modify == 0) 1437 mlx5e_rl_close_workers(priv); 1438 1439 /* import TX coalesce mode */ 1440 if (value != 0) 1441 value = 1; 1442 1443 /* store new value */ 1444 rl->param.arg[arg2] = value; 1445 1446 /* restart network interface, if any */ 1447 if (was_opened != 0) { 1448 if (mode_modify == 0) 1449 mlx5e_rl_open_workers(priv); 1450 else 1451 error = mlx5e_rl_refresh_channel_params(rl); 1452 } 1453 break; 1454 1455 case MLX5E_RL_PARAMS_INDEX(tx_queue_size): 1456 /* network interface must be down */ 1457 if (was_opened) 1458 mlx5e_rl_close_workers(priv); 1459 1460 /* import TX queue size */ 1461 if (value < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) 1462 value = (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE); 1463 else if (value > priv->params_ethtool.tx_queue_size_max) 1464 value = priv->params_ethtool.tx_queue_size_max; 1465 1466 /* store actual TX queue size */ 1467 value = 1ULL << order_base_2(value); 1468 1469 /* store new value */ 1470 rl->param.arg[arg2] = value; 1471 1472 /* verify TX completion factor */ 1473 mlx5e_rl_sync_tx_completion_fact(rl); 1474 1475 /* restart network interface, if any */ 1476 if (was_opened) 1477 mlx5e_rl_open_workers(priv); 1478 break; 1479 1480 case MLX5E_RL_PARAMS_INDEX(tx_completion_fact): 1481 /* network interface must be down */ 1482 if (was_opened) 1483 mlx5e_rl_close_workers(priv); 1484 1485 /* store new value */ 1486 rl->param.arg[arg2] = value; 1487 1488 /* verify parameter */ 1489 mlx5e_rl_sync_tx_completion_fact(rl); 1490 1491 /* restart network interface, if any */ 1492 if (was_opened) 1493 mlx5e_rl_open_workers(priv); 1494 break; 1495 1496 case MLX5E_RL_PARAMS_INDEX(tx_limit_add): 1497 error = mlx5e_rl_tx_limit_add(rl, value); 1498 break; 1499 1500 case MLX5E_RL_PARAMS_INDEX(tx_limit_clr): 1501 error = mlx5e_rl_tx_limit_clr(rl, value); 1502 break; 1503 1504 case MLX5E_RL_PARAMS_INDEX(tx_allowed_deviation): 1505 /* range check */ 1506 if (value > rl->param.tx_allowed_deviation_max) 1507 value = rl->param.tx_allowed_deviation_max; 1508 else if (value < rl->param.tx_allowed_deviation_min) 1509 value = rl->param.tx_allowed_deviation_min; 1510 1511 MLX5E_RL_WLOCK(rl); 1512 rl->param.arg[arg2] = value; 1513 MLX5E_RL_WUNLOCK(rl); 1514 break; 1515 1516 case MLX5E_RL_PARAMS_INDEX(tx_burst_size): 1517 /* range check */ 1518 if (value > rl->param.tx_burst_size_max) 1519 value = rl->param.tx_burst_size_max; 1520 else if (value < rl->param.tx_burst_size_min) 1521 value = rl->param.tx_burst_size_min; 1522 1523 MLX5E_RL_WLOCK(rl); 1524 rl->param.arg[arg2] = value; 1525 MLX5E_RL_WUNLOCK(rl); 1526 break; 1527 1528 default: 1529 break; 1530 } 1531 done: 1532 PRIV_UNLOCK(priv); 1533 return (error); 1534 } 1535 1536 static void 1537 mlx5e_rl_sysctl_add_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1538 struct sysctl_oid *node, const char *name, const char *desc) 1539 { 1540 /* 1541 * NOTE: In FreeBSD-11 and newer the CTLFLAG_RWTUN flag will 1542 * take care of loading default sysctl value from the kernel 1543 * environment, if any: 1544 */ 1545 if (strstr(name, "_max") != 0 || strstr(name, "_min") != 0) { 1546 /* read-only SYSCTLs */ 1547 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1548 name, CTLTYPE_U64 | CTLFLAG_RD | 1549 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1550 } else { 1551 if (strstr(name, "_def") != 0) { 1552 #ifdef RATELIMIT_DEBUG 1553 /* tunable read-only advanced SYSCTLs */ 1554 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1555 name, CTLTYPE_U64 | CTLFLAG_RDTUN | 1556 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1557 #endif 1558 } else { 1559 /* read-write SYSCTLs */ 1560 SYSCTL_ADD_PROC(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, 1561 name, CTLTYPE_U64 | CTLFLAG_RWTUN | 1562 CTLFLAG_MPSAFE, rl, x, &mlx5e_rl_sysctl_handler, "QU", desc); 1563 } 1564 } 1565 } 1566 1567 static void 1568 mlx5e_rl_sysctl_add_stats_u64_oid(struct mlx5e_rl_priv_data *rl, unsigned x, 1569 struct sysctl_oid *node, const char *name, const char *desc) 1570 { 1571 /* read-only SYSCTLs */ 1572 SYSCTL_ADD_U64(&rl->ctx, SYSCTL_CHILDREN(node), OID_AUTO, name, 1573 CTLFLAG_RD, &rl->stats.arg[x], 0, desc); 1574 } 1575 1576 #else 1577 1578 int 1579 mlx5e_rl_init(struct mlx5e_priv *priv) 1580 { 1581 1582 return (0); 1583 } 1584 1585 void 1586 mlx5e_rl_cleanup(struct mlx5e_priv *priv) 1587 { 1588 /* NOP */ 1589 } 1590 1591 #endif /* RATELIMIT */ 1592