1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 #include "opt_rss.h" 33 34 #include "ena_rss.h" 35 #include "ena_sysctl.h" 36 37 static void ena_sysctl_add_wd(struct ena_adapter *); 38 static void ena_sysctl_add_stats(struct ena_adapter *); 39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *); 40 static void ena_sysctl_add_tuneables(struct ena_adapter *); 41 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 42 #ifndef RSS 43 static void ena_sysctl_add_rss(struct ena_adapter *); 44 #endif 45 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); 46 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); 47 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); 48 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS); 49 #ifndef RSS 50 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS); 51 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS); 52 #endif 53 54 /* Limit max ENI sample rate to be an hour. */ 55 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600 56 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1) 57 58 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 59 "ENA driver parameters"); 60 61 /* 62 * Logging level for changing verbosity of the output 63 */ 64 int ena_log_level = ENA_INFO; 65 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0, 66 "Logging level indicating verbosity of the logs"); 67 68 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD, 69 ENA_DRV_MODULE_VERSION, "ENA driver version"); 70 71 /* 72 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). 73 * Using 9k mbufs in low memory conditions might cause allocation to take a lot 74 * of time and lead to the OS instability as it needs to look for the contiguous 75 * pages. 76 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if 77 * the network performance is the priority, the 9k mbufs can be used. 78 */ 79 int ena_enable_9k_mbufs = 0; 80 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, 81 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); 82 83 /* 84 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to 85 * false. This option may be important for platforms, which often handle packet 86 * headers on Tx with total header size greater than 96B, as it may 87 * reduce the latency. 88 * It also reduces the maximum Tx queue size by half, so it may cause more Tx 89 * packet drops. 90 */ 91 bool ena_force_large_llq_header = false; 92 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN, 93 &ena_force_large_llq_header, 0, 94 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n"); 95 96 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE; 97 98 void 99 ena_sysctl_add_nodes(struct ena_adapter *adapter) 100 { 101 ena_sysctl_add_wd(adapter); 102 ena_sysctl_add_stats(adapter); 103 ena_sysctl_add_eni_metrics(adapter); 104 ena_sysctl_add_tuneables(adapter); 105 #ifndef RSS 106 ena_sysctl_add_rss(adapter); 107 #endif 108 } 109 110 static void 111 ena_sysctl_add_wd(struct ena_adapter *adapter) 112 { 113 device_t dev; 114 115 struct sysctl_ctx_list *ctx; 116 struct sysctl_oid *tree; 117 struct sysctl_oid_list *child; 118 119 dev = adapter->pdev; 120 121 ctx = device_get_sysctl_ctx(dev); 122 tree = device_get_sysctl_tree(dev); 123 child = SYSCTL_CHILDREN(tree); 124 125 /* Sysctl calls for Watchdog service */ 126 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN, 127 &adapter->wd_active, 0, "Watchdog is active"); 128 129 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout", 130 CTLFLAG_RWTUN, &adapter->keep_alive_timeout, 131 "Timeout for Keep Alive messages"); 132 133 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout", 134 CTLFLAG_RWTUN, &adapter->missing_tx_timeout, 135 "Timeout for TX completion"); 136 137 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues", 138 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, 139 "Number of TX queues to check per run"); 140 141 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold", 142 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, 143 "Max number of timeouted packets"); 144 } 145 146 static void 147 ena_sysctl_add_stats(struct ena_adapter *adapter) 148 { 149 device_t dev; 150 151 struct ena_ring *tx_ring; 152 struct ena_ring *rx_ring; 153 154 struct ena_hw_stats *hw_stats; 155 struct ena_stats_dev *dev_stats; 156 struct ena_stats_tx *tx_stats; 157 struct ena_stats_rx *rx_stats; 158 struct ena_com_stats_admin *admin_stats; 159 160 struct sysctl_ctx_list *ctx; 161 struct sysctl_oid *tree; 162 struct sysctl_oid_list *child; 163 164 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; 165 struct sysctl_oid *admin_node; 166 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; 167 struct sysctl_oid_list *admin_list; 168 169 #define QUEUE_NAME_LEN 32 170 char namebuf[QUEUE_NAME_LEN]; 171 int i; 172 173 dev = adapter->pdev; 174 175 ctx = device_get_sysctl_ctx(dev); 176 tree = device_get_sysctl_tree(dev); 177 child = SYSCTL_CHILDREN(tree); 178 179 tx_ring = adapter->tx_ring; 180 rx_ring = adapter->rx_ring; 181 182 hw_stats = &adapter->hw_stats; 183 dev_stats = &adapter->dev_stats; 184 admin_stats = &adapter->ena_dev->admin_queue.stats; 185 186 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD, 187 &dev_stats->wd_expired, "Watchdog expiry count"); 188 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD, 189 &dev_stats->interface_up, "Network interface up count"); 190 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", 191 CTLFLAG_RD, &dev_stats->interface_down, 192 "Network interface down count"); 193 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", 194 CTLFLAG_RD, &dev_stats->admin_q_pause, "Admin queue pauses"); 195 196 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) { 197 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 198 199 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 200 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 201 queue_list = SYSCTL_CHILDREN(queue_node); 202 203 adapter->que[i].oid = queue_node; 204 205 #ifdef RSS 206 /* Common stats */ 207 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD, 208 &adapter->que[i].cpu, 0, "CPU affinity"); 209 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD, 210 &adapter->que[i].domain, 0, "NUMA domain"); 211 #endif 212 213 /* TX specific stats */ 214 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring", 215 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring"); 216 tx_list = SYSCTL_CHILDREN(tx_node); 217 218 tx_stats = &tx_ring->tx_stats; 219 220 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count", 221 CTLFLAG_RD, &tx_stats->cnt, "Packets sent"); 222 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes", 223 CTLFLAG_RD, &tx_stats->bytes, "Bytes sent"); 224 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 225 "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err, 226 "TX buffer preparation failures"); 227 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 228 "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err, 229 "DMA mapping failures"); 230 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells", 231 CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells"); 232 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 233 "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp, 234 "TX completions missed"); 235 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id", 236 CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count"); 237 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses", 238 CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count"); 239 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 240 "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err, 241 "Mbuf collapse failures"); 242 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups", 243 CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups"); 244 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops", 245 CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops"); 246 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 247 "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy, 248 "Header copies for llq transaction"); 249 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 250 "unmask_interrupt_num", CTLFLAG_RD, 251 &tx_stats->unmask_interrupt_num, 252 "Unmasked interrupt count"); 253 254 /* RX specific stats */ 255 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring", 256 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring"); 257 rx_list = SYSCTL_CHILDREN(rx_node); 258 259 rx_stats = &rx_ring->rx_stats; 260 261 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count", 262 CTLFLAG_RD, &rx_stats->cnt, "Packets received"); 263 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes", 264 CTLFLAG_RD, &rx_stats->bytes, "Bytes received"); 265 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial", 266 CTLFLAG_RD, &rx_stats->refil_partial, 267 "Partial refilled mbufs"); 268 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad", 269 CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum"); 270 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 271 "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail, 272 "Failed mbuf allocs"); 273 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 274 "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail, 275 "Failed jumbo mbuf allocs"); 276 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 277 "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err, 278 "DMA mapping errors"); 279 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num", 280 CTLFLAG_RD, &rx_stats->bad_desc_num, 281 "Bad descriptor count"); 282 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id", 283 CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count"); 284 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring", 285 CTLFLAG_RD, &rx_stats->empty_rx_ring, 286 "RX descriptors depletion count"); 287 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good", 288 CTLFLAG_RD, &rx_stats->csum_good, 289 "Valid RX checksum calculations"); 290 } 291 292 /* Stats read from device */ 293 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", 294 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware"); 295 hw_list = SYSCTL_CHILDREN(hw_node); 296 297 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, 298 &hw_stats->rx_packets, "Packets received"); 299 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, 300 &hw_stats->tx_packets, "Packets transmitted"); 301 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, 302 &hw_stats->rx_bytes, "Bytes received"); 303 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, 304 &hw_stats->tx_bytes, "Bytes transmitted"); 305 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, 306 &hw_stats->rx_drops, "Receive packet drops"); 307 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD, 308 &hw_stats->tx_drops, "Transmit packet drops"); 309 310 /* ENA Admin queue stats */ 311 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", 312 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics"); 313 admin_list = SYSCTL_CHILDREN(admin_node); 314 315 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, 316 &admin_stats->aborted_cmd, 0, "Aborted commands"); 317 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, 318 &admin_stats->submitted_cmd, 0, "Submitted commands"); 319 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, 320 &admin_stats->completed_cmd, 0, "Completed commands"); 321 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, 322 &admin_stats->out_of_space, 0, "Queue out of space"); 323 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, 324 &admin_stats->no_completion, 0, "Commands not completed"); 325 } 326 327 static void 328 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) 329 { 330 device_t dev; 331 struct ena_admin_eni_stats *eni_metrics; 332 333 struct sysctl_ctx_list *ctx; 334 struct sysctl_oid *tree; 335 struct sysctl_oid_list *child; 336 337 struct sysctl_oid *eni_node; 338 struct sysctl_oid_list *eni_list; 339 340 dev = adapter->pdev; 341 342 ctx = device_get_sysctl_ctx(dev); 343 tree = device_get_sysctl_tree(dev); 344 child = SYSCTL_CHILDREN(tree); 345 346 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", 347 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); 348 eni_list = SYSCTL_CHILDREN(eni_node); 349 350 eni_metrics = &adapter->eni_metrics; 351 352 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", 353 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, 354 "Inbound BW allowance exceeded"); 355 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", 356 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, 357 "Outbound BW allowance exceeded"); 358 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", 359 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, 360 "PPS allowance exceeded"); 361 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", 362 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, 363 "Connection tracking allowance exceeded"); 364 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", 365 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, 366 "Linklocal packet rate allowance exceeded"); 367 368 /* 369 * Tuneable, which determines how often ENI metrics will be read. 370 * 0 means it's turned off. Maximum allowed value is limited by: 371 * ENI_METRICS_MAX_SAMPLE_INTERVAL. 372 */ 373 SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval", 374 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 375 ena_sysctl_eni_metrics_interval, "SU", 376 "Interval in seconds for updating ENI emetrics. 0 turns off the update."); 377 } 378 379 static void 380 ena_sysctl_add_tuneables(struct ena_adapter *adapter) 381 { 382 device_t dev; 383 384 struct sysctl_ctx_list *ctx; 385 struct sysctl_oid *tree; 386 struct sysctl_oid_list *child; 387 388 dev = adapter->pdev; 389 390 ctx = device_get_sysctl_ctx(dev); 391 tree = device_get_sysctl_tree(dev); 392 child = SYSCTL_CHILDREN(tree); 393 394 /* Tuneable number of buffers in the buf-ring (drbr) */ 395 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size", 396 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 397 ena_sysctl_buf_ring_size, "I", 398 "Size of the Tx buffer ring (drbr)."); 399 400 /* Tuneable number of the Rx ring size */ 401 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size", 402 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 403 ena_sysctl_rx_queue_size, "I", 404 "Size of the Rx ring. The size should be a power of 2."); 405 406 /* Tuneable number of IO queues */ 407 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb", 408 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 409 ena_sysctl_io_queues_nb, "I", "Number of IO queues."); 410 } 411 412 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 413 #ifndef RSS 414 static void 415 ena_sysctl_add_rss(struct ena_adapter *adapter) 416 { 417 device_t dev; 418 419 struct sysctl_ctx_list *ctx; 420 struct sysctl_oid *tree; 421 struct sysctl_oid_list *child; 422 423 dev = adapter->pdev; 424 425 ctx = device_get_sysctl_ctx(dev); 426 tree = device_get_sysctl_tree(dev); 427 child = SYSCTL_CHILDREN(tree); 428 429 /* RSS options */ 430 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss", 431 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options."); 432 child = SYSCTL_CHILDREN(tree); 433 434 /* RSS hash key */ 435 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key", 436 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 437 ena_sysctl_rss_key, "A", "RSS key."); 438 439 /* Tuneable RSS indirection table */ 440 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table", 441 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 442 ena_sysctl_rss_indir_table, "A", "RSS indirection table."); 443 444 /* RSS indirection table size */ 445 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size", 446 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0, 447 "RSS indirection table size."); 448 } 449 #endif /* RSS */ 450 451 452 /* 453 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes. 454 * 455 * Whether the nodes are registered or unregistered depends on a delta between 456 * the `old` and `new` parameters, representing the number of queues. 457 * 458 * This function is used to hide sysctl attributes for queue nodes which aren't 459 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`). 460 * 461 * NOTE: 462 * All unregistered nodes must be registered again at detach, i.e. by a call to 463 * this function. 464 */ 465 void 466 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new) 467 { 468 struct sysctl_oid *oid; 469 int min, max, i; 470 471 min = MIN(old, new); 472 max = MIN(MAX(old, new), adapter->max_num_io_queues); 473 474 for (i = min; i < max; ++i) { 475 oid = adapter->que[i].oid; 476 477 sysctl_wlock(); 478 if (old > new) 479 sysctl_unregister_oid(oid); 480 else 481 sysctl_register_oid(oid); 482 sysctl_wunlock(); 483 } 484 } 485 486 static int 487 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS) 488 { 489 struct ena_adapter *adapter = arg1; 490 uint32_t val; 491 int error; 492 493 ENA_LOCK_LOCK(); 494 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 495 error = EINVAL; 496 goto unlock; 497 } 498 499 val = 0; 500 error = sysctl_wire_old_buffer(req, sizeof(val)); 501 if (error == 0) { 502 val = adapter->buf_ring_size; 503 error = sysctl_handle_32(oidp, &val, 0, req); 504 } 505 if (error != 0 || req->newptr == NULL) 506 goto unlock; 507 508 if (!powerof2(val) || val == 0) { 509 ena_log(adapter->pdev, ERR, 510 "Requested new Tx buffer ring size (%u) is not a power of 2\n", 511 val); 512 error = EINVAL; 513 goto unlock; 514 } 515 516 if (val != adapter->buf_ring_size) { 517 ena_log(adapter->pdev, INFO, 518 "Requested new Tx buffer ring size: %d. Old size: %d\n", 519 val, adapter->buf_ring_size); 520 521 error = ena_update_buf_ring_size(adapter, val); 522 } else { 523 ena_log(adapter->pdev, ERR, 524 "New Tx buffer ring size is the same as already used: %u\n", 525 adapter->buf_ring_size); 526 } 527 528 unlock: 529 ENA_LOCK_UNLOCK(); 530 531 return (error); 532 } 533 534 static int 535 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS) 536 { 537 struct ena_adapter *adapter = arg1; 538 uint32_t val; 539 int error; 540 541 ENA_LOCK_LOCK(); 542 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 543 error = EINVAL; 544 goto unlock; 545 } 546 547 val = 0; 548 error = sysctl_wire_old_buffer(req, sizeof(val)); 549 if (error == 0) { 550 val = adapter->requested_rx_ring_size; 551 error = sysctl_handle_32(oidp, &val, 0, req); 552 } 553 if (error != 0 || req->newptr == NULL) 554 goto unlock; 555 556 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) { 557 ena_log(adapter->pdev, ERR, 558 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n", 559 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size); 560 error = EINVAL; 561 goto unlock; 562 } 563 564 /* Check if the parameter is power of 2 */ 565 if (!powerof2(val)) { 566 ena_log(adapter->pdev, ERR, 567 "Requested new Rx queue size (%u) is not a power of 2\n", 568 val); 569 error = EINVAL; 570 goto unlock; 571 } 572 573 if (val != adapter->requested_rx_ring_size) { 574 ena_log(adapter->pdev, INFO, 575 "Requested new Rx queue size: %u. Old size: %u\n", val, 576 adapter->requested_rx_ring_size); 577 578 error = ena_update_queue_size(adapter, 579 adapter->requested_tx_ring_size, val); 580 } else { 581 ena_log(adapter->pdev, ERR, 582 "New Rx queue size is the same as already used: %u\n", 583 adapter->requested_rx_ring_size); 584 } 585 586 unlock: 587 ENA_LOCK_UNLOCK(); 588 589 return (error); 590 } 591 592 /* 593 * Change number of effectively used IO queues adapter->num_io_queues 594 */ 595 static int 596 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS) 597 { 598 struct ena_adapter *adapter = arg1; 599 uint32_t old_num_queues, tmp = 0; 600 int error; 601 602 ENA_LOCK_LOCK(); 603 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 604 error = EINVAL; 605 goto unlock; 606 } 607 608 error = sysctl_wire_old_buffer(req, sizeof(tmp)); 609 if (error == 0) { 610 tmp = adapter->num_io_queues; 611 error = sysctl_handle_int(oidp, &tmp, 0, req); 612 } 613 if (error != 0 || req->newptr == NULL) 614 goto unlock; 615 616 if (tmp == 0) { 617 ena_log(adapter->pdev, ERR, 618 "Requested number of IO queues is zero\n"); 619 error = EINVAL; 620 goto unlock; 621 } 622 623 /* 624 * The adapter::max_num_io_queues is the HW capability. The system 625 * resources availability may potentially be a tighter limit. Therefore 626 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs` 627 * always holds true, while the `adapter::msix_vecs` is variable across 628 * device reset (`ena_destroy_device()` + `ena_restore_device()`). 629 */ 630 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) { 631 ena_log(adapter->pdev, ERR, 632 "Requested number of IO queues is higher than maximum allowed (%u)\n", 633 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC); 634 error = EINVAL; 635 goto unlock; 636 } 637 if (tmp == adapter->num_io_queues) { 638 ena_log(adapter->pdev, ERR, 639 "Requested number of IO queues is equal to current value " 640 "(%u)\n", 641 adapter->num_io_queues); 642 } else { 643 ena_log(adapter->pdev, INFO, 644 "Requested new number of IO queues: %u, current value: " 645 "%u\n", 646 tmp, adapter->num_io_queues); 647 648 old_num_queues = adapter->num_io_queues; 649 error = ena_update_io_queue_nb(adapter, tmp); 650 if (error != 0) 651 return (error); 652 653 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp); 654 } 655 656 unlock: 657 ENA_LOCK_UNLOCK(); 658 659 return (error); 660 } 661 662 static int 663 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS) 664 { 665 struct ena_adapter *adapter = arg1; 666 uint16_t interval; 667 int error; 668 669 ENA_LOCK_LOCK(); 670 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 671 error = EINVAL; 672 goto unlock; 673 } 674 675 error = sysctl_wire_old_buffer(req, sizeof(interval)); 676 if (error == 0) { 677 interval = adapter->eni_metrics_sample_interval; 678 error = sysctl_handle_16(oidp, &interval, 0, req); 679 } 680 if (error != 0 || req->newptr == NULL) 681 goto unlock; 682 683 if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) { 684 ena_log(adapter->pdev, ERR, 685 "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n", 686 ENI_METRICS_MAX_SAMPLE_INTERVAL); 687 error = EINVAL; 688 goto unlock; 689 } 690 691 if (interval == 0) { 692 ena_log(adapter->pdev, INFO, 693 "ENI metrics update is now turned off\n"); 694 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); 695 } else { 696 ena_log(adapter->pdev, INFO, 697 "ENI metrics update interval is set to: %" PRIu16 698 " seconds\n", 699 interval); 700 } 701 702 adapter->eni_metrics_sample_interval = interval; 703 704 unlock: 705 ENA_LOCK_UNLOCK(); 706 707 return (0); 708 } 709 710 #ifndef RSS 711 /* 712 * Change the Receive Side Scaling hash key. 713 */ 714 static int 715 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS) 716 { 717 struct ena_adapter *adapter = arg1; 718 struct ena_com_dev *ena_dev = adapter->ena_dev; 719 enum ena_admin_hash_functions ena_func; 720 char msg[ENA_HASH_KEY_MSG_SIZE]; 721 char elem[3] = { 0 }; 722 char *endp; 723 u8 rss_key[ENA_HASH_KEY_SIZE]; 724 int error, i; 725 726 ENA_LOCK_LOCK(); 727 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 728 error = EINVAL; 729 goto unlock; 730 } 731 732 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 733 error = ENOTSUP; 734 goto unlock; 735 } 736 737 error = sysctl_wire_old_buffer(req, sizeof(msg)); 738 if (error != 0) 739 goto unlock; 740 741 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func); 742 if (error != 0) { 743 device_printf(adapter->pdev, "Cannot get hash function\n"); 744 goto unlock; 745 } 746 747 if (ena_func != ENA_ADMIN_TOEPLITZ) { 748 error = EINVAL; 749 device_printf(adapter->pdev, "Unsupported hash algorithm\n"); 750 goto unlock; 751 } 752 753 error = ena_rss_get_hash_key(ena_dev, rss_key); 754 if (error != 0) { 755 device_printf(adapter->pdev, "Cannot get hash key\n"); 756 goto unlock; 757 } 758 759 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) 760 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]); 761 762 error = sysctl_handle_string(oidp, msg, sizeof(msg), req); 763 if (error != 0 || req->newptr == NULL) 764 goto unlock; 765 766 if (strlen(msg) != sizeof(msg) - 1) { 767 error = EINVAL; 768 device_printf(adapter->pdev, "Invalid key size\n"); 769 goto unlock; 770 } 771 772 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) { 773 strncpy(elem, &msg[i * 2], 2); 774 rss_key[i] = strtol(elem, &endp, 16); 775 776 /* Both hex nibbles in the string must be valid to continue. */ 777 if (endp == elem || *endp != '\0' || rss_key[i] < 0) { 778 error = EINVAL; 779 device_printf(adapter->pdev, 780 "Invalid key hex value: '%c'\n", *endp); 781 goto unlock; 782 } 783 } 784 785 error = ena_rss_set_hash(ena_dev, rss_key); 786 if (error != 0) 787 device_printf(adapter->pdev, "Cannot fill hash key\n"); 788 789 unlock: 790 ENA_LOCK_UNLOCK(); 791 792 return (error); 793 } 794 795 /* 796 * Change the Receive Side Scaling indirection table. 797 * 798 * The sysctl entry string consists of one or more `x:y` keypairs, where 799 * x stands for the table index and y for its new value. 800 * Table indices that don't need to be updated can be omitted from the string 801 * and will retain their existing values. If an index is entered more than once, 802 * the last value is used. 803 * 804 * Example: 805 * To update two selected indices in the RSS indirection table, e.g. setting 806 * index 0 to queue 5 and then index 5 to queue 0, the below command should be 807 * used: 808 * sysctl dev.ena.0.rss.indir_table="0:5 5:0" 809 */ 810 static int 811 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS) 812 { 813 int num_queues, error; 814 struct ena_adapter *adapter = arg1; 815 struct ena_indir *indir; 816 char *msg, *buf, *endp; 817 uint32_t idx, value; 818 819 ENA_LOCK_LOCK(); 820 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 821 error = EINVAL; 822 goto unlock; 823 } 824 825 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 826 error = ENOTSUP; 827 goto unlock; 828 } 829 830 indir = adapter->rss_indir; 831 msg = indir->sysctl_buf; 832 833 if (unlikely(indir == NULL)) { 834 error = ENOTSUP; 835 goto unlock; 836 } 837 838 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req); 839 if (error != 0 || req->newptr == NULL) 840 goto unlock; 841 842 num_queues = adapter->num_io_queues; 843 844 /* 845 * This sysctl expects msg to be a list of `x:y` record pairs, 846 * where x is the indirection table index and y is its value. 847 */ 848 for (buf = msg; *buf != '\0'; buf = endp) { 849 idx = strtol(buf, &endp, 10); 850 851 if (endp == buf || idx < 0) { 852 device_printf(adapter->pdev, "Invalid index: %s\n", 853 buf); 854 error = EINVAL; 855 break; 856 } 857 858 if (idx >= ENA_RX_RSS_TABLE_SIZE) { 859 device_printf(adapter->pdev, "Index %d out of range\n", 860 idx); 861 error = ERANGE; 862 break; 863 } 864 865 buf = endp; 866 867 if (*buf++ != ':') { 868 device_printf(adapter->pdev, "Missing ':' separator\n"); 869 error = EINVAL; 870 break; 871 } 872 873 value = strtol(buf, &endp, 10); 874 875 if (endp == buf || value < 0) { 876 device_printf(adapter->pdev, "Invalid value: %s\n", 877 buf); 878 error = EINVAL; 879 break; 880 } 881 882 if (value >= num_queues) { 883 device_printf(adapter->pdev, "Value %d out of range\n", 884 value); 885 error = ERANGE; 886 break; 887 } 888 889 indir->table[idx] = value; 890 } 891 892 if (error != 0) /* Reload indirection table with last good data. */ 893 ena_rss_indir_get(adapter, indir->table); 894 895 /* At this point msg has been clobbered by sysctl_handle_string. */ 896 ena_rss_copy_indir_buf(msg, indir->table); 897 898 if (error == 0) 899 error = ena_rss_indir_set(adapter, indir->table); 900 901 unlock: 902 ENA_LOCK_UNLOCK(); 903 904 return (error); 905 } 906 #endif /* RSS */ 907