1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_rss.h" 35 36 #include "ena_rss.h" 37 #include "ena_sysctl.h" 38 39 static void ena_sysctl_add_wd(struct ena_adapter *); 40 static void ena_sysctl_add_stats(struct ena_adapter *); 41 static void ena_sysctl_add_eni_metrics(struct ena_adapter *); 42 static void ena_sysctl_add_tuneables(struct ena_adapter *); 43 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 44 #ifndef RSS 45 static void ena_sysctl_add_rss(struct ena_adapter *); 46 #endif 47 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); 48 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); 49 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); 50 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS); 51 #ifndef RSS 52 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS); 53 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS); 54 #endif 55 56 /* Limit max ENI sample rate to be an hour. */ 57 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600 58 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1) 59 60 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 61 "ENA driver parameters"); 62 63 /* 64 * Logging level for changing verbosity of the output 65 */ 66 int ena_log_level = ENA_INFO; 67 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0, 68 "Logging level indicating verbosity of the logs"); 69 70 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD, 71 ENA_DRV_MODULE_VERSION, "ENA driver version"); 72 73 /* 74 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). 75 * Using 9k mbufs in low memory conditions might cause allocation to take a lot 76 * of time and lead to the OS instability as it needs to look for the contiguous 77 * pages. 78 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if 79 * the network performance is the priority, the 9k mbufs can be used. 80 */ 81 int ena_enable_9k_mbufs = 0; 82 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, 83 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); 84 85 /* 86 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to 87 * false. This option may be important for platforms, which often handle packet 88 * headers on Tx with total header size greater than 96B, as it may 89 * reduce the latency. 90 * It also reduces the maximum Tx queue size by half, so it may cause more Tx 91 * packet drops. 92 */ 93 bool ena_force_large_llq_header = false; 94 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN, 95 &ena_force_large_llq_header, 0, 96 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n"); 97 98 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE; 99 100 void 101 ena_sysctl_add_nodes(struct ena_adapter *adapter) 102 { 103 ena_sysctl_add_wd(adapter); 104 ena_sysctl_add_stats(adapter); 105 ena_sysctl_add_eni_metrics(adapter); 106 ena_sysctl_add_tuneables(adapter); 107 #ifndef RSS 108 ena_sysctl_add_rss(adapter); 109 #endif 110 } 111 112 static void 113 ena_sysctl_add_wd(struct ena_adapter *adapter) 114 { 115 device_t dev; 116 117 struct sysctl_ctx_list *ctx; 118 struct sysctl_oid *tree; 119 struct sysctl_oid_list *child; 120 121 dev = adapter->pdev; 122 123 ctx = device_get_sysctl_ctx(dev); 124 tree = device_get_sysctl_tree(dev); 125 child = SYSCTL_CHILDREN(tree); 126 127 /* Sysctl calls for Watchdog service */ 128 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN, 129 &adapter->wd_active, 0, "Watchdog is active"); 130 131 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout", 132 CTLFLAG_RWTUN, &adapter->keep_alive_timeout, 133 "Timeout for Keep Alive messages"); 134 135 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout", 136 CTLFLAG_RWTUN, &adapter->missing_tx_timeout, 137 "Timeout for TX completion"); 138 139 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues", 140 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, 141 "Number of TX queues to check per run"); 142 143 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold", 144 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, 145 "Max number of timeouted packets"); 146 } 147 148 static void 149 ena_sysctl_add_stats(struct ena_adapter *adapter) 150 { 151 device_t dev; 152 153 struct ena_ring *tx_ring; 154 struct ena_ring *rx_ring; 155 156 struct ena_hw_stats *hw_stats; 157 struct ena_stats_dev *dev_stats; 158 struct ena_stats_tx *tx_stats; 159 struct ena_stats_rx *rx_stats; 160 struct ena_com_stats_admin *admin_stats; 161 162 struct sysctl_ctx_list *ctx; 163 struct sysctl_oid *tree; 164 struct sysctl_oid_list *child; 165 166 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; 167 struct sysctl_oid *admin_node; 168 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; 169 struct sysctl_oid_list *admin_list; 170 171 #define QUEUE_NAME_LEN 32 172 char namebuf[QUEUE_NAME_LEN]; 173 int i; 174 175 dev = adapter->pdev; 176 177 ctx = device_get_sysctl_ctx(dev); 178 tree = device_get_sysctl_tree(dev); 179 child = SYSCTL_CHILDREN(tree); 180 181 tx_ring = adapter->tx_ring; 182 rx_ring = adapter->rx_ring; 183 184 hw_stats = &adapter->hw_stats; 185 dev_stats = &adapter->dev_stats; 186 admin_stats = &adapter->ena_dev->admin_queue.stats; 187 188 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD, 189 &dev_stats->wd_expired, "Watchdog expiry count"); 190 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD, 191 &dev_stats->interface_up, "Network interface up count"); 192 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", 193 CTLFLAG_RD, &dev_stats->interface_down, 194 "Network interface down count"); 195 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", 196 CTLFLAG_RD, &dev_stats->admin_q_pause, "Admin queue pauses"); 197 198 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) { 199 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 200 201 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 202 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 203 queue_list = SYSCTL_CHILDREN(queue_node); 204 205 adapter->que[i].oid = queue_node; 206 207 #ifdef RSS 208 /* Common stats */ 209 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD, 210 &adapter->que[i].cpu, 0, "CPU affinity"); 211 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD, 212 &adapter->que[i].domain, 0, "NUMA domain"); 213 #endif 214 215 /* TX specific stats */ 216 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring", 217 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring"); 218 tx_list = SYSCTL_CHILDREN(tx_node); 219 220 tx_stats = &tx_ring->tx_stats; 221 222 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count", 223 CTLFLAG_RD, &tx_stats->cnt, "Packets sent"); 224 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes", 225 CTLFLAG_RD, &tx_stats->bytes, "Bytes sent"); 226 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 227 "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err, 228 "TX buffer preparation failures"); 229 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 230 "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err, 231 "DMA mapping failures"); 232 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells", 233 CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells"); 234 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 235 "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp, 236 "TX completions missed"); 237 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id", 238 CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count"); 239 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses", 240 CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count"); 241 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 242 "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err, 243 "Mbuf collapse failures"); 244 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups", 245 CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups"); 246 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops", 247 CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops"); 248 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 249 "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy, 250 "Header copies for llq transaction"); 251 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 252 "unmask_interrupt_num", CTLFLAG_RD, 253 &tx_stats->unmask_interrupt_num, 254 "Unmasked interrupt count"); 255 256 /* RX specific stats */ 257 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring", 258 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring"); 259 rx_list = SYSCTL_CHILDREN(rx_node); 260 261 rx_stats = &rx_ring->rx_stats; 262 263 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count", 264 CTLFLAG_RD, &rx_stats->cnt, "Packets received"); 265 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes", 266 CTLFLAG_RD, &rx_stats->bytes, "Bytes received"); 267 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial", 268 CTLFLAG_RD, &rx_stats->refil_partial, 269 "Partial refilled mbufs"); 270 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad", 271 CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum"); 272 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 273 "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail, 274 "Failed mbuf allocs"); 275 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 276 "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail, 277 "Failed jumbo mbuf allocs"); 278 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 279 "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err, 280 "DMA mapping errors"); 281 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num", 282 CTLFLAG_RD, &rx_stats->bad_desc_num, 283 "Bad descriptor count"); 284 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id", 285 CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count"); 286 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring", 287 CTLFLAG_RD, &rx_stats->empty_rx_ring, 288 "RX descriptors depletion count"); 289 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good", 290 CTLFLAG_RD, &rx_stats->csum_good, 291 "Valid RX checksum calculations"); 292 } 293 294 /* Stats read from device */ 295 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", 296 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware"); 297 hw_list = SYSCTL_CHILDREN(hw_node); 298 299 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, 300 &hw_stats->rx_packets, "Packets received"); 301 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, 302 &hw_stats->tx_packets, "Packets transmitted"); 303 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, 304 &hw_stats->rx_bytes, "Bytes received"); 305 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, 306 &hw_stats->tx_bytes, "Bytes transmitted"); 307 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, 308 &hw_stats->rx_drops, "Receive packet drops"); 309 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD, 310 &hw_stats->tx_drops, "Transmit packet drops"); 311 312 /* ENA Admin queue stats */ 313 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", 314 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics"); 315 admin_list = SYSCTL_CHILDREN(admin_node); 316 317 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, 318 &admin_stats->aborted_cmd, 0, "Aborted commands"); 319 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, 320 &admin_stats->submitted_cmd, 0, "Submitted commands"); 321 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, 322 &admin_stats->completed_cmd, 0, "Completed commands"); 323 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, 324 &admin_stats->out_of_space, 0, "Queue out of space"); 325 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, 326 &admin_stats->no_completion, 0, "Commands not completed"); 327 } 328 329 static void 330 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) 331 { 332 device_t dev; 333 struct ena_admin_eni_stats *eni_metrics; 334 335 struct sysctl_ctx_list *ctx; 336 struct sysctl_oid *tree; 337 struct sysctl_oid_list *child; 338 339 struct sysctl_oid *eni_node; 340 struct sysctl_oid_list *eni_list; 341 342 dev = adapter->pdev; 343 344 ctx = device_get_sysctl_ctx(dev); 345 tree = device_get_sysctl_tree(dev); 346 child = SYSCTL_CHILDREN(tree); 347 348 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", 349 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); 350 eni_list = SYSCTL_CHILDREN(eni_node); 351 352 eni_metrics = &adapter->eni_metrics; 353 354 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", 355 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, 356 "Inbound BW allowance exceeded"); 357 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", 358 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, 359 "Outbound BW allowance exceeded"); 360 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", 361 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, 362 "PPS allowance exceeded"); 363 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", 364 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, 365 "Connection tracking allowance exceeded"); 366 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", 367 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, 368 "Linklocal packet rate allowance exceeded"); 369 370 /* 371 * Tuneable, which determines how often ENI metrics will be read. 372 * 0 means it's turned off. Maximum allowed value is limited by: 373 * ENI_METRICS_MAX_SAMPLE_INTERVAL. 374 */ 375 SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval", 376 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 377 ena_sysctl_eni_metrics_interval, "SU", 378 "Interval in seconds for updating ENI emetrics. 0 turns off the update."); 379 } 380 381 static void 382 ena_sysctl_add_tuneables(struct ena_adapter *adapter) 383 { 384 device_t dev; 385 386 struct sysctl_ctx_list *ctx; 387 struct sysctl_oid *tree; 388 struct sysctl_oid_list *child; 389 390 dev = adapter->pdev; 391 392 ctx = device_get_sysctl_ctx(dev); 393 tree = device_get_sysctl_tree(dev); 394 child = SYSCTL_CHILDREN(tree); 395 396 /* Tuneable number of buffers in the buf-ring (drbr) */ 397 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size", 398 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 399 ena_sysctl_buf_ring_size, "I", 400 "Size of the Tx buffer ring (drbr)."); 401 402 /* Tuneable number of the Rx ring size */ 403 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size", 404 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 405 ena_sysctl_rx_queue_size, "I", 406 "Size of the Rx ring. The size should be a power of 2."); 407 408 /* Tuneable number of IO queues */ 409 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb", 410 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 411 ena_sysctl_io_queues_nb, "I", "Number of IO queues."); 412 } 413 414 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 415 #ifndef RSS 416 static void 417 ena_sysctl_add_rss(struct ena_adapter *adapter) 418 { 419 device_t dev; 420 421 struct sysctl_ctx_list *ctx; 422 struct sysctl_oid *tree; 423 struct sysctl_oid_list *child; 424 425 dev = adapter->pdev; 426 427 ctx = device_get_sysctl_ctx(dev); 428 tree = device_get_sysctl_tree(dev); 429 child = SYSCTL_CHILDREN(tree); 430 431 /* RSS options */ 432 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss", 433 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options."); 434 child = SYSCTL_CHILDREN(tree); 435 436 /* RSS hash key */ 437 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key", 438 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 439 ena_sysctl_rss_key, "A", "RSS key."); 440 441 /* Tuneable RSS indirection table */ 442 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table", 443 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 444 ena_sysctl_rss_indir_table, "A", "RSS indirection table."); 445 446 /* RSS indirection table size */ 447 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size", 448 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0, 449 "RSS indirection table size."); 450 } 451 #endif /* RSS */ 452 453 454 /* 455 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes. 456 * 457 * Whether the nodes are registered or unregistered depends on a delta between 458 * the `old` and `new` parameters, representing the number of queues. 459 * 460 * This function is used to hide sysctl attributes for queue nodes which aren't 461 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`). 462 * 463 * NOTE: 464 * All unregistered nodes must be registered again at detach, i.e. by a call to 465 * this function. 466 */ 467 void 468 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new) 469 { 470 struct sysctl_oid *oid; 471 int min, max, i; 472 473 min = MIN(old, new); 474 max = MIN(MAX(old, new), adapter->max_num_io_queues); 475 476 for (i = min; i < max; ++i) { 477 oid = adapter->que[i].oid; 478 479 sysctl_wlock(); 480 if (old > new) 481 sysctl_unregister_oid(oid); 482 else 483 sysctl_register_oid(oid); 484 sysctl_wunlock(); 485 } 486 } 487 488 static int 489 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS) 490 { 491 struct ena_adapter *adapter = arg1; 492 uint32_t val; 493 int error; 494 495 ENA_LOCK_LOCK(); 496 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 497 error = EINVAL; 498 goto unlock; 499 } 500 501 val = 0; 502 error = sysctl_wire_old_buffer(req, sizeof(val)); 503 if (error == 0) { 504 val = adapter->buf_ring_size; 505 error = sysctl_handle_32(oidp, &val, 0, req); 506 } 507 if (error != 0 || req->newptr == NULL) 508 goto unlock; 509 510 if (!powerof2(val) || val == 0) { 511 ena_log(adapter->pdev, ERR, 512 "Requested new Tx buffer ring size (%u) is not a power of 2\n", 513 val); 514 error = EINVAL; 515 goto unlock; 516 } 517 518 if (val != adapter->buf_ring_size) { 519 ena_log(adapter->pdev, INFO, 520 "Requested new Tx buffer ring size: %d. Old size: %d\n", 521 val, adapter->buf_ring_size); 522 523 error = ena_update_buf_ring_size(adapter, val); 524 } else { 525 ena_log(adapter->pdev, ERR, 526 "New Tx buffer ring size is the same as already used: %u\n", 527 adapter->buf_ring_size); 528 } 529 530 unlock: 531 ENA_LOCK_UNLOCK(); 532 533 return (error); 534 } 535 536 static int 537 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS) 538 { 539 struct ena_adapter *adapter = arg1; 540 uint32_t val; 541 int error; 542 543 ENA_LOCK_LOCK(); 544 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 545 error = EINVAL; 546 goto unlock; 547 } 548 549 val = 0; 550 error = sysctl_wire_old_buffer(req, sizeof(val)); 551 if (error == 0) { 552 val = adapter->requested_rx_ring_size; 553 error = sysctl_handle_32(oidp, &val, 0, req); 554 } 555 if (error != 0 || req->newptr == NULL) 556 goto unlock; 557 558 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) { 559 ena_log(adapter->pdev, ERR, 560 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n", 561 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size); 562 error = EINVAL; 563 goto unlock; 564 } 565 566 /* Check if the parameter is power of 2 */ 567 if (!powerof2(val)) { 568 ena_log(adapter->pdev, ERR, 569 "Requested new Rx queue size (%u) is not a power of 2\n", 570 val); 571 error = EINVAL; 572 goto unlock; 573 } 574 575 if (val != adapter->requested_rx_ring_size) { 576 ena_log(adapter->pdev, INFO, 577 "Requested new Rx queue size: %u. Old size: %u\n", val, 578 adapter->requested_rx_ring_size); 579 580 error = ena_update_queue_size(adapter, 581 adapter->requested_tx_ring_size, val); 582 } else { 583 ena_log(adapter->pdev, ERR, 584 "New Rx queue size is the same as already used: %u\n", 585 adapter->requested_rx_ring_size); 586 } 587 588 unlock: 589 ENA_LOCK_UNLOCK(); 590 591 return (error); 592 } 593 594 /* 595 * Change number of effectively used IO queues adapter->num_io_queues 596 */ 597 static int 598 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS) 599 { 600 struct ena_adapter *adapter = arg1; 601 uint32_t old_num_queues, tmp = 0; 602 int error; 603 604 ENA_LOCK_LOCK(); 605 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 606 error = EINVAL; 607 goto unlock; 608 } 609 610 error = sysctl_wire_old_buffer(req, sizeof(tmp)); 611 if (error == 0) { 612 tmp = adapter->num_io_queues; 613 error = sysctl_handle_int(oidp, &tmp, 0, req); 614 } 615 if (error != 0 || req->newptr == NULL) 616 goto unlock; 617 618 if (tmp == 0) { 619 ena_log(adapter->pdev, ERR, 620 "Requested number of IO queues is zero\n"); 621 error = EINVAL; 622 goto unlock; 623 } 624 625 /* 626 * The adapter::max_num_io_queues is the HW capability. The system 627 * resources availability may potentially be a tighter limit. Therefore 628 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs` 629 * always holds true, while the `adapter::msix_vecs` is variable across 630 * device reset (`ena_destroy_device()` + `ena_restore_device()`). 631 */ 632 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) { 633 ena_log(adapter->pdev, ERR, 634 "Requested number of IO queues is higher than maximum allowed (%u)\n", 635 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC); 636 error = EINVAL; 637 goto unlock; 638 } 639 if (tmp == adapter->num_io_queues) { 640 ena_log(adapter->pdev, ERR, 641 "Requested number of IO queues is equal to current value " 642 "(%u)\n", 643 adapter->num_io_queues); 644 } else { 645 ena_log(adapter->pdev, INFO, 646 "Requested new number of IO queues: %u, current value: " 647 "%u\n", 648 tmp, adapter->num_io_queues); 649 650 old_num_queues = adapter->num_io_queues; 651 error = ena_update_io_queue_nb(adapter, tmp); 652 if (error != 0) 653 return (error); 654 655 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp); 656 } 657 658 unlock: 659 ENA_LOCK_UNLOCK(); 660 661 return (error); 662 } 663 664 static int 665 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS) 666 { 667 struct ena_adapter *adapter = arg1; 668 uint16_t interval; 669 int error; 670 671 ENA_LOCK_LOCK(); 672 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 673 error = EINVAL; 674 goto unlock; 675 } 676 677 error = sysctl_wire_old_buffer(req, sizeof(interval)); 678 if (error == 0) { 679 interval = adapter->eni_metrics_sample_interval; 680 error = sysctl_handle_16(oidp, &interval, 0, req); 681 } 682 if (error != 0 || req->newptr == NULL) 683 goto unlock; 684 685 if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) { 686 ena_log(adapter->pdev, ERR, 687 "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n", 688 ENI_METRICS_MAX_SAMPLE_INTERVAL); 689 error = EINVAL; 690 goto unlock; 691 } 692 693 if (interval == 0) { 694 ena_log(adapter->pdev, INFO, 695 "ENI metrics update is now turned off\n"); 696 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); 697 } else { 698 ena_log(adapter->pdev, INFO, 699 "ENI metrics update interval is set to: %" PRIu16 700 " seconds\n", 701 interval); 702 } 703 704 adapter->eni_metrics_sample_interval = interval; 705 706 unlock: 707 ENA_LOCK_UNLOCK(); 708 709 return (0); 710 } 711 712 #ifndef RSS 713 /* 714 * Change the Receive Side Scaling hash key. 715 */ 716 static int 717 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS) 718 { 719 struct ena_adapter *adapter = arg1; 720 struct ena_com_dev *ena_dev = adapter->ena_dev; 721 enum ena_admin_hash_functions ena_func; 722 char msg[ENA_HASH_KEY_MSG_SIZE]; 723 char elem[3] = { 0 }; 724 char *endp; 725 u8 rss_key[ENA_HASH_KEY_SIZE]; 726 int error, i; 727 728 ENA_LOCK_LOCK(); 729 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 730 error = EINVAL; 731 goto unlock; 732 } 733 734 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 735 error = ENOTSUP; 736 goto unlock; 737 } 738 739 error = sysctl_wire_old_buffer(req, sizeof(msg)); 740 if (error != 0) 741 goto unlock; 742 743 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func); 744 if (error != 0) { 745 device_printf(adapter->pdev, "Cannot get hash function\n"); 746 goto unlock; 747 } 748 749 if (ena_func != ENA_ADMIN_TOEPLITZ) { 750 error = EINVAL; 751 device_printf(adapter->pdev, "Unsupported hash algorithm\n"); 752 goto unlock; 753 } 754 755 error = ena_rss_get_hash_key(ena_dev, rss_key); 756 if (error != 0) { 757 device_printf(adapter->pdev, "Cannot get hash key\n"); 758 goto unlock; 759 } 760 761 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) 762 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]); 763 764 error = sysctl_handle_string(oidp, msg, sizeof(msg), req); 765 if (error != 0 || req->newptr == NULL) 766 goto unlock; 767 768 if (strlen(msg) != sizeof(msg) - 1) { 769 error = EINVAL; 770 device_printf(adapter->pdev, "Invalid key size\n"); 771 goto unlock; 772 } 773 774 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) { 775 strncpy(elem, &msg[i * 2], 2); 776 rss_key[i] = strtol(elem, &endp, 16); 777 778 /* Both hex nibbles in the string must be valid to continue. */ 779 if (endp == elem || *endp != '\0' || rss_key[i] < 0) { 780 error = EINVAL; 781 device_printf(adapter->pdev, 782 "Invalid key hex value: '%c'\n", *endp); 783 goto unlock; 784 } 785 } 786 787 error = ena_rss_set_hash(ena_dev, rss_key); 788 if (error != 0) 789 device_printf(adapter->pdev, "Cannot fill hash key\n"); 790 791 unlock: 792 ENA_LOCK_UNLOCK(); 793 794 return (error); 795 } 796 797 /* 798 * Change the Receive Side Scaling indirection table. 799 * 800 * The sysctl entry string consists of one or more `x:y` keypairs, where 801 * x stands for the table index and y for its new value. 802 * Table indices that don't need to be updated can be omitted from the string 803 * and will retain their existing values. If an index is entered more than once, 804 * the last value is used. 805 * 806 * Example: 807 * To update two selected indices in the RSS indirection table, e.g. setting 808 * index 0 to queue 5 and then index 5 to queue 0, the below command should be 809 * used: 810 * sysctl dev.ena.0.rss.indir_table="0:5 5:0" 811 */ 812 static int 813 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS) 814 { 815 int num_queues, error; 816 struct ena_adapter *adapter = arg1; 817 struct ena_indir *indir; 818 char *msg, *buf, *endp; 819 uint32_t idx, value; 820 821 ENA_LOCK_LOCK(); 822 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 823 error = EINVAL; 824 goto unlock; 825 } 826 827 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 828 error = ENOTSUP; 829 goto unlock; 830 } 831 832 indir = adapter->rss_indir; 833 msg = indir->sysctl_buf; 834 835 if (unlikely(indir == NULL)) { 836 error = ENOTSUP; 837 goto unlock; 838 } 839 840 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req); 841 if (error != 0 || req->newptr == NULL) 842 goto unlock; 843 844 num_queues = adapter->num_io_queues; 845 846 /* 847 * This sysctl expects msg to be a list of `x:y` record pairs, 848 * where x is the indirection table index and y is its value. 849 */ 850 for (buf = msg; *buf != '\0'; buf = endp) { 851 idx = strtol(buf, &endp, 10); 852 853 if (endp == buf || idx < 0) { 854 device_printf(adapter->pdev, "Invalid index: %s\n", 855 buf); 856 error = EINVAL; 857 break; 858 } 859 860 if (idx >= ENA_RX_RSS_TABLE_SIZE) { 861 device_printf(adapter->pdev, "Index %d out of range\n", 862 idx); 863 error = ERANGE; 864 break; 865 } 866 867 buf = endp; 868 869 if (*buf++ != ':') { 870 device_printf(adapter->pdev, "Missing ':' separator\n"); 871 error = EINVAL; 872 break; 873 } 874 875 value = strtol(buf, &endp, 10); 876 877 if (endp == buf || value < 0) { 878 device_printf(adapter->pdev, "Invalid value: %s\n", 879 buf); 880 error = EINVAL; 881 break; 882 } 883 884 if (value >= num_queues) { 885 device_printf(adapter->pdev, "Value %d out of range\n", 886 value); 887 error = ERANGE; 888 break; 889 } 890 891 indir->table[idx] = value; 892 } 893 894 if (error != 0) /* Reload indirection table with last good data. */ 895 ena_rss_indir_get(adapter, indir->table); 896 897 /* At this point msg has been clobbered by sysctl_handle_string. */ 898 ena_rss_copy_indir_buf(msg, indir->table); 899 900 if (error == 0) 901 error = ena_rss_indir_set(adapter, indir->table); 902 903 unlock: 904 ENA_LOCK_UNLOCK(); 905 906 return (error); 907 } 908 #endif /* RSS */ 909