1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_rss.h" 35 36 #include "ena_sysctl.h" 37 #include "ena_rss.h" 38 39 static void ena_sysctl_add_wd(struct ena_adapter *); 40 static void ena_sysctl_add_stats(struct ena_adapter *); 41 static void ena_sysctl_add_eni_metrics(struct ena_adapter *); 42 static void ena_sysctl_add_tuneables(struct ena_adapter *); 43 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 44 #ifndef RSS 45 static void ena_sysctl_add_rss(struct ena_adapter *); 46 #endif 47 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); 48 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); 49 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); 50 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS); 51 #ifndef RSS 52 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS); 53 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS); 54 #endif 55 56 /* Limit max ENI sample rate to be an hour. */ 57 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600 58 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1) 59 60 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 61 "ENA driver parameters"); 62 63 /* 64 * Logging level for changing verbosity of the output 65 */ 66 int ena_log_level = ENA_INFO; 67 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, 68 &ena_log_level, 0, "Logging level indicating verbosity of the logs"); 69 70 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD, 71 DRV_MODULE_VERSION, "ENA driver version"); 72 73 /* 74 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). 75 * Using 9k mbufs in low memory conditions might cause allocation to take a lot 76 * of time and lead to the OS instability as it needs to look for the contiguous 77 * pages. 78 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if 79 * the network performance is the priority, the 9k mbufs can be used. 80 */ 81 int ena_enable_9k_mbufs = 0; 82 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, 83 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); 84 85 /* 86 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to 87 * false. This option may be important for platforms, which often handle packet 88 * headers on Tx with total header size greater than 96B, as it may 89 * reduce the latency. 90 * It also reduces the maximum Tx queue size by half, so it may cause more Tx 91 * packet drops. 92 */ 93 bool ena_force_large_llq_header = false; 94 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN, 95 &ena_force_large_llq_header, 0, 96 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n"); 97 98 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE; 99 100 void 101 ena_sysctl_add_nodes(struct ena_adapter *adapter) 102 { 103 ena_sysctl_add_wd(adapter); 104 ena_sysctl_add_stats(adapter); 105 ena_sysctl_add_eni_metrics(adapter); 106 ena_sysctl_add_tuneables(adapter); 107 #ifndef RSS 108 ena_sysctl_add_rss(adapter); 109 #endif 110 } 111 112 static void 113 ena_sysctl_add_wd(struct ena_adapter *adapter) 114 { 115 device_t dev; 116 117 struct sysctl_ctx_list *ctx; 118 struct sysctl_oid *tree; 119 struct sysctl_oid_list *child; 120 121 dev = adapter->pdev; 122 123 ctx = device_get_sysctl_ctx(dev); 124 tree = device_get_sysctl_tree(dev); 125 child = SYSCTL_CHILDREN(tree); 126 127 /* Sysctl calls for Watchdog service */ 128 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", 129 CTLFLAG_RWTUN, &adapter->wd_active, 0, 130 "Watchdog is active"); 131 132 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout", 133 CTLFLAG_RWTUN, &adapter->keep_alive_timeout, 134 "Timeout for Keep Alive messages"); 135 136 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout", 137 CTLFLAG_RWTUN, &adapter->missing_tx_timeout, 138 "Timeout for TX completion"); 139 140 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues", 141 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, 142 "Number of TX queues to check per run"); 143 144 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold", 145 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, 146 "Max number of timeouted packets"); 147 } 148 149 static void 150 ena_sysctl_add_stats(struct ena_adapter *adapter) 151 { 152 device_t dev; 153 154 struct ena_ring *tx_ring; 155 struct ena_ring *rx_ring; 156 157 struct ena_hw_stats *hw_stats; 158 struct ena_stats_dev *dev_stats; 159 struct ena_stats_tx *tx_stats; 160 struct ena_stats_rx *rx_stats; 161 struct ena_com_stats_admin *admin_stats; 162 163 struct sysctl_ctx_list *ctx; 164 struct sysctl_oid *tree; 165 struct sysctl_oid_list *child; 166 167 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; 168 struct sysctl_oid *admin_node; 169 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; 170 struct sysctl_oid_list *admin_list; 171 172 #define QUEUE_NAME_LEN 32 173 char namebuf[QUEUE_NAME_LEN]; 174 int i; 175 176 dev = adapter->pdev; 177 178 ctx = device_get_sysctl_ctx(dev); 179 tree = device_get_sysctl_tree(dev); 180 child = SYSCTL_CHILDREN(tree); 181 182 tx_ring = adapter->tx_ring; 183 rx_ring = adapter->rx_ring; 184 185 hw_stats = &adapter->hw_stats; 186 dev_stats = &adapter->dev_stats; 187 admin_stats = &adapter->ena_dev->admin_queue.stats; 188 189 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", 190 CTLFLAG_RD, &dev_stats->wd_expired, 191 "Watchdog expiry count"); 192 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", 193 CTLFLAG_RD, &dev_stats->interface_up, 194 "Network interface up count"); 195 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", 196 CTLFLAG_RD, &dev_stats->interface_down, 197 "Network interface down count"); 198 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", 199 CTLFLAG_RD, &dev_stats->admin_q_pause, 200 "Admin queue pauses"); 201 202 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) { 203 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 204 205 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 206 namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 207 queue_list = SYSCTL_CHILDREN(queue_node); 208 209 adapter->que[i].oid = queue_node; 210 211 #ifdef RSS 212 /* Common stats */ 213 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", 214 CTLFLAG_RD, &adapter->que[i].cpu, 0, "CPU affinity"); 215 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", 216 CTLFLAG_RD, &adapter->que[i].domain, 0, "NUMA domain"); 217 #endif 218 219 /* TX specific stats */ 220 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, 221 "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring"); 222 tx_list = SYSCTL_CHILDREN(tx_node); 223 224 tx_stats = &tx_ring->tx_stats; 225 226 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 227 "count", CTLFLAG_RD, 228 &tx_stats->cnt, "Packets sent"); 229 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 230 "bytes", CTLFLAG_RD, 231 &tx_stats->bytes, "Bytes sent"); 232 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 233 "prepare_ctx_err", CTLFLAG_RD, 234 &tx_stats->prepare_ctx_err, 235 "TX buffer preparation failures"); 236 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 237 "dma_mapping_err", CTLFLAG_RD, 238 &tx_stats->dma_mapping_err, "DMA mapping failures"); 239 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 240 "doorbells", CTLFLAG_RD, 241 &tx_stats->doorbells, "Queue doorbells"); 242 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 243 "missing_tx_comp", CTLFLAG_RD, 244 &tx_stats->missing_tx_comp, "TX completions missed"); 245 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 246 "bad_req_id", CTLFLAG_RD, 247 &tx_stats->bad_req_id, "Bad request id count"); 248 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 249 "mbuf_collapses", CTLFLAG_RD, 250 &tx_stats->collapse, 251 "Mbuf collapse count"); 252 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 253 "mbuf_collapse_err", CTLFLAG_RD, 254 &tx_stats->collapse_err, 255 "Mbuf collapse failures"); 256 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 257 "queue_wakeups", CTLFLAG_RD, 258 &tx_stats->queue_wakeup, "Queue wakeups"); 259 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 260 "queue_stops", CTLFLAG_RD, 261 &tx_stats->queue_stop, "Queue stops"); 262 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 263 "llq_buffer_copy", CTLFLAG_RD, 264 &tx_stats->llq_buffer_copy, 265 "Header copies for llq transaction"); 266 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 267 "unmask_interrupt_num", CTLFLAG_RD, 268 &tx_stats->unmask_interrupt_num, 269 "Unmasked interrupt count"); 270 271 /* RX specific stats */ 272 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, 273 "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring"); 274 rx_list = SYSCTL_CHILDREN(rx_node); 275 276 rx_stats = &rx_ring->rx_stats; 277 278 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 279 "count", CTLFLAG_RD, 280 &rx_stats->cnt, "Packets received"); 281 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 282 "bytes", CTLFLAG_RD, 283 &rx_stats->bytes, "Bytes received"); 284 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 285 "refil_partial", CTLFLAG_RD, 286 &rx_stats->refil_partial, "Partial refilled mbufs"); 287 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 288 "csum_bad", CTLFLAG_RD, 289 &rx_stats->csum_bad, "Bad RX checksum"); 290 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 291 "mbuf_alloc_fail", CTLFLAG_RD, 292 &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs"); 293 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 294 "mjum_alloc_fail", CTLFLAG_RD, 295 &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs"); 296 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 297 "dma_mapping_err", CTLFLAG_RD, 298 &rx_stats->dma_mapping_err, "DMA mapping errors"); 299 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 300 "bad_desc_num", CTLFLAG_RD, 301 &rx_stats->bad_desc_num, "Bad descriptor count"); 302 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 303 "bad_req_id", CTLFLAG_RD, 304 &rx_stats->bad_req_id, "Bad request id count"); 305 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 306 "empty_rx_ring", CTLFLAG_RD, 307 &rx_stats->empty_rx_ring, "RX descriptors depletion count"); 308 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 309 "csum_good", CTLFLAG_RD, 310 &rx_stats->csum_good, "Valid RX checksum calculations"); 311 } 312 313 /* Stats read from device */ 314 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", 315 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware"); 316 hw_list = SYSCTL_CHILDREN(hw_node); 317 318 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, 319 &hw_stats->rx_packets, "Packets received"); 320 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, 321 &hw_stats->tx_packets, "Packets transmitted"); 322 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, 323 &hw_stats->rx_bytes, "Bytes received"); 324 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, 325 &hw_stats->tx_bytes, "Bytes transmitted"); 326 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, 327 &hw_stats->rx_drops, "Receive packet drops"); 328 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD, 329 &hw_stats->tx_drops, "Transmit packet drops"); 330 331 /* ENA Admin queue stats */ 332 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", 333 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics"); 334 admin_list = SYSCTL_CHILDREN(admin_node); 335 336 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, 337 &admin_stats->aborted_cmd, 0, "Aborted commands"); 338 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, 339 &admin_stats->submitted_cmd, 0, "Submitted commands"); 340 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, 341 &admin_stats->completed_cmd, 0, "Completed commands"); 342 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, 343 &admin_stats->out_of_space, 0, "Queue out of space"); 344 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, 345 &admin_stats->no_completion, 0, "Commands not completed"); 346 } 347 348 static void 349 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) 350 { 351 device_t dev; 352 struct ena_admin_eni_stats *eni_metrics; 353 354 struct sysctl_ctx_list *ctx; 355 struct sysctl_oid *tree; 356 struct sysctl_oid_list *child; 357 358 struct sysctl_oid *eni_node; 359 struct sysctl_oid_list *eni_list; 360 361 dev = adapter->pdev; 362 363 ctx = device_get_sysctl_ctx(dev); 364 tree = device_get_sysctl_tree(dev); 365 child = SYSCTL_CHILDREN(tree); 366 367 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", 368 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); 369 eni_list = SYSCTL_CHILDREN(eni_node); 370 371 eni_metrics = &adapter->eni_metrics; 372 373 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", 374 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, 375 "Inbound BW allowance exceeded"); 376 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", 377 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, 378 "Outbound BW allowance exceeded"); 379 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", 380 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, 381 "PPS allowance exceeded"); 382 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", 383 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, 384 "Connection tracking allowance exceeded"); 385 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", 386 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, 387 "Linklocal packet rate allowance exceeded"); 388 389 /* 390 * Tuneable, which determines how often ENI metrics will be read. 391 * 0 means it's turned off. Maximum allowed value is limited by: 392 * ENI_METRICS_MAX_SAMPLE_INTERVAL. 393 */ 394 SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval", 395 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 396 ena_sysctl_eni_metrics_interval, "SU", 397 "Interval in seconds for updating ENI emetrics. 0 turns off the update."); 398 } 399 400 static void 401 ena_sysctl_add_tuneables(struct ena_adapter *adapter) 402 { 403 device_t dev; 404 405 struct sysctl_ctx_list *ctx; 406 struct sysctl_oid *tree; 407 struct sysctl_oid_list *child; 408 409 dev = adapter->pdev; 410 411 ctx = device_get_sysctl_ctx(dev); 412 tree = device_get_sysctl_tree(dev); 413 child = SYSCTL_CHILDREN(tree); 414 415 /* Tuneable number of buffers in the buf-ring (drbr) */ 416 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size", 417 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 418 ena_sysctl_buf_ring_size, "I", 419 "Size of the Tx buffer ring (drbr)."); 420 421 /* Tuneable number of the Rx ring size */ 422 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size", 423 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 424 ena_sysctl_rx_queue_size, "I", 425 "Size of the Rx ring. The size should be a power of 2."); 426 427 /* Tuneable number of IO queues */ 428 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb", 429 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 430 ena_sysctl_io_queues_nb, "I", "Number of IO queues."); 431 } 432 433 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 434 #ifndef RSS 435 static void 436 ena_sysctl_add_rss(struct ena_adapter *adapter) 437 { 438 device_t dev; 439 440 struct sysctl_ctx_list *ctx; 441 struct sysctl_oid *tree; 442 struct sysctl_oid_list *child; 443 444 dev = adapter->pdev; 445 446 ctx = device_get_sysctl_ctx(dev); 447 tree = device_get_sysctl_tree(dev); 448 child = SYSCTL_CHILDREN(tree); 449 450 /* RSS options */ 451 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss", 452 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options."); 453 child = SYSCTL_CHILDREN(tree); 454 455 /* RSS hash key */ 456 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key", 457 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 458 ena_sysctl_rss_key, "A", "RSS key."); 459 460 /* Tuneable RSS indirection table */ 461 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table", 462 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 463 ena_sysctl_rss_indir_table, "A", "RSS indirection table."); 464 465 /* RSS indirection table size */ 466 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size", 467 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0, 468 "RSS indirection table size."); 469 } 470 #endif /* RSS */ 471 472 473 /* 474 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes. 475 * 476 * Whether the nodes are registered or unregistered depends on a delta between 477 * the `old` and `new` parameters, representing the number of queues. 478 * 479 * This function is used to hide sysctl attributes for queue nodes which aren't 480 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`). 481 * 482 * NOTE: 483 * All unregistered nodes must be registered again at detach, i.e. by a call to 484 * this function. 485 */ 486 void 487 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new) 488 { 489 device_t dev; 490 struct sysctl_oid *oid; 491 int min, max, i; 492 493 dev = adapter->pdev; 494 min = MIN(old, new); 495 max = MIN(MAX(old, new), adapter->max_num_io_queues); 496 497 for (i = min; i < max; ++i) { 498 oid = adapter->que[i].oid; 499 500 sysctl_wlock(); 501 if (old > new) 502 sysctl_unregister_oid(oid); 503 else 504 sysctl_register_oid(oid); 505 sysctl_wunlock(); 506 } 507 } 508 509 static int 510 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS) 511 { 512 struct ena_adapter *adapter = arg1; 513 uint32_t val; 514 int error; 515 516 ENA_LOCK_LOCK(); 517 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 518 error = EINVAL; 519 goto unlock; 520 } 521 522 val = 0; 523 error = sysctl_wire_old_buffer(req, sizeof(val)); 524 if (error == 0) { 525 val = adapter->buf_ring_size; 526 error = sysctl_handle_32(oidp, &val, 0, req); 527 } 528 if (error != 0 || req->newptr == NULL) 529 goto unlock; 530 531 if (!powerof2(val) || val == 0) { 532 ena_log(adapter->pdev, ERR, 533 "Requested new Tx buffer ring size (%u) is not a power of 2\n", 534 val); 535 error = EINVAL; 536 goto unlock; 537 } 538 539 if (val != adapter->buf_ring_size) { 540 ena_log(adapter->pdev, INFO, 541 "Requested new Tx buffer ring size: %d. Old size: %d\n", 542 val, adapter->buf_ring_size); 543 544 error = ena_update_buf_ring_size(adapter, val); 545 } else { 546 ena_log(adapter->pdev, ERR, 547 "New Tx buffer ring size is the same as already used: %u\n", 548 adapter->buf_ring_size); 549 } 550 551 unlock: 552 ENA_LOCK_UNLOCK(); 553 554 return (error); 555 } 556 557 static int 558 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS) 559 { 560 struct ena_adapter *adapter = arg1; 561 uint32_t val; 562 int error; 563 564 ENA_LOCK_LOCK(); 565 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 566 error = EINVAL; 567 goto unlock; 568 } 569 570 val = 0; 571 error = sysctl_wire_old_buffer(req, sizeof(val)); 572 if (error == 0) { 573 val = adapter->requested_rx_ring_size; 574 error = sysctl_handle_32(oidp, &val, 0, req); 575 } 576 if (error != 0 || req->newptr == NULL) 577 goto unlock; 578 579 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) { 580 ena_log(adapter->pdev, ERR, 581 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n", 582 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size); 583 error = EINVAL; 584 goto unlock; 585 } 586 587 /* Check if the parameter is power of 2 */ 588 if (!powerof2(val)) { 589 ena_log(adapter->pdev, ERR, 590 "Requested new Rx queue size (%u) is not a power of 2\n", 591 val); 592 error = EINVAL; 593 goto unlock; 594 } 595 596 if (val != adapter->requested_rx_ring_size) { 597 ena_log(adapter->pdev, INFO, 598 "Requested new Rx queue size: %u. Old size: %u\n", 599 val, adapter->requested_rx_ring_size); 600 601 error = ena_update_queue_size(adapter, 602 adapter->requested_tx_ring_size, val); 603 } else { 604 ena_log(adapter->pdev, ERR, 605 "New Rx queue size is the same as already used: %u\n", 606 adapter->requested_rx_ring_size); 607 } 608 609 unlock: 610 ENA_LOCK_UNLOCK(); 611 612 return (error); 613 } 614 615 /* 616 * Change number of effectively used IO queues adapter->num_io_queues 617 */ 618 static int 619 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS) 620 { 621 struct ena_adapter *adapter = arg1; 622 uint32_t old_num_queues, tmp = 0; 623 int error; 624 625 ENA_LOCK_LOCK(); 626 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 627 error = EINVAL; 628 goto unlock; 629 } 630 631 error = sysctl_wire_old_buffer(req, sizeof(tmp)); 632 if (error == 0) { 633 tmp = adapter->num_io_queues; 634 error = sysctl_handle_int(oidp, &tmp, 0, req); 635 } 636 if (error != 0 || req->newptr == NULL) 637 goto unlock; 638 639 if (tmp == 0) { 640 ena_log(adapter->pdev, ERR, 641 "Requested number of IO queues is zero\n"); 642 error = EINVAL; 643 goto unlock; 644 } 645 646 /* 647 * The adapter::max_num_io_queues is the HW capability. The system 648 * resources availability may potentially be a tighter limit. Therefore 649 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs` 650 * always holds true, while the `adapter::msix_vecs` is variable across 651 * device reset (`ena_destroy_device()` + `ena_restore_device()`). 652 */ 653 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) { 654 ena_log(adapter->pdev, ERR, 655 "Requested number of IO queues is higher than maximum " 656 "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC); 657 error = EINVAL; 658 goto unlock; 659 } 660 if (tmp == adapter->num_io_queues) { 661 ena_log(adapter->pdev, ERR, 662 "Requested number of IO queues is equal to current value " 663 "(%u)\n", adapter->num_io_queues); 664 } else { 665 ena_log(adapter->pdev, INFO, 666 "Requested new number of IO queues: %u, current value: " 667 "%u\n", tmp, adapter->num_io_queues); 668 669 old_num_queues = adapter->num_io_queues; 670 error = ena_update_io_queue_nb(adapter, tmp); 671 if (error != 0) 672 return (error); 673 674 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp); 675 } 676 677 unlock: 678 ENA_LOCK_UNLOCK(); 679 680 return (error); 681 } 682 683 static int 684 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS) 685 { 686 struct ena_adapter *adapter = arg1; 687 uint16_t interval; 688 int error; 689 690 ENA_LOCK_LOCK(); 691 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 692 error = EINVAL; 693 goto unlock; 694 } 695 696 error = sysctl_wire_old_buffer(req, sizeof(interval)); 697 if (error == 0) { 698 interval = adapter->eni_metrics_sample_interval; 699 error = sysctl_handle_16(oidp, &interval, 0, req); 700 } 701 if (error != 0 || req->newptr == NULL) 702 goto unlock; 703 704 if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) { 705 ena_log(adapter->pdev, ERR, 706 "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n", 707 ENI_METRICS_MAX_SAMPLE_INTERVAL); 708 error = EINVAL; 709 goto unlock; 710 } 711 712 if (interval == 0) { 713 ena_log(adapter->pdev, INFO, 714 "ENI metrics update is now turned off\n"); 715 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); 716 } else { 717 ena_log(adapter->pdev, INFO, 718 "ENI metrics update interval is set to: %"PRIu16" seconds\n", 719 interval); 720 } 721 722 adapter->eni_metrics_sample_interval = interval; 723 724 unlock: 725 ENA_LOCK_UNLOCK(); 726 727 return (0); 728 } 729 730 #ifndef RSS 731 /* 732 * Change the Receive Side Scaling hash key. 733 */ 734 static int 735 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS) 736 { 737 struct ena_adapter *adapter = arg1; 738 struct ena_com_dev *ena_dev = adapter->ena_dev; 739 enum ena_admin_hash_functions ena_func; 740 char msg[ENA_HASH_KEY_MSG_SIZE]; 741 char elem[3] = { 0 }; 742 char *endp; 743 u8 rss_key[ENA_HASH_KEY_SIZE]; 744 int error, i; 745 746 ENA_LOCK_LOCK(); 747 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 748 error = EINVAL; 749 goto unlock; 750 } 751 752 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 753 error = ENOTSUP; 754 goto unlock; 755 } 756 757 error = sysctl_wire_old_buffer(req, sizeof(msg)); 758 if (error != 0) 759 goto unlock; 760 761 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func); 762 if (error != 0) { 763 device_printf(adapter->pdev, "Cannot get hash function\n"); 764 goto unlock; 765 } 766 767 if (ena_func != ENA_ADMIN_TOEPLITZ) { 768 error = EINVAL; 769 device_printf(adapter->pdev, "Unsupported hash algorithm\n"); 770 goto unlock; 771 } 772 773 error = ena_rss_get_hash_key(ena_dev, rss_key); 774 if (error != 0) { 775 device_printf(adapter->pdev, "Cannot get hash key\n"); 776 goto unlock; 777 } 778 779 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) 780 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]); 781 782 error = sysctl_handle_string(oidp, msg, sizeof(msg), req); 783 if (error != 0 || req->newptr == NULL) 784 goto unlock; 785 786 if (strlen(msg) != sizeof(msg) - 1) { 787 error = EINVAL; 788 device_printf(adapter->pdev, "Invalid key size\n"); 789 goto unlock; 790 } 791 792 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) { 793 strncpy(elem, &msg[i * 2], 2); 794 rss_key[i] = strtol(elem, &endp, 16); 795 796 /* Both hex nibbles in the string must be valid to continue. */ 797 if (endp == elem || *endp != '\0' || rss_key[i] < 0) { 798 error = EINVAL; 799 device_printf(adapter->pdev, 800 "Invalid key hex value: '%c'\n", *endp); 801 goto unlock; 802 } 803 } 804 805 error = ena_rss_set_hash(ena_dev, rss_key); 806 if (error != 0) 807 device_printf(adapter->pdev, "Cannot fill hash key\n"); 808 809 unlock: 810 ENA_LOCK_UNLOCK(); 811 812 return (error); 813 } 814 815 /* 816 * Change the Receive Side Scaling indirection table. 817 * 818 * The sysctl entry string consists of one or more `x:y` keypairs, where 819 * x stands for the table index and y for its new value. 820 * Table indices that don't need to be updated can be omitted from the string 821 * and will retain their existing values. If an index is entered more than once, 822 * the last value is used. 823 * 824 * Example: 825 * To update two selected indices in the RSS indirection table, e.g. setting 826 * index 0 to queue 5 and then index 5 to queue 0, the below command should be 827 * used: 828 * sysctl dev.ena.0.rss.indir_table="0:5 5:0" 829 */ 830 static int 831 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS) 832 { 833 int num_queues, error; 834 struct ena_adapter *adapter = arg1; 835 struct ena_com_dev *ena_dev; 836 struct ena_indir *indir; 837 char *msg, *buf, *endp; 838 uint32_t idx, value; 839 840 ENA_LOCK_LOCK(); 841 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 842 error = EINVAL; 843 goto unlock; 844 } 845 846 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 847 error = ENOTSUP; 848 goto unlock; 849 } 850 851 ena_dev = adapter->ena_dev; 852 indir = adapter->rss_indir; 853 msg = indir->sysctl_buf; 854 855 if (unlikely(indir == NULL)) { 856 error = ENOTSUP; 857 goto unlock; 858 } 859 860 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req); 861 if (error != 0 || req->newptr == NULL) 862 goto unlock; 863 864 num_queues = adapter->num_io_queues; 865 866 /* 867 * This sysctl expects msg to be a list of `x:y` record pairs, 868 * where x is the indirection table index and y is its value. 869 */ 870 for (buf = msg; *buf != '\0'; buf = endp) { 871 idx = strtol(buf, &endp, 10); 872 873 if (endp == buf || idx < 0) { 874 device_printf(adapter->pdev, "Invalid index: %s\n", 875 buf); 876 error = EINVAL; 877 break; 878 } 879 880 if (idx >= ENA_RX_RSS_TABLE_SIZE) { 881 device_printf(adapter->pdev, "Index %d out of range\n", 882 idx); 883 error = ERANGE; 884 break; 885 } 886 887 buf = endp; 888 889 if (*buf++ != ':') { 890 device_printf(adapter->pdev, "Missing ':' separator\n"); 891 error = EINVAL; 892 break; 893 } 894 895 value = strtol(buf, &endp, 10); 896 897 if (endp == buf || value < 0) { 898 device_printf(adapter->pdev, "Invalid value: %s\n", 899 buf); 900 error = EINVAL; 901 break; 902 } 903 904 if (value >= num_queues) { 905 device_printf(adapter->pdev, "Value %d out of range\n", 906 value); 907 error = ERANGE; 908 break; 909 } 910 911 indir->table[idx] = value; 912 } 913 914 if (error != 0) /* Reload indirection table with last good data. */ 915 ena_rss_indir_get(adapter, indir->table); 916 917 /* At this point msg has been clobbered by sysctl_handle_string. */ 918 ena_rss_copy_indir_buf(msg, indir->table); 919 920 if (error == 0) 921 error = ena_rss_indir_set(adapter, indir->table); 922 923 unlock: 924 ENA_LOCK_UNLOCK(); 925 926 return (error); 927 } 928 #endif /* RSS */ 929