1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_rss.h" 35 36 #include "ena_sysctl.h" 37 #include "ena_rss.h" 38 39 static void ena_sysctl_add_wd(struct ena_adapter *); 40 static void ena_sysctl_add_stats(struct ena_adapter *); 41 static void ena_sysctl_add_eni_metrics(struct ena_adapter *); 42 static void ena_sysctl_add_tuneables(struct ena_adapter *); 43 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 44 #ifndef RSS 45 static void ena_sysctl_add_rss(struct ena_adapter *); 46 #endif 47 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); 48 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); 49 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); 50 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS); 51 #ifndef RSS 52 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS); 53 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS); 54 #endif 55 56 /* Limit max ENI sample rate to be an hour. */ 57 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600 58 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1) 59 60 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 61 "ENA driver parameters"); 62 63 /* 64 * Logging level for changing verbosity of the output 65 */ 66 int ena_log_level = ENA_INFO; 67 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, 68 &ena_log_level, 0, "Logging level indicating verbosity of the logs"); 69 70 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD, 71 DRV_MODULE_VERSION, "ENA driver version"); 72 73 /* 74 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). 75 * Using 9k mbufs in low memory conditions might cause allocation to take a lot 76 * of time and lead to the OS instability as it needs to look for the contiguous 77 * pages. 78 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if 79 * the network performance is the priority, the 9k mbufs can be used. 80 */ 81 int ena_enable_9k_mbufs = 0; 82 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, 83 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); 84 85 /* 86 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to 87 * false. This option may be important for platforms, which often handle packet 88 * headers on Tx with total header size greater than 96B, as it may 89 * reduce the latency. 90 * It also reduces the maximum Tx queue size by half, so it may cause more Tx 91 * packet drops. 92 */ 93 bool ena_force_large_llq_header = false; 94 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN, 95 &ena_force_large_llq_header, 0, 96 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n"); 97 98 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE; 99 100 void 101 ena_sysctl_add_nodes(struct ena_adapter *adapter) 102 { 103 ena_sysctl_add_wd(adapter); 104 ena_sysctl_add_stats(adapter); 105 ena_sysctl_add_eni_metrics(adapter); 106 ena_sysctl_add_tuneables(adapter); 107 #ifndef RSS 108 ena_sysctl_add_rss(adapter); 109 #endif 110 } 111 112 static void 113 ena_sysctl_add_wd(struct ena_adapter *adapter) 114 { 115 device_t dev; 116 117 struct sysctl_ctx_list *ctx; 118 struct sysctl_oid *tree; 119 struct sysctl_oid_list *child; 120 121 dev = adapter->pdev; 122 123 ctx = device_get_sysctl_ctx(dev); 124 tree = device_get_sysctl_tree(dev); 125 child = SYSCTL_CHILDREN(tree); 126 127 /* Sysctl calls for Watchdog service */ 128 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", 129 CTLFLAG_RWTUN, &adapter->wd_active, 0, 130 "Watchdog is active"); 131 132 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout", 133 CTLFLAG_RWTUN, &adapter->keep_alive_timeout, 134 "Timeout for Keep Alive messages"); 135 136 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout", 137 CTLFLAG_RWTUN, &adapter->missing_tx_timeout, 138 "Timeout for TX completion"); 139 140 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues", 141 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, 142 "Number of TX queues to check per run"); 143 144 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold", 145 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, 146 "Max number of timeouted packets"); 147 } 148 149 static void 150 ena_sysctl_add_stats(struct ena_adapter *adapter) 151 { 152 device_t dev; 153 154 struct ena_ring *tx_ring; 155 struct ena_ring *rx_ring; 156 157 struct ena_hw_stats *hw_stats; 158 struct ena_stats_dev *dev_stats; 159 struct ena_stats_tx *tx_stats; 160 struct ena_stats_rx *rx_stats; 161 struct ena_com_stats_admin *admin_stats; 162 163 struct sysctl_ctx_list *ctx; 164 struct sysctl_oid *tree; 165 struct sysctl_oid_list *child; 166 167 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; 168 struct sysctl_oid *admin_node; 169 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; 170 struct sysctl_oid_list *admin_list; 171 172 #define QUEUE_NAME_LEN 32 173 char namebuf[QUEUE_NAME_LEN]; 174 int i; 175 176 dev = adapter->pdev; 177 178 ctx = device_get_sysctl_ctx(dev); 179 tree = device_get_sysctl_tree(dev); 180 child = SYSCTL_CHILDREN(tree); 181 182 tx_ring = adapter->tx_ring; 183 rx_ring = adapter->rx_ring; 184 185 hw_stats = &adapter->hw_stats; 186 dev_stats = &adapter->dev_stats; 187 admin_stats = &adapter->ena_dev->admin_queue.stats; 188 189 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", 190 CTLFLAG_RD, &dev_stats->wd_expired, 191 "Watchdog expiry count"); 192 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", 193 CTLFLAG_RD, &dev_stats->interface_up, 194 "Network interface up count"); 195 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", 196 CTLFLAG_RD, &dev_stats->interface_down, 197 "Network interface down count"); 198 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", 199 CTLFLAG_RD, &dev_stats->admin_q_pause, 200 "Admin queue pauses"); 201 202 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) { 203 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 204 205 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 206 namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 207 queue_list = SYSCTL_CHILDREN(queue_node); 208 209 adapter->que[i].oid = queue_node; 210 211 #ifdef RSS 212 /* Common stats */ 213 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", 214 CTLFLAG_RD, &adapter->que[i].cpu, 0, "CPU affinity"); 215 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", 216 CTLFLAG_RD, &adapter->que[i].domain, 0, "NUMA domain"); 217 #endif 218 219 /* TX specific stats */ 220 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, 221 "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring"); 222 tx_list = SYSCTL_CHILDREN(tx_node); 223 224 tx_stats = &tx_ring->tx_stats; 225 226 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 227 "count", CTLFLAG_RD, 228 &tx_stats->cnt, "Packets sent"); 229 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 230 "bytes", CTLFLAG_RD, 231 &tx_stats->bytes, "Bytes sent"); 232 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 233 "prepare_ctx_err", CTLFLAG_RD, 234 &tx_stats->prepare_ctx_err, 235 "TX buffer preparation failures"); 236 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 237 "dma_mapping_err", CTLFLAG_RD, 238 &tx_stats->dma_mapping_err, "DMA mapping failures"); 239 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 240 "doorbells", CTLFLAG_RD, 241 &tx_stats->doorbells, "Queue doorbells"); 242 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 243 "missing_tx_comp", CTLFLAG_RD, 244 &tx_stats->missing_tx_comp, "TX completions missed"); 245 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 246 "bad_req_id", CTLFLAG_RD, 247 &tx_stats->bad_req_id, "Bad request id count"); 248 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 249 "mbuf_collapses", CTLFLAG_RD, 250 &tx_stats->collapse, 251 "Mbuf collapse count"); 252 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 253 "mbuf_collapse_err", CTLFLAG_RD, 254 &tx_stats->collapse_err, 255 "Mbuf collapse failures"); 256 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 257 "queue_wakeups", CTLFLAG_RD, 258 &tx_stats->queue_wakeup, "Queue wakeups"); 259 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 260 "queue_stops", CTLFLAG_RD, 261 &tx_stats->queue_stop, "Queue stops"); 262 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 263 "llq_buffer_copy", CTLFLAG_RD, 264 &tx_stats->llq_buffer_copy, 265 "Header copies for llq transaction"); 266 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 267 "unmask_interrupt_num", CTLFLAG_RD, 268 &tx_stats->unmask_interrupt_num, 269 "Unmasked interrupt count"); 270 271 /* RX specific stats */ 272 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, 273 "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring"); 274 rx_list = SYSCTL_CHILDREN(rx_node); 275 276 rx_stats = &rx_ring->rx_stats; 277 278 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 279 "count", CTLFLAG_RD, 280 &rx_stats->cnt, "Packets received"); 281 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 282 "bytes", CTLFLAG_RD, 283 &rx_stats->bytes, "Bytes received"); 284 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 285 "refil_partial", CTLFLAG_RD, 286 &rx_stats->refil_partial, "Partial refilled mbufs"); 287 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 288 "csum_bad", CTLFLAG_RD, 289 &rx_stats->csum_bad, "Bad RX checksum"); 290 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 291 "mbuf_alloc_fail", CTLFLAG_RD, 292 &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs"); 293 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 294 "mjum_alloc_fail", CTLFLAG_RD, 295 &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs"); 296 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 297 "dma_mapping_err", CTLFLAG_RD, 298 &rx_stats->dma_mapping_err, "DMA mapping errors"); 299 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 300 "bad_desc_num", CTLFLAG_RD, 301 &rx_stats->bad_desc_num, "Bad descriptor count"); 302 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 303 "bad_req_id", CTLFLAG_RD, 304 &rx_stats->bad_req_id, "Bad request id count"); 305 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 306 "empty_rx_ring", CTLFLAG_RD, 307 &rx_stats->empty_rx_ring, "RX descriptors depletion count"); 308 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 309 "csum_good", CTLFLAG_RD, 310 &rx_stats->csum_good, "Valid RX checksum calculations"); 311 } 312 313 /* Stats read from device */ 314 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", 315 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware"); 316 hw_list = SYSCTL_CHILDREN(hw_node); 317 318 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, 319 &hw_stats->rx_packets, "Packets received"); 320 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, 321 &hw_stats->tx_packets, "Packets transmitted"); 322 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, 323 &hw_stats->rx_bytes, "Bytes received"); 324 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, 325 &hw_stats->tx_bytes, "Bytes transmitted"); 326 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, 327 &hw_stats->rx_drops, "Receive packet drops"); 328 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD, 329 &hw_stats->tx_drops, "Transmit packet drops"); 330 331 /* ENA Admin queue stats */ 332 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", 333 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics"); 334 admin_list = SYSCTL_CHILDREN(admin_node); 335 336 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, 337 &admin_stats->aborted_cmd, 0, "Aborted commands"); 338 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, 339 &admin_stats->submitted_cmd, 0, "Submitted commands"); 340 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, 341 &admin_stats->completed_cmd, 0, "Completed commands"); 342 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, 343 &admin_stats->out_of_space, 0, "Queue out of space"); 344 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, 345 &admin_stats->no_completion, 0, "Commands not completed"); 346 } 347 348 static void 349 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) 350 { 351 device_t dev; 352 struct ena_admin_eni_stats *eni_metrics; 353 354 struct sysctl_ctx_list *ctx; 355 struct sysctl_oid *tree; 356 struct sysctl_oid_list *child; 357 358 struct sysctl_oid *eni_node; 359 struct sysctl_oid_list *eni_list; 360 361 dev = adapter->pdev; 362 363 ctx = device_get_sysctl_ctx(dev); 364 tree = device_get_sysctl_tree(dev); 365 child = SYSCTL_CHILDREN(tree); 366 367 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", 368 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); 369 eni_list = SYSCTL_CHILDREN(eni_node); 370 371 eni_metrics = &adapter->eni_metrics; 372 373 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", 374 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, 375 "Inbound BW allowance exceeded"); 376 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", 377 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, 378 "Outbound BW allowance exceeded"); 379 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", 380 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, 381 "PPS allowance exceeded"); 382 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", 383 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, 384 "Connection tracking allowance exceeded"); 385 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", 386 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, 387 "Linklocal packet rate allowance exceeded"); 388 389 /* 390 * Tuneable, which determines how often ENI metrics will be read. 391 * 0 means it's turned off. Maximum allowed value is limited by: 392 * ENI_METRICS_MAX_SAMPLE_INTERVAL. 393 */ 394 SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval", 395 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 396 ena_sysctl_eni_metrics_interval, "SU", 397 "Interval in seconds for updating ENI emetrics. 0 turns off the update."); 398 } 399 400 static void 401 ena_sysctl_add_tuneables(struct ena_adapter *adapter) 402 { 403 device_t dev; 404 405 struct sysctl_ctx_list *ctx; 406 struct sysctl_oid *tree; 407 struct sysctl_oid_list *child; 408 409 dev = adapter->pdev; 410 411 ctx = device_get_sysctl_ctx(dev); 412 tree = device_get_sysctl_tree(dev); 413 child = SYSCTL_CHILDREN(tree); 414 415 /* Tuneable number of buffers in the buf-ring (drbr) */ 416 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size", 417 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 418 ena_sysctl_buf_ring_size, "I", 419 "Size of the Tx buffer ring (drbr)."); 420 421 /* Tuneable number of the Rx ring size */ 422 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size", 423 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 424 ena_sysctl_rx_queue_size, "I", 425 "Size of the Rx ring. The size should be a power of 2."); 426 427 /* Tuneable number of IO queues */ 428 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb", 429 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 430 ena_sysctl_io_queues_nb, "I", "Number of IO queues."); 431 } 432 433 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 434 #ifndef RSS 435 static void 436 ena_sysctl_add_rss(struct ena_adapter *adapter) 437 { 438 device_t dev; 439 440 struct sysctl_ctx_list *ctx; 441 struct sysctl_oid *tree; 442 struct sysctl_oid_list *child; 443 444 dev = adapter->pdev; 445 446 ctx = device_get_sysctl_ctx(dev); 447 tree = device_get_sysctl_tree(dev); 448 child = SYSCTL_CHILDREN(tree); 449 450 /* RSS options */ 451 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss", 452 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options."); 453 child = SYSCTL_CHILDREN(tree); 454 455 /* RSS hash key */ 456 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key", 457 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 458 ena_sysctl_rss_key, "A", "RSS key."); 459 460 /* Tuneable RSS indirection table */ 461 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table", 462 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 463 ena_sysctl_rss_indir_table, "A", "RSS indirection table."); 464 465 /* RSS indirection table size */ 466 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size", 467 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0, 468 "RSS indirection table size."); 469 } 470 #endif /* RSS */ 471 472 473 /* 474 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes. 475 * 476 * Whether the nodes are registered or unregistered depends on a delta between 477 * the `old` and `new` parameters, representing the number of queues. 478 * 479 * This function is used to hide sysctl attributes for queue nodes which aren't 480 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`). 481 * 482 * NOTE: 483 * All unregistered nodes must be registered again at detach, i.e. by a call to 484 * this function. 485 */ 486 void 487 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new) 488 { 489 struct sysctl_oid *oid; 490 int min, max, i; 491 492 min = MIN(old, new); 493 max = MIN(MAX(old, new), adapter->max_num_io_queues); 494 495 for (i = min; i < max; ++i) { 496 oid = adapter->que[i].oid; 497 498 sysctl_wlock(); 499 if (old > new) 500 sysctl_unregister_oid(oid); 501 else 502 sysctl_register_oid(oid); 503 sysctl_wunlock(); 504 } 505 } 506 507 static int 508 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS) 509 { 510 struct ena_adapter *adapter = arg1; 511 uint32_t val; 512 int error; 513 514 ENA_LOCK_LOCK(); 515 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 516 error = EINVAL; 517 goto unlock; 518 } 519 520 val = 0; 521 error = sysctl_wire_old_buffer(req, sizeof(val)); 522 if (error == 0) { 523 val = adapter->buf_ring_size; 524 error = sysctl_handle_32(oidp, &val, 0, req); 525 } 526 if (error != 0 || req->newptr == NULL) 527 goto unlock; 528 529 if (!powerof2(val) || val == 0) { 530 ena_log(adapter->pdev, ERR, 531 "Requested new Tx buffer ring size (%u) is not a power of 2\n", 532 val); 533 error = EINVAL; 534 goto unlock; 535 } 536 537 if (val != adapter->buf_ring_size) { 538 ena_log(adapter->pdev, INFO, 539 "Requested new Tx buffer ring size: %d. Old size: %d\n", 540 val, adapter->buf_ring_size); 541 542 error = ena_update_buf_ring_size(adapter, val); 543 } else { 544 ena_log(adapter->pdev, ERR, 545 "New Tx buffer ring size is the same as already used: %u\n", 546 adapter->buf_ring_size); 547 } 548 549 unlock: 550 ENA_LOCK_UNLOCK(); 551 552 return (error); 553 } 554 555 static int 556 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS) 557 { 558 struct ena_adapter *adapter = arg1; 559 uint32_t val; 560 int error; 561 562 ENA_LOCK_LOCK(); 563 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 564 error = EINVAL; 565 goto unlock; 566 } 567 568 val = 0; 569 error = sysctl_wire_old_buffer(req, sizeof(val)); 570 if (error == 0) { 571 val = adapter->requested_rx_ring_size; 572 error = sysctl_handle_32(oidp, &val, 0, req); 573 } 574 if (error != 0 || req->newptr == NULL) 575 goto unlock; 576 577 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) { 578 ena_log(adapter->pdev, ERR, 579 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n", 580 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size); 581 error = EINVAL; 582 goto unlock; 583 } 584 585 /* Check if the parameter is power of 2 */ 586 if (!powerof2(val)) { 587 ena_log(adapter->pdev, ERR, 588 "Requested new Rx queue size (%u) is not a power of 2\n", 589 val); 590 error = EINVAL; 591 goto unlock; 592 } 593 594 if (val != adapter->requested_rx_ring_size) { 595 ena_log(adapter->pdev, INFO, 596 "Requested new Rx queue size: %u. Old size: %u\n", 597 val, adapter->requested_rx_ring_size); 598 599 error = ena_update_queue_size(adapter, 600 adapter->requested_tx_ring_size, val); 601 } else { 602 ena_log(adapter->pdev, ERR, 603 "New Rx queue size is the same as already used: %u\n", 604 adapter->requested_rx_ring_size); 605 } 606 607 unlock: 608 ENA_LOCK_UNLOCK(); 609 610 return (error); 611 } 612 613 /* 614 * Change number of effectively used IO queues adapter->num_io_queues 615 */ 616 static int 617 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS) 618 { 619 struct ena_adapter *adapter = arg1; 620 uint32_t old_num_queues, tmp = 0; 621 int error; 622 623 ENA_LOCK_LOCK(); 624 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 625 error = EINVAL; 626 goto unlock; 627 } 628 629 error = sysctl_wire_old_buffer(req, sizeof(tmp)); 630 if (error == 0) { 631 tmp = adapter->num_io_queues; 632 error = sysctl_handle_int(oidp, &tmp, 0, req); 633 } 634 if (error != 0 || req->newptr == NULL) 635 goto unlock; 636 637 if (tmp == 0) { 638 ena_log(adapter->pdev, ERR, 639 "Requested number of IO queues is zero\n"); 640 error = EINVAL; 641 goto unlock; 642 } 643 644 /* 645 * The adapter::max_num_io_queues is the HW capability. The system 646 * resources availability may potentially be a tighter limit. Therefore 647 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs` 648 * always holds true, while the `adapter::msix_vecs` is variable across 649 * device reset (`ena_destroy_device()` + `ena_restore_device()`). 650 */ 651 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) { 652 ena_log(adapter->pdev, ERR, 653 "Requested number of IO queues is higher than maximum " 654 "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC); 655 error = EINVAL; 656 goto unlock; 657 } 658 if (tmp == adapter->num_io_queues) { 659 ena_log(adapter->pdev, ERR, 660 "Requested number of IO queues is equal to current value " 661 "(%u)\n", adapter->num_io_queues); 662 } else { 663 ena_log(adapter->pdev, INFO, 664 "Requested new number of IO queues: %u, current value: " 665 "%u\n", tmp, adapter->num_io_queues); 666 667 old_num_queues = adapter->num_io_queues; 668 error = ena_update_io_queue_nb(adapter, tmp); 669 if (error != 0) 670 return (error); 671 672 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp); 673 } 674 675 unlock: 676 ENA_LOCK_UNLOCK(); 677 678 return (error); 679 } 680 681 static int 682 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS) 683 { 684 struct ena_adapter *adapter = arg1; 685 uint16_t interval; 686 int error; 687 688 ENA_LOCK_LOCK(); 689 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 690 error = EINVAL; 691 goto unlock; 692 } 693 694 error = sysctl_wire_old_buffer(req, sizeof(interval)); 695 if (error == 0) { 696 interval = adapter->eni_metrics_sample_interval; 697 error = sysctl_handle_16(oidp, &interval, 0, req); 698 } 699 if (error != 0 || req->newptr == NULL) 700 goto unlock; 701 702 if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) { 703 ena_log(adapter->pdev, ERR, 704 "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n", 705 ENI_METRICS_MAX_SAMPLE_INTERVAL); 706 error = EINVAL; 707 goto unlock; 708 } 709 710 if (interval == 0) { 711 ena_log(adapter->pdev, INFO, 712 "ENI metrics update is now turned off\n"); 713 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); 714 } else { 715 ena_log(adapter->pdev, INFO, 716 "ENI metrics update interval is set to: %"PRIu16" seconds\n", 717 interval); 718 } 719 720 adapter->eni_metrics_sample_interval = interval; 721 722 unlock: 723 ENA_LOCK_UNLOCK(); 724 725 return (0); 726 } 727 728 #ifndef RSS 729 /* 730 * Change the Receive Side Scaling hash key. 731 */ 732 static int 733 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS) 734 { 735 struct ena_adapter *adapter = arg1; 736 struct ena_com_dev *ena_dev = adapter->ena_dev; 737 enum ena_admin_hash_functions ena_func; 738 char msg[ENA_HASH_KEY_MSG_SIZE]; 739 char elem[3] = { 0 }; 740 char *endp; 741 u8 rss_key[ENA_HASH_KEY_SIZE]; 742 int error, i; 743 744 ENA_LOCK_LOCK(); 745 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 746 error = EINVAL; 747 goto unlock; 748 } 749 750 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 751 error = ENOTSUP; 752 goto unlock; 753 } 754 755 error = sysctl_wire_old_buffer(req, sizeof(msg)); 756 if (error != 0) 757 goto unlock; 758 759 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func); 760 if (error != 0) { 761 device_printf(adapter->pdev, "Cannot get hash function\n"); 762 goto unlock; 763 } 764 765 if (ena_func != ENA_ADMIN_TOEPLITZ) { 766 error = EINVAL; 767 device_printf(adapter->pdev, "Unsupported hash algorithm\n"); 768 goto unlock; 769 } 770 771 error = ena_rss_get_hash_key(ena_dev, rss_key); 772 if (error != 0) { 773 device_printf(adapter->pdev, "Cannot get hash key\n"); 774 goto unlock; 775 } 776 777 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) 778 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]); 779 780 error = sysctl_handle_string(oidp, msg, sizeof(msg), req); 781 if (error != 0 || req->newptr == NULL) 782 goto unlock; 783 784 if (strlen(msg) != sizeof(msg) - 1) { 785 error = EINVAL; 786 device_printf(adapter->pdev, "Invalid key size\n"); 787 goto unlock; 788 } 789 790 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) { 791 strncpy(elem, &msg[i * 2], 2); 792 rss_key[i] = strtol(elem, &endp, 16); 793 794 /* Both hex nibbles in the string must be valid to continue. */ 795 if (endp == elem || *endp != '\0' || rss_key[i] < 0) { 796 error = EINVAL; 797 device_printf(adapter->pdev, 798 "Invalid key hex value: '%c'\n", *endp); 799 goto unlock; 800 } 801 } 802 803 error = ena_rss_set_hash(ena_dev, rss_key); 804 if (error != 0) 805 device_printf(adapter->pdev, "Cannot fill hash key\n"); 806 807 unlock: 808 ENA_LOCK_UNLOCK(); 809 810 return (error); 811 } 812 813 /* 814 * Change the Receive Side Scaling indirection table. 815 * 816 * The sysctl entry string consists of one or more `x:y` keypairs, where 817 * x stands for the table index and y for its new value. 818 * Table indices that don't need to be updated can be omitted from the string 819 * and will retain their existing values. If an index is entered more than once, 820 * the last value is used. 821 * 822 * Example: 823 * To update two selected indices in the RSS indirection table, e.g. setting 824 * index 0 to queue 5 and then index 5 to queue 0, the below command should be 825 * used: 826 * sysctl dev.ena.0.rss.indir_table="0:5 5:0" 827 */ 828 static int 829 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS) 830 { 831 int num_queues, error; 832 struct ena_adapter *adapter = arg1; 833 struct ena_indir *indir; 834 char *msg, *buf, *endp; 835 uint32_t idx, value; 836 837 ENA_LOCK_LOCK(); 838 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 839 error = EINVAL; 840 goto unlock; 841 } 842 843 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 844 error = ENOTSUP; 845 goto unlock; 846 } 847 848 indir = adapter->rss_indir; 849 msg = indir->sysctl_buf; 850 851 if (unlikely(indir == NULL)) { 852 error = ENOTSUP; 853 goto unlock; 854 } 855 856 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req); 857 if (error != 0 || req->newptr == NULL) 858 goto unlock; 859 860 num_queues = adapter->num_io_queues; 861 862 /* 863 * This sysctl expects msg to be a list of `x:y` record pairs, 864 * where x is the indirection table index and y is its value. 865 */ 866 for (buf = msg; *buf != '\0'; buf = endp) { 867 idx = strtol(buf, &endp, 10); 868 869 if (endp == buf || idx < 0) { 870 device_printf(adapter->pdev, "Invalid index: %s\n", 871 buf); 872 error = EINVAL; 873 break; 874 } 875 876 if (idx >= ENA_RX_RSS_TABLE_SIZE) { 877 device_printf(adapter->pdev, "Index %d out of range\n", 878 idx); 879 error = ERANGE; 880 break; 881 } 882 883 buf = endp; 884 885 if (*buf++ != ':') { 886 device_printf(adapter->pdev, "Missing ':' separator\n"); 887 error = EINVAL; 888 break; 889 } 890 891 value = strtol(buf, &endp, 10); 892 893 if (endp == buf || value < 0) { 894 device_printf(adapter->pdev, "Invalid value: %s\n", 895 buf); 896 error = EINVAL; 897 break; 898 } 899 900 if (value >= num_queues) { 901 device_printf(adapter->pdev, "Value %d out of range\n", 902 value); 903 error = ERANGE; 904 break; 905 } 906 907 indir->table[idx] = value; 908 } 909 910 if (error != 0) /* Reload indirection table with last good data. */ 911 ena_rss_indir_get(adapter, indir->table); 912 913 /* At this point msg has been clobbered by sysctl_handle_string. */ 914 ena_rss_copy_indir_buf(msg, indir->table); 915 916 if (error == 0) 917 error = ena_rss_indir_set(adapter, indir->table); 918 919 unlock: 920 ENA_LOCK_UNLOCK(); 921 922 return (error); 923 } 924 #endif /* RSS */ 925