1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_rss.h" 35 36 #include "ena_sysctl.h" 37 #include "ena_rss.h" 38 39 static void ena_sysctl_add_wd(struct ena_adapter *); 40 static void ena_sysctl_add_stats(struct ena_adapter *); 41 static void ena_sysctl_add_eni_metrics(struct ena_adapter *); 42 static void ena_sysctl_add_tuneables(struct ena_adapter *); 43 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 44 #ifndef RSS 45 static void ena_sysctl_add_rss(struct ena_adapter *); 46 #endif 47 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); 48 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); 49 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); 50 static int ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS); 51 #ifndef RSS 52 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS); 53 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS); 54 #endif 55 56 /* Limit max ENI sample rate to be an hour. */ 57 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600 58 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1) 59 60 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 61 "ENA driver parameters"); 62 63 /* 64 * Logging level for changing verbosity of the output 65 */ 66 int ena_log_level = ENA_INFO; 67 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, 68 &ena_log_level, 0, "Logging level indicating verbosity of the logs"); 69 70 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD, 71 DRV_MODULE_VERSION, "ENA driver version"); 72 73 /* 74 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). 75 * Using 9k mbufs in low memory conditions might cause allocation to take a lot 76 * of time and lead to the OS instability as it needs to look for the contiguous 77 * pages. 78 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if 79 * the network performance is the priority, the 9k mbufs can be used. 80 */ 81 int ena_enable_9k_mbufs = 0; 82 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, 83 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); 84 85 /* 86 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to 87 * false. This option may be important for platforms, which often handle packet 88 * headers on Tx with total header size greater than 96B, as it may 89 * reduce the latency. 90 * It also reduces the maximum Tx queue size by half, so it may cause more Tx 91 * packet drops. 92 */ 93 bool ena_force_large_llq_header = false; 94 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN, 95 &ena_force_large_llq_header, 0, 96 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n"); 97 98 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE; 99 100 void 101 ena_sysctl_add_nodes(struct ena_adapter *adapter) 102 { 103 ena_sysctl_add_wd(adapter); 104 ena_sysctl_add_stats(adapter); 105 ena_sysctl_add_eni_metrics(adapter); 106 ena_sysctl_add_tuneables(adapter); 107 #ifndef RSS 108 ena_sysctl_add_rss(adapter); 109 #endif 110 } 111 112 static void 113 ena_sysctl_add_wd(struct ena_adapter *adapter) 114 { 115 device_t dev; 116 117 struct sysctl_ctx_list *ctx; 118 struct sysctl_oid *tree; 119 struct sysctl_oid_list *child; 120 121 dev = adapter->pdev; 122 123 ctx = device_get_sysctl_ctx(dev); 124 tree = device_get_sysctl_tree(dev); 125 child = SYSCTL_CHILDREN(tree); 126 127 /* Sysctl calls for Watchdog service */ 128 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", 129 CTLFLAG_RWTUN, &adapter->wd_active, 0, 130 "Watchdog is active"); 131 132 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout", 133 CTLFLAG_RWTUN, &adapter->keep_alive_timeout, 134 "Timeout for Keep Alive messages"); 135 136 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout", 137 CTLFLAG_RWTUN, &adapter->missing_tx_timeout, 138 "Timeout for TX completion"); 139 140 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues", 141 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, 142 "Number of TX queues to check per run"); 143 144 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold", 145 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, 146 "Max number of timeouted packets"); 147 } 148 149 static void 150 ena_sysctl_add_stats(struct ena_adapter *adapter) 151 { 152 device_t dev; 153 154 struct ena_ring *tx_ring; 155 struct ena_ring *rx_ring; 156 157 struct ena_hw_stats *hw_stats; 158 struct ena_stats_dev *dev_stats; 159 struct ena_stats_tx *tx_stats; 160 struct ena_stats_rx *rx_stats; 161 struct ena_com_stats_admin *admin_stats; 162 163 struct sysctl_ctx_list *ctx; 164 struct sysctl_oid *tree; 165 struct sysctl_oid_list *child; 166 167 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; 168 struct sysctl_oid *admin_node; 169 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; 170 struct sysctl_oid_list *admin_list; 171 172 #define QUEUE_NAME_LEN 32 173 char namebuf[QUEUE_NAME_LEN]; 174 int i; 175 176 dev = adapter->pdev; 177 178 ctx = device_get_sysctl_ctx(dev); 179 tree = device_get_sysctl_tree(dev); 180 child = SYSCTL_CHILDREN(tree); 181 182 tx_ring = adapter->tx_ring; 183 rx_ring = adapter->rx_ring; 184 185 hw_stats = &adapter->hw_stats; 186 dev_stats = &adapter->dev_stats; 187 admin_stats = &adapter->ena_dev->admin_queue.stats; 188 189 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", 190 CTLFLAG_RD, &dev_stats->wd_expired, 191 "Watchdog expiry count"); 192 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", 193 CTLFLAG_RD, &dev_stats->interface_up, 194 "Network interface up count"); 195 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", 196 CTLFLAG_RD, &dev_stats->interface_down, 197 "Network interface down count"); 198 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", 199 CTLFLAG_RD, &dev_stats->admin_q_pause, 200 "Admin queue pauses"); 201 202 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) { 203 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 204 205 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, 206 namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 207 queue_list = SYSCTL_CHILDREN(queue_node); 208 209 adapter->que[i].oid = queue_node; 210 211 /* TX specific stats */ 212 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, 213 "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring"); 214 tx_list = SYSCTL_CHILDREN(tx_node); 215 216 tx_stats = &tx_ring->tx_stats; 217 218 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 219 "count", CTLFLAG_RD, 220 &tx_stats->cnt, "Packets sent"); 221 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 222 "bytes", CTLFLAG_RD, 223 &tx_stats->bytes, "Bytes sent"); 224 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 225 "prepare_ctx_err", CTLFLAG_RD, 226 &tx_stats->prepare_ctx_err, 227 "TX buffer preparation failures"); 228 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 229 "dma_mapping_err", CTLFLAG_RD, 230 &tx_stats->dma_mapping_err, "DMA mapping failures"); 231 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 232 "doorbells", CTLFLAG_RD, 233 &tx_stats->doorbells, "Queue doorbells"); 234 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 235 "missing_tx_comp", CTLFLAG_RD, 236 &tx_stats->missing_tx_comp, "TX completions missed"); 237 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 238 "bad_req_id", CTLFLAG_RD, 239 &tx_stats->bad_req_id, "Bad request id count"); 240 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 241 "mbuf_collapses", CTLFLAG_RD, 242 &tx_stats->collapse, 243 "Mbuf collapse count"); 244 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 245 "mbuf_collapse_err", CTLFLAG_RD, 246 &tx_stats->collapse_err, 247 "Mbuf collapse failures"); 248 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 249 "queue_wakeups", CTLFLAG_RD, 250 &tx_stats->queue_wakeup, "Queue wakeups"); 251 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 252 "queue_stops", CTLFLAG_RD, 253 &tx_stats->queue_stop, "Queue stops"); 254 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 255 "llq_buffer_copy", CTLFLAG_RD, 256 &tx_stats->llq_buffer_copy, 257 "Header copies for llq transaction"); 258 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 259 "unmask_interrupt_num", CTLFLAG_RD, 260 &tx_stats->unmask_interrupt_num, 261 "Unmasked interrupt count"); 262 263 /* RX specific stats */ 264 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, 265 "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring"); 266 rx_list = SYSCTL_CHILDREN(rx_node); 267 268 rx_stats = &rx_ring->rx_stats; 269 270 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 271 "count", CTLFLAG_RD, 272 &rx_stats->cnt, "Packets received"); 273 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 274 "bytes", CTLFLAG_RD, 275 &rx_stats->bytes, "Bytes received"); 276 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 277 "refil_partial", CTLFLAG_RD, 278 &rx_stats->refil_partial, "Partial refilled mbufs"); 279 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 280 "csum_bad", CTLFLAG_RD, 281 &rx_stats->csum_bad, "Bad RX checksum"); 282 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 283 "mbuf_alloc_fail", CTLFLAG_RD, 284 &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs"); 285 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 286 "mjum_alloc_fail", CTLFLAG_RD, 287 &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs"); 288 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 289 "dma_mapping_err", CTLFLAG_RD, 290 &rx_stats->dma_mapping_err, "DMA mapping errors"); 291 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 292 "bad_desc_num", CTLFLAG_RD, 293 &rx_stats->bad_desc_num, "Bad descriptor count"); 294 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 295 "bad_req_id", CTLFLAG_RD, 296 &rx_stats->bad_req_id, "Bad request id count"); 297 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 298 "empty_rx_ring", CTLFLAG_RD, 299 &rx_stats->empty_rx_ring, "RX descriptors depletion count"); 300 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 301 "csum_good", CTLFLAG_RD, 302 &rx_stats->csum_good, "Valid RX checksum calculations"); 303 } 304 305 /* Stats read from device */ 306 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", 307 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware"); 308 hw_list = SYSCTL_CHILDREN(hw_node); 309 310 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, 311 &hw_stats->rx_packets, "Packets received"); 312 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, 313 &hw_stats->tx_packets, "Packets transmitted"); 314 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, 315 &hw_stats->rx_bytes, "Bytes received"); 316 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, 317 &hw_stats->tx_bytes, "Bytes transmitted"); 318 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, 319 &hw_stats->rx_drops, "Receive packet drops"); 320 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD, 321 &hw_stats->tx_drops, "Transmit packet drops"); 322 323 /* ENA Admin queue stats */ 324 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", 325 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics"); 326 admin_list = SYSCTL_CHILDREN(admin_node); 327 328 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, 329 &admin_stats->aborted_cmd, 0, "Aborted commands"); 330 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, 331 &admin_stats->submitted_cmd, 0, "Submitted commands"); 332 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, 333 &admin_stats->completed_cmd, 0, "Completed commands"); 334 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, 335 &admin_stats->out_of_space, 0, "Queue out of space"); 336 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, 337 &admin_stats->no_completion, 0, "Commands not completed"); 338 } 339 340 static void 341 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) 342 { 343 device_t dev; 344 struct ena_admin_eni_stats *eni_metrics; 345 346 struct sysctl_ctx_list *ctx; 347 struct sysctl_oid *tree; 348 struct sysctl_oid_list *child; 349 350 struct sysctl_oid *eni_node; 351 struct sysctl_oid_list *eni_list; 352 353 dev = adapter->pdev; 354 355 ctx = device_get_sysctl_ctx(dev); 356 tree = device_get_sysctl_tree(dev); 357 child = SYSCTL_CHILDREN(tree); 358 359 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", 360 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); 361 eni_list = SYSCTL_CHILDREN(eni_node); 362 363 eni_metrics = &adapter->eni_metrics; 364 365 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", 366 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, 367 "Inbound BW allowance exceeded"); 368 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", 369 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, 370 "Outbound BW allowance exceeded"); 371 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", 372 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, 373 "PPS allowance exceeded"); 374 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", 375 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, 376 "Connection tracking allowance exceeded"); 377 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", 378 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, 379 "Linklocal packet rate allowance exceeded"); 380 381 /* 382 * Tuneable, which determines how often ENI metrics will be read. 383 * 0 means it's turned off. Maximum allowed value is limited by: 384 * ENI_METRICS_MAX_SAMPLE_INTERVAL. 385 */ 386 SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval", 387 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 388 ena_sysctl_eni_metrics_interval, "SU", 389 "Interval in seconds for updating ENI emetrics. 0 turns off the update."); 390 } 391 392 static void 393 ena_sysctl_add_tuneables(struct ena_adapter *adapter) 394 { 395 device_t dev; 396 397 struct sysctl_ctx_list *ctx; 398 struct sysctl_oid *tree; 399 struct sysctl_oid_list *child; 400 401 dev = adapter->pdev; 402 403 ctx = device_get_sysctl_ctx(dev); 404 tree = device_get_sysctl_tree(dev); 405 child = SYSCTL_CHILDREN(tree); 406 407 /* Tuneable number of buffers in the buf-ring (drbr) */ 408 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size", 409 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 410 ena_sysctl_buf_ring_size, "I", 411 "Size of the Tx buffer ring (drbr)."); 412 413 /* Tuneable number of the Rx ring size */ 414 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size", 415 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 416 ena_sysctl_rx_queue_size, "I", 417 "Size of the Rx ring. The size should be a power of 2."); 418 419 /* Tuneable number of IO queues */ 420 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb", 421 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 422 ena_sysctl_io_queues_nb, "I", "Number of IO queues."); 423 } 424 425 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 426 #ifndef RSS 427 static void 428 ena_sysctl_add_rss(struct ena_adapter *adapter) 429 { 430 device_t dev; 431 432 struct sysctl_ctx_list *ctx; 433 struct sysctl_oid *tree; 434 struct sysctl_oid_list *child; 435 436 dev = adapter->pdev; 437 438 ctx = device_get_sysctl_ctx(dev); 439 tree = device_get_sysctl_tree(dev); 440 child = SYSCTL_CHILDREN(tree); 441 442 /* RSS options */ 443 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss", 444 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options."); 445 child = SYSCTL_CHILDREN(tree); 446 447 /* RSS hash key */ 448 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key", 449 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 450 ena_sysctl_rss_key, "A", "RSS key."); 451 452 /* Tuneable RSS indirection table */ 453 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table", 454 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 455 ena_sysctl_rss_indir_table, "A", "RSS indirection table."); 456 457 /* RSS indirection table size */ 458 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size", 459 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0, 460 "RSS indirection table size."); 461 } 462 #endif /* RSS */ 463 464 465 /* 466 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes. 467 * 468 * Whether the nodes are registered or unregistered depends on a delta between 469 * the `old` and `new` parameters, representing the number of queues. 470 * 471 * This function is used to hide sysctl attributes for queue nodes which aren't 472 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`). 473 * 474 * NOTE: 475 * All unregistered nodes must be registered again at detach, i.e. by a call to 476 * this function. 477 */ 478 void 479 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new) 480 { 481 device_t dev; 482 struct sysctl_oid *oid; 483 int min, max, i; 484 485 dev = adapter->pdev; 486 min = MIN(old, new); 487 max = MIN(MAX(old, new), adapter->max_num_io_queues); 488 489 for (i = min; i < max; ++i) { 490 oid = adapter->que[i].oid; 491 492 sysctl_wlock(); 493 if (old > new) 494 sysctl_unregister_oid(oid); 495 else 496 sysctl_register_oid(oid); 497 sysctl_wunlock(); 498 } 499 } 500 501 static int 502 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS) 503 { 504 struct ena_adapter *adapter = arg1; 505 uint32_t val; 506 int error; 507 508 ENA_LOCK_LOCK(); 509 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 510 error = EINVAL; 511 goto unlock; 512 } 513 514 val = 0; 515 error = sysctl_wire_old_buffer(req, sizeof(val)); 516 if (error == 0) { 517 val = adapter->buf_ring_size; 518 error = sysctl_handle_32(oidp, &val, 0, req); 519 } 520 if (error != 0 || req->newptr == NULL) 521 goto unlock; 522 523 if (!powerof2(val) || val == 0) { 524 ena_log(adapter->pdev, ERR, 525 "Requested new Tx buffer ring size (%u) is not a power of 2\n", 526 val); 527 error = EINVAL; 528 goto unlock; 529 } 530 531 if (val != adapter->buf_ring_size) { 532 ena_log(adapter->pdev, INFO, 533 "Requested new Tx buffer ring size: %d. Old size: %d\n", 534 val, adapter->buf_ring_size); 535 536 error = ena_update_buf_ring_size(adapter, val); 537 } else { 538 ena_log(adapter->pdev, ERR, 539 "New Tx buffer ring size is the same as already used: %u\n", 540 adapter->buf_ring_size); 541 } 542 543 unlock: 544 ENA_LOCK_UNLOCK(); 545 546 return (error); 547 } 548 549 static int 550 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS) 551 { 552 struct ena_adapter *adapter = arg1; 553 uint32_t val; 554 int error; 555 556 ENA_LOCK_LOCK(); 557 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 558 error = EINVAL; 559 goto unlock; 560 } 561 562 val = 0; 563 error = sysctl_wire_old_buffer(req, sizeof(val)); 564 if (error == 0) { 565 val = adapter->requested_rx_ring_size; 566 error = sysctl_handle_32(oidp, &val, 0, req); 567 } 568 if (error != 0 || req->newptr == NULL) 569 goto unlock; 570 571 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) { 572 ena_log(adapter->pdev, ERR, 573 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n", 574 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size); 575 error = EINVAL; 576 goto unlock; 577 } 578 579 /* Check if the parameter is power of 2 */ 580 if (!powerof2(val)) { 581 ena_log(adapter->pdev, ERR, 582 "Requested new Rx queue size (%u) is not a power of 2\n", 583 val); 584 error = EINVAL; 585 goto unlock; 586 } 587 588 if (val != adapter->requested_rx_ring_size) { 589 ena_log(adapter->pdev, INFO, 590 "Requested new Rx queue size: %u. Old size: %u\n", 591 val, adapter->requested_rx_ring_size); 592 593 error = ena_update_queue_size(adapter, 594 adapter->requested_tx_ring_size, val); 595 } else { 596 ena_log(adapter->pdev, ERR, 597 "New Rx queue size is the same as already used: %u\n", 598 adapter->requested_rx_ring_size); 599 } 600 601 unlock: 602 ENA_LOCK_UNLOCK(); 603 604 return (error); 605 } 606 607 /* 608 * Change number of effectively used IO queues adapter->num_io_queues 609 */ 610 static int 611 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS) 612 { 613 struct ena_adapter *adapter = arg1; 614 uint32_t old_num_queues, tmp = 0; 615 int error; 616 617 ENA_LOCK_LOCK(); 618 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 619 error = EINVAL; 620 goto unlock; 621 } 622 623 error = sysctl_wire_old_buffer(req, sizeof(tmp)); 624 if (error == 0) { 625 tmp = adapter->num_io_queues; 626 error = sysctl_handle_int(oidp, &tmp, 0, req); 627 } 628 if (error != 0 || req->newptr == NULL) 629 goto unlock; 630 631 if (tmp == 0) { 632 ena_log(adapter->pdev, ERR, 633 "Requested number of IO queues is zero\n"); 634 error = EINVAL; 635 goto unlock; 636 } 637 638 /* 639 * The adapter::max_num_io_queues is the HW capability. The system 640 * resources availability may potentially be a tighter limit. Therefore 641 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs` 642 * always holds true, while the `adapter::msix_vecs` is variable across 643 * device reset (`ena_destroy_device()` + `ena_restore_device()`). 644 */ 645 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) { 646 ena_log(adapter->pdev, ERR, 647 "Requested number of IO queues is higher than maximum " 648 "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC); 649 error = EINVAL; 650 goto unlock; 651 } 652 if (tmp == adapter->num_io_queues) { 653 ena_log(adapter->pdev, ERR, 654 "Requested number of IO queues is equal to current value " 655 "(%u)\n", adapter->num_io_queues); 656 } else { 657 ena_log(adapter->pdev, INFO, 658 "Requested new number of IO queues: %u, current value: " 659 "%u\n", tmp, adapter->num_io_queues); 660 661 old_num_queues = adapter->num_io_queues; 662 error = ena_update_io_queue_nb(adapter, tmp); 663 if (error != 0) 664 return (error); 665 666 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp); 667 } 668 669 unlock: 670 ENA_LOCK_UNLOCK(); 671 672 return (error); 673 } 674 675 static int 676 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS) 677 { 678 struct ena_adapter *adapter = arg1; 679 uint16_t interval; 680 int error; 681 682 ENA_LOCK_LOCK(); 683 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 684 error = EINVAL; 685 goto unlock; 686 } 687 688 error = sysctl_wire_old_buffer(req, sizeof(interval)); 689 if (error == 0) { 690 interval = adapter->eni_metrics_sample_interval; 691 error = sysctl_handle_16(oidp, &interval, 0, req); 692 } 693 if (error != 0 || req->newptr == NULL) 694 goto unlock; 695 696 if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) { 697 ena_log(adapter->pdev, ERR, 698 "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n", 699 ENI_METRICS_MAX_SAMPLE_INTERVAL); 700 error = EINVAL; 701 goto unlock; 702 } 703 704 if (interval == 0) { 705 ena_log(adapter->pdev, INFO, 706 "ENI metrics update is now turned off\n"); 707 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); 708 } else { 709 ena_log(adapter->pdev, INFO, 710 "ENI metrics update interval is set to: %"PRIu16" seconds\n", 711 interval); 712 } 713 714 adapter->eni_metrics_sample_interval = interval; 715 716 unlock: 717 ENA_LOCK_UNLOCK(); 718 719 return (0); 720 } 721 722 #ifndef RSS 723 /* 724 * Change the Receive Side Scaling hash key. 725 */ 726 static int 727 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS) 728 { 729 struct ena_adapter *adapter = arg1; 730 struct ena_com_dev *ena_dev = adapter->ena_dev; 731 enum ena_admin_hash_functions ena_func; 732 char msg[ENA_HASH_KEY_MSG_SIZE]; 733 char elem[3] = { 0 }; 734 char *endp; 735 u8 rss_key[ENA_HASH_KEY_SIZE]; 736 int error, i; 737 738 ENA_LOCK_LOCK(); 739 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 740 error = EINVAL; 741 goto unlock; 742 } 743 744 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 745 error = ENOTSUP; 746 goto unlock; 747 } 748 749 error = sysctl_wire_old_buffer(req, sizeof(msg)); 750 if (error != 0) 751 goto unlock; 752 753 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func); 754 if (error != 0) { 755 device_printf(adapter->pdev, "Cannot get hash function\n"); 756 goto unlock; 757 } 758 759 if (ena_func != ENA_ADMIN_TOEPLITZ) { 760 error = EINVAL; 761 device_printf(adapter->pdev, "Unsupported hash algorithm\n"); 762 goto unlock; 763 } 764 765 error = ena_rss_get_hash_key(ena_dev, rss_key); 766 if (error != 0) { 767 device_printf(adapter->pdev, "Cannot get hash key\n"); 768 goto unlock; 769 } 770 771 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) 772 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]); 773 774 error = sysctl_handle_string(oidp, msg, sizeof(msg), req); 775 if (error != 0 || req->newptr == NULL) 776 goto unlock; 777 778 if (strlen(msg) != sizeof(msg) - 1) { 779 error = EINVAL; 780 device_printf(adapter->pdev, "Invalid key size\n"); 781 goto unlock; 782 } 783 784 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) { 785 strncpy(elem, &msg[i * 2], 2); 786 rss_key[i] = strtol(elem, &endp, 16); 787 788 /* Both hex nibbles in the string must be valid to continue. */ 789 if (endp == elem || *endp != '\0' || rss_key[i] < 0) { 790 error = EINVAL; 791 device_printf(adapter->pdev, 792 "Invalid key hex value: '%c'\n", *endp); 793 goto unlock; 794 } 795 } 796 797 error = ena_rss_set_hash(ena_dev, rss_key); 798 if (error != 0) 799 device_printf(adapter->pdev, "Cannot fill hash key\n"); 800 801 unlock: 802 ENA_LOCK_UNLOCK(); 803 804 return (error); 805 } 806 807 /* 808 * Change the Receive Side Scaling indirection table. 809 * 810 * The sysctl entry string consists of one or more `x:y` keypairs, where 811 * x stands for the table index and y for its new value. 812 * Table indices that don't need to be updated can be omitted from the string 813 * and will retain their existing values. If an index is entered more than once, 814 * the last value is used. 815 * 816 * Example: 817 * To update two selected indices in the RSS indirection table, e.g. setting 818 * index 0 to queue 5 and then index 5 to queue 0, the below command should be 819 * used: 820 * sysctl dev.ena.0.rss.indir_table="0:5 5:0" 821 */ 822 static int 823 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS) 824 { 825 int num_queues, error; 826 struct ena_adapter *adapter = arg1; 827 struct ena_com_dev *ena_dev; 828 struct ena_indir *indir; 829 char *msg, *buf, *endp; 830 uint32_t idx, value; 831 832 ENA_LOCK_LOCK(); 833 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 834 error = EINVAL; 835 goto unlock; 836 } 837 838 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 839 error = ENOTSUP; 840 goto unlock; 841 } 842 843 ena_dev = adapter->ena_dev; 844 indir = adapter->rss_indir; 845 msg = indir->sysctl_buf; 846 847 if (unlikely(indir == NULL)) { 848 error = ENOTSUP; 849 goto unlock; 850 } 851 852 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req); 853 if (error != 0 || req->newptr == NULL) 854 goto unlock; 855 856 num_queues = adapter->num_io_queues; 857 858 /* 859 * This sysctl expects msg to be a list of `x:y` record pairs, 860 * where x is the indirection table index and y is its value. 861 */ 862 for (buf = msg; *buf != '\0'; buf = endp) { 863 idx = strtol(buf, &endp, 10); 864 865 if (endp == buf || idx < 0) { 866 device_printf(adapter->pdev, "Invalid index: %s\n", 867 buf); 868 error = EINVAL; 869 break; 870 } 871 872 if (idx >= ENA_RX_RSS_TABLE_SIZE) { 873 device_printf(adapter->pdev, "Index %d out of range\n", 874 idx); 875 error = ERANGE; 876 break; 877 } 878 879 buf = endp; 880 881 if (*buf++ != ':') { 882 device_printf(adapter->pdev, "Missing ':' separator\n"); 883 error = EINVAL; 884 break; 885 } 886 887 value = strtol(buf, &endp, 10); 888 889 if (endp == buf || value < 0) { 890 device_printf(adapter->pdev, "Invalid value: %s\n", 891 buf); 892 error = EINVAL; 893 break; 894 } 895 896 if (value >= num_queues) { 897 device_printf(adapter->pdev, "Value %d out of range\n", 898 value); 899 error = ERANGE; 900 break; 901 } 902 903 indir->table[idx] = value; 904 } 905 906 if (error != 0) /* Reload indirection table with last good data. */ 907 ena_rss_indir_get(adapter, indir->table); 908 909 /* At this point msg has been clobbered by sysctl_handle_string. */ 910 ena_rss_copy_indir_buf(msg, indir->table); 911 912 if (error == 0) 913 error = ena_rss_indir_set(adapter, indir->table); 914 915 unlock: 916 ENA_LOCK_UNLOCK(); 917 918 return (error); 919 } 920 #endif /* RSS */ 921