1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2024 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include "opt_rss.h" 33 34 #include "ena_rss.h" 35 #include "ena_sysctl.h" 36 37 static void ena_sysctl_add_wd(struct ena_adapter *); 38 static void ena_sysctl_add_stats(struct ena_adapter *); 39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *); 40 static void ena_sysctl_add_customer_metrics(struct ena_adapter *); 41 static void ena_sysctl_add_srd_info(struct ena_adapter *); 42 static void ena_sysctl_add_tuneables(struct ena_adapter *); 43 static void ena_sysctl_add_irq_affinity(struct ena_adapter *); 44 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 45 #ifndef RSS 46 static void ena_sysctl_add_rss(struct ena_adapter *); 47 #endif 48 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); 49 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); 50 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); 51 static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS); 52 static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS); 53 static int ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS); 54 #ifndef RSS 55 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS); 56 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS); 57 #endif 58 59 /* Limit max ENA sample rate to be an hour. */ 60 #define ENA_METRICS_MAX_SAMPLE_INTERVAL 3600 61 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1) 62 63 #define SYSCTL_GSTRING_LEN 128 64 65 #define ENA_METRIC_ENI_ENTRY(stat, desc) { \ 66 .name = #stat, \ 67 .description = #desc, \ 68 } 69 70 #define ENA_STAT_ENTRY(stat, desc, stat_type) { \ 71 .name = #stat, \ 72 .description = #desc, \ 73 .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64), \ 74 } 75 76 #define ENA_STAT_ENA_SRD_ENTRY(stat, desc) \ 77 ENA_STAT_ENTRY(stat, desc, ena_srd_stats) 78 79 struct ena_hw_metrics { 80 char name[SYSCTL_GSTRING_LEN]; 81 char description[SYSCTL_GSTRING_LEN]; 82 }; 83 84 struct ena_srd_metrics { 85 char name[SYSCTL_GSTRING_LEN]; 86 char description[SYSCTL_GSTRING_LEN]; 87 int stat_offset; 88 }; 89 90 static const struct ena_srd_metrics ena_srd_stats_strings[] = { 91 ENA_STAT_ENA_SRD_ENTRY( 92 ena_srd_tx_pkts, Number of packets transmitted over ENA SRD), 93 ENA_STAT_ENA_SRD_ENTRY( 94 ena_srd_eligible_tx_pkts, Number of packets transmitted or could 95 have been transmitted over ENA SRD), 96 ENA_STAT_ENA_SRD_ENTRY( 97 ena_srd_rx_pkts, Number of packets received over ENA SRD), 98 ENA_STAT_ENA_SRD_ENTRY( 99 ena_srd_resource_utilization, Percentage of the ENA SRD resources 100 that are in use), 101 }; 102 103 static const struct ena_hw_metrics ena_hw_stats_strings[] = { 104 ENA_METRIC_ENI_ENTRY( 105 bw_in_allowance_exceeded, Inbound BW allowance exceeded), 106 ENA_METRIC_ENI_ENTRY( 107 bw_out_allowance_exceeded, Outbound BW allowance exceeded), 108 ENA_METRIC_ENI_ENTRY( 109 pps_allowance_exceeded, PPS allowance exceeded), 110 ENA_METRIC_ENI_ENTRY( 111 conntrack_allowance_exceeded, Connection tracking allowance exceeded), 112 ENA_METRIC_ENI_ENTRY( 113 linklocal_allowance_exceeded, Linklocal packet rate allowance), 114 ENA_METRIC_ENI_ENTRY( 115 conntrack_allowance_available, Number of available conntracks), 116 }; 117 118 #ifndef ARRAY_SIZE 119 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) 120 #endif 121 122 #define ENA_CUSTOMER_METRICS_ARRAY_SIZE ARRAY_SIZE(ena_hw_stats_strings) 123 #define ENA_SRD_METRICS_ARRAY_SIZE ARRAY_SIZE(ena_srd_stats_strings) 124 125 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 126 "ENA driver parameters"); 127 128 /* 129 * Logging level for changing verbosity of the output 130 */ 131 int ena_log_level = ENA_INFO; 132 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0, 133 "Logging level indicating verbosity of the logs"); 134 135 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD, 136 ENA_DRV_MODULE_VERSION, "ENA driver version"); 137 138 /* 139 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). 140 * Using 9k mbufs in low memory conditions might cause allocation to take a lot 141 * of time and lead to the OS instability as it needs to look for the contiguous 142 * pages. 143 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if 144 * the network performance is the priority, the 9k mbufs can be used. 145 */ 146 int ena_enable_9k_mbufs = 0; 147 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, 148 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); 149 150 /* 151 * Force the driver to use large or regular LLQ (Low Latency Queue) header size. 152 * Defaults to ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT. This option may be 153 * important for platforms, which often handle packet headers on Tx with total 154 * header size greater than 96B, as it may reduce the latency. 155 * It also reduces the maximum Tx queue size by half, so it may cause more Tx 156 * packet drops. 157 */ 158 int ena_force_large_llq_header = ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT; 159 SYSCTL_INT(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN, 160 &ena_force_large_llq_header, 0, 161 "Change default LLQ entry size received from the device"); 162 163 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE; 164 165 int ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter *adapter) 166 { 167 int rc = 0; 168 169 adapter->customer_metrics_array = malloc((sizeof(u64) * ENA_CUSTOMER_METRICS_ARRAY_SIZE), 170 M_DEVBUF, M_NOWAIT | M_ZERO); 171 if (unlikely(adapter->customer_metrics_array == NULL)) 172 rc = ENOMEM; 173 174 return rc; 175 } 176 void 177 ena_sysctl_add_nodes(struct ena_adapter *adapter) 178 { 179 struct ena_com_dev *dev = adapter->ena_dev; 180 181 if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS)) 182 ena_sysctl_add_customer_metrics(adapter); 183 else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS)) 184 ena_sysctl_add_eni_metrics(adapter); 185 186 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 187 ena_sysctl_add_srd_info(adapter); 188 189 ena_sysctl_add_wd(adapter); 190 ena_sysctl_add_stats(adapter); 191 ena_sysctl_add_tuneables(adapter); 192 ena_sysctl_add_irq_affinity(adapter); 193 #ifndef RSS 194 ena_sysctl_add_rss(adapter); 195 #endif 196 } 197 198 static void 199 ena_sysctl_add_wd(struct ena_adapter *adapter) 200 { 201 device_t dev; 202 203 struct sysctl_ctx_list *ctx; 204 struct sysctl_oid *tree; 205 struct sysctl_oid_list *child; 206 207 dev = adapter->pdev; 208 209 ctx = device_get_sysctl_ctx(dev); 210 tree = device_get_sysctl_tree(dev); 211 child = SYSCTL_CHILDREN(tree); 212 213 /* Sysctl calls for Watchdog service */ 214 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN, 215 &adapter->wd_active, 0, "Watchdog is active"); 216 217 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout", 218 CTLFLAG_RWTUN, &adapter->keep_alive_timeout, 219 "Timeout for Keep Alive messages"); 220 221 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout", 222 CTLFLAG_RWTUN, &adapter->missing_tx_timeout, 223 "Timeout for TX completion"); 224 225 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues", 226 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, 227 "Number of TX queues to check per run"); 228 229 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold", 230 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, 231 "Max number of timeouted packets"); 232 } 233 234 static void 235 ena_sysctl_add_stats(struct ena_adapter *adapter) 236 { 237 device_t dev; 238 239 struct ena_ring *tx_ring; 240 struct ena_ring *rx_ring; 241 242 struct ena_hw_stats *hw_stats; 243 struct ena_stats_dev *dev_stats; 244 struct ena_stats_tx *tx_stats; 245 struct ena_stats_rx *rx_stats; 246 struct ena_com_stats_admin *admin_stats; 247 248 struct sysctl_ctx_list *ctx; 249 struct sysctl_oid *tree; 250 struct sysctl_oid_list *child; 251 252 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; 253 struct sysctl_oid *admin_node; 254 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; 255 struct sysctl_oid_list *admin_list; 256 257 #define QUEUE_NAME_LEN 32 258 char namebuf[QUEUE_NAME_LEN]; 259 int i; 260 261 dev = adapter->pdev; 262 263 ctx = device_get_sysctl_ctx(dev); 264 tree = device_get_sysctl_tree(dev); 265 child = SYSCTL_CHILDREN(tree); 266 267 tx_ring = adapter->tx_ring; 268 rx_ring = adapter->rx_ring; 269 270 hw_stats = &adapter->hw_stats; 271 dev_stats = &adapter->dev_stats; 272 admin_stats = &adapter->ena_dev->admin_queue.stats; 273 274 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD, 275 &dev_stats->wd_expired, "Watchdog expiry count"); 276 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD, 277 &dev_stats->interface_up, "Network interface up count"); 278 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", CTLFLAG_RD, 279 &dev_stats->interface_down, "Network interface down count"); 280 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", CTLFLAG_RD, 281 &dev_stats->admin_q_pause, "Admin queue pauses"); 282 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "os_trigger", CTLFLAG_RD, 283 &dev_stats->os_trigger, "OS trigger count"); 284 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_tx_cmpl", CTLFLAG_RD, 285 &dev_stats->missing_tx_cmpl, "Missing TX completions resets count"); 286 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_req_id", CTLFLAG_RD, 287 &dev_stats->bad_rx_req_id, "Bad RX req id count"); 288 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_tx_req_id", CTLFLAG_RD, 289 &dev_stats->bad_tx_req_id, "Bad TX req id count"); 290 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_desc_num", CTLFLAG_RD, 291 &dev_stats->bad_rx_desc_num, "Bad RX descriptors number count"); 292 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "invalid_state", CTLFLAG_RD, 293 &dev_stats->invalid_state, "Driver invalid state count"); 294 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_intr", CTLFLAG_RD, 295 &dev_stats->missing_intr, "Missing interrupt count"); 296 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "tx_desc_malformed", CTLFLAG_RD, 297 &dev_stats->tx_desc_malformed, "TX descriptors malformed count"); 298 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD, 299 &dev_stats->rx_desc_malformed, "RX descriptors malformed count"); 300 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD, 301 &dev_stats->missing_admin_interrupt, "Missing admin interrupts count"); 302 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD, 303 &dev_stats->admin_to, "Admin queue timeouts count"); 304 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "device_request_reset", CTLFLAG_RD, 305 &dev_stats->device_request_reset, "Device reset requests count"); 306 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD, 307 &dev_stats->total_resets, "Total resets count"); 308 309 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) { 310 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 311 312 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 313 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 314 queue_list = SYSCTL_CHILDREN(queue_node); 315 316 adapter->que[i].oid = queue_node; 317 318 #ifdef RSS 319 /* Common stats */ 320 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD, 321 &adapter->que[i].cpu, 0, "CPU affinity"); 322 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD, 323 &adapter->que[i].domain, 0, "NUMA domain"); 324 #endif 325 326 /* TX specific stats */ 327 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring", 328 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring"); 329 tx_list = SYSCTL_CHILDREN(tx_node); 330 331 tx_stats = &tx_ring->tx_stats; 332 333 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count", 334 CTLFLAG_RD, &tx_stats->cnt, "Packets sent"); 335 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes", 336 CTLFLAG_RD, &tx_stats->bytes, "Bytes sent"); 337 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 338 "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err, 339 "TX buffer preparation failures"); 340 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 341 "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err, 342 "DMA mapping failures"); 343 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells", 344 CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells"); 345 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 346 "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp, 347 "TX completions missed"); 348 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id", 349 CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count"); 350 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses", 351 CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count"); 352 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 353 "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err, 354 "Mbuf collapse failures"); 355 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups", 356 CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups"); 357 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops", 358 CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops"); 359 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 360 "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy, 361 "Header copies for llq transaction"); 362 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 363 "unmask_interrupt_num", CTLFLAG_RD, 364 &tx_stats->unmask_interrupt_num, 365 "Unmasked interrupt count"); 366 367 /* RX specific stats */ 368 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring", 369 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring"); 370 rx_list = SYSCTL_CHILDREN(rx_node); 371 372 rx_stats = &rx_ring->rx_stats; 373 374 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count", 375 CTLFLAG_RD, &rx_stats->cnt, "Packets received"); 376 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes", 377 CTLFLAG_RD, &rx_stats->bytes, "Bytes received"); 378 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial", 379 CTLFLAG_RD, &rx_stats->refil_partial, 380 "Partial refilled mbufs"); 381 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad", 382 CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum"); 383 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 384 "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail, 385 "Failed mbuf allocs"); 386 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 387 "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail, 388 "Failed jumbo mbuf allocs"); 389 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 390 "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err, 391 "DMA mapping errors"); 392 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num", 393 CTLFLAG_RD, &rx_stats->bad_desc_num, 394 "Bad descriptor count"); 395 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id", 396 CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count"); 397 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring", 398 CTLFLAG_RD, &rx_stats->empty_rx_ring, 399 "RX descriptors depletion count"); 400 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good", 401 CTLFLAG_RD, &rx_stats->csum_good, 402 "Valid RX checksum calculations"); 403 } 404 405 /* Stats read from device */ 406 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", 407 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware"); 408 hw_list = SYSCTL_CHILDREN(hw_node); 409 410 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, 411 &hw_stats->rx_packets, "Packets received"); 412 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, 413 &hw_stats->tx_packets, "Packets transmitted"); 414 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, 415 &hw_stats->rx_bytes, "Bytes received"); 416 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, 417 &hw_stats->tx_bytes, "Bytes transmitted"); 418 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, 419 &hw_stats->rx_drops, "Receive packet drops"); 420 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD, 421 &hw_stats->tx_drops, "Transmit packet drops"); 422 423 /* ENA Admin queue stats */ 424 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", 425 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics"); 426 admin_list = SYSCTL_CHILDREN(admin_node); 427 428 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, 429 &admin_stats->aborted_cmd, 0, "Aborted commands"); 430 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, 431 &admin_stats->submitted_cmd, 0, "Submitted commands"); 432 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, 433 &admin_stats->completed_cmd, 0, "Completed commands"); 434 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, 435 &admin_stats->out_of_space, 0, "Queue out of space"); 436 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, 437 &admin_stats->no_completion, 0, "Commands not completed"); 438 } 439 440 static void 441 ena_sysctl_add_srd_info(struct ena_adapter *adapter) 442 { 443 device_t dev; 444 445 struct sysctl_oid *ena_srd_info; 446 struct sysctl_oid_list *srd_list; 447 448 struct sysctl_ctx_list *ctx; 449 struct sysctl_oid *tree; 450 struct sysctl_oid_list *child; 451 452 struct ena_admin_ena_srd_stats *srd_stats_ptr; 453 struct ena_srd_metrics cur_stat_strings; 454 455 int i; 456 457 dev = adapter->pdev; 458 459 ctx = device_get_sysctl_ctx(dev); 460 tree = device_get_sysctl_tree(dev); 461 child = SYSCTL_CHILDREN(tree); 462 463 ena_srd_info = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ena_srd_info", 464 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's SRD information"); 465 srd_list = SYSCTL_CHILDREN(ena_srd_info); 466 467 SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, "ena_srd_mode", 468 CTLFLAG_RD, &adapter->ena_srd_info.flags, 0, 469 "Describes which ENA-express features are enabled"); 470 471 srd_stats_ptr = &adapter->ena_srd_info.ena_srd_stats; 472 473 for (i = 0 ; i < ENA_SRD_METRICS_ARRAY_SIZE; i++) { 474 cur_stat_strings = ena_srd_stats_strings[i]; 475 SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, cur_stat_strings.name, 476 CTLFLAG_RD, (u64 *)srd_stats_ptr + cur_stat_strings.stat_offset, 477 0, cur_stat_strings.description); 478 } 479 } 480 481 static void 482 ena_sysctl_add_customer_metrics(struct ena_adapter *adapter) 483 { 484 device_t dev; 485 struct ena_com_dev *ena_dev; 486 487 struct sysctl_ctx_list *ctx; 488 struct sysctl_oid *tree; 489 struct sysctl_oid_list *child; 490 491 struct sysctl_oid *customer_metric; 492 struct sysctl_oid_list *customer_list; 493 494 int i; 495 496 dev = adapter->pdev; 497 ena_dev = adapter->ena_dev; 498 499 ctx = device_get_sysctl_ctx(dev); 500 tree = device_get_sysctl_tree(dev); 501 child = SYSCTL_CHILDREN(tree); 502 customer_metric = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "customer_metrics", 503 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's customer metrics"); 504 customer_list = SYSCTL_CHILDREN(customer_metric); 505 506 for (i = 0; i < ENA_CUSTOMER_METRICS_ARRAY_SIZE; i++) { 507 if (ena_com_get_customer_metric_support(ena_dev, i)) { 508 SYSCTL_ADD_U64(ctx, customer_list, OID_AUTO, ena_hw_stats_strings[i].name, 509 CTLFLAG_RD, &adapter->customer_metrics_array[i], 0, 510 ena_hw_stats_strings[i].description); 511 } 512 } 513 } 514 515 static void 516 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) 517 { 518 device_t dev; 519 struct ena_admin_eni_stats *eni_metrics; 520 521 struct sysctl_ctx_list *ctx; 522 struct sysctl_oid *tree; 523 struct sysctl_oid_list *child; 524 525 struct sysctl_oid *eni_node; 526 struct sysctl_oid_list *eni_list; 527 528 dev = adapter->pdev; 529 530 ctx = device_get_sysctl_ctx(dev); 531 tree = device_get_sysctl_tree(dev); 532 child = SYSCTL_CHILDREN(tree); 533 534 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", 535 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); 536 eni_list = SYSCTL_CHILDREN(eni_node); 537 538 eni_metrics = &adapter->eni_metrics; 539 540 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", 541 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, 542 "Inbound BW allowance exceeded"); 543 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", 544 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, 545 "Outbound BW allowance exceeded"); 546 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", 547 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, 548 "PPS allowance exceeded"); 549 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", 550 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, 551 "Connection tracking allowance exceeded"); 552 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", 553 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, 554 "Linklocal packet rate allowance exceeded"); 555 } 556 557 static void 558 ena_sysctl_add_tuneables(struct ena_adapter *adapter) 559 { 560 device_t dev; 561 562 struct sysctl_ctx_list *ctx; 563 struct sysctl_oid *tree; 564 struct sysctl_oid_list *child; 565 566 dev = adapter->pdev; 567 568 ctx = device_get_sysctl_ctx(dev); 569 tree = device_get_sysctl_tree(dev); 570 child = SYSCTL_CHILDREN(tree); 571 572 /* Tuneable number of buffers in the buf-ring (drbr) */ 573 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size", 574 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 575 ena_sysctl_buf_ring_size, "I", 576 "Size of the Tx buffer ring (drbr)."); 577 578 /* Tuneable number of the Rx ring size */ 579 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size", 580 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 581 ena_sysctl_rx_queue_size, "I", 582 "Size of the Rx ring. The size should be a power of 2."); 583 584 /* Tuneable number of IO queues */ 585 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb", 586 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 587 ena_sysctl_io_queues_nb, "I", "Number of IO queues."); 588 589 /* 590 * Tuneable, which determines how often ENA metrics will be read. 591 * 0 means it's turned off. Maximum allowed value is limited by: 592 * ENA_METRICS_MAX_SAMPLE_INTERVAL. 593 */ 594 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "stats_sample_interval", 595 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 596 ena_sysctl_metrics_interval, "SU", 597 "Interval in seconds for updating Netword interface metrics. 0 turns off the update."); 598 } 599 600 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 601 #ifndef RSS 602 static void 603 ena_sysctl_add_rss(struct ena_adapter *adapter) 604 { 605 device_t dev; 606 607 struct sysctl_ctx_list *ctx; 608 struct sysctl_oid *tree; 609 struct sysctl_oid_list *child; 610 611 dev = adapter->pdev; 612 613 ctx = device_get_sysctl_ctx(dev); 614 tree = device_get_sysctl_tree(dev); 615 child = SYSCTL_CHILDREN(tree); 616 617 /* RSS options */ 618 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss", 619 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options."); 620 child = SYSCTL_CHILDREN(tree); 621 622 /* RSS hash key */ 623 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key", 624 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 625 ena_sysctl_rss_key, "A", "RSS key."); 626 627 /* Tuneable RSS indirection table */ 628 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table", 629 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 630 ena_sysctl_rss_indir_table, "A", "RSS indirection table."); 631 632 /* RSS indirection table size */ 633 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size", 634 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0, 635 "RSS indirection table size."); 636 } 637 #endif /* RSS */ 638 639 static void 640 ena_sysctl_add_irq_affinity(struct ena_adapter *adapter) 641 { 642 device_t dev; 643 644 struct sysctl_ctx_list *ctx; 645 struct sysctl_oid *tree; 646 struct sysctl_oid_list *child; 647 648 dev = adapter->pdev; 649 650 ctx = device_get_sysctl_ctx(dev); 651 tree = device_get_sysctl_tree(dev); 652 child = SYSCTL_CHILDREN(tree); 653 654 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity", 655 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity."); 656 child = SYSCTL_CHILDREN(tree); 657 658 /* Add base cpu leaf */ 659 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu", 660 CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 661 ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity."); 662 663 /* Add cpu stride leaf */ 664 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride", 665 CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 666 ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity."); 667 } 668 669 670 /* 671 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes. 672 * 673 * Whether the nodes are registered or unregistered depends on a delta between 674 * the `old` and `new` parameters, representing the number of queues. 675 * 676 * This function is used to hide sysctl attributes for queue nodes which aren't 677 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`). 678 * 679 * NOTE: 680 * All unregistered nodes must be registered again at detach, i.e. by a call to 681 * this function. 682 */ 683 void 684 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new) 685 { 686 struct sysctl_oid *oid; 687 int min, max, i; 688 689 min = MIN(old, new); 690 max = MIN(MAX(old, new), adapter->max_num_io_queues); 691 692 for (i = min; i < max; ++i) { 693 oid = adapter->que[i].oid; 694 695 sysctl_wlock(); 696 if (old > new) 697 sysctl_unregister_oid(oid); 698 else 699 sysctl_register_oid(oid); 700 sysctl_wunlock(); 701 } 702 } 703 704 static int 705 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS) 706 { 707 struct ena_adapter *adapter = arg1; 708 uint32_t val; 709 int error; 710 711 ENA_LOCK_LOCK(); 712 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 713 error = EINVAL; 714 goto unlock; 715 } 716 717 val = 0; 718 error = sysctl_wire_old_buffer(req, sizeof(val)); 719 if (error == 0) { 720 val = adapter->buf_ring_size; 721 error = sysctl_handle_32(oidp, &val, 0, req); 722 } 723 if (error != 0 || req->newptr == NULL) 724 goto unlock; 725 726 if (!powerof2(val) || val == 0) { 727 ena_log(adapter->pdev, ERR, 728 "Requested new Tx buffer ring size (%u) is not a power of 2\n", 729 val); 730 error = EINVAL; 731 goto unlock; 732 } 733 734 if (val != adapter->buf_ring_size) { 735 ena_log(adapter->pdev, INFO, 736 "Requested new Tx buffer ring size: %d. Old size: %d\n", 737 val, adapter->buf_ring_size); 738 739 error = ena_update_buf_ring_size(adapter, val); 740 } else { 741 ena_log(adapter->pdev, ERR, 742 "New Tx buffer ring size is the same as already used: %u\n", 743 adapter->buf_ring_size); 744 } 745 746 unlock: 747 ENA_LOCK_UNLOCK(); 748 749 return (error); 750 } 751 752 static int 753 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS) 754 { 755 struct ena_adapter *adapter = arg1; 756 uint32_t val; 757 int error; 758 759 ENA_LOCK_LOCK(); 760 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 761 error = EINVAL; 762 goto unlock; 763 } 764 765 val = 0; 766 error = sysctl_wire_old_buffer(req, sizeof(val)); 767 if (error == 0) { 768 val = adapter->requested_rx_ring_size; 769 error = sysctl_handle_32(oidp, &val, 0, req); 770 } 771 if (error != 0 || req->newptr == NULL) 772 goto unlock; 773 774 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) { 775 ena_log(adapter->pdev, ERR, 776 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n", 777 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size); 778 error = EINVAL; 779 goto unlock; 780 } 781 782 /* Check if the parameter is power of 2 */ 783 if (!powerof2(val)) { 784 ena_log(adapter->pdev, ERR, 785 "Requested new Rx queue size (%u) is not a power of 2\n", 786 val); 787 error = EINVAL; 788 goto unlock; 789 } 790 791 if (val != adapter->requested_rx_ring_size) { 792 ena_log(adapter->pdev, INFO, 793 "Requested new Rx queue size: %u. Old size: %u\n", val, 794 adapter->requested_rx_ring_size); 795 796 error = ena_update_queue_size(adapter, 797 adapter->requested_tx_ring_size, val); 798 } else { 799 ena_log(adapter->pdev, ERR, 800 "New Rx queue size is the same as already used: %u\n", 801 adapter->requested_rx_ring_size); 802 } 803 804 unlock: 805 ENA_LOCK_UNLOCK(); 806 807 return (error); 808 } 809 810 /* 811 * Change number of effectively used IO queues adapter->num_io_queues 812 */ 813 static int 814 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS) 815 { 816 struct ena_adapter *adapter = arg1; 817 uint32_t old_num_queues, tmp = 0; 818 int error; 819 820 ENA_LOCK_LOCK(); 821 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 822 error = EINVAL; 823 goto unlock; 824 } 825 826 error = sysctl_wire_old_buffer(req, sizeof(tmp)); 827 if (error == 0) { 828 tmp = adapter->num_io_queues; 829 error = sysctl_handle_int(oidp, &tmp, 0, req); 830 } 831 if (error != 0 || req->newptr == NULL) 832 goto unlock; 833 834 if (tmp == 0) { 835 ena_log(adapter->pdev, ERR, 836 "Requested number of IO queues is zero\n"); 837 error = EINVAL; 838 goto unlock; 839 } 840 841 /* 842 * The adapter::max_num_io_queues is the HW capability. The system 843 * resources availability may potentially be a tighter limit. Therefore 844 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs` 845 * always holds true, while the `adapter::msix_vecs` is variable across 846 * device reset (`ena_destroy_device()` + `ena_restore_device()`). 847 */ 848 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) { 849 ena_log(adapter->pdev, ERR, 850 "Requested number of IO queues is higher than maximum allowed (%u)\n", 851 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC); 852 error = EINVAL; 853 goto unlock; 854 } 855 if (tmp == adapter->num_io_queues) { 856 ena_log(adapter->pdev, ERR, 857 "Requested number of IO queues is equal to current value " 858 "(%u)\n", 859 adapter->num_io_queues); 860 } else { 861 ena_log(adapter->pdev, INFO, 862 "Requested new number of IO queues: %u, current value: " 863 "%u\n", 864 tmp, adapter->num_io_queues); 865 866 old_num_queues = adapter->num_io_queues; 867 error = ena_update_io_queue_nb(adapter, tmp); 868 if (error != 0) 869 return (error); 870 871 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp); 872 } 873 874 unlock: 875 ENA_LOCK_UNLOCK(); 876 877 return (error); 878 } 879 880 static int 881 ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS) 882 { 883 struct ena_adapter *adapter = arg1; 884 uint16_t interval; 885 int error; 886 887 ENA_LOCK_LOCK(); 888 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 889 error = EINVAL; 890 goto unlock; 891 } 892 893 error = sysctl_wire_old_buffer(req, sizeof(interval)); 894 if (error == 0) { 895 interval = adapter->metrics_sample_interval; 896 error = sysctl_handle_16(oidp, &interval, 0, req); 897 } 898 if (error != 0 || req->newptr == NULL) 899 goto unlock; 900 901 if (interval > ENA_METRICS_MAX_SAMPLE_INTERVAL) { 902 ena_log(adapter->pdev, ERR, 903 "ENA metrics update interval is out of range - maximum allowed value: %d seconds\n", 904 ENA_METRICS_MAX_SAMPLE_INTERVAL); 905 error = EINVAL; 906 goto unlock; 907 } 908 909 if (interval == 0) { 910 ena_log(adapter->pdev, INFO, 911 "ENA metrics update is now turned off\n"); 912 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); 913 } else { 914 ena_log(adapter->pdev, INFO, 915 "ENA metrics update interval is set to: %" PRIu16 916 " seconds\n", 917 interval); 918 } 919 920 adapter->metrics_sample_interval = interval; 921 922 unlock: 923 ENA_LOCK_UNLOCK(); 924 925 return (0); 926 } 927 928 static int 929 ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS) 930 { 931 struct ena_adapter *adapter = arg1; 932 int irq_base_cpu = 0; 933 int error; 934 935 ENA_LOCK_LOCK(); 936 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 937 error = ENODEV; 938 goto unlock; 939 } 940 941 error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu)); 942 if (error == 0) { 943 irq_base_cpu = adapter->irq_cpu_base; 944 error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req); 945 } 946 if (error != 0 || req->newptr == NULL) 947 goto unlock; 948 949 if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) { 950 ena_log(adapter->pdev, ERR, 951 "Requested base CPU is less than zero.\n"); 952 error = EINVAL; 953 goto unlock; 954 } 955 956 if (irq_base_cpu > mp_ncpus) { 957 ena_log(adapter->pdev, INFO, 958 "Requested base CPU is larger than the number of available CPUs. \n"); 959 error = EINVAL; 960 goto unlock; 961 962 } 963 964 if (irq_base_cpu == adapter->irq_cpu_base) { 965 ena_log(adapter->pdev, INFO, 966 "Requested IRQ base CPU is equal to current value " 967 "(%d)\n", 968 adapter->irq_cpu_base); 969 goto unlock; 970 } 971 972 ena_log(adapter->pdev, INFO, 973 "Requested new IRQ base CPU: %d, current value: %d\n", 974 irq_base_cpu, adapter->irq_cpu_base); 975 976 error = ena_update_base_cpu(adapter, irq_base_cpu); 977 978 unlock: 979 ENA_LOCK_UNLOCK(); 980 981 return (error); 982 } 983 984 static int 985 ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS) 986 { 987 struct ena_adapter *adapter = arg1; 988 int32_t irq_cpu_stride = 0; 989 int error; 990 991 ENA_LOCK_LOCK(); 992 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 993 error = ENODEV; 994 goto unlock; 995 } 996 997 error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride)); 998 if (error == 0) { 999 irq_cpu_stride = adapter->irq_cpu_stride; 1000 error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req); 1001 } 1002 if (error != 0 || req->newptr == NULL) 1003 goto unlock; 1004 1005 if (irq_cpu_stride < 0) { 1006 ena_log(adapter->pdev, ERR, 1007 "Requested IRQ stride is less than zero.\n"); 1008 error = EINVAL; 1009 goto unlock; 1010 } 1011 1012 if (irq_cpu_stride > mp_ncpus) { 1013 ena_log(adapter->pdev, INFO, 1014 "Warning: Requested IRQ stride is larger than the number of available CPUs.\n"); 1015 } 1016 1017 if (irq_cpu_stride == adapter->irq_cpu_stride) { 1018 ena_log(adapter->pdev, INFO, 1019 "Requested IRQ CPU stride is equal to current value " 1020 "(%u)\n", 1021 adapter->irq_cpu_stride); 1022 goto unlock; 1023 } 1024 1025 ena_log(adapter->pdev, INFO, 1026 "Requested new IRQ CPU stride: %u, current value: %u\n", 1027 irq_cpu_stride, adapter->irq_cpu_stride); 1028 1029 error = ena_update_cpu_stride(adapter, irq_cpu_stride); 1030 if (error != 0) 1031 goto unlock; 1032 1033 unlock: 1034 ENA_LOCK_UNLOCK(); 1035 1036 return (error); 1037 } 1038 1039 #ifndef RSS 1040 /* 1041 * Change the Receive Side Scaling hash key. 1042 */ 1043 static int 1044 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS) 1045 { 1046 struct ena_adapter *adapter = arg1; 1047 struct ena_com_dev *ena_dev = adapter->ena_dev; 1048 enum ena_admin_hash_functions ena_func; 1049 char msg[ENA_HASH_KEY_MSG_SIZE]; 1050 char elem[3] = { 0 }; 1051 char *endp; 1052 u8 rss_key[ENA_HASH_KEY_SIZE]; 1053 int error, i; 1054 1055 ENA_LOCK_LOCK(); 1056 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 1057 error = EINVAL; 1058 goto unlock; 1059 } 1060 1061 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 1062 error = ENOTSUP; 1063 goto unlock; 1064 } 1065 1066 error = sysctl_wire_old_buffer(req, sizeof(msg)); 1067 if (error != 0) 1068 goto unlock; 1069 1070 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func); 1071 if (error != 0) { 1072 device_printf(adapter->pdev, "Cannot get hash function\n"); 1073 goto unlock; 1074 } 1075 1076 if (ena_func != ENA_ADMIN_TOEPLITZ) { 1077 error = EINVAL; 1078 device_printf(adapter->pdev, "Unsupported hash algorithm\n"); 1079 goto unlock; 1080 } 1081 1082 error = ena_rss_get_hash_key(ena_dev, rss_key); 1083 if (error != 0) { 1084 device_printf(adapter->pdev, "Cannot get hash key\n"); 1085 goto unlock; 1086 } 1087 1088 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) 1089 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]); 1090 1091 error = sysctl_handle_string(oidp, msg, sizeof(msg), req); 1092 if (error != 0 || req->newptr == NULL) 1093 goto unlock; 1094 1095 if (strlen(msg) != sizeof(msg) - 1) { 1096 error = EINVAL; 1097 device_printf(adapter->pdev, "Invalid key size\n"); 1098 goto unlock; 1099 } 1100 1101 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) { 1102 strncpy(elem, &msg[i * 2], 2); 1103 rss_key[i] = strtol(elem, &endp, 16); 1104 1105 /* Both hex nibbles in the string must be valid to continue. */ 1106 if (endp == elem || *endp != '\0' || rss_key[i] < 0) { 1107 error = EINVAL; 1108 device_printf(adapter->pdev, 1109 "Invalid key hex value: '%c'\n", *endp); 1110 goto unlock; 1111 } 1112 } 1113 1114 error = ena_rss_set_hash(ena_dev, rss_key); 1115 if (error != 0) 1116 device_printf(adapter->pdev, "Cannot fill hash key\n"); 1117 1118 unlock: 1119 ENA_LOCK_UNLOCK(); 1120 1121 return (error); 1122 } 1123 1124 /* 1125 * Change the Receive Side Scaling indirection table. 1126 * 1127 * The sysctl entry string consists of one or more `x:y` keypairs, where 1128 * x stands for the table index and y for its new value. 1129 * Table indices that don't need to be updated can be omitted from the string 1130 * and will retain their existing values. If an index is entered more than once, 1131 * the last value is used. 1132 * 1133 * Example: 1134 * To update two selected indices in the RSS indirection table, e.g. setting 1135 * index 0 to queue 5 and then index 5 to queue 0, the below command should be 1136 * used: 1137 * sysctl dev.ena.0.rss.indir_table="0:5 5:0" 1138 */ 1139 static int 1140 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS) 1141 { 1142 int num_queues, error; 1143 struct ena_adapter *adapter = arg1; 1144 struct ena_indir *indir; 1145 char *msg, *buf, *endp; 1146 uint32_t idx, value; 1147 1148 ENA_LOCK_LOCK(); 1149 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 1150 error = EINVAL; 1151 goto unlock; 1152 } 1153 1154 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 1155 error = ENOTSUP; 1156 goto unlock; 1157 } 1158 1159 indir = adapter->rss_indir; 1160 msg = indir->sysctl_buf; 1161 1162 if (unlikely(indir == NULL)) { 1163 error = ENOTSUP; 1164 goto unlock; 1165 } 1166 1167 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req); 1168 if (error != 0 || req->newptr == NULL) 1169 goto unlock; 1170 1171 num_queues = adapter->num_io_queues; 1172 1173 /* 1174 * This sysctl expects msg to be a list of `x:y` record pairs, 1175 * where x is the indirection table index and y is its value. 1176 */ 1177 for (buf = msg; *buf != '\0'; buf = endp) { 1178 idx = strtol(buf, &endp, 10); 1179 1180 if (endp == buf || idx < 0) { 1181 device_printf(adapter->pdev, "Invalid index: %s\n", 1182 buf); 1183 error = EINVAL; 1184 break; 1185 } 1186 1187 if (idx >= ENA_RX_RSS_TABLE_SIZE) { 1188 device_printf(adapter->pdev, "Index %d out of range\n", 1189 idx); 1190 error = ERANGE; 1191 break; 1192 } 1193 1194 buf = endp; 1195 1196 if (*buf++ != ':') { 1197 device_printf(adapter->pdev, "Missing ':' separator\n"); 1198 error = EINVAL; 1199 break; 1200 } 1201 1202 value = strtol(buf, &endp, 10); 1203 1204 if (endp == buf || value < 0) { 1205 device_printf(adapter->pdev, "Invalid value: %s\n", 1206 buf); 1207 error = EINVAL; 1208 break; 1209 } 1210 1211 if (value >= num_queues) { 1212 device_printf(adapter->pdev, "Value %d out of range\n", 1213 value); 1214 error = ERANGE; 1215 break; 1216 } 1217 1218 indir->table[idx] = value; 1219 } 1220 1221 if (error != 0) /* Reload indirection table with last good data. */ 1222 ena_rss_indir_get(adapter, indir->table); 1223 1224 /* At this point msg has been clobbered by sysctl_handle_string. */ 1225 ena_rss_copy_indir_buf(msg, indir->table); 1226 1227 if (error == 0) 1228 error = ena_rss_indir_set(adapter, indir->table); 1229 1230 unlock: 1231 ENA_LOCK_UNLOCK(); 1232 1233 return (error); 1234 } 1235 #endif /* RSS */ 1236