1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2024 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include "opt_rss.h" 33 34 #include "ena_rss.h" 35 #include "ena_sysctl.h" 36 37 static void ena_sysctl_add_wd(struct ena_adapter *); 38 static void ena_sysctl_add_stats(struct ena_adapter *); 39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *); 40 static void ena_sysctl_add_customer_metrics(struct ena_adapter *); 41 static void ena_sysctl_add_srd_info(struct ena_adapter *); 42 static void ena_sysctl_add_tuneables(struct ena_adapter *); 43 static void ena_sysctl_add_irq_affinity(struct ena_adapter *); 44 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 45 #ifndef RSS 46 static void ena_sysctl_add_rss(struct ena_adapter *); 47 #endif 48 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); 49 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); 50 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); 51 static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS); 52 static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS); 53 static int ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS); 54 #ifndef RSS 55 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS); 56 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS); 57 #endif 58 59 /* Limit max ENA sample rate to be an hour. */ 60 #define ENA_METRICS_MAX_SAMPLE_INTERVAL 3600 61 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1) 62 63 #define SYSCTL_GSTRING_LEN 128 64 65 #define ENA_METRIC_ENI_ENTRY(stat, desc) { \ 66 .name = #stat, \ 67 .description = #desc, \ 68 } 69 70 #define ENA_STAT_ENTRY(stat, desc, stat_type) { \ 71 .name = #stat, \ 72 .description = #desc, \ 73 .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64), \ 74 } 75 76 #define ENA_STAT_ENA_SRD_ENTRY(stat, desc) \ 77 ENA_STAT_ENTRY(stat, desc, ena_srd_stats) 78 79 struct ena_hw_metrics { 80 char name[SYSCTL_GSTRING_LEN]; 81 char description[SYSCTL_GSTRING_LEN]; 82 }; 83 84 struct ena_srd_metrics { 85 char name[SYSCTL_GSTRING_LEN]; 86 char description[SYSCTL_GSTRING_LEN]; 87 int stat_offset; 88 }; 89 90 static const struct ena_srd_metrics ena_srd_stats_strings[] = { 91 ENA_STAT_ENA_SRD_ENTRY( 92 ena_srd_tx_pkts, Number of packets transmitted over ENA SRD), 93 ENA_STAT_ENA_SRD_ENTRY( 94 ena_srd_eligible_tx_pkts, Number of packets transmitted or could 95 have been transmitted over ENA SRD), 96 ENA_STAT_ENA_SRD_ENTRY( 97 ena_srd_rx_pkts, Number of packets received over ENA SRD), 98 ENA_STAT_ENA_SRD_ENTRY( 99 ena_srd_resource_utilization, Percentage of the ENA SRD resources 100 that are in use), 101 }; 102 103 static const struct ena_hw_metrics ena_hw_stats_strings[] = { 104 ENA_METRIC_ENI_ENTRY( 105 bw_in_allowance_exceeded, Inbound BW allowance exceeded), 106 ENA_METRIC_ENI_ENTRY( 107 bw_out_allowance_exceeded, Outbound BW allowance exceeded), 108 ENA_METRIC_ENI_ENTRY( 109 pps_allowance_exceeded, PPS allowance exceeded), 110 ENA_METRIC_ENI_ENTRY( 111 conntrack_allowance_exceeded, Connection tracking allowance exceeded), 112 ENA_METRIC_ENI_ENTRY( 113 linklocal_allowance_exceeded, Linklocal packet rate allowance), 114 ENA_METRIC_ENI_ENTRY( 115 conntrack_allowance_available, Number of available conntracks), 116 }; 117 118 #ifndef ARRAY_SIZE 119 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) 120 #endif 121 122 #define ENA_CUSTOMER_METRICS_ARRAY_SIZE ARRAY_SIZE(ena_hw_stats_strings) 123 #define ENA_SRD_METRICS_ARRAY_SIZE ARRAY_SIZE(ena_srd_stats_strings) 124 125 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 126 "ENA driver parameters"); 127 128 /* 129 * Logging level for changing verbosity of the output 130 */ 131 int ena_log_level = ENA_INFO; 132 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0, 133 "Logging level indicating verbosity of the logs"); 134 135 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD, 136 ENA_DRV_MODULE_VERSION, "ENA driver version"); 137 138 /* 139 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). 140 * Using 9k mbufs in low memory conditions might cause allocation to take a lot 141 * of time and lead to the OS instability as it needs to look for the contiguous 142 * pages. 143 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if 144 * the network performance is the priority, the 9k mbufs can be used. 145 */ 146 int ena_enable_9k_mbufs = 0; 147 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, 148 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); 149 150 /* 151 * Force the driver to use large or regular LLQ (Low Latency Queue) header size. 152 * Defaults to ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT. This option may be 153 * important for platforms, which often handle packet headers on Tx with total 154 * header size greater than 96B, as it may reduce the latency. 155 * It also reduces the maximum Tx queue size by half, so it may cause more Tx 156 * packet drops. 157 */ 158 int ena_force_large_llq_header = ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT; 159 SYSCTL_INT(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN, 160 &ena_force_large_llq_header, 0, 161 "Change default LLQ entry size received from the device"); 162 163 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE; 164 165 int ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter *adapter) 166 { 167 int rc = 0; 168 169 adapter->customer_metrics_array = malloc((sizeof(u64) * ENA_CUSTOMER_METRICS_ARRAY_SIZE), 170 M_DEVBUF, M_NOWAIT | M_ZERO); 171 if (unlikely(adapter->customer_metrics_array == NULL)) 172 rc = ENOMEM; 173 174 return rc; 175 } 176 void 177 ena_sysctl_add_nodes(struct ena_adapter *adapter) 178 { 179 struct ena_com_dev *dev = adapter->ena_dev; 180 181 if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS)) 182 ena_sysctl_add_customer_metrics(adapter); 183 else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS)) 184 ena_sysctl_add_eni_metrics(adapter); 185 186 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 187 ena_sysctl_add_srd_info(adapter); 188 189 ena_sysctl_add_wd(adapter); 190 ena_sysctl_add_stats(adapter); 191 ena_sysctl_add_tuneables(adapter); 192 ena_sysctl_add_irq_affinity(adapter); 193 #ifndef RSS 194 ena_sysctl_add_rss(adapter); 195 #endif 196 } 197 198 static void 199 ena_sysctl_add_wd(struct ena_adapter *adapter) 200 { 201 device_t dev; 202 203 struct sysctl_ctx_list *ctx; 204 struct sysctl_oid *tree; 205 struct sysctl_oid_list *child; 206 207 dev = adapter->pdev; 208 209 ctx = device_get_sysctl_ctx(dev); 210 tree = device_get_sysctl_tree(dev); 211 child = SYSCTL_CHILDREN(tree); 212 213 /* Sysctl calls for Watchdog service */ 214 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN, 215 &adapter->wd_active, 0, "Watchdog is active"); 216 217 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout", 218 CTLFLAG_RWTUN, &adapter->keep_alive_timeout, 219 "Timeout for Keep Alive messages"); 220 221 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout", 222 CTLFLAG_RWTUN, &adapter->missing_tx_timeout, 223 "Timeout for TX completion"); 224 225 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues", 226 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, 227 "Number of TX queues to check per run"); 228 229 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold", 230 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, 231 "Max number of timeouted packets"); 232 } 233 234 static void 235 ena_sysctl_add_stats(struct ena_adapter *adapter) 236 { 237 device_t dev; 238 239 struct ena_ring *tx_ring; 240 struct ena_ring *rx_ring; 241 242 struct ena_hw_stats *hw_stats; 243 struct ena_stats_dev *dev_stats; 244 struct ena_stats_tx *tx_stats; 245 struct ena_stats_rx *rx_stats; 246 struct ena_com_stats_admin *admin_stats; 247 248 struct sysctl_ctx_list *ctx; 249 struct sysctl_oid *tree; 250 struct sysctl_oid_list *child; 251 252 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; 253 struct sysctl_oid *admin_node; 254 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; 255 struct sysctl_oid_list *admin_list; 256 257 #define QUEUE_NAME_LEN 32 258 char namebuf[QUEUE_NAME_LEN]; 259 int i; 260 261 dev = adapter->pdev; 262 263 ctx = device_get_sysctl_ctx(dev); 264 tree = device_get_sysctl_tree(dev); 265 child = SYSCTL_CHILDREN(tree); 266 267 tx_ring = adapter->tx_ring; 268 rx_ring = adapter->rx_ring; 269 270 hw_stats = &adapter->hw_stats; 271 dev_stats = &adapter->dev_stats; 272 admin_stats = &adapter->ena_dev->admin_queue.stats; 273 274 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD, 275 &dev_stats->wd_expired, "Watchdog expiry count"); 276 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD, 277 &dev_stats->interface_up, "Network interface up count"); 278 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", CTLFLAG_RD, 279 &dev_stats->interface_down, "Network interface down count"); 280 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", CTLFLAG_RD, 281 &dev_stats->admin_q_pause, "Admin queue pauses"); 282 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "os_trigger", CTLFLAG_RD, 283 &dev_stats->os_trigger, "OS trigger count"); 284 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_tx_cmpl", CTLFLAG_RD, 285 &dev_stats->missing_tx_cmpl, "Missing TX completions resets count"); 286 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_req_id", CTLFLAG_RD, 287 &dev_stats->bad_rx_req_id, "Bad RX req id count"); 288 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_tx_req_id", CTLFLAG_RD, 289 &dev_stats->bad_tx_req_id, "Bad TX req id count"); 290 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_desc_num", CTLFLAG_RD, 291 &dev_stats->bad_rx_desc_num, "Bad RX descriptors number count"); 292 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "invalid_state", CTLFLAG_RD, 293 &dev_stats->invalid_state, "Driver invalid state count"); 294 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_intr", CTLFLAG_RD, 295 &dev_stats->missing_intr, "Missing interrupt count"); 296 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "tx_desc_malformed", CTLFLAG_RD, 297 &dev_stats->tx_desc_malformed, "TX descriptors malformed count"); 298 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD, 299 &dev_stats->rx_desc_malformed, "RX descriptors malformed count"); 300 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD, 301 &dev_stats->missing_admin_interrupt, "Missing admin interrupts count"); 302 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD, 303 &dev_stats->admin_to, "Admin queue timeouts count"); 304 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "device_request_reset", CTLFLAG_RD, 305 &dev_stats->device_request_reset, "Device reset requests count"); 306 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD, 307 &dev_stats->total_resets, "Total resets count"); 308 309 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) { 310 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 311 312 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 313 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 314 queue_list = SYSCTL_CHILDREN(queue_node); 315 316 adapter->que[i].oid = queue_node; 317 318 #ifdef RSS 319 /* Common stats */ 320 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD, 321 &adapter->que[i].cpu, 0, "CPU affinity"); 322 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD, 323 &adapter->que[i].domain, 0, "NUMA domain"); 324 #endif 325 326 /* TX specific stats */ 327 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring", 328 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring"); 329 tx_list = SYSCTL_CHILDREN(tx_node); 330 331 tx_stats = &tx_ring->tx_stats; 332 333 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count", 334 CTLFLAG_RD, &tx_stats->cnt, "Packets sent"); 335 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes", 336 CTLFLAG_RD, &tx_stats->bytes, "Bytes sent"); 337 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 338 "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err, 339 "TX buffer preparation failures"); 340 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 341 "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err, 342 "DMA mapping failures"); 343 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells", 344 CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells"); 345 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 346 "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp, 347 "TX completions missed"); 348 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id", 349 CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count"); 350 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses", 351 CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count"); 352 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 353 "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err, 354 "Mbuf collapse failures"); 355 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups", 356 CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups"); 357 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops", 358 CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops"); 359 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 360 "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy, 361 "Header copies for llq transaction"); 362 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 363 "unmask_interrupt_num", CTLFLAG_RD, 364 &tx_stats->unmask_interrupt_num, 365 "Unmasked interrupt count"); 366 367 /* RX specific stats */ 368 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring", 369 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring"); 370 rx_list = SYSCTL_CHILDREN(rx_node); 371 372 rx_stats = &rx_ring->rx_stats; 373 374 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count", 375 CTLFLAG_RD, &rx_stats->cnt, "Packets received"); 376 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes", 377 CTLFLAG_RD, &rx_stats->bytes, "Bytes received"); 378 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial", 379 CTLFLAG_RD, &rx_stats->refil_partial, 380 "Partial refilled mbufs"); 381 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad", 382 CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum"); 383 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 384 "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail, 385 "Failed mbuf allocs"); 386 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 387 "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail, 388 "Failed jumbo mbuf allocs"); 389 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 390 "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err, 391 "DMA mapping errors"); 392 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num", 393 CTLFLAG_RD, &rx_stats->bad_desc_num, 394 "Bad descriptor count"); 395 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id", 396 CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count"); 397 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring", 398 CTLFLAG_RD, &rx_stats->empty_rx_ring, 399 "RX descriptors depletion count"); 400 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good", 401 CTLFLAG_RD, &rx_stats->csum_good, 402 "Valid RX checksum calculations"); 403 } 404 405 /* Stats read from device */ 406 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", 407 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware"); 408 hw_list = SYSCTL_CHILDREN(hw_node); 409 410 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, 411 &hw_stats->rx_packets, "Packets received"); 412 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, 413 &hw_stats->tx_packets, "Packets transmitted"); 414 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, 415 &hw_stats->rx_bytes, "Bytes received"); 416 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, 417 &hw_stats->tx_bytes, "Bytes transmitted"); 418 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, 419 &hw_stats->rx_drops, "Receive packet drops"); 420 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD, 421 &hw_stats->tx_drops, "Transmit packet drops"); 422 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_overruns", CTLFLAG_RD, 423 &hw_stats->rx_overruns, "Receive overruns"); 424 425 /* ENA Admin queue stats */ 426 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", 427 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics"); 428 admin_list = SYSCTL_CHILDREN(admin_node); 429 430 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, 431 &admin_stats->aborted_cmd, 0, "Aborted commands"); 432 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, 433 &admin_stats->submitted_cmd, 0, "Submitted commands"); 434 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, 435 &admin_stats->completed_cmd, 0, "Completed commands"); 436 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, 437 &admin_stats->out_of_space, 0, "Queue out of space"); 438 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, 439 &admin_stats->no_completion, 0, "Commands not completed"); 440 } 441 442 static void 443 ena_sysctl_add_srd_info(struct ena_adapter *adapter) 444 { 445 device_t dev; 446 447 struct sysctl_oid *ena_srd_info; 448 struct sysctl_oid_list *srd_list; 449 450 struct sysctl_ctx_list *ctx; 451 struct sysctl_oid *tree; 452 struct sysctl_oid_list *child; 453 454 struct ena_admin_ena_srd_stats *srd_stats_ptr; 455 struct ena_srd_metrics cur_stat_strings; 456 457 int i; 458 459 dev = adapter->pdev; 460 461 ctx = device_get_sysctl_ctx(dev); 462 tree = device_get_sysctl_tree(dev); 463 child = SYSCTL_CHILDREN(tree); 464 465 ena_srd_info = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ena_srd_info", 466 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's SRD information"); 467 srd_list = SYSCTL_CHILDREN(ena_srd_info); 468 469 SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, "ena_srd_mode", 470 CTLFLAG_RD, &adapter->ena_srd_info.flags, 0, 471 "Describes which ENA-express features are enabled"); 472 473 srd_stats_ptr = &adapter->ena_srd_info.ena_srd_stats; 474 475 for (i = 0 ; i < ENA_SRD_METRICS_ARRAY_SIZE; i++) { 476 cur_stat_strings = ena_srd_stats_strings[i]; 477 SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, cur_stat_strings.name, 478 CTLFLAG_RD, (u64 *)srd_stats_ptr + cur_stat_strings.stat_offset, 479 0, cur_stat_strings.description); 480 } 481 } 482 483 static void 484 ena_sysctl_add_customer_metrics(struct ena_adapter *adapter) 485 { 486 device_t dev; 487 struct ena_com_dev *ena_dev; 488 489 struct sysctl_ctx_list *ctx; 490 struct sysctl_oid *tree; 491 struct sysctl_oid_list *child; 492 493 struct sysctl_oid *customer_metric; 494 struct sysctl_oid_list *customer_list; 495 496 int i; 497 498 dev = adapter->pdev; 499 ena_dev = adapter->ena_dev; 500 501 ctx = device_get_sysctl_ctx(dev); 502 tree = device_get_sysctl_tree(dev); 503 child = SYSCTL_CHILDREN(tree); 504 customer_metric = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "customer_metrics", 505 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's customer metrics"); 506 customer_list = SYSCTL_CHILDREN(customer_metric); 507 508 for (i = 0; i < ENA_CUSTOMER_METRICS_ARRAY_SIZE; i++) { 509 if (ena_com_get_customer_metric_support(ena_dev, i)) { 510 SYSCTL_ADD_U64(ctx, customer_list, OID_AUTO, ena_hw_stats_strings[i].name, 511 CTLFLAG_RD, &adapter->customer_metrics_array[i], 0, 512 ena_hw_stats_strings[i].description); 513 } 514 } 515 } 516 517 static void 518 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) 519 { 520 device_t dev; 521 struct ena_admin_eni_stats *eni_metrics; 522 523 struct sysctl_ctx_list *ctx; 524 struct sysctl_oid *tree; 525 struct sysctl_oid_list *child; 526 527 struct sysctl_oid *eni_node; 528 struct sysctl_oid_list *eni_list; 529 530 dev = adapter->pdev; 531 532 ctx = device_get_sysctl_ctx(dev); 533 tree = device_get_sysctl_tree(dev); 534 child = SYSCTL_CHILDREN(tree); 535 536 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", 537 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); 538 eni_list = SYSCTL_CHILDREN(eni_node); 539 540 eni_metrics = &adapter->eni_metrics; 541 542 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", 543 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, 544 "Inbound BW allowance exceeded"); 545 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", 546 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, 547 "Outbound BW allowance exceeded"); 548 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", 549 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, 550 "PPS allowance exceeded"); 551 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", 552 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, 553 "Connection tracking allowance exceeded"); 554 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", 555 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, 556 "Linklocal packet rate allowance exceeded"); 557 } 558 559 static void 560 ena_sysctl_add_tuneables(struct ena_adapter *adapter) 561 { 562 device_t dev; 563 564 struct sysctl_ctx_list *ctx; 565 struct sysctl_oid *tree; 566 struct sysctl_oid_list *child; 567 568 dev = adapter->pdev; 569 570 ctx = device_get_sysctl_ctx(dev); 571 tree = device_get_sysctl_tree(dev); 572 child = SYSCTL_CHILDREN(tree); 573 574 /* Tuneable number of buffers in the buf-ring (drbr) */ 575 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size", 576 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 577 ena_sysctl_buf_ring_size, "I", 578 "Size of the Tx buffer ring (drbr)."); 579 580 /* Tuneable number of the Rx ring size */ 581 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size", 582 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 583 ena_sysctl_rx_queue_size, "I", 584 "Size of the Rx ring. The size should be a power of 2."); 585 586 /* Tuneable number of IO queues */ 587 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb", 588 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 589 ena_sysctl_io_queues_nb, "I", "Number of IO queues."); 590 591 /* 592 * Tuneable, which determines how often ENA metrics will be read. 593 * 0 means it's turned off. Maximum allowed value is limited by: 594 * ENA_METRICS_MAX_SAMPLE_INTERVAL. 595 */ 596 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "stats_sample_interval", 597 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 598 ena_sysctl_metrics_interval, "SU", 599 "Interval in seconds for updating Netword interface metrics. 0 turns off the update."); 600 } 601 602 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 603 #ifndef RSS 604 static void 605 ena_sysctl_add_rss(struct ena_adapter *adapter) 606 { 607 device_t dev; 608 609 struct sysctl_ctx_list *ctx; 610 struct sysctl_oid *tree; 611 struct sysctl_oid_list *child; 612 613 dev = adapter->pdev; 614 615 ctx = device_get_sysctl_ctx(dev); 616 tree = device_get_sysctl_tree(dev); 617 child = SYSCTL_CHILDREN(tree); 618 619 /* RSS options */ 620 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss", 621 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options."); 622 child = SYSCTL_CHILDREN(tree); 623 624 /* RSS hash key */ 625 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key", 626 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 627 ena_sysctl_rss_key, "A", "RSS key."); 628 629 /* Tuneable RSS indirection table */ 630 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table", 631 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 632 ena_sysctl_rss_indir_table, "A", "RSS indirection table."); 633 634 /* RSS indirection table size */ 635 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size", 636 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0, 637 "RSS indirection table size."); 638 } 639 #endif /* RSS */ 640 641 static void 642 ena_sysctl_add_irq_affinity(struct ena_adapter *adapter) 643 { 644 device_t dev; 645 646 struct sysctl_ctx_list *ctx; 647 struct sysctl_oid *tree; 648 struct sysctl_oid_list *child; 649 650 dev = adapter->pdev; 651 652 ctx = device_get_sysctl_ctx(dev); 653 tree = device_get_sysctl_tree(dev); 654 child = SYSCTL_CHILDREN(tree); 655 656 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity", 657 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity."); 658 child = SYSCTL_CHILDREN(tree); 659 660 /* Add base cpu leaf */ 661 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu", 662 CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 663 ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity."); 664 665 /* Add cpu stride leaf */ 666 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride", 667 CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 668 ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity."); 669 } 670 671 672 /* 673 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes. 674 * 675 * Whether the nodes are registered or unregistered depends on a delta between 676 * the `old` and `new` parameters, representing the number of queues. 677 * 678 * This function is used to hide sysctl attributes for queue nodes which aren't 679 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`). 680 * 681 * NOTE: 682 * All unregistered nodes must be registered again at detach, i.e. by a call to 683 * this function. 684 */ 685 void 686 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new) 687 { 688 struct sysctl_oid *oid; 689 int min, max, i; 690 691 min = MIN(old, new); 692 max = MIN(MAX(old, new), adapter->max_num_io_queues); 693 694 for (i = min; i < max; ++i) { 695 oid = adapter->que[i].oid; 696 697 sysctl_wlock(); 698 if (old > new) 699 sysctl_unregister_oid(oid); 700 else 701 sysctl_register_oid(oid); 702 sysctl_wunlock(); 703 } 704 } 705 706 static int 707 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS) 708 { 709 struct ena_adapter *adapter = arg1; 710 uint32_t val; 711 int error; 712 713 ENA_LOCK_LOCK(); 714 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 715 error = EINVAL; 716 goto unlock; 717 } 718 719 val = 0; 720 error = sysctl_wire_old_buffer(req, sizeof(val)); 721 if (error == 0) { 722 val = adapter->buf_ring_size; 723 error = sysctl_handle_32(oidp, &val, 0, req); 724 } 725 if (error != 0 || req->newptr == NULL) 726 goto unlock; 727 728 if (!powerof2(val) || val == 0) { 729 ena_log(adapter->pdev, ERR, 730 "Requested new Tx buffer ring size (%u) is not a power of 2\n", 731 val); 732 error = EINVAL; 733 goto unlock; 734 } 735 736 if (val != adapter->buf_ring_size) { 737 ena_log(adapter->pdev, INFO, 738 "Requested new Tx buffer ring size: %d. Old size: %d\n", 739 val, adapter->buf_ring_size); 740 741 error = ena_update_buf_ring_size(adapter, val); 742 } else { 743 ena_log(adapter->pdev, ERR, 744 "New Tx buffer ring size is the same as already used: %u\n", 745 adapter->buf_ring_size); 746 } 747 748 unlock: 749 ENA_LOCK_UNLOCK(); 750 751 return (error); 752 } 753 754 static int 755 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS) 756 { 757 struct ena_adapter *adapter = arg1; 758 uint32_t val; 759 int error; 760 761 ENA_LOCK_LOCK(); 762 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 763 error = EINVAL; 764 goto unlock; 765 } 766 767 val = 0; 768 error = sysctl_wire_old_buffer(req, sizeof(val)); 769 if (error == 0) { 770 val = adapter->requested_rx_ring_size; 771 error = sysctl_handle_32(oidp, &val, 0, req); 772 } 773 if (error != 0 || req->newptr == NULL) 774 goto unlock; 775 776 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) { 777 ena_log(adapter->pdev, ERR, 778 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n", 779 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size); 780 error = EINVAL; 781 goto unlock; 782 } 783 784 /* Check if the parameter is power of 2 */ 785 if (!powerof2(val)) { 786 ena_log(adapter->pdev, ERR, 787 "Requested new Rx queue size (%u) is not a power of 2\n", 788 val); 789 error = EINVAL; 790 goto unlock; 791 } 792 793 if (val != adapter->requested_rx_ring_size) { 794 ena_log(adapter->pdev, INFO, 795 "Requested new Rx queue size: %u. Old size: %u\n", val, 796 adapter->requested_rx_ring_size); 797 798 error = ena_update_queue_size(adapter, 799 adapter->requested_tx_ring_size, val); 800 } else { 801 ena_log(adapter->pdev, ERR, 802 "New Rx queue size is the same as already used: %u\n", 803 adapter->requested_rx_ring_size); 804 } 805 806 unlock: 807 ENA_LOCK_UNLOCK(); 808 809 return (error); 810 } 811 812 /* 813 * Change number of effectively used IO queues adapter->num_io_queues 814 */ 815 static int 816 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS) 817 { 818 struct ena_adapter *adapter = arg1; 819 uint32_t old_num_queues, tmp = 0; 820 int error; 821 822 ENA_LOCK_LOCK(); 823 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 824 error = EINVAL; 825 goto unlock; 826 } 827 828 error = sysctl_wire_old_buffer(req, sizeof(tmp)); 829 if (error == 0) { 830 tmp = adapter->num_io_queues; 831 error = sysctl_handle_int(oidp, &tmp, 0, req); 832 } 833 if (error != 0 || req->newptr == NULL) 834 goto unlock; 835 836 if (tmp == 0) { 837 ena_log(adapter->pdev, ERR, 838 "Requested number of IO queues is zero\n"); 839 error = EINVAL; 840 goto unlock; 841 } 842 843 /* 844 * The adapter::max_num_io_queues is the HW capability. The system 845 * resources availability may potentially be a tighter limit. Therefore 846 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs` 847 * always holds true, while the `adapter::msix_vecs` is variable across 848 * device reset (`ena_destroy_device()` + `ena_restore_device()`). 849 */ 850 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) { 851 ena_log(adapter->pdev, ERR, 852 "Requested number of IO queues is higher than maximum allowed (%u)\n", 853 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC); 854 error = EINVAL; 855 goto unlock; 856 } 857 if (tmp == adapter->num_io_queues) { 858 ena_log(adapter->pdev, ERR, 859 "Requested number of IO queues is equal to current value " 860 "(%u)\n", 861 adapter->num_io_queues); 862 } else { 863 ena_log(adapter->pdev, INFO, 864 "Requested new number of IO queues: %u, current value: " 865 "%u\n", 866 tmp, adapter->num_io_queues); 867 868 old_num_queues = adapter->num_io_queues; 869 error = ena_update_io_queue_nb(adapter, tmp); 870 if (error != 0) 871 return (error); 872 873 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp); 874 } 875 876 unlock: 877 ENA_LOCK_UNLOCK(); 878 879 return (error); 880 } 881 882 static int 883 ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS) 884 { 885 struct ena_adapter *adapter = arg1; 886 uint16_t interval; 887 int error; 888 889 ENA_LOCK_LOCK(); 890 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 891 error = EINVAL; 892 goto unlock; 893 } 894 895 error = sysctl_wire_old_buffer(req, sizeof(interval)); 896 if (error == 0) { 897 interval = adapter->metrics_sample_interval; 898 error = sysctl_handle_16(oidp, &interval, 0, req); 899 } 900 if (error != 0 || req->newptr == NULL) 901 goto unlock; 902 903 if (interval > ENA_METRICS_MAX_SAMPLE_INTERVAL) { 904 ena_log(adapter->pdev, ERR, 905 "ENA metrics update interval is out of range - maximum allowed value: %d seconds\n", 906 ENA_METRICS_MAX_SAMPLE_INTERVAL); 907 error = EINVAL; 908 goto unlock; 909 } 910 911 if (interval == 0) { 912 ena_log(adapter->pdev, INFO, 913 "ENA metrics update is now turned off\n"); 914 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); 915 } else { 916 ena_log(adapter->pdev, INFO, 917 "ENA metrics update interval is set to: %" PRIu16 918 " seconds\n", 919 interval); 920 } 921 922 adapter->metrics_sample_interval = interval; 923 924 unlock: 925 ENA_LOCK_UNLOCK(); 926 927 return (0); 928 } 929 930 static int 931 ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS) 932 { 933 struct ena_adapter *adapter = arg1; 934 int irq_base_cpu = 0; 935 int error; 936 937 ENA_LOCK_LOCK(); 938 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 939 error = ENODEV; 940 goto unlock; 941 } 942 943 error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu)); 944 if (error == 0) { 945 irq_base_cpu = adapter->irq_cpu_base; 946 error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req); 947 } 948 if (error != 0 || req->newptr == NULL) 949 goto unlock; 950 951 if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) { 952 ena_log(adapter->pdev, ERR, 953 "Requested base CPU is less than zero.\n"); 954 error = EINVAL; 955 goto unlock; 956 } 957 958 if (irq_base_cpu > mp_ncpus) { 959 ena_log(adapter->pdev, INFO, 960 "Requested base CPU is larger than the number of available CPUs. \n"); 961 error = EINVAL; 962 goto unlock; 963 964 } 965 966 if (irq_base_cpu == adapter->irq_cpu_base) { 967 ena_log(adapter->pdev, INFO, 968 "Requested IRQ base CPU is equal to current value " 969 "(%d)\n", 970 adapter->irq_cpu_base); 971 goto unlock; 972 } 973 974 ena_log(adapter->pdev, INFO, 975 "Requested new IRQ base CPU: %d, current value: %d\n", 976 irq_base_cpu, adapter->irq_cpu_base); 977 978 error = ena_update_base_cpu(adapter, irq_base_cpu); 979 980 unlock: 981 ENA_LOCK_UNLOCK(); 982 983 return (error); 984 } 985 986 static int 987 ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS) 988 { 989 struct ena_adapter *adapter = arg1; 990 int32_t irq_cpu_stride = 0; 991 int error; 992 993 ENA_LOCK_LOCK(); 994 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 995 error = ENODEV; 996 goto unlock; 997 } 998 999 error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride)); 1000 if (error == 0) { 1001 irq_cpu_stride = adapter->irq_cpu_stride; 1002 error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req); 1003 } 1004 if (error != 0 || req->newptr == NULL) 1005 goto unlock; 1006 1007 if (irq_cpu_stride < 0) { 1008 ena_log(adapter->pdev, ERR, 1009 "Requested IRQ stride is less than zero.\n"); 1010 error = EINVAL; 1011 goto unlock; 1012 } 1013 1014 if (irq_cpu_stride > mp_ncpus) { 1015 ena_log(adapter->pdev, INFO, 1016 "Warning: Requested IRQ stride is larger than the number of available CPUs.\n"); 1017 } 1018 1019 if (irq_cpu_stride == adapter->irq_cpu_stride) { 1020 ena_log(adapter->pdev, INFO, 1021 "Requested IRQ CPU stride is equal to current value " 1022 "(%u)\n", 1023 adapter->irq_cpu_stride); 1024 goto unlock; 1025 } 1026 1027 ena_log(adapter->pdev, INFO, 1028 "Requested new IRQ CPU stride: %u, current value: %u\n", 1029 irq_cpu_stride, adapter->irq_cpu_stride); 1030 1031 error = ena_update_cpu_stride(adapter, irq_cpu_stride); 1032 if (error != 0) 1033 goto unlock; 1034 1035 unlock: 1036 ENA_LOCK_UNLOCK(); 1037 1038 return (error); 1039 } 1040 1041 #ifndef RSS 1042 /* 1043 * Change the Receive Side Scaling hash key. 1044 */ 1045 static int 1046 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS) 1047 { 1048 struct ena_adapter *adapter = arg1; 1049 struct ena_com_dev *ena_dev = adapter->ena_dev; 1050 enum ena_admin_hash_functions ena_func; 1051 char msg[ENA_HASH_KEY_MSG_SIZE]; 1052 char elem[3] = { 0 }; 1053 char *endp; 1054 u8 rss_key[ENA_HASH_KEY_SIZE]; 1055 int error, i; 1056 1057 ENA_LOCK_LOCK(); 1058 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 1059 error = EINVAL; 1060 goto unlock; 1061 } 1062 1063 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 1064 error = ENOTSUP; 1065 goto unlock; 1066 } 1067 1068 error = sysctl_wire_old_buffer(req, sizeof(msg)); 1069 if (error != 0) 1070 goto unlock; 1071 1072 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func); 1073 if (error != 0) { 1074 device_printf(adapter->pdev, "Cannot get hash function\n"); 1075 goto unlock; 1076 } 1077 1078 if (ena_func != ENA_ADMIN_TOEPLITZ) { 1079 error = EINVAL; 1080 device_printf(adapter->pdev, "Unsupported hash algorithm\n"); 1081 goto unlock; 1082 } 1083 1084 error = ena_rss_get_hash_key(ena_dev, rss_key); 1085 if (error != 0) { 1086 device_printf(adapter->pdev, "Cannot get hash key\n"); 1087 goto unlock; 1088 } 1089 1090 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) 1091 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]); 1092 1093 error = sysctl_handle_string(oidp, msg, sizeof(msg), req); 1094 if (error != 0 || req->newptr == NULL) 1095 goto unlock; 1096 1097 if (strlen(msg) != sizeof(msg) - 1) { 1098 error = EINVAL; 1099 device_printf(adapter->pdev, "Invalid key size\n"); 1100 goto unlock; 1101 } 1102 1103 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) { 1104 strncpy(elem, &msg[i * 2], 2); 1105 rss_key[i] = strtol(elem, &endp, 16); 1106 1107 /* Both hex nibbles in the string must be valid to continue. */ 1108 if (endp == elem || *endp != '\0' || rss_key[i] < 0) { 1109 error = EINVAL; 1110 device_printf(adapter->pdev, 1111 "Invalid key hex value: '%c'\n", *endp); 1112 goto unlock; 1113 } 1114 } 1115 1116 error = ena_rss_set_hash(ena_dev, rss_key); 1117 if (error != 0) 1118 device_printf(adapter->pdev, "Cannot fill hash key\n"); 1119 1120 unlock: 1121 ENA_LOCK_UNLOCK(); 1122 1123 return (error); 1124 } 1125 1126 /* 1127 * Change the Receive Side Scaling indirection table. 1128 * 1129 * The sysctl entry string consists of one or more `x:y` keypairs, where 1130 * x stands for the table index and y for its new value. 1131 * Table indices that don't need to be updated can be omitted from the string 1132 * and will retain their existing values. If an index is entered more than once, 1133 * the last value is used. 1134 * 1135 * Example: 1136 * To update two selected indices in the RSS indirection table, e.g. setting 1137 * index 0 to queue 5 and then index 5 to queue 0, the below command should be 1138 * used: 1139 * sysctl dev.ena.0.rss.indir_table="0:5 5:0" 1140 */ 1141 static int 1142 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS) 1143 { 1144 int num_queues, error; 1145 struct ena_adapter *adapter = arg1; 1146 struct ena_indir *indir; 1147 char *msg, *buf, *endp; 1148 uint32_t idx, value; 1149 1150 ENA_LOCK_LOCK(); 1151 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 1152 error = EINVAL; 1153 goto unlock; 1154 } 1155 1156 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 1157 error = ENOTSUP; 1158 goto unlock; 1159 } 1160 1161 indir = adapter->rss_indir; 1162 msg = indir->sysctl_buf; 1163 1164 if (unlikely(indir == NULL)) { 1165 error = ENOTSUP; 1166 goto unlock; 1167 } 1168 1169 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req); 1170 if (error != 0 || req->newptr == NULL) 1171 goto unlock; 1172 1173 num_queues = adapter->num_io_queues; 1174 1175 /* 1176 * This sysctl expects msg to be a list of `x:y` record pairs, 1177 * where x is the indirection table index and y is its value. 1178 */ 1179 for (buf = msg; *buf != '\0'; buf = endp) { 1180 idx = strtol(buf, &endp, 10); 1181 1182 if (endp == buf || idx < 0) { 1183 device_printf(adapter->pdev, "Invalid index: %s\n", 1184 buf); 1185 error = EINVAL; 1186 break; 1187 } 1188 1189 if (idx >= ENA_RX_RSS_TABLE_SIZE) { 1190 device_printf(adapter->pdev, "Index %d out of range\n", 1191 idx); 1192 error = ERANGE; 1193 break; 1194 } 1195 1196 buf = endp; 1197 1198 if (*buf++ != ':') { 1199 device_printf(adapter->pdev, "Missing ':' separator\n"); 1200 error = EINVAL; 1201 break; 1202 } 1203 1204 value = strtol(buf, &endp, 10); 1205 1206 if (endp == buf || value < 0) { 1207 device_printf(adapter->pdev, "Invalid value: %s\n", 1208 buf); 1209 error = EINVAL; 1210 break; 1211 } 1212 1213 if (value >= num_queues) { 1214 device_printf(adapter->pdev, "Value %d out of range\n", 1215 value); 1216 error = ERANGE; 1217 break; 1218 } 1219 1220 indir->table[idx] = value; 1221 } 1222 1223 if (error != 0) /* Reload indirection table with last good data. */ 1224 ena_rss_indir_get(adapter, indir->table); 1225 1226 /* At this point msg has been clobbered by sysctl_handle_string. */ 1227 ena_rss_copy_indir_buf(msg, indir->table); 1228 1229 if (error == 0) 1230 error = ena_rss_indir_set(adapter, indir->table); 1231 1232 unlock: 1233 ENA_LOCK_UNLOCK(); 1234 1235 return (error); 1236 } 1237 #endif /* RSS */ 1238