1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2015-2023 Amazon.com, Inc. or its affiliates. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/param.h> 32 #include "opt_rss.h" 33 34 #include "ena_rss.h" 35 #include "ena_sysctl.h" 36 37 static void ena_sysctl_add_wd(struct ena_adapter *); 38 static void ena_sysctl_add_stats(struct ena_adapter *); 39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *); 40 static void ena_sysctl_add_customer_metrics(struct ena_adapter *); 41 static void ena_sysctl_add_srd_info(struct ena_adapter *); 42 static void ena_sysctl_add_tuneables(struct ena_adapter *); 43 static void ena_sysctl_add_irq_affinity(struct ena_adapter *); 44 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 45 #ifndef RSS 46 static void ena_sysctl_add_rss(struct ena_adapter *); 47 #endif 48 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS); 49 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS); 50 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS); 51 static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS); 52 static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS); 53 static int ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS); 54 #ifndef RSS 55 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS); 56 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS); 57 #endif 58 59 /* Limit max ENA sample rate to be an hour. */ 60 #define ENA_METRICS_MAX_SAMPLE_INTERVAL 3600 61 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1) 62 63 #define SYSCTL_GSTRING_LEN 128 64 65 #define ENA_METRIC_ENI_ENTRY(stat, desc) { \ 66 .name = #stat, \ 67 .description = #desc, \ 68 } 69 70 #define ENA_STAT_ENTRY(stat, desc, stat_type) { \ 71 .name = #stat, \ 72 .description = #desc, \ 73 .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64), \ 74 } 75 76 #define ENA_STAT_ENA_SRD_ENTRY(stat, desc) \ 77 ENA_STAT_ENTRY(stat, desc, ena_srd_stats) 78 79 struct ena_hw_metrics { 80 char name[SYSCTL_GSTRING_LEN]; 81 char description[SYSCTL_GSTRING_LEN]; 82 }; 83 84 struct ena_srd_metrics { 85 char name[SYSCTL_GSTRING_LEN]; 86 char description[SYSCTL_GSTRING_LEN]; 87 int stat_offset; 88 }; 89 90 static const struct ena_srd_metrics ena_srd_stats_strings[] = { 91 ENA_STAT_ENA_SRD_ENTRY( 92 ena_srd_tx_pkts, Number of packets transmitted over ENA SRD), 93 ENA_STAT_ENA_SRD_ENTRY( 94 ena_srd_eligible_tx_pkts, Number of packets transmitted or could 95 have been transmitted over ENA SRD), 96 ENA_STAT_ENA_SRD_ENTRY( 97 ena_srd_rx_pkts, Number of packets received over ENA SRD), 98 ENA_STAT_ENA_SRD_ENTRY( 99 ena_srd_resource_utilization, Percentage of the ENA SRD resources 100 that are in use), 101 }; 102 103 static const struct ena_hw_metrics ena_hw_stats_strings[] = { 104 ENA_METRIC_ENI_ENTRY( 105 bw_in_allowance_exceeded, Inbound BW allowance exceeded), 106 ENA_METRIC_ENI_ENTRY( 107 bw_out_allowance_exceeded, Outbound BW allowance exceeded), 108 ENA_METRIC_ENI_ENTRY( 109 pps_allowance_exceeded, PPS allowance exceeded), 110 ENA_METRIC_ENI_ENTRY( 111 conntrack_allowance_exceeded, Connection tracking allowance exceeded), 112 ENA_METRIC_ENI_ENTRY( 113 linklocal_allowance_exceeded, Linklocal packet rate allowance), 114 ENA_METRIC_ENI_ENTRY( 115 conntrack_allowance_available, Number of available conntracks), 116 }; 117 118 #ifndef ARRAY_SIZE 119 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) 120 #endif 121 122 #define ENA_CUSTOMER_METRICS_ARRAY_SIZE ARRAY_SIZE(ena_hw_stats_strings) 123 #define ENA_SRD_METRICS_ARRAY_SIZE ARRAY_SIZE(ena_srd_stats_strings) 124 125 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 126 "ENA driver parameters"); 127 128 /* 129 * Logging level for changing verbosity of the output 130 */ 131 int ena_log_level = ENA_INFO; 132 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0, 133 "Logging level indicating verbosity of the logs"); 134 135 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD, 136 ENA_DRV_MODULE_VERSION, "ENA driver version"); 137 138 /* 139 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead). 140 * Using 9k mbufs in low memory conditions might cause allocation to take a lot 141 * of time and lead to the OS instability as it needs to look for the contiguous 142 * pages. 143 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if 144 * the network performance is the priority, the 9k mbufs can be used. 145 */ 146 int ena_enable_9k_mbufs = 0; 147 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN, 148 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors"); 149 150 /* 151 * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to 152 * false. This option may be important for platforms, which often handle packet 153 * headers on Tx with total header size greater than 96B, as it may 154 * reduce the latency. 155 * It also reduces the maximum Tx queue size by half, so it may cause more Tx 156 * packet drops. 157 */ 158 bool ena_force_large_llq_header = false; 159 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN, 160 &ena_force_large_llq_header, 0, 161 "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n"); 162 163 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE; 164 165 int ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter *adapter) 166 { 167 int rc = 0; 168 169 adapter->customer_metrics_array = malloc((sizeof(u64) * ENA_CUSTOMER_METRICS_ARRAY_SIZE), 170 M_DEVBUF, M_NOWAIT | M_ZERO); 171 if (unlikely(adapter->customer_metrics_array == NULL)) 172 rc = ENOMEM; 173 174 return rc; 175 } 176 void 177 ena_sysctl_add_nodes(struct ena_adapter *adapter) 178 { 179 struct ena_com_dev *dev = adapter->ena_dev; 180 181 if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS)) 182 ena_sysctl_add_customer_metrics(adapter); 183 else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS)) 184 ena_sysctl_add_eni_metrics(adapter); 185 186 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO)) 187 ena_sysctl_add_srd_info(adapter); 188 189 ena_sysctl_add_wd(adapter); 190 ena_sysctl_add_stats(adapter); 191 ena_sysctl_add_tuneables(adapter); 192 ena_sysctl_add_irq_affinity(adapter); 193 #ifndef RSS 194 ena_sysctl_add_rss(adapter); 195 #endif 196 } 197 198 static void 199 ena_sysctl_add_wd(struct ena_adapter *adapter) 200 { 201 device_t dev; 202 203 struct sysctl_ctx_list *ctx; 204 struct sysctl_oid *tree; 205 struct sysctl_oid_list *child; 206 207 dev = adapter->pdev; 208 209 ctx = device_get_sysctl_ctx(dev); 210 tree = device_get_sysctl_tree(dev); 211 child = SYSCTL_CHILDREN(tree); 212 213 /* Sysctl calls for Watchdog service */ 214 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN, 215 &adapter->wd_active, 0, "Watchdog is active"); 216 217 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout", 218 CTLFLAG_RWTUN, &adapter->keep_alive_timeout, 219 "Timeout for Keep Alive messages"); 220 221 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout", 222 CTLFLAG_RWTUN, &adapter->missing_tx_timeout, 223 "Timeout for TX completion"); 224 225 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues", 226 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0, 227 "Number of TX queues to check per run"); 228 229 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold", 230 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0, 231 "Max number of timeouted packets"); 232 } 233 234 static void 235 ena_sysctl_add_stats(struct ena_adapter *adapter) 236 { 237 device_t dev; 238 239 struct ena_ring *tx_ring; 240 struct ena_ring *rx_ring; 241 242 struct ena_hw_stats *hw_stats; 243 struct ena_stats_dev *dev_stats; 244 struct ena_stats_tx *tx_stats; 245 struct ena_stats_rx *rx_stats; 246 struct ena_com_stats_admin *admin_stats; 247 248 struct sysctl_ctx_list *ctx; 249 struct sysctl_oid *tree; 250 struct sysctl_oid_list *child; 251 252 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node; 253 struct sysctl_oid *admin_node; 254 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list; 255 struct sysctl_oid_list *admin_list; 256 257 #define QUEUE_NAME_LEN 32 258 char namebuf[QUEUE_NAME_LEN]; 259 int i; 260 261 dev = adapter->pdev; 262 263 ctx = device_get_sysctl_ctx(dev); 264 tree = device_get_sysctl_tree(dev); 265 child = SYSCTL_CHILDREN(tree); 266 267 tx_ring = adapter->tx_ring; 268 rx_ring = adapter->rx_ring; 269 270 hw_stats = &adapter->hw_stats; 271 dev_stats = &adapter->dev_stats; 272 admin_stats = &adapter->ena_dev->admin_queue.stats; 273 274 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD, 275 &dev_stats->wd_expired, "Watchdog expiry count"); 276 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD, 277 &dev_stats->interface_up, "Network interface up count"); 278 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", 279 CTLFLAG_RD, &dev_stats->interface_down, 280 "Network interface down count"); 281 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", 282 CTLFLAG_RD, &dev_stats->admin_q_pause, "Admin queue pauses"); 283 284 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) { 285 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 286 287 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 288 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 289 queue_list = SYSCTL_CHILDREN(queue_node); 290 291 adapter->que[i].oid = queue_node; 292 293 #ifdef RSS 294 /* Common stats */ 295 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD, 296 &adapter->que[i].cpu, 0, "CPU affinity"); 297 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD, 298 &adapter->que[i].domain, 0, "NUMA domain"); 299 #endif 300 301 /* TX specific stats */ 302 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring", 303 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring"); 304 tx_list = SYSCTL_CHILDREN(tx_node); 305 306 tx_stats = &tx_ring->tx_stats; 307 308 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count", 309 CTLFLAG_RD, &tx_stats->cnt, "Packets sent"); 310 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes", 311 CTLFLAG_RD, &tx_stats->bytes, "Bytes sent"); 312 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 313 "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err, 314 "TX buffer preparation failures"); 315 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 316 "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err, 317 "DMA mapping failures"); 318 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells", 319 CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells"); 320 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 321 "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp, 322 "TX completions missed"); 323 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id", 324 CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count"); 325 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses", 326 CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count"); 327 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 328 "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err, 329 "Mbuf collapse failures"); 330 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups", 331 CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups"); 332 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops", 333 CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops"); 334 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 335 "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy, 336 "Header copies for llq transaction"); 337 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, 338 "unmask_interrupt_num", CTLFLAG_RD, 339 &tx_stats->unmask_interrupt_num, 340 "Unmasked interrupt count"); 341 342 /* RX specific stats */ 343 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring", 344 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring"); 345 rx_list = SYSCTL_CHILDREN(rx_node); 346 347 rx_stats = &rx_ring->rx_stats; 348 349 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count", 350 CTLFLAG_RD, &rx_stats->cnt, "Packets received"); 351 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes", 352 CTLFLAG_RD, &rx_stats->bytes, "Bytes received"); 353 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial", 354 CTLFLAG_RD, &rx_stats->refil_partial, 355 "Partial refilled mbufs"); 356 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad", 357 CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum"); 358 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 359 "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail, 360 "Failed mbuf allocs"); 361 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 362 "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail, 363 "Failed jumbo mbuf allocs"); 364 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, 365 "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err, 366 "DMA mapping errors"); 367 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num", 368 CTLFLAG_RD, &rx_stats->bad_desc_num, 369 "Bad descriptor count"); 370 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id", 371 CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count"); 372 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring", 373 CTLFLAG_RD, &rx_stats->empty_rx_ring, 374 "RX descriptors depletion count"); 375 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good", 376 CTLFLAG_RD, &rx_stats->csum_good, 377 "Valid RX checksum calculations"); 378 } 379 380 /* Stats read from device */ 381 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats", 382 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware"); 383 hw_list = SYSCTL_CHILDREN(hw_node); 384 385 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD, 386 &hw_stats->rx_packets, "Packets received"); 387 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD, 388 &hw_stats->tx_packets, "Packets transmitted"); 389 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, 390 &hw_stats->rx_bytes, "Bytes received"); 391 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, 392 &hw_stats->tx_bytes, "Bytes transmitted"); 393 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD, 394 &hw_stats->rx_drops, "Receive packet drops"); 395 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD, 396 &hw_stats->tx_drops, "Transmit packet drops"); 397 398 /* ENA Admin queue stats */ 399 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats", 400 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics"); 401 admin_list = SYSCTL_CHILDREN(admin_node); 402 403 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD, 404 &admin_stats->aborted_cmd, 0, "Aborted commands"); 405 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD, 406 &admin_stats->submitted_cmd, 0, "Submitted commands"); 407 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD, 408 &admin_stats->completed_cmd, 0, "Completed commands"); 409 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD, 410 &admin_stats->out_of_space, 0, "Queue out of space"); 411 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD, 412 &admin_stats->no_completion, 0, "Commands not completed"); 413 } 414 415 static void 416 ena_sysctl_add_srd_info(struct ena_adapter *adapter) 417 { 418 device_t dev; 419 420 struct sysctl_oid *ena_srd_info; 421 struct sysctl_oid_list *srd_list; 422 423 struct sysctl_ctx_list *ctx; 424 struct sysctl_oid *tree; 425 struct sysctl_oid_list *child; 426 427 struct ena_admin_ena_srd_stats *srd_stats_ptr; 428 struct ena_srd_metrics cur_stat_strings; 429 430 int i; 431 432 dev = adapter->pdev; 433 434 ctx = device_get_sysctl_ctx(dev); 435 tree = device_get_sysctl_tree(dev); 436 child = SYSCTL_CHILDREN(tree); 437 438 ena_srd_info = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ena_srd_info", 439 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's SRD information"); 440 srd_list = SYSCTL_CHILDREN(ena_srd_info); 441 442 SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, "ena_srd_mode", 443 CTLFLAG_RD, &adapter->ena_srd_info.flags, 0, 444 "Describes which ENA-express features are enabled"); 445 446 srd_stats_ptr = &adapter->ena_srd_info.ena_srd_stats; 447 448 for (i = 0 ; i < ENA_SRD_METRICS_ARRAY_SIZE; i++) { 449 cur_stat_strings = ena_srd_stats_strings[i]; 450 SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, cur_stat_strings.name, 451 CTLFLAG_RD, (u64 *)srd_stats_ptr + cur_stat_strings.stat_offset, 452 0, cur_stat_strings.description); 453 } 454 } 455 456 static void 457 ena_sysctl_add_customer_metrics(struct ena_adapter *adapter) 458 { 459 device_t dev; 460 struct ena_com_dev *ena_dev; 461 462 struct sysctl_ctx_list *ctx; 463 struct sysctl_oid *tree; 464 struct sysctl_oid_list *child; 465 466 struct sysctl_oid *customer_metric; 467 struct sysctl_oid_list *customer_list; 468 469 int i; 470 471 dev = adapter->pdev; 472 ena_dev = adapter->ena_dev; 473 474 ctx = device_get_sysctl_ctx(dev); 475 tree = device_get_sysctl_tree(dev); 476 child = SYSCTL_CHILDREN(tree); 477 customer_metric = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "customer_metrics", 478 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's customer metrics"); 479 customer_list = SYSCTL_CHILDREN(customer_metric); 480 481 for (i = 0; i < ENA_CUSTOMER_METRICS_ARRAY_SIZE; i++) { 482 if (ena_com_get_customer_metric_support(ena_dev, i)) { 483 SYSCTL_ADD_U64(ctx, customer_list, OID_AUTO, ena_hw_stats_strings[i].name, 484 CTLFLAG_RD, &adapter->customer_metrics_array[i], 0, 485 ena_hw_stats_strings[i].description); 486 } 487 } 488 } 489 490 static void 491 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter) 492 { 493 device_t dev; 494 struct ena_admin_eni_stats *eni_metrics; 495 496 struct sysctl_ctx_list *ctx; 497 struct sysctl_oid *tree; 498 struct sysctl_oid_list *child; 499 500 struct sysctl_oid *eni_node; 501 struct sysctl_oid_list *eni_list; 502 503 dev = adapter->pdev; 504 505 ctx = device_get_sysctl_ctx(dev); 506 tree = device_get_sysctl_tree(dev); 507 child = SYSCTL_CHILDREN(tree); 508 509 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics", 510 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics"); 511 eni_list = SYSCTL_CHILDREN(eni_node); 512 513 eni_metrics = &adapter->eni_metrics; 514 515 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded", 516 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0, 517 "Inbound BW allowance exceeded"); 518 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded", 519 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0, 520 "Outbound BW allowance exceeded"); 521 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded", 522 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0, 523 "PPS allowance exceeded"); 524 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded", 525 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0, 526 "Connection tracking allowance exceeded"); 527 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded", 528 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0, 529 "Linklocal packet rate allowance exceeded"); 530 } 531 532 static void 533 ena_sysctl_add_tuneables(struct ena_adapter *adapter) 534 { 535 device_t dev; 536 537 struct sysctl_ctx_list *ctx; 538 struct sysctl_oid *tree; 539 struct sysctl_oid_list *child; 540 541 dev = adapter->pdev; 542 543 ctx = device_get_sysctl_ctx(dev); 544 tree = device_get_sysctl_tree(dev); 545 child = SYSCTL_CHILDREN(tree); 546 547 /* Tuneable number of buffers in the buf-ring (drbr) */ 548 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size", 549 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 550 ena_sysctl_buf_ring_size, "I", 551 "Size of the Tx buffer ring (drbr)."); 552 553 /* Tuneable number of the Rx ring size */ 554 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size", 555 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 556 ena_sysctl_rx_queue_size, "I", 557 "Size of the Rx ring. The size should be a power of 2."); 558 559 /* Tuneable number of IO queues */ 560 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb", 561 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 562 ena_sysctl_io_queues_nb, "I", "Number of IO queues."); 563 564 /* 565 * Tuneable, which determines how often ENA metrics will be read. 566 * 0 means it's turned off. Maximum allowed value is limited by: 567 * ENA_METRICS_MAX_SAMPLE_INTERVAL. 568 */ 569 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "stats_sample_interval", 570 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 571 ena_sysctl_metrics_interval, "SU", 572 "Interval in seconds for updating Netword interface metrics. 0 turns off the update."); 573 } 574 575 /* Kernel option RSS prevents manipulation of key hash and indirection table. */ 576 #ifndef RSS 577 static void 578 ena_sysctl_add_rss(struct ena_adapter *adapter) 579 { 580 device_t dev; 581 582 struct sysctl_ctx_list *ctx; 583 struct sysctl_oid *tree; 584 struct sysctl_oid_list *child; 585 586 dev = adapter->pdev; 587 588 ctx = device_get_sysctl_ctx(dev); 589 tree = device_get_sysctl_tree(dev); 590 child = SYSCTL_CHILDREN(tree); 591 592 /* RSS options */ 593 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss", 594 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options."); 595 child = SYSCTL_CHILDREN(tree); 596 597 /* RSS hash key */ 598 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key", 599 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 600 ena_sysctl_rss_key, "A", "RSS key."); 601 602 /* Tuneable RSS indirection table */ 603 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table", 604 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 605 ena_sysctl_rss_indir_table, "A", "RSS indirection table."); 606 607 /* RSS indirection table size */ 608 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size", 609 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0, 610 "RSS indirection table size."); 611 } 612 #endif /* RSS */ 613 614 static void 615 ena_sysctl_add_irq_affinity(struct ena_adapter *adapter) 616 { 617 device_t dev; 618 619 struct sysctl_ctx_list *ctx; 620 struct sysctl_oid *tree; 621 struct sysctl_oid_list *child; 622 623 dev = adapter->pdev; 624 625 ctx = device_get_sysctl_ctx(dev); 626 tree = device_get_sysctl_tree(dev); 627 child = SYSCTL_CHILDREN(tree); 628 629 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity", 630 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity."); 631 child = SYSCTL_CHILDREN(tree); 632 633 /* Add base cpu leaf */ 634 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu", 635 CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 636 ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity."); 637 638 /* Add cpu stride leaf */ 639 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride", 640 CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0, 641 ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity."); 642 } 643 644 645 /* 646 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes. 647 * 648 * Whether the nodes are registered or unregistered depends on a delta between 649 * the `old` and `new` parameters, representing the number of queues. 650 * 651 * This function is used to hide sysctl attributes for queue nodes which aren't 652 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`). 653 * 654 * NOTE: 655 * All unregistered nodes must be registered again at detach, i.e. by a call to 656 * this function. 657 */ 658 void 659 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new) 660 { 661 struct sysctl_oid *oid; 662 int min, max, i; 663 664 min = MIN(old, new); 665 max = MIN(MAX(old, new), adapter->max_num_io_queues); 666 667 for (i = min; i < max; ++i) { 668 oid = adapter->que[i].oid; 669 670 sysctl_wlock(); 671 if (old > new) 672 sysctl_unregister_oid(oid); 673 else 674 sysctl_register_oid(oid); 675 sysctl_wunlock(); 676 } 677 } 678 679 static int 680 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS) 681 { 682 struct ena_adapter *adapter = arg1; 683 uint32_t val; 684 int error; 685 686 ENA_LOCK_LOCK(); 687 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 688 error = EINVAL; 689 goto unlock; 690 } 691 692 val = 0; 693 error = sysctl_wire_old_buffer(req, sizeof(val)); 694 if (error == 0) { 695 val = adapter->buf_ring_size; 696 error = sysctl_handle_32(oidp, &val, 0, req); 697 } 698 if (error != 0 || req->newptr == NULL) 699 goto unlock; 700 701 if (!powerof2(val) || val == 0) { 702 ena_log(adapter->pdev, ERR, 703 "Requested new Tx buffer ring size (%u) is not a power of 2\n", 704 val); 705 error = EINVAL; 706 goto unlock; 707 } 708 709 if (val != adapter->buf_ring_size) { 710 ena_log(adapter->pdev, INFO, 711 "Requested new Tx buffer ring size: %d. Old size: %d\n", 712 val, adapter->buf_ring_size); 713 714 error = ena_update_buf_ring_size(adapter, val); 715 } else { 716 ena_log(adapter->pdev, ERR, 717 "New Tx buffer ring size is the same as already used: %u\n", 718 adapter->buf_ring_size); 719 } 720 721 unlock: 722 ENA_LOCK_UNLOCK(); 723 724 return (error); 725 } 726 727 static int 728 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS) 729 { 730 struct ena_adapter *adapter = arg1; 731 uint32_t val; 732 int error; 733 734 ENA_LOCK_LOCK(); 735 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 736 error = EINVAL; 737 goto unlock; 738 } 739 740 val = 0; 741 error = sysctl_wire_old_buffer(req, sizeof(val)); 742 if (error == 0) { 743 val = adapter->requested_rx_ring_size; 744 error = sysctl_handle_32(oidp, &val, 0, req); 745 } 746 if (error != 0 || req->newptr == NULL) 747 goto unlock; 748 749 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) { 750 ena_log(adapter->pdev, ERR, 751 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n", 752 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size); 753 error = EINVAL; 754 goto unlock; 755 } 756 757 /* Check if the parameter is power of 2 */ 758 if (!powerof2(val)) { 759 ena_log(adapter->pdev, ERR, 760 "Requested new Rx queue size (%u) is not a power of 2\n", 761 val); 762 error = EINVAL; 763 goto unlock; 764 } 765 766 if (val != adapter->requested_rx_ring_size) { 767 ena_log(adapter->pdev, INFO, 768 "Requested new Rx queue size: %u. Old size: %u\n", val, 769 adapter->requested_rx_ring_size); 770 771 error = ena_update_queue_size(adapter, 772 adapter->requested_tx_ring_size, val); 773 } else { 774 ena_log(adapter->pdev, ERR, 775 "New Rx queue size is the same as already used: %u\n", 776 adapter->requested_rx_ring_size); 777 } 778 779 unlock: 780 ENA_LOCK_UNLOCK(); 781 782 return (error); 783 } 784 785 /* 786 * Change number of effectively used IO queues adapter->num_io_queues 787 */ 788 static int 789 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS) 790 { 791 struct ena_adapter *adapter = arg1; 792 uint32_t old_num_queues, tmp = 0; 793 int error; 794 795 ENA_LOCK_LOCK(); 796 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 797 error = EINVAL; 798 goto unlock; 799 } 800 801 error = sysctl_wire_old_buffer(req, sizeof(tmp)); 802 if (error == 0) { 803 tmp = adapter->num_io_queues; 804 error = sysctl_handle_int(oidp, &tmp, 0, req); 805 } 806 if (error != 0 || req->newptr == NULL) 807 goto unlock; 808 809 if (tmp == 0) { 810 ena_log(adapter->pdev, ERR, 811 "Requested number of IO queues is zero\n"); 812 error = EINVAL; 813 goto unlock; 814 } 815 816 /* 817 * The adapter::max_num_io_queues is the HW capability. The system 818 * resources availability may potentially be a tighter limit. Therefore 819 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs` 820 * always holds true, while the `adapter::msix_vecs` is variable across 821 * device reset (`ena_destroy_device()` + `ena_restore_device()`). 822 */ 823 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) { 824 ena_log(adapter->pdev, ERR, 825 "Requested number of IO queues is higher than maximum allowed (%u)\n", 826 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC); 827 error = EINVAL; 828 goto unlock; 829 } 830 if (tmp == adapter->num_io_queues) { 831 ena_log(adapter->pdev, ERR, 832 "Requested number of IO queues is equal to current value " 833 "(%u)\n", 834 adapter->num_io_queues); 835 } else { 836 ena_log(adapter->pdev, INFO, 837 "Requested new number of IO queues: %u, current value: " 838 "%u\n", 839 tmp, adapter->num_io_queues); 840 841 old_num_queues = adapter->num_io_queues; 842 error = ena_update_io_queue_nb(adapter, tmp); 843 if (error != 0) 844 return (error); 845 846 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp); 847 } 848 849 unlock: 850 ENA_LOCK_UNLOCK(); 851 852 return (error); 853 } 854 855 static int 856 ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS) 857 { 858 struct ena_adapter *adapter = arg1; 859 uint16_t interval; 860 int error; 861 862 ENA_LOCK_LOCK(); 863 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 864 error = EINVAL; 865 goto unlock; 866 } 867 868 error = sysctl_wire_old_buffer(req, sizeof(interval)); 869 if (error == 0) { 870 interval = adapter->metrics_sample_interval; 871 error = sysctl_handle_16(oidp, &interval, 0, req); 872 } 873 if (error != 0 || req->newptr == NULL) 874 goto unlock; 875 876 if (interval > ENA_METRICS_MAX_SAMPLE_INTERVAL) { 877 ena_log(adapter->pdev, ERR, 878 "ENA metrics update interval is out of range - maximum allowed value: %d seconds\n", 879 ENA_METRICS_MAX_SAMPLE_INTERVAL); 880 error = EINVAL; 881 goto unlock; 882 } 883 884 if (interval == 0) { 885 ena_log(adapter->pdev, INFO, 886 "ENA metrics update is now turned off\n"); 887 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics)); 888 } else { 889 ena_log(adapter->pdev, INFO, 890 "ENA metrics update interval is set to: %" PRIu16 891 " seconds\n", 892 interval); 893 } 894 895 adapter->metrics_sample_interval = interval; 896 897 unlock: 898 ENA_LOCK_UNLOCK(); 899 900 return (0); 901 } 902 903 static int 904 ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS) 905 { 906 struct ena_adapter *adapter = arg1; 907 int irq_base_cpu = 0; 908 int error; 909 910 ENA_LOCK_LOCK(); 911 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 912 error = ENODEV; 913 goto unlock; 914 } 915 916 error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu)); 917 if (error == 0) { 918 irq_base_cpu = adapter->irq_cpu_base; 919 error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req); 920 } 921 if (error != 0 || req->newptr == NULL) 922 goto unlock; 923 924 if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) { 925 ena_log(adapter->pdev, ERR, 926 "Requested base CPU is less than zero.\n"); 927 error = EINVAL; 928 goto unlock; 929 } 930 931 if (irq_base_cpu > mp_ncpus) { 932 ena_log(adapter->pdev, INFO, 933 "Requested base CPU is larger than the number of available CPUs. \n"); 934 error = EINVAL; 935 goto unlock; 936 937 } 938 939 if (irq_base_cpu == adapter->irq_cpu_base) { 940 ena_log(adapter->pdev, INFO, 941 "Requested IRQ base CPU is equal to current value " 942 "(%d)\n", 943 adapter->irq_cpu_base); 944 goto unlock; 945 } 946 947 ena_log(adapter->pdev, INFO, 948 "Requested new IRQ base CPU: %d, current value: %d\n", 949 irq_base_cpu, adapter->irq_cpu_base); 950 951 error = ena_update_base_cpu(adapter, irq_base_cpu); 952 953 unlock: 954 ENA_LOCK_UNLOCK(); 955 956 return (error); 957 } 958 959 static int 960 ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS) 961 { 962 struct ena_adapter *adapter = arg1; 963 int32_t irq_cpu_stride = 0; 964 int error; 965 966 ENA_LOCK_LOCK(); 967 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 968 error = ENODEV; 969 goto unlock; 970 } 971 972 error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride)); 973 if (error == 0) { 974 irq_cpu_stride = adapter->irq_cpu_stride; 975 error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req); 976 } 977 if (error != 0 || req->newptr == NULL) 978 goto unlock; 979 980 if (irq_cpu_stride < 0) { 981 ena_log(adapter->pdev, ERR, 982 "Requested IRQ stride is less than zero.\n"); 983 error = EINVAL; 984 goto unlock; 985 } 986 987 if (irq_cpu_stride > mp_ncpus) { 988 ena_log(adapter->pdev, INFO, 989 "Warning: Requested IRQ stride is larger than the number of available CPUs.\n"); 990 } 991 992 if (irq_cpu_stride == adapter->irq_cpu_stride) { 993 ena_log(adapter->pdev, INFO, 994 "Requested IRQ CPU stride is equal to current value " 995 "(%u)\n", 996 adapter->irq_cpu_stride); 997 goto unlock; 998 } 999 1000 ena_log(adapter->pdev, INFO, 1001 "Requested new IRQ CPU stride: %u, current value: %u\n", 1002 irq_cpu_stride, adapter->irq_cpu_stride); 1003 1004 error = ena_update_cpu_stride(adapter, irq_cpu_stride); 1005 if (error != 0) 1006 goto unlock; 1007 1008 unlock: 1009 ENA_LOCK_UNLOCK(); 1010 1011 return (error); 1012 } 1013 1014 #ifndef RSS 1015 /* 1016 * Change the Receive Side Scaling hash key. 1017 */ 1018 static int 1019 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS) 1020 { 1021 struct ena_adapter *adapter = arg1; 1022 struct ena_com_dev *ena_dev = adapter->ena_dev; 1023 enum ena_admin_hash_functions ena_func; 1024 char msg[ENA_HASH_KEY_MSG_SIZE]; 1025 char elem[3] = { 0 }; 1026 char *endp; 1027 u8 rss_key[ENA_HASH_KEY_SIZE]; 1028 int error, i; 1029 1030 ENA_LOCK_LOCK(); 1031 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 1032 error = EINVAL; 1033 goto unlock; 1034 } 1035 1036 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 1037 error = ENOTSUP; 1038 goto unlock; 1039 } 1040 1041 error = sysctl_wire_old_buffer(req, sizeof(msg)); 1042 if (error != 0) 1043 goto unlock; 1044 1045 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func); 1046 if (error != 0) { 1047 device_printf(adapter->pdev, "Cannot get hash function\n"); 1048 goto unlock; 1049 } 1050 1051 if (ena_func != ENA_ADMIN_TOEPLITZ) { 1052 error = EINVAL; 1053 device_printf(adapter->pdev, "Unsupported hash algorithm\n"); 1054 goto unlock; 1055 } 1056 1057 error = ena_rss_get_hash_key(ena_dev, rss_key); 1058 if (error != 0) { 1059 device_printf(adapter->pdev, "Cannot get hash key\n"); 1060 goto unlock; 1061 } 1062 1063 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) 1064 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]); 1065 1066 error = sysctl_handle_string(oidp, msg, sizeof(msg), req); 1067 if (error != 0 || req->newptr == NULL) 1068 goto unlock; 1069 1070 if (strlen(msg) != sizeof(msg) - 1) { 1071 error = EINVAL; 1072 device_printf(adapter->pdev, "Invalid key size\n"); 1073 goto unlock; 1074 } 1075 1076 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) { 1077 strncpy(elem, &msg[i * 2], 2); 1078 rss_key[i] = strtol(elem, &endp, 16); 1079 1080 /* Both hex nibbles in the string must be valid to continue. */ 1081 if (endp == elem || *endp != '\0' || rss_key[i] < 0) { 1082 error = EINVAL; 1083 device_printf(adapter->pdev, 1084 "Invalid key hex value: '%c'\n", *endp); 1085 goto unlock; 1086 } 1087 } 1088 1089 error = ena_rss_set_hash(ena_dev, rss_key); 1090 if (error != 0) 1091 device_printf(adapter->pdev, "Cannot fill hash key\n"); 1092 1093 unlock: 1094 ENA_LOCK_UNLOCK(); 1095 1096 return (error); 1097 } 1098 1099 /* 1100 * Change the Receive Side Scaling indirection table. 1101 * 1102 * The sysctl entry string consists of one or more `x:y` keypairs, where 1103 * x stands for the table index and y for its new value. 1104 * Table indices that don't need to be updated can be omitted from the string 1105 * and will retain their existing values. If an index is entered more than once, 1106 * the last value is used. 1107 * 1108 * Example: 1109 * To update two selected indices in the RSS indirection table, e.g. setting 1110 * index 0 to queue 5 and then index 5 to queue 0, the below command should be 1111 * used: 1112 * sysctl dev.ena.0.rss.indir_table="0:5 5:0" 1113 */ 1114 static int 1115 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS) 1116 { 1117 int num_queues, error; 1118 struct ena_adapter *adapter = arg1; 1119 struct ena_indir *indir; 1120 char *msg, *buf, *endp; 1121 uint32_t idx, value; 1122 1123 ENA_LOCK_LOCK(); 1124 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) { 1125 error = EINVAL; 1126 goto unlock; 1127 } 1128 1129 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) { 1130 error = ENOTSUP; 1131 goto unlock; 1132 } 1133 1134 indir = adapter->rss_indir; 1135 msg = indir->sysctl_buf; 1136 1137 if (unlikely(indir == NULL)) { 1138 error = ENOTSUP; 1139 goto unlock; 1140 } 1141 1142 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req); 1143 if (error != 0 || req->newptr == NULL) 1144 goto unlock; 1145 1146 num_queues = adapter->num_io_queues; 1147 1148 /* 1149 * This sysctl expects msg to be a list of `x:y` record pairs, 1150 * where x is the indirection table index and y is its value. 1151 */ 1152 for (buf = msg; *buf != '\0'; buf = endp) { 1153 idx = strtol(buf, &endp, 10); 1154 1155 if (endp == buf || idx < 0) { 1156 device_printf(adapter->pdev, "Invalid index: %s\n", 1157 buf); 1158 error = EINVAL; 1159 break; 1160 } 1161 1162 if (idx >= ENA_RX_RSS_TABLE_SIZE) { 1163 device_printf(adapter->pdev, "Index %d out of range\n", 1164 idx); 1165 error = ERANGE; 1166 break; 1167 } 1168 1169 buf = endp; 1170 1171 if (*buf++ != ':') { 1172 device_printf(adapter->pdev, "Missing ':' separator\n"); 1173 error = EINVAL; 1174 break; 1175 } 1176 1177 value = strtol(buf, &endp, 10); 1178 1179 if (endp == buf || value < 0) { 1180 device_printf(adapter->pdev, "Invalid value: %s\n", 1181 buf); 1182 error = EINVAL; 1183 break; 1184 } 1185 1186 if (value >= num_queues) { 1187 device_printf(adapter->pdev, "Value %d out of range\n", 1188 value); 1189 error = ERANGE; 1190 break; 1191 } 1192 1193 indir->table[idx] = value; 1194 } 1195 1196 if (error != 0) /* Reload indirection table with last good data. */ 1197 ena_rss_indir_get(adapter, indir->table); 1198 1199 /* At this point msg has been clobbered by sysctl_handle_string. */ 1200 ena_rss_copy_indir_buf(msg, indir->table); 1201 1202 if (error == 0) 1203 error = ena_rss_indir_set(adapter, indir->table); 1204 1205 unlock: 1206 ENA_LOCK_UNLOCK(); 1207 1208 return (error); 1209 } 1210 #endif /* RSS */ 1211