xref: /freebsd/sys/dev/ena/ena_sysctl.c (revision 41c5a4a2f888765b513efb13befe32fa084ad7cb)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2024 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include "opt_rss.h"
33 
34 #include "ena_rss.h"
35 #include "ena_sysctl.h"
36 
37 static void ena_sysctl_add_wd(struct ena_adapter *);
38 static void ena_sysctl_add_stats(struct ena_adapter *);
39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
40 static void ena_sysctl_add_customer_metrics(struct ena_adapter *);
41 static void ena_sysctl_add_srd_info(struct ena_adapter *);
42 static void ena_sysctl_add_tuneables(struct ena_adapter *);
43 static void ena_sysctl_add_irq_affinity(struct ena_adapter *);
44 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
45 #ifndef RSS
46 static void ena_sysctl_add_rss(struct ena_adapter *);
47 #endif
48 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
49 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
50 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
51 static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS);
52 static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS);
53 static int ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS);
54 #ifndef RSS
55 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
56 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
57 #endif
58 
59 /* Limit max ENA sample rate to be an hour. */
60 #define ENA_METRICS_MAX_SAMPLE_INTERVAL 3600
61 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
62 
63 #define SYSCTL_GSTRING_LEN 128
64 
65 #define ENA_METRIC_ENI_ENTRY(stat, desc) { \
66         .name = #stat, \
67         .description = #desc, \
68 }
69 
70 #define ENA_STAT_ENTRY(stat, desc, stat_type) { \
71         .name = #stat, \
72         .description = #desc, \
73         .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64), \
74 }
75 
76 #define ENA_STAT_ENA_SRD_ENTRY(stat, desc) \
77 	ENA_STAT_ENTRY(stat, desc, ena_srd_stats)
78 
79 struct ena_hw_metrics {
80         char name[SYSCTL_GSTRING_LEN];
81         char description[SYSCTL_GSTRING_LEN];
82 };
83 
84 struct ena_srd_metrics {
85         char name[SYSCTL_GSTRING_LEN];
86         char description[SYSCTL_GSTRING_LEN];
87         int stat_offset;
88 };
89 
90 static const struct ena_srd_metrics ena_srd_stats_strings[] = {
91         ENA_STAT_ENA_SRD_ENTRY(
92 	    ena_srd_tx_pkts, Number of packets transmitted over ENA SRD),
93         ENA_STAT_ENA_SRD_ENTRY(
94 	    ena_srd_eligible_tx_pkts, Number of packets transmitted or could
95 	    have been transmitted over ENA SRD),
96         ENA_STAT_ENA_SRD_ENTRY(
97 	    ena_srd_rx_pkts, Number of packets received over ENA SRD),
98         ENA_STAT_ENA_SRD_ENTRY(
99 	    ena_srd_resource_utilization, Percentage of the ENA SRD resources
100 	    that are in use),
101 };
102 
103 static const struct ena_hw_metrics ena_hw_stats_strings[] = {
104         ENA_METRIC_ENI_ENTRY(
105 	    bw_in_allowance_exceeded, Inbound BW allowance exceeded),
106         ENA_METRIC_ENI_ENTRY(
107 	    bw_out_allowance_exceeded, Outbound BW allowance exceeded),
108         ENA_METRIC_ENI_ENTRY(
109 	    pps_allowance_exceeded, PPS allowance exceeded),
110         ENA_METRIC_ENI_ENTRY(
111 	    conntrack_allowance_exceeded, Connection tracking allowance exceeded),
112         ENA_METRIC_ENI_ENTRY(
113 	    linklocal_allowance_exceeded, Linklocal packet rate allowance),
114         ENA_METRIC_ENI_ENTRY(
115 	    conntrack_allowance_available, Number of available conntracks),
116 };
117 
118 #ifndef ARRAY_SIZE
119 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
120 #endif
121 
122 #define ENA_CUSTOMER_METRICS_ARRAY_SIZE      ARRAY_SIZE(ena_hw_stats_strings)
123 #define ENA_SRD_METRICS_ARRAY_SIZE           ARRAY_SIZE(ena_srd_stats_strings)
124 
125 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
126     "ENA driver parameters");
127 
128 /*
129  * Logging level for changing verbosity of the output
130  */
131 int ena_log_level = ENA_INFO;
132 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0,
133     "Logging level indicating verbosity of the logs");
134 
135 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
136     ENA_DRV_MODULE_VERSION, "ENA driver version");
137 
138 /*
139  * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
140  * Using 9k mbufs in low memory conditions might cause allocation to take a lot
141  * of time and lead to the OS instability as it needs to look for the contiguous
142  * pages.
143  * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
144  * the network performance is the priority, the 9k mbufs can be used.
145  */
146 int ena_enable_9k_mbufs = 0;
147 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
148     &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
149 
150 /*
151  * Force the driver to use large or regular LLQ (Low Latency Queue) header size.
152  * Defaults to ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT. This option may be
153  * important for platforms, which often handle packet headers on Tx with total
154  * header size greater than 96B, as it may reduce the latency.
155  * It also reduces the maximum Tx queue size by half, so it may cause more Tx
156  * packet drops.
157  */
158 int ena_force_large_llq_header = ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT;
159 SYSCTL_INT(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
160     &ena_force_large_llq_header, 0,
161     "Change default LLQ entry size received from the device");
162 
163 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
164 
165 int ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter *adapter)
166 {
167 	int rc = 0;
168 
169 	adapter->customer_metrics_array = malloc((sizeof(u64) * ENA_CUSTOMER_METRICS_ARRAY_SIZE),
170 	    M_DEVBUF, M_NOWAIT | M_ZERO);
171 	if (unlikely(adapter->customer_metrics_array == NULL))
172 		rc = ENOMEM;
173 
174 	return rc;
175 }
176 void
177 ena_sysctl_add_nodes(struct ena_adapter *adapter)
178 {
179 	struct ena_com_dev *dev = adapter->ena_dev;
180 
181 	if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS))
182 		ena_sysctl_add_customer_metrics(adapter);
183 	else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS))
184 		ena_sysctl_add_eni_metrics(adapter);
185 
186 	if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO))
187 		ena_sysctl_add_srd_info(adapter);
188 
189 	ena_sysctl_add_wd(adapter);
190 	ena_sysctl_add_stats(adapter);
191 	ena_sysctl_add_tuneables(adapter);
192 	ena_sysctl_add_irq_affinity(adapter);
193 #ifndef RSS
194 	ena_sysctl_add_rss(adapter);
195 #endif
196 }
197 
198 static void
199 ena_sysctl_add_wd(struct ena_adapter *adapter)
200 {
201 	device_t dev;
202 
203 	struct sysctl_ctx_list *ctx;
204 	struct sysctl_oid *tree;
205 	struct sysctl_oid_list *child;
206 
207 	dev = adapter->pdev;
208 
209 	ctx = device_get_sysctl_ctx(dev);
210 	tree = device_get_sysctl_tree(dev);
211 	child = SYSCTL_CHILDREN(tree);
212 
213 	/* Sysctl calls for Watchdog service */
214 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN,
215 	    &adapter->wd_active, 0, "Watchdog is active");
216 
217 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
218 	    CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
219 	    "Timeout for Keep Alive messages");
220 
221 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
222 	    CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
223 	    "Timeout for TX completion");
224 
225 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
226 	    CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
227 	    "Number of TX queues to check per run");
228 
229 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
230 	    CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
231 	    "Max number of timeouted packets");
232 }
233 
234 static void
235 ena_sysctl_add_stats(struct ena_adapter *adapter)
236 {
237 	device_t dev;
238 
239 	struct ena_ring *tx_ring;
240 	struct ena_ring *rx_ring;
241 
242 	struct ena_hw_stats *hw_stats;
243 	struct ena_stats_dev *dev_stats;
244 	struct ena_stats_tx *tx_stats;
245 	struct ena_stats_rx *rx_stats;
246 	struct ena_com_stats_admin *admin_stats;
247 
248 	struct sysctl_ctx_list *ctx;
249 	struct sysctl_oid *tree;
250 	struct sysctl_oid_list *child;
251 
252 	struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
253 	struct sysctl_oid *admin_node;
254 	struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
255 	struct sysctl_oid_list *admin_list;
256 
257 #define QUEUE_NAME_LEN 32
258 	char namebuf[QUEUE_NAME_LEN];
259 	int i;
260 
261 	dev = adapter->pdev;
262 
263 	ctx = device_get_sysctl_ctx(dev);
264 	tree = device_get_sysctl_tree(dev);
265 	child = SYSCTL_CHILDREN(tree);
266 
267 	tx_ring = adapter->tx_ring;
268 	rx_ring = adapter->rx_ring;
269 
270 	hw_stats = &adapter->hw_stats;
271 	dev_stats = &adapter->dev_stats;
272 	admin_stats = &adapter->ena_dev->admin_queue.stats;
273 
274 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD,
275 	    &dev_stats->wd_expired, "Watchdog expiry count");
276 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD,
277 	    &dev_stats->interface_up, "Network interface up count");
278 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", CTLFLAG_RD,
279 	    &dev_stats->interface_down, "Network interface down count");
280 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", CTLFLAG_RD,
281 	    &dev_stats->admin_q_pause, "Admin queue pauses");
282 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "os_trigger", CTLFLAG_RD,
283 	    &dev_stats->os_trigger, "OS trigger count");
284 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_tx_cmpl", CTLFLAG_RD,
285 	    &dev_stats->missing_tx_cmpl, "Missing TX completions resets count");
286 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_req_id", CTLFLAG_RD,
287 	    &dev_stats->bad_rx_req_id, "Bad RX req id count");
288 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_tx_req_id", CTLFLAG_RD,
289 	    &dev_stats->bad_tx_req_id, "Bad TX req id count");
290 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_desc_num", CTLFLAG_RD,
291 	    &dev_stats->bad_rx_desc_num, "Bad RX descriptors number count");
292 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "invalid_state", CTLFLAG_RD,
293 	    &dev_stats->invalid_state, "Driver invalid state count");
294 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_intr", CTLFLAG_RD,
295 	    &dev_stats->missing_intr, "Missing interrupt count");
296 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "tx_desc_malformed", CTLFLAG_RD,
297 	    &dev_stats->tx_desc_malformed, "TX descriptors malformed count");
298 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD,
299 	    &dev_stats->rx_desc_malformed, "RX descriptors malformed count");
300 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD,
301 	    &dev_stats->missing_admin_interrupt, "Missing admin interrupts count");
302 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD,
303 	    &dev_stats->admin_to, "Admin queue timeouts count");
304 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "device_request_reset", CTLFLAG_RD,
305 	    &dev_stats->device_request_reset, "Device reset requests count");
306 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD,
307 	    &dev_stats->total_resets, "Total resets count");
308 
309 	for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
310 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
311 
312 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
313 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
314 		queue_list = SYSCTL_CHILDREN(queue_node);
315 
316 		adapter->que[i].oid = queue_node;
317 
318 #ifdef RSS
319 		/* Common stats */
320 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD,
321 		    &adapter->que[i].cpu, 0, "CPU affinity");
322 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD,
323 		    &adapter->que[i].domain, 0, "NUMA domain");
324 #endif
325 
326 		/* TX specific stats */
327 		tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring",
328 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
329 		tx_list = SYSCTL_CHILDREN(tx_node);
330 
331 		tx_stats = &tx_ring->tx_stats;
332 
333 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count",
334 		    CTLFLAG_RD, &tx_stats->cnt, "Packets sent");
335 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes",
336 		    CTLFLAG_RD, &tx_stats->bytes, "Bytes sent");
337 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
338 		    "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err,
339 		    "TX buffer preparation failures");
340 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
341 		    "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err,
342 		    "DMA mapping failures");
343 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells",
344 		    CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells");
345 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
346 		    "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp,
347 		    "TX completions missed");
348 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id",
349 		    CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count");
350 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses",
351 		    CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count");
352 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
353 		    "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err,
354 		    "Mbuf collapse failures");
355 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups",
356 		    CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups");
357 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops",
358 		    CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops");
359 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
360 		    "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy,
361 		    "Header copies for llq transaction");
362 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
363 		    "unmask_interrupt_num", CTLFLAG_RD,
364 		    &tx_stats->unmask_interrupt_num,
365 		    "Unmasked interrupt count");
366 
367 		/* RX specific stats */
368 		rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring",
369 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
370 		rx_list = SYSCTL_CHILDREN(rx_node);
371 
372 		rx_stats = &rx_ring->rx_stats;
373 
374 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count",
375 		    CTLFLAG_RD, &rx_stats->cnt, "Packets received");
376 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes",
377 		    CTLFLAG_RD, &rx_stats->bytes, "Bytes received");
378 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial",
379 		    CTLFLAG_RD, &rx_stats->refil_partial,
380 		    "Partial refilled mbufs");
381 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad",
382 		    CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum");
383 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
384 		    "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail,
385 		    "Failed mbuf allocs");
386 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
387 		    "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail,
388 		    "Failed jumbo mbuf allocs");
389 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
390 		    "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err,
391 		    "DMA mapping errors");
392 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num",
393 		    CTLFLAG_RD, &rx_stats->bad_desc_num,
394 		    "Bad descriptor count");
395 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id",
396 		    CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count");
397 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring",
398 		    CTLFLAG_RD, &rx_stats->empty_rx_ring,
399 		    "RX descriptors depletion count");
400 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good",
401 		    CTLFLAG_RD, &rx_stats->csum_good,
402 		    "Valid RX checksum calculations");
403 	}
404 
405 	/* Stats read from device */
406 	hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
407 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
408 	hw_list = SYSCTL_CHILDREN(hw_node);
409 
410 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
411 	    &hw_stats->rx_packets, "Packets received");
412 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
413 	    &hw_stats->tx_packets, "Packets transmitted");
414 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
415 	    &hw_stats->rx_bytes, "Bytes received");
416 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
417 	    &hw_stats->tx_bytes, "Bytes transmitted");
418 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
419 	    &hw_stats->rx_drops, "Receive packet drops");
420 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
421 	    &hw_stats->tx_drops, "Transmit packet drops");
422 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_overruns", CTLFLAG_RD,
423 	    &hw_stats->rx_overruns, "Receive overruns");
424 
425 	/* ENA Admin queue stats */
426 	admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
427 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
428 	admin_list = SYSCTL_CHILDREN(admin_node);
429 
430 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
431 	    &admin_stats->aborted_cmd, 0, "Aborted commands");
432 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
433 	    &admin_stats->submitted_cmd, 0, "Submitted commands");
434 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
435 	    &admin_stats->completed_cmd, 0, "Completed commands");
436 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
437 	    &admin_stats->out_of_space, 0, "Queue out of space");
438 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
439 	    &admin_stats->no_completion, 0, "Commands not completed");
440 }
441 
442 static void
443 ena_sysctl_add_srd_info(struct ena_adapter *adapter)
444 {
445 	device_t dev;
446 
447 	struct sysctl_oid *ena_srd_info;
448 	struct sysctl_oid_list *srd_list;
449 
450 	struct sysctl_ctx_list *ctx;
451 	struct sysctl_oid *tree;
452 	struct sysctl_oid_list *child;
453 
454 	struct ena_admin_ena_srd_stats *srd_stats_ptr;
455 	struct ena_srd_metrics cur_stat_strings;
456 
457 	int i;
458 
459 	dev = adapter->pdev;
460 
461 	ctx = device_get_sysctl_ctx(dev);
462 	tree = device_get_sysctl_tree(dev);
463 	child = SYSCTL_CHILDREN(tree);
464 
465 	ena_srd_info = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ena_srd_info",
466 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's SRD information");
467 	srd_list = SYSCTL_CHILDREN(ena_srd_info);
468 
469 	SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, "ena_srd_mode",
470             CTLFLAG_RD, &adapter->ena_srd_info.flags, 0,
471             "Describes which ENA-express features are enabled");
472 
473 	srd_stats_ptr = &adapter->ena_srd_info.ena_srd_stats;
474 
475 	for (i = 0 ; i < ENA_SRD_METRICS_ARRAY_SIZE; i++) {
476 		cur_stat_strings = ena_srd_stats_strings[i];
477 		SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, cur_stat_strings.name,
478 		    CTLFLAG_RD, (u64 *)srd_stats_ptr + cur_stat_strings.stat_offset,
479 		    0, cur_stat_strings.description);
480 	}
481 }
482 
483 static void
484 ena_sysctl_add_customer_metrics(struct ena_adapter *adapter)
485 {
486 	device_t dev;
487 	struct ena_com_dev *ena_dev;
488 
489 	struct sysctl_ctx_list *ctx;
490 	struct sysctl_oid *tree;
491 	struct sysctl_oid_list *child;
492 
493 	struct sysctl_oid *customer_metric;
494 	struct sysctl_oid_list *customer_list;
495 
496 	int i;
497 
498 	dev = adapter->pdev;
499 	ena_dev = adapter->ena_dev;
500 
501 	ctx = device_get_sysctl_ctx(dev);
502 	tree = device_get_sysctl_tree(dev);
503 	child = SYSCTL_CHILDREN(tree);
504 	customer_metric = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "customer_metrics",
505 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's customer metrics");
506 	customer_list = SYSCTL_CHILDREN(customer_metric);
507 
508 	for (i = 0; i < ENA_CUSTOMER_METRICS_ARRAY_SIZE; i++) {
509 	        if (ena_com_get_customer_metric_support(ena_dev, i)) {
510 	                SYSCTL_ADD_U64(ctx, customer_list, OID_AUTO, ena_hw_stats_strings[i].name,
511 	                    CTLFLAG_RD, &adapter->customer_metrics_array[i], 0,
512 	                    ena_hw_stats_strings[i].description);
513 	         }
514 	 }
515 }
516 
517 static void
518 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
519 {
520 	device_t dev;
521 	struct ena_admin_eni_stats *eni_metrics;
522 
523 	struct sysctl_ctx_list *ctx;
524 	struct sysctl_oid *tree;
525 	struct sysctl_oid_list *child;
526 
527 	struct sysctl_oid *eni_node;
528 	struct sysctl_oid_list *eni_list;
529 
530 	dev = adapter->pdev;
531 
532 	ctx = device_get_sysctl_ctx(dev);
533 	tree = device_get_sysctl_tree(dev);
534 	child = SYSCTL_CHILDREN(tree);
535 
536 	eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
537 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
538 	eni_list = SYSCTL_CHILDREN(eni_node);
539 
540 	eni_metrics = &adapter->eni_metrics;
541 
542 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
543 	    CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
544 	    "Inbound BW allowance exceeded");
545 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
546 	    CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
547 	    "Outbound BW allowance exceeded");
548 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
549 	    CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
550 	    "PPS allowance exceeded");
551 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
552 	    CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
553 	    "Connection tracking allowance exceeded");
554 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
555 	    CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
556 	    "Linklocal packet rate allowance exceeded");
557 }
558 
559 static void
560 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
561 {
562 	device_t dev;
563 
564 	struct sysctl_ctx_list *ctx;
565 	struct sysctl_oid *tree;
566 	struct sysctl_oid_list *child;
567 
568 	dev = adapter->pdev;
569 
570 	ctx = device_get_sysctl_ctx(dev);
571 	tree = device_get_sysctl_tree(dev);
572 	child = SYSCTL_CHILDREN(tree);
573 
574 	/* Tuneable number of buffers in the buf-ring (drbr) */
575 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
576 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
577 	    ena_sysctl_buf_ring_size, "I",
578 	    "Size of the Tx buffer ring (drbr).");
579 
580 	/* Tuneable number of the Rx ring size */
581 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
582 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
583 	    ena_sysctl_rx_queue_size, "I",
584 	    "Size of the Rx ring. The size should be a power of 2.");
585 
586 	/* Tuneable number of IO queues */
587 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
588 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
589 	    ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
590 
591 	/*
592 	 * Tuneable, which determines how often ENA metrics will be read.
593 	 * 0 means it's turned off. Maximum allowed value is limited by:
594 	 * ENA_METRICS_MAX_SAMPLE_INTERVAL.
595 	 */
596 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "stats_sample_interval",
597 	    CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
598 	    ena_sysctl_metrics_interval, "SU",
599 	    "Interval in seconds for updating Netword interface metrics. 0 turns off the update.");
600 }
601 
602 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
603 #ifndef RSS
604 static void
605 ena_sysctl_add_rss(struct ena_adapter *adapter)
606 {
607 	device_t dev;
608 
609 	struct sysctl_ctx_list *ctx;
610 	struct sysctl_oid *tree;
611 	struct sysctl_oid_list *child;
612 
613 	dev = adapter->pdev;
614 
615 	ctx = device_get_sysctl_ctx(dev);
616 	tree = device_get_sysctl_tree(dev);
617 	child = SYSCTL_CHILDREN(tree);
618 
619 	/* RSS options */
620 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
621 	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
622 	child = SYSCTL_CHILDREN(tree);
623 
624 	/* RSS hash key */
625 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
626 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
627 	    ena_sysctl_rss_key, "A", "RSS key.");
628 
629 	/* Tuneable RSS indirection table */
630 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
631 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
632 	    ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
633 
634 	/* RSS indirection table size */
635 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
636 	    CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
637 	    "RSS indirection table size.");
638 }
639 #endif /* RSS */
640 
641 static void
642 ena_sysctl_add_irq_affinity(struct ena_adapter *adapter)
643 {
644 	device_t dev;
645 
646 	struct sysctl_ctx_list *ctx;
647 	struct sysctl_oid *tree;
648 	struct sysctl_oid_list *child;
649 
650 	dev = adapter->pdev;
651 
652 	ctx = device_get_sysctl_ctx(dev);
653 	tree = device_get_sysctl_tree(dev);
654 	child = SYSCTL_CHILDREN(tree);
655 
656 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity",
657 	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity.");
658 	child = SYSCTL_CHILDREN(tree);
659 
660 	/* Add base cpu leaf */
661 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu",
662 	    CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
663 	    ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity.");
664 
665 	/* Add cpu stride leaf */
666 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride",
667 	    CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
668 	    ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity.");
669 }
670 
671 
672 /*
673  * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
674  *
675  * Whether the nodes are registered or unregistered depends on a delta between
676  * the `old` and `new` parameters, representing the number of queues.
677  *
678  * This function is used to hide sysctl attributes for queue nodes which aren't
679  * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
680  *
681  * NOTE:
682  * All unregistered nodes must be registered again at detach, i.e. by a call to
683  * this function.
684  */
685 void
686 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
687 {
688 	struct sysctl_oid *oid;
689 	int min, max, i;
690 
691 	min = MIN(old, new);
692 	max = MIN(MAX(old, new), adapter->max_num_io_queues);
693 
694 	for (i = min; i < max; ++i) {
695 		oid = adapter->que[i].oid;
696 
697 		sysctl_wlock();
698 		if (old > new)
699 			sysctl_unregister_oid(oid);
700 		else
701 			sysctl_register_oid(oid);
702 		sysctl_wunlock();
703 	}
704 }
705 
706 static int
707 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
708 {
709 	struct ena_adapter *adapter = arg1;
710 	uint32_t val;
711 	int error;
712 
713 	ENA_LOCK_LOCK();
714 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
715 		error = EINVAL;
716 		goto unlock;
717 	}
718 
719 	val = 0;
720 	error = sysctl_wire_old_buffer(req, sizeof(val));
721 	if (error == 0) {
722 		val = adapter->buf_ring_size;
723 		error = sysctl_handle_32(oidp, &val, 0, req);
724 	}
725 	if (error != 0 || req->newptr == NULL)
726 		goto unlock;
727 
728 	if (!powerof2(val) || val == 0) {
729 		ena_log(adapter->pdev, ERR,
730 		    "Requested new Tx buffer ring size (%u) is not a power of 2\n",
731 		    val);
732 		error = EINVAL;
733 		goto unlock;
734 	}
735 
736 	if (val != adapter->buf_ring_size) {
737 		ena_log(adapter->pdev, INFO,
738 		    "Requested new Tx buffer ring size: %d. Old size: %d\n",
739 		    val, adapter->buf_ring_size);
740 
741 		error = ena_update_buf_ring_size(adapter, val);
742 	} else {
743 		ena_log(adapter->pdev, ERR,
744 		    "New Tx buffer ring size is the same as already used: %u\n",
745 		    adapter->buf_ring_size);
746 	}
747 
748 unlock:
749 	ENA_LOCK_UNLOCK();
750 
751 	return (error);
752 }
753 
754 static int
755 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
756 {
757 	struct ena_adapter *adapter = arg1;
758 	uint32_t val;
759 	int error;
760 
761 	ENA_LOCK_LOCK();
762 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
763 		error = EINVAL;
764 		goto unlock;
765 	}
766 
767 	val = 0;
768 	error = sysctl_wire_old_buffer(req, sizeof(val));
769 	if (error == 0) {
770 		val = adapter->requested_rx_ring_size;
771 		error = sysctl_handle_32(oidp, &val, 0, req);
772 	}
773 	if (error != 0 || req->newptr == NULL)
774 		goto unlock;
775 
776 	if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
777 		ena_log(adapter->pdev, ERR,
778 		    "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
779 		    val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
780 		error = EINVAL;
781 		goto unlock;
782 	}
783 
784 	/* Check if the parameter is power of 2 */
785 	if (!powerof2(val)) {
786 		ena_log(adapter->pdev, ERR,
787 		    "Requested new Rx queue size (%u) is not a power of 2\n",
788 		    val);
789 		error = EINVAL;
790 		goto unlock;
791 	}
792 
793 	if (val != adapter->requested_rx_ring_size) {
794 		ena_log(adapter->pdev, INFO,
795 		    "Requested new Rx queue size: %u. Old size: %u\n", val,
796 		    adapter->requested_rx_ring_size);
797 
798 		error = ena_update_queue_size(adapter,
799 		    adapter->requested_tx_ring_size, val);
800 	} else {
801 		ena_log(adapter->pdev, ERR,
802 		    "New Rx queue size is the same as already used: %u\n",
803 		    adapter->requested_rx_ring_size);
804 	}
805 
806 unlock:
807 	ENA_LOCK_UNLOCK();
808 
809 	return (error);
810 }
811 
812 /*
813  * Change number of effectively used IO queues adapter->num_io_queues
814  */
815 static int
816 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
817 {
818 	struct ena_adapter *adapter = arg1;
819 	uint32_t old_num_queues, tmp = 0;
820 	int error;
821 
822 	ENA_LOCK_LOCK();
823 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
824 		error = EINVAL;
825 		goto unlock;
826 	}
827 
828 	error = sysctl_wire_old_buffer(req, sizeof(tmp));
829 	if (error == 0) {
830 		tmp = adapter->num_io_queues;
831 		error = sysctl_handle_int(oidp, &tmp, 0, req);
832 	}
833 	if (error != 0 || req->newptr == NULL)
834 		goto unlock;
835 
836 	if (tmp == 0) {
837 		ena_log(adapter->pdev, ERR,
838 		    "Requested number of IO queues is zero\n");
839 		error = EINVAL;
840 		goto unlock;
841 	}
842 
843 	/*
844 	 * The adapter::max_num_io_queues is the HW capability. The system
845 	 * resources availability may potentially be a tighter limit. Therefore
846 	 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
847 	 * always holds true, while the `adapter::msix_vecs` is variable across
848 	 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
849 	 */
850 	if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
851 		ena_log(adapter->pdev, ERR,
852 		    "Requested number of IO queues is higher than maximum allowed (%u)\n",
853 		    adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
854 		error = EINVAL;
855 		goto unlock;
856 	}
857 	if (tmp == adapter->num_io_queues) {
858 		ena_log(adapter->pdev, ERR,
859 		    "Requested number of IO queues is equal to current value "
860 		    "(%u)\n",
861 		    adapter->num_io_queues);
862 	} else {
863 		ena_log(adapter->pdev, INFO,
864 		    "Requested new number of IO queues: %u, current value: "
865 		    "%u\n",
866 		    tmp, adapter->num_io_queues);
867 
868 		old_num_queues = adapter->num_io_queues;
869 		error = ena_update_io_queue_nb(adapter, tmp);
870 		if (error != 0)
871 			return (error);
872 
873 		ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
874 	}
875 
876 unlock:
877 	ENA_LOCK_UNLOCK();
878 
879 	return (error);
880 }
881 
882 static int
883 ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS)
884 {
885 	struct ena_adapter *adapter = arg1;
886 	uint16_t interval;
887 	int error;
888 
889 	ENA_LOCK_LOCK();
890 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
891 		error = EINVAL;
892 		goto unlock;
893 	}
894 
895 	error = sysctl_wire_old_buffer(req, sizeof(interval));
896 	if (error == 0) {
897 		interval = adapter->metrics_sample_interval;
898 		error = sysctl_handle_16(oidp, &interval, 0, req);
899 	}
900 	if (error != 0 || req->newptr == NULL)
901 		goto unlock;
902 
903 	if (interval > ENA_METRICS_MAX_SAMPLE_INTERVAL) {
904 		ena_log(adapter->pdev, ERR,
905 		    "ENA metrics update interval is out of range - maximum allowed value: %d seconds\n",
906 		    ENA_METRICS_MAX_SAMPLE_INTERVAL);
907 		error = EINVAL;
908 		goto unlock;
909 	}
910 
911 	if (interval == 0) {
912 		ena_log(adapter->pdev, INFO,
913 		    "ENA metrics update is now turned off\n");
914 		bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
915 	} else {
916 		ena_log(adapter->pdev, INFO,
917 		    "ENA metrics update interval is set to: %" PRIu16
918 		    " seconds\n",
919 		    interval);
920 	}
921 
922 	adapter->metrics_sample_interval = interval;
923 
924 unlock:
925 	ENA_LOCK_UNLOCK();
926 
927 	return (0);
928 }
929 
930 static int
931 ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS)
932 {
933 	struct ena_adapter *adapter = arg1;
934 	int irq_base_cpu = 0;
935 	int error;
936 
937 	ENA_LOCK_LOCK();
938 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
939 		error = ENODEV;
940 		goto unlock;
941 	}
942 
943 	error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu));
944 	if (error == 0) {
945 		irq_base_cpu = adapter->irq_cpu_base;
946 		error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req);
947 	}
948 	if (error != 0 || req->newptr == NULL)
949 		goto unlock;
950 
951 	if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) {
952 		ena_log(adapter->pdev, ERR,
953 		    "Requested base CPU is less than zero.\n");
954 		error = EINVAL;
955 		goto unlock;
956 	}
957 
958 	if (irq_base_cpu > mp_ncpus) {
959 		ena_log(adapter->pdev, INFO,
960 		    "Requested base CPU is larger than the number of available CPUs. \n");
961 		error = EINVAL;
962 		goto unlock;
963 
964 	}
965 
966 	if (irq_base_cpu == adapter->irq_cpu_base) {
967 		ena_log(adapter->pdev, INFO,
968 		    "Requested IRQ base CPU is equal to current value "
969 		    "(%d)\n",
970 		    adapter->irq_cpu_base);
971 		goto unlock;
972 	}
973 
974 	ena_log(adapter->pdev, INFO,
975 	    "Requested new IRQ base CPU: %d, current value: %d\n",
976 	    irq_base_cpu, adapter->irq_cpu_base);
977 
978 	error = ena_update_base_cpu(adapter, irq_base_cpu);
979 
980 unlock:
981 	ENA_LOCK_UNLOCK();
982 
983 	return (error);
984 }
985 
986 static int
987 ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS)
988 {
989 	struct ena_adapter *adapter = arg1;
990 	int32_t irq_cpu_stride = 0;
991 	int error;
992 
993 	ENA_LOCK_LOCK();
994 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
995 		error = ENODEV;
996 		goto unlock;
997 	}
998 
999 	error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride));
1000 	if (error == 0) {
1001 		irq_cpu_stride = adapter->irq_cpu_stride;
1002 		error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req);
1003 	}
1004 	if (error != 0 || req->newptr == NULL)
1005 		goto unlock;
1006 
1007 	if (irq_cpu_stride < 0) {
1008 		ena_log(adapter->pdev, ERR,
1009 		    "Requested IRQ stride is less than zero.\n");
1010 		error = EINVAL;
1011 		goto unlock;
1012 	}
1013 
1014 	if (irq_cpu_stride > mp_ncpus) {
1015 		ena_log(adapter->pdev, INFO,
1016 		    "Warning: Requested IRQ stride is larger than the number of available CPUs.\n");
1017 	}
1018 
1019 	if (irq_cpu_stride == adapter->irq_cpu_stride) {
1020 		ena_log(adapter->pdev, INFO,
1021 		    "Requested IRQ CPU stride is equal to current value "
1022 		    "(%u)\n",
1023 		    adapter->irq_cpu_stride);
1024 		goto unlock;
1025 	}
1026 
1027 	ena_log(adapter->pdev, INFO,
1028 	    "Requested new IRQ CPU stride: %u, current value: %u\n",
1029 	    irq_cpu_stride, adapter->irq_cpu_stride);
1030 
1031 	error = ena_update_cpu_stride(adapter, irq_cpu_stride);
1032 	if (error != 0)
1033 		goto unlock;
1034 
1035 unlock:
1036 	ENA_LOCK_UNLOCK();
1037 
1038 	return (error);
1039 }
1040 
1041 #ifndef RSS
1042 /*
1043  * Change the Receive Side Scaling hash key.
1044  */
1045 static int
1046 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
1047 {
1048 	struct ena_adapter *adapter = arg1;
1049 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1050 	enum ena_admin_hash_functions ena_func;
1051 	char msg[ENA_HASH_KEY_MSG_SIZE];
1052 	char elem[3] = { 0 };
1053 	char *endp;
1054 	u8 rss_key[ENA_HASH_KEY_SIZE];
1055 	int error, i;
1056 
1057 	ENA_LOCK_LOCK();
1058 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1059 		error = EINVAL;
1060 		goto unlock;
1061 	}
1062 
1063 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1064 		error = ENOTSUP;
1065 		goto unlock;
1066 	}
1067 
1068 	error = sysctl_wire_old_buffer(req, sizeof(msg));
1069 	if (error != 0)
1070 		goto unlock;
1071 
1072 	error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
1073 	if (error != 0) {
1074 		device_printf(adapter->pdev, "Cannot get hash function\n");
1075 		goto unlock;
1076 	}
1077 
1078 	if (ena_func != ENA_ADMIN_TOEPLITZ) {
1079 		error = EINVAL;
1080 		device_printf(adapter->pdev, "Unsupported hash algorithm\n");
1081 		goto unlock;
1082 	}
1083 
1084 	error = ena_rss_get_hash_key(ena_dev, rss_key);
1085 	if (error != 0) {
1086 		device_printf(adapter->pdev, "Cannot get hash key\n");
1087 		goto unlock;
1088 	}
1089 
1090 	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
1091 		snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
1092 
1093 	error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
1094 	if (error != 0 || req->newptr == NULL)
1095 		goto unlock;
1096 
1097 	if (strlen(msg) != sizeof(msg) - 1) {
1098 		error = EINVAL;
1099 		device_printf(adapter->pdev, "Invalid key size\n");
1100 		goto unlock;
1101 	}
1102 
1103 	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
1104 		strncpy(elem, &msg[i * 2], 2);
1105 		rss_key[i] = strtol(elem, &endp, 16);
1106 
1107 		/* Both hex nibbles in the string must be valid to continue. */
1108 		if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
1109 			error = EINVAL;
1110 			device_printf(adapter->pdev,
1111 			    "Invalid key hex value: '%c'\n", *endp);
1112 			goto unlock;
1113 		}
1114 	}
1115 
1116 	error = ena_rss_set_hash(ena_dev, rss_key);
1117 	if (error != 0)
1118 		device_printf(adapter->pdev, "Cannot fill hash key\n");
1119 
1120 unlock:
1121 	ENA_LOCK_UNLOCK();
1122 
1123 	return (error);
1124 }
1125 
1126 /*
1127  * Change the Receive Side Scaling indirection table.
1128  *
1129  * The sysctl entry string consists of one or more `x:y` keypairs, where
1130  * x stands for the table index and y for its new value.
1131  * Table indices that don't need to be updated can be omitted from the string
1132  * and will retain their existing values. If an index is entered more than once,
1133  * the last value is used.
1134  *
1135  * Example:
1136  * To update two selected indices in the RSS indirection table, e.g. setting
1137  * index 0 to queue 5 and then index 5 to queue 0, the below command should be
1138  * used:
1139  *   sysctl dev.ena.0.rss.indir_table="0:5 5:0"
1140  */
1141 static int
1142 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
1143 {
1144 	int num_queues, error;
1145 	struct ena_adapter *adapter = arg1;
1146 	struct ena_indir *indir;
1147 	char *msg, *buf, *endp;
1148 	uint32_t idx, value;
1149 
1150 	ENA_LOCK_LOCK();
1151 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1152 		error = EINVAL;
1153 		goto unlock;
1154 	}
1155 
1156 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1157 		error = ENOTSUP;
1158 		goto unlock;
1159 	}
1160 
1161 	indir = adapter->rss_indir;
1162 	msg = indir->sysctl_buf;
1163 
1164 	if (unlikely(indir == NULL)) {
1165 		error = ENOTSUP;
1166 		goto unlock;
1167 	}
1168 
1169 	error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
1170 	if (error != 0 || req->newptr == NULL)
1171 		goto unlock;
1172 
1173 	num_queues = adapter->num_io_queues;
1174 
1175 	/*
1176 	 * This sysctl expects msg to be a list of `x:y` record pairs,
1177 	 * where x is the indirection table index and y is its value.
1178 	 */
1179 	for (buf = msg; *buf != '\0'; buf = endp) {
1180 		idx = strtol(buf, &endp, 10);
1181 
1182 		if (endp == buf || idx < 0) {
1183 			device_printf(adapter->pdev, "Invalid index: %s\n",
1184 			    buf);
1185 			error = EINVAL;
1186 			break;
1187 		}
1188 
1189 		if (idx >= ENA_RX_RSS_TABLE_SIZE) {
1190 			device_printf(adapter->pdev, "Index %d out of range\n",
1191 			    idx);
1192 			error = ERANGE;
1193 			break;
1194 		}
1195 
1196 		buf = endp;
1197 
1198 		if (*buf++ != ':') {
1199 			device_printf(adapter->pdev, "Missing ':' separator\n");
1200 			error = EINVAL;
1201 			break;
1202 		}
1203 
1204 		value = strtol(buf, &endp, 10);
1205 
1206 		if (endp == buf || value < 0) {
1207 			device_printf(adapter->pdev, "Invalid value: %s\n",
1208 			    buf);
1209 			error = EINVAL;
1210 			break;
1211 		}
1212 
1213 		if (value >= num_queues) {
1214 			device_printf(adapter->pdev, "Value %d out of range\n",
1215 			    value);
1216 			error = ERANGE;
1217 			break;
1218 		}
1219 
1220 		indir->table[idx] = value;
1221 	}
1222 
1223 	if (error != 0) /* Reload indirection table with last good data. */
1224 		ena_rss_indir_get(adapter, indir->table);
1225 
1226 	/* At this point msg has been clobbered by sysctl_handle_string. */
1227 	ena_rss_copy_indir_buf(msg, indir->table);
1228 
1229 	if (error == 0)
1230 		error = ena_rss_indir_set(adapter, indir->table);
1231 
1232 unlock:
1233 	ENA_LOCK_UNLOCK();
1234 
1235 	return (error);
1236 }
1237 #endif /* RSS */
1238