xref: /freebsd/sys/dev/ena/ena_sysctl.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2024 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include "opt_rss.h"
33 
34 #include "ena_rss.h"
35 #include "ena_sysctl.h"
36 
37 static void ena_sysctl_add_wd(struct ena_adapter *);
38 static void ena_sysctl_add_stats(struct ena_adapter *);
39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
40 static void ena_sysctl_add_customer_metrics(struct ena_adapter *);
41 static void ena_sysctl_add_srd_info(struct ena_adapter *);
42 static void ena_sysctl_add_tuneables(struct ena_adapter *);
43 static void ena_sysctl_add_irq_affinity(struct ena_adapter *);
44 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
45 #ifndef RSS
46 static void ena_sysctl_add_rss(struct ena_adapter *);
47 #endif
48 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
49 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
50 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
51 static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS);
52 static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS);
53 static int ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS);
54 #ifndef RSS
55 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
56 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
57 #endif
58 
59 /* Limit max ENA sample rate to be an hour. */
60 #define ENA_METRICS_MAX_SAMPLE_INTERVAL 3600
61 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
62 
63 #define SYSCTL_GSTRING_LEN 128
64 
65 #define ENA_METRIC_ENI_ENTRY(stat, desc) { \
66         .name = #stat, \
67         .description = #desc, \
68 }
69 
70 #define ENA_STAT_ENTRY(stat, desc, stat_type) { \
71         .name = #stat, \
72         .description = #desc, \
73         .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64), \
74 }
75 
76 #define ENA_STAT_ENA_SRD_ENTRY(stat, desc) \
77 	ENA_STAT_ENTRY(stat, desc, ena_srd_stats)
78 
79 struct ena_hw_metrics {
80         char name[SYSCTL_GSTRING_LEN];
81         char description[SYSCTL_GSTRING_LEN];
82 };
83 
84 struct ena_srd_metrics {
85         char name[SYSCTL_GSTRING_LEN];
86         char description[SYSCTL_GSTRING_LEN];
87         int stat_offset;
88 };
89 
90 static const struct ena_srd_metrics ena_srd_stats_strings[] = {
91         ENA_STAT_ENA_SRD_ENTRY(
92 	    ena_srd_tx_pkts, Number of packets transmitted over ENA SRD),
93         ENA_STAT_ENA_SRD_ENTRY(
94 	    ena_srd_eligible_tx_pkts, Number of packets transmitted or could
95 	    have been transmitted over ENA SRD),
96         ENA_STAT_ENA_SRD_ENTRY(
97 	    ena_srd_rx_pkts, Number of packets received over ENA SRD),
98         ENA_STAT_ENA_SRD_ENTRY(
99 	    ena_srd_resource_utilization, Percentage of the ENA SRD resources
100 	    that are in use),
101 };
102 
103 static const struct ena_hw_metrics ena_hw_stats_strings[] = {
104         ENA_METRIC_ENI_ENTRY(
105 	    bw_in_allowance_exceeded, Inbound BW allowance exceeded),
106         ENA_METRIC_ENI_ENTRY(
107 	    bw_out_allowance_exceeded, Outbound BW allowance exceeded),
108         ENA_METRIC_ENI_ENTRY(
109 	    pps_allowance_exceeded, PPS allowance exceeded),
110         ENA_METRIC_ENI_ENTRY(
111 	    conntrack_allowance_exceeded, Connection tracking allowance exceeded),
112         ENA_METRIC_ENI_ENTRY(
113 	    linklocal_allowance_exceeded, Linklocal packet rate allowance),
114         ENA_METRIC_ENI_ENTRY(
115 	    conntrack_allowance_available, Number of available conntracks),
116 };
117 
118 #ifndef ARRAY_SIZE
119 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
120 #endif
121 
122 #define ENA_CUSTOMER_METRICS_ARRAY_SIZE      ARRAY_SIZE(ena_hw_stats_strings)
123 #define ENA_SRD_METRICS_ARRAY_SIZE           ARRAY_SIZE(ena_srd_stats_strings)
124 
125 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
126     "ENA driver parameters");
127 
128 /*
129  * Logging level for changing verbosity of the output
130  */
131 int ena_log_level = ENA_INFO;
132 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0,
133     "Logging level indicating verbosity of the logs");
134 
135 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
136     ENA_DRV_MODULE_VERSION, "ENA driver version");
137 
138 /*
139  * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
140  * Using 9k mbufs in low memory conditions might cause allocation to take a lot
141  * of time and lead to the OS instability as it needs to look for the contiguous
142  * pages.
143  * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
144  * the network performance is the priority, the 9k mbufs can be used.
145  */
146 int ena_enable_9k_mbufs = 0;
147 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
148     &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
149 
150 /*
151  * Force the driver to use large or regular LLQ (Low Latency Queue) header size.
152  * Defaults to ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT. This option may be
153  * important for platforms, which often handle packet headers on Tx with total
154  * header size greater than 96B, as it may reduce the latency.
155  * It also reduces the maximum Tx queue size by half, so it may cause more Tx
156  * packet drops.
157  */
158 int ena_force_large_llq_header = ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT;
159 SYSCTL_INT(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
160     &ena_force_large_llq_header, 0,
161     "Change default LLQ entry size received from the device");
162 
163 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
164 
165 int ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter *adapter)
166 {
167 	int rc = 0;
168 
169 	adapter->customer_metrics_array = malloc((sizeof(u64) * ENA_CUSTOMER_METRICS_ARRAY_SIZE),
170 	    M_DEVBUF, M_NOWAIT | M_ZERO);
171 	if (unlikely(adapter->customer_metrics_array == NULL))
172 		rc = ENOMEM;
173 
174 	return rc;
175 }
176 void
177 ena_sysctl_add_nodes(struct ena_adapter *adapter)
178 {
179 	struct ena_com_dev *dev = adapter->ena_dev;
180 
181 	if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS))
182 		ena_sysctl_add_customer_metrics(adapter);
183 	else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS))
184 		ena_sysctl_add_eni_metrics(adapter);
185 
186 	if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO))
187 		ena_sysctl_add_srd_info(adapter);
188 
189 	ena_sysctl_add_wd(adapter);
190 	ena_sysctl_add_stats(adapter);
191 	ena_sysctl_add_tuneables(adapter);
192 	ena_sysctl_add_irq_affinity(adapter);
193 #ifndef RSS
194 	ena_sysctl_add_rss(adapter);
195 #endif
196 }
197 
198 static void
199 ena_sysctl_add_wd(struct ena_adapter *adapter)
200 {
201 	device_t dev;
202 
203 	struct sysctl_ctx_list *ctx;
204 	struct sysctl_oid *tree;
205 	struct sysctl_oid_list *child;
206 
207 	dev = adapter->pdev;
208 
209 	ctx = device_get_sysctl_ctx(dev);
210 	tree = device_get_sysctl_tree(dev);
211 	child = SYSCTL_CHILDREN(tree);
212 
213 	/* Sysctl calls for Watchdog service */
214 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN,
215 	    &adapter->wd_active, 0, "Watchdog is active");
216 
217 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
218 	    CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
219 	    "Timeout for Keep Alive messages");
220 
221 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
222 	    CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
223 	    "Timeout for TX completion");
224 
225 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
226 	    CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
227 	    "Number of TX queues to check per run");
228 
229 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
230 	    CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
231 	    "Max number of timeouted packets");
232 }
233 
234 static void
235 ena_sysctl_add_stats(struct ena_adapter *adapter)
236 {
237 	device_t dev;
238 
239 	struct ena_ring *tx_ring;
240 	struct ena_ring *rx_ring;
241 
242 	struct ena_hw_stats *hw_stats;
243 	struct ena_stats_dev *dev_stats;
244 	struct ena_stats_tx *tx_stats;
245 	struct ena_stats_rx *rx_stats;
246 	struct ena_com_stats_admin *admin_stats;
247 
248 	struct sysctl_ctx_list *ctx;
249 	struct sysctl_oid *tree;
250 	struct sysctl_oid_list *child;
251 
252 	struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
253 	struct sysctl_oid *admin_node;
254 	struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
255 	struct sysctl_oid_list *admin_list;
256 
257 #define QUEUE_NAME_LEN 32
258 	char namebuf[QUEUE_NAME_LEN];
259 	int i;
260 
261 	dev = adapter->pdev;
262 
263 	ctx = device_get_sysctl_ctx(dev);
264 	tree = device_get_sysctl_tree(dev);
265 	child = SYSCTL_CHILDREN(tree);
266 
267 	tx_ring = adapter->tx_ring;
268 	rx_ring = adapter->rx_ring;
269 
270 	hw_stats = &adapter->hw_stats;
271 	dev_stats = &adapter->dev_stats;
272 	admin_stats = &adapter->ena_dev->admin_queue.stats;
273 
274 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD,
275 	    &dev_stats->wd_expired, "Watchdog expiry count");
276 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD,
277 	    &dev_stats->interface_up, "Network interface up count");
278 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", CTLFLAG_RD,
279 	    &dev_stats->interface_down, "Network interface down count");
280 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", CTLFLAG_RD,
281 	    &dev_stats->admin_q_pause, "Admin queue pauses");
282 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "os_trigger", CTLFLAG_RD,
283 	    &dev_stats->os_trigger, "OS trigger count");
284 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_tx_cmpl", CTLFLAG_RD,
285 	    &dev_stats->missing_tx_cmpl, "Missing TX completions resets count");
286 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_req_id", CTLFLAG_RD,
287 	    &dev_stats->bad_rx_req_id, "Bad RX req id count");
288 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_tx_req_id", CTLFLAG_RD,
289 	    &dev_stats->bad_tx_req_id, "Bad TX req id count");
290 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_desc_num", CTLFLAG_RD,
291 	    &dev_stats->bad_rx_desc_num, "Bad RX descriptors number count");
292 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "invalid_state", CTLFLAG_RD,
293 	    &dev_stats->invalid_state, "Driver invalid state count");
294 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_intr", CTLFLAG_RD,
295 	    &dev_stats->missing_intr, "Missing interrupt count");
296 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "tx_desc_malformed", CTLFLAG_RD,
297 	    &dev_stats->tx_desc_malformed, "TX descriptors malformed count");
298 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD,
299 	    &dev_stats->rx_desc_malformed, "RX descriptors malformed count");
300 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD,
301 	    &dev_stats->missing_admin_interrupt, "Missing admin interrupts count");
302 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD,
303 	    &dev_stats->admin_to, "Admin queue timeouts count");
304 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "device_request_reset", CTLFLAG_RD,
305 	    &dev_stats->device_request_reset, "Device reset requests count");
306 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD,
307 	    &dev_stats->total_resets, "Total resets count");
308 
309 	for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
310 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
311 
312 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
313 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
314 		queue_list = SYSCTL_CHILDREN(queue_node);
315 
316 		adapter->que[i].oid = queue_node;
317 
318 #ifdef RSS
319 		/* Common stats */
320 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD,
321 		    &adapter->que[i].cpu, 0, "CPU affinity");
322 		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD,
323 		    &adapter->que[i].domain, 0, "NUMA domain");
324 #endif
325 
326 		/* TX specific stats */
327 		tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring",
328 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
329 		tx_list = SYSCTL_CHILDREN(tx_node);
330 
331 		tx_stats = &tx_ring->tx_stats;
332 
333 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count",
334 		    CTLFLAG_RD, &tx_stats->cnt, "Packets sent");
335 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes",
336 		    CTLFLAG_RD, &tx_stats->bytes, "Bytes sent");
337 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
338 		    "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err,
339 		    "TX buffer preparation failures");
340 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
341 		    "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err,
342 		    "DMA mapping failures");
343 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells",
344 		    CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells");
345 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
346 		    "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp,
347 		    "TX completions missed");
348 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id",
349 		    CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count");
350 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses",
351 		    CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count");
352 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
353 		    "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err,
354 		    "Mbuf collapse failures");
355 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups",
356 		    CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups");
357 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops",
358 		    CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops");
359 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
360 		    "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy,
361 		    "Header copies for llq transaction");
362 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
363 		    "unmask_interrupt_num", CTLFLAG_RD,
364 		    &tx_stats->unmask_interrupt_num,
365 		    "Unmasked interrupt count");
366 
367 		/* RX specific stats */
368 		rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring",
369 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
370 		rx_list = SYSCTL_CHILDREN(rx_node);
371 
372 		rx_stats = &rx_ring->rx_stats;
373 
374 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count",
375 		    CTLFLAG_RD, &rx_stats->cnt, "Packets received");
376 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes",
377 		    CTLFLAG_RD, &rx_stats->bytes, "Bytes received");
378 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial",
379 		    CTLFLAG_RD, &rx_stats->refil_partial,
380 		    "Partial refilled mbufs");
381 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad",
382 		    CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum");
383 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
384 		    "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail,
385 		    "Failed mbuf allocs");
386 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
387 		    "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail,
388 		    "Failed jumbo mbuf allocs");
389 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
390 		    "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err,
391 		    "DMA mapping errors");
392 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num",
393 		    CTLFLAG_RD, &rx_stats->bad_desc_num,
394 		    "Bad descriptor count");
395 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id",
396 		    CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count");
397 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring",
398 		    CTLFLAG_RD, &rx_stats->empty_rx_ring,
399 		    "RX descriptors depletion count");
400 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good",
401 		    CTLFLAG_RD, &rx_stats->csum_good,
402 		    "Valid RX checksum calculations");
403 	}
404 
405 	/* Stats read from device */
406 	hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
407 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
408 	hw_list = SYSCTL_CHILDREN(hw_node);
409 
410 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
411 	    &hw_stats->rx_packets, "Packets received");
412 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
413 	    &hw_stats->tx_packets, "Packets transmitted");
414 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
415 	    &hw_stats->rx_bytes, "Bytes received");
416 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
417 	    &hw_stats->tx_bytes, "Bytes transmitted");
418 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
419 	    &hw_stats->rx_drops, "Receive packet drops");
420 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
421 	    &hw_stats->tx_drops, "Transmit packet drops");
422 
423 	/* ENA Admin queue stats */
424 	admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
425 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
426 	admin_list = SYSCTL_CHILDREN(admin_node);
427 
428 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
429 	    &admin_stats->aborted_cmd, 0, "Aborted commands");
430 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
431 	    &admin_stats->submitted_cmd, 0, "Submitted commands");
432 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
433 	    &admin_stats->completed_cmd, 0, "Completed commands");
434 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
435 	    &admin_stats->out_of_space, 0, "Queue out of space");
436 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
437 	    &admin_stats->no_completion, 0, "Commands not completed");
438 }
439 
440 static void
441 ena_sysctl_add_srd_info(struct ena_adapter *adapter)
442 {
443 	device_t dev;
444 
445 	struct sysctl_oid *ena_srd_info;
446 	struct sysctl_oid_list *srd_list;
447 
448 	struct sysctl_ctx_list *ctx;
449 	struct sysctl_oid *tree;
450 	struct sysctl_oid_list *child;
451 
452 	struct ena_admin_ena_srd_stats *srd_stats_ptr;
453 	struct ena_srd_metrics cur_stat_strings;
454 
455 	int i;
456 
457 	dev = adapter->pdev;
458 
459 	ctx = device_get_sysctl_ctx(dev);
460 	tree = device_get_sysctl_tree(dev);
461 	child = SYSCTL_CHILDREN(tree);
462 
463 	ena_srd_info = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ena_srd_info",
464 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's SRD information");
465 	srd_list = SYSCTL_CHILDREN(ena_srd_info);
466 
467 	SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, "ena_srd_mode",
468             CTLFLAG_RD, &adapter->ena_srd_info.flags, 0,
469             "Describes which ENA-express features are enabled");
470 
471 	srd_stats_ptr = &adapter->ena_srd_info.ena_srd_stats;
472 
473 	for (i = 0 ; i < ENA_SRD_METRICS_ARRAY_SIZE; i++) {
474 		cur_stat_strings = ena_srd_stats_strings[i];
475 		SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, cur_stat_strings.name,
476 		    CTLFLAG_RD, (u64 *)srd_stats_ptr + cur_stat_strings.stat_offset,
477 		    0, cur_stat_strings.description);
478 	}
479 }
480 
481 static void
482 ena_sysctl_add_customer_metrics(struct ena_adapter *adapter)
483 {
484 	device_t dev;
485 	struct ena_com_dev *ena_dev;
486 
487 	struct sysctl_ctx_list *ctx;
488 	struct sysctl_oid *tree;
489 	struct sysctl_oid_list *child;
490 
491 	struct sysctl_oid *customer_metric;
492 	struct sysctl_oid_list *customer_list;
493 
494 	int i;
495 
496 	dev = adapter->pdev;
497 	ena_dev = adapter->ena_dev;
498 
499 	ctx = device_get_sysctl_ctx(dev);
500 	tree = device_get_sysctl_tree(dev);
501 	child = SYSCTL_CHILDREN(tree);
502 	customer_metric = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "customer_metrics",
503 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's customer metrics");
504 	customer_list = SYSCTL_CHILDREN(customer_metric);
505 
506 	for (i = 0; i < ENA_CUSTOMER_METRICS_ARRAY_SIZE; i++) {
507 	        if (ena_com_get_customer_metric_support(ena_dev, i)) {
508 	                SYSCTL_ADD_U64(ctx, customer_list, OID_AUTO, ena_hw_stats_strings[i].name,
509 	                    CTLFLAG_RD, &adapter->customer_metrics_array[i], 0,
510 	                    ena_hw_stats_strings[i].description);
511 	         }
512 	 }
513 }
514 
515 static void
516 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
517 {
518 	device_t dev;
519 	struct ena_admin_eni_stats *eni_metrics;
520 
521 	struct sysctl_ctx_list *ctx;
522 	struct sysctl_oid *tree;
523 	struct sysctl_oid_list *child;
524 
525 	struct sysctl_oid *eni_node;
526 	struct sysctl_oid_list *eni_list;
527 
528 	dev = adapter->pdev;
529 
530 	ctx = device_get_sysctl_ctx(dev);
531 	tree = device_get_sysctl_tree(dev);
532 	child = SYSCTL_CHILDREN(tree);
533 
534 	eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
535 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
536 	eni_list = SYSCTL_CHILDREN(eni_node);
537 
538 	eni_metrics = &adapter->eni_metrics;
539 
540 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
541 	    CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
542 	    "Inbound BW allowance exceeded");
543 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
544 	    CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
545 	    "Outbound BW allowance exceeded");
546 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
547 	    CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
548 	    "PPS allowance exceeded");
549 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
550 	    CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
551 	    "Connection tracking allowance exceeded");
552 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
553 	    CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
554 	    "Linklocal packet rate allowance exceeded");
555 }
556 
557 static void
558 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
559 {
560 	device_t dev;
561 
562 	struct sysctl_ctx_list *ctx;
563 	struct sysctl_oid *tree;
564 	struct sysctl_oid_list *child;
565 
566 	dev = adapter->pdev;
567 
568 	ctx = device_get_sysctl_ctx(dev);
569 	tree = device_get_sysctl_tree(dev);
570 	child = SYSCTL_CHILDREN(tree);
571 
572 	/* Tuneable number of buffers in the buf-ring (drbr) */
573 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
574 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
575 	    ena_sysctl_buf_ring_size, "I",
576 	    "Size of the Tx buffer ring (drbr).");
577 
578 	/* Tuneable number of the Rx ring size */
579 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
580 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
581 	    ena_sysctl_rx_queue_size, "I",
582 	    "Size of the Rx ring. The size should be a power of 2.");
583 
584 	/* Tuneable number of IO queues */
585 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
586 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
587 	    ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
588 
589 	/*
590 	 * Tuneable, which determines how often ENA metrics will be read.
591 	 * 0 means it's turned off. Maximum allowed value is limited by:
592 	 * ENA_METRICS_MAX_SAMPLE_INTERVAL.
593 	 */
594 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "stats_sample_interval",
595 	    CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
596 	    ena_sysctl_metrics_interval, "SU",
597 	    "Interval in seconds for updating Netword interface metrics. 0 turns off the update.");
598 }
599 
600 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
601 #ifndef RSS
602 static void
603 ena_sysctl_add_rss(struct ena_adapter *adapter)
604 {
605 	device_t dev;
606 
607 	struct sysctl_ctx_list *ctx;
608 	struct sysctl_oid *tree;
609 	struct sysctl_oid_list *child;
610 
611 	dev = adapter->pdev;
612 
613 	ctx = device_get_sysctl_ctx(dev);
614 	tree = device_get_sysctl_tree(dev);
615 	child = SYSCTL_CHILDREN(tree);
616 
617 	/* RSS options */
618 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
619 	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
620 	child = SYSCTL_CHILDREN(tree);
621 
622 	/* RSS hash key */
623 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
624 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
625 	    ena_sysctl_rss_key, "A", "RSS key.");
626 
627 	/* Tuneable RSS indirection table */
628 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
629 	    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
630 	    ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
631 
632 	/* RSS indirection table size */
633 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
634 	    CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
635 	    "RSS indirection table size.");
636 }
637 #endif /* RSS */
638 
639 static void
640 ena_sysctl_add_irq_affinity(struct ena_adapter *adapter)
641 {
642 	device_t dev;
643 
644 	struct sysctl_ctx_list *ctx;
645 	struct sysctl_oid *tree;
646 	struct sysctl_oid_list *child;
647 
648 	dev = adapter->pdev;
649 
650 	ctx = device_get_sysctl_ctx(dev);
651 	tree = device_get_sysctl_tree(dev);
652 	child = SYSCTL_CHILDREN(tree);
653 
654 	tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity",
655 	    CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity.");
656 	child = SYSCTL_CHILDREN(tree);
657 
658 	/* Add base cpu leaf */
659 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu",
660 	    CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
661 	    ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity.");
662 
663 	/* Add cpu stride leaf */
664 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride",
665 	    CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
666 	    ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity.");
667 }
668 
669 
670 /*
671  * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
672  *
673  * Whether the nodes are registered or unregistered depends on a delta between
674  * the `old` and `new` parameters, representing the number of queues.
675  *
676  * This function is used to hide sysctl attributes for queue nodes which aren't
677  * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
678  *
679  * NOTE:
680  * All unregistered nodes must be registered again at detach, i.e. by a call to
681  * this function.
682  */
683 void
684 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
685 {
686 	struct sysctl_oid *oid;
687 	int min, max, i;
688 
689 	min = MIN(old, new);
690 	max = MIN(MAX(old, new), adapter->max_num_io_queues);
691 
692 	for (i = min; i < max; ++i) {
693 		oid = adapter->que[i].oid;
694 
695 		sysctl_wlock();
696 		if (old > new)
697 			sysctl_unregister_oid(oid);
698 		else
699 			sysctl_register_oid(oid);
700 		sysctl_wunlock();
701 	}
702 }
703 
704 static int
705 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
706 {
707 	struct ena_adapter *adapter = arg1;
708 	uint32_t val;
709 	int error;
710 
711 	ENA_LOCK_LOCK();
712 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
713 		error = EINVAL;
714 		goto unlock;
715 	}
716 
717 	val = 0;
718 	error = sysctl_wire_old_buffer(req, sizeof(val));
719 	if (error == 0) {
720 		val = adapter->buf_ring_size;
721 		error = sysctl_handle_32(oidp, &val, 0, req);
722 	}
723 	if (error != 0 || req->newptr == NULL)
724 		goto unlock;
725 
726 	if (!powerof2(val) || val == 0) {
727 		ena_log(adapter->pdev, ERR,
728 		    "Requested new Tx buffer ring size (%u) is not a power of 2\n",
729 		    val);
730 		error = EINVAL;
731 		goto unlock;
732 	}
733 
734 	if (val != adapter->buf_ring_size) {
735 		ena_log(adapter->pdev, INFO,
736 		    "Requested new Tx buffer ring size: %d. Old size: %d\n",
737 		    val, adapter->buf_ring_size);
738 
739 		error = ena_update_buf_ring_size(adapter, val);
740 	} else {
741 		ena_log(adapter->pdev, ERR,
742 		    "New Tx buffer ring size is the same as already used: %u\n",
743 		    adapter->buf_ring_size);
744 	}
745 
746 unlock:
747 	ENA_LOCK_UNLOCK();
748 
749 	return (error);
750 }
751 
752 static int
753 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
754 {
755 	struct ena_adapter *adapter = arg1;
756 	uint32_t val;
757 	int error;
758 
759 	ENA_LOCK_LOCK();
760 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
761 		error = EINVAL;
762 		goto unlock;
763 	}
764 
765 	val = 0;
766 	error = sysctl_wire_old_buffer(req, sizeof(val));
767 	if (error == 0) {
768 		val = adapter->requested_rx_ring_size;
769 		error = sysctl_handle_32(oidp, &val, 0, req);
770 	}
771 	if (error != 0 || req->newptr == NULL)
772 		goto unlock;
773 
774 	if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
775 		ena_log(adapter->pdev, ERR,
776 		    "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
777 		    val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
778 		error = EINVAL;
779 		goto unlock;
780 	}
781 
782 	/* Check if the parameter is power of 2 */
783 	if (!powerof2(val)) {
784 		ena_log(adapter->pdev, ERR,
785 		    "Requested new Rx queue size (%u) is not a power of 2\n",
786 		    val);
787 		error = EINVAL;
788 		goto unlock;
789 	}
790 
791 	if (val != adapter->requested_rx_ring_size) {
792 		ena_log(adapter->pdev, INFO,
793 		    "Requested new Rx queue size: %u. Old size: %u\n", val,
794 		    adapter->requested_rx_ring_size);
795 
796 		error = ena_update_queue_size(adapter,
797 		    adapter->requested_tx_ring_size, val);
798 	} else {
799 		ena_log(adapter->pdev, ERR,
800 		    "New Rx queue size is the same as already used: %u\n",
801 		    adapter->requested_rx_ring_size);
802 	}
803 
804 unlock:
805 	ENA_LOCK_UNLOCK();
806 
807 	return (error);
808 }
809 
810 /*
811  * Change number of effectively used IO queues adapter->num_io_queues
812  */
813 static int
814 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
815 {
816 	struct ena_adapter *adapter = arg1;
817 	uint32_t old_num_queues, tmp = 0;
818 	int error;
819 
820 	ENA_LOCK_LOCK();
821 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
822 		error = EINVAL;
823 		goto unlock;
824 	}
825 
826 	error = sysctl_wire_old_buffer(req, sizeof(tmp));
827 	if (error == 0) {
828 		tmp = adapter->num_io_queues;
829 		error = sysctl_handle_int(oidp, &tmp, 0, req);
830 	}
831 	if (error != 0 || req->newptr == NULL)
832 		goto unlock;
833 
834 	if (tmp == 0) {
835 		ena_log(adapter->pdev, ERR,
836 		    "Requested number of IO queues is zero\n");
837 		error = EINVAL;
838 		goto unlock;
839 	}
840 
841 	/*
842 	 * The adapter::max_num_io_queues is the HW capability. The system
843 	 * resources availability may potentially be a tighter limit. Therefore
844 	 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
845 	 * always holds true, while the `adapter::msix_vecs` is variable across
846 	 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
847 	 */
848 	if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
849 		ena_log(adapter->pdev, ERR,
850 		    "Requested number of IO queues is higher than maximum allowed (%u)\n",
851 		    adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
852 		error = EINVAL;
853 		goto unlock;
854 	}
855 	if (tmp == adapter->num_io_queues) {
856 		ena_log(adapter->pdev, ERR,
857 		    "Requested number of IO queues is equal to current value "
858 		    "(%u)\n",
859 		    adapter->num_io_queues);
860 	} else {
861 		ena_log(adapter->pdev, INFO,
862 		    "Requested new number of IO queues: %u, current value: "
863 		    "%u\n",
864 		    tmp, adapter->num_io_queues);
865 
866 		old_num_queues = adapter->num_io_queues;
867 		error = ena_update_io_queue_nb(adapter, tmp);
868 		if (error != 0)
869 			return (error);
870 
871 		ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
872 	}
873 
874 unlock:
875 	ENA_LOCK_UNLOCK();
876 
877 	return (error);
878 }
879 
880 static int
881 ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS)
882 {
883 	struct ena_adapter *adapter = arg1;
884 	uint16_t interval;
885 	int error;
886 
887 	ENA_LOCK_LOCK();
888 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
889 		error = EINVAL;
890 		goto unlock;
891 	}
892 
893 	error = sysctl_wire_old_buffer(req, sizeof(interval));
894 	if (error == 0) {
895 		interval = adapter->metrics_sample_interval;
896 		error = sysctl_handle_16(oidp, &interval, 0, req);
897 	}
898 	if (error != 0 || req->newptr == NULL)
899 		goto unlock;
900 
901 	if (interval > ENA_METRICS_MAX_SAMPLE_INTERVAL) {
902 		ena_log(adapter->pdev, ERR,
903 		    "ENA metrics update interval is out of range - maximum allowed value: %d seconds\n",
904 		    ENA_METRICS_MAX_SAMPLE_INTERVAL);
905 		error = EINVAL;
906 		goto unlock;
907 	}
908 
909 	if (interval == 0) {
910 		ena_log(adapter->pdev, INFO,
911 		    "ENA metrics update is now turned off\n");
912 		bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
913 	} else {
914 		ena_log(adapter->pdev, INFO,
915 		    "ENA metrics update interval is set to: %" PRIu16
916 		    " seconds\n",
917 		    interval);
918 	}
919 
920 	adapter->metrics_sample_interval = interval;
921 
922 unlock:
923 	ENA_LOCK_UNLOCK();
924 
925 	return (0);
926 }
927 
928 static int
929 ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS)
930 {
931 	struct ena_adapter *adapter = arg1;
932 	int irq_base_cpu = 0;
933 	int error;
934 
935 	ENA_LOCK_LOCK();
936 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
937 		error = ENODEV;
938 		goto unlock;
939 	}
940 
941 	error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu));
942 	if (error == 0) {
943 		irq_base_cpu = adapter->irq_cpu_base;
944 		error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req);
945 	}
946 	if (error != 0 || req->newptr == NULL)
947 		goto unlock;
948 
949 	if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) {
950 		ena_log(adapter->pdev, ERR,
951 		    "Requested base CPU is less than zero.\n");
952 		error = EINVAL;
953 		goto unlock;
954 	}
955 
956 	if (irq_base_cpu > mp_ncpus) {
957 		ena_log(adapter->pdev, INFO,
958 		    "Requested base CPU is larger than the number of available CPUs. \n");
959 		error = EINVAL;
960 		goto unlock;
961 
962 	}
963 
964 	if (irq_base_cpu == adapter->irq_cpu_base) {
965 		ena_log(adapter->pdev, INFO,
966 		    "Requested IRQ base CPU is equal to current value "
967 		    "(%d)\n",
968 		    adapter->irq_cpu_base);
969 		goto unlock;
970 	}
971 
972 	ena_log(adapter->pdev, INFO,
973 	    "Requested new IRQ base CPU: %d, current value: %d\n",
974 	    irq_base_cpu, adapter->irq_cpu_base);
975 
976 	error = ena_update_base_cpu(adapter, irq_base_cpu);
977 
978 unlock:
979 	ENA_LOCK_UNLOCK();
980 
981 	return (error);
982 }
983 
984 static int
985 ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS)
986 {
987 	struct ena_adapter *adapter = arg1;
988 	int32_t irq_cpu_stride = 0;
989 	int error;
990 
991 	ENA_LOCK_LOCK();
992 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
993 		error = ENODEV;
994 		goto unlock;
995 	}
996 
997 	error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride));
998 	if (error == 0) {
999 		irq_cpu_stride = adapter->irq_cpu_stride;
1000 		error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req);
1001 	}
1002 	if (error != 0 || req->newptr == NULL)
1003 		goto unlock;
1004 
1005 	if (irq_cpu_stride < 0) {
1006 		ena_log(adapter->pdev, ERR,
1007 		    "Requested IRQ stride is less than zero.\n");
1008 		error = EINVAL;
1009 		goto unlock;
1010 	}
1011 
1012 	if (irq_cpu_stride > mp_ncpus) {
1013 		ena_log(adapter->pdev, INFO,
1014 		    "Warning: Requested IRQ stride is larger than the number of available CPUs.\n");
1015 	}
1016 
1017 	if (irq_cpu_stride == adapter->irq_cpu_stride) {
1018 		ena_log(adapter->pdev, INFO,
1019 		    "Requested IRQ CPU stride is equal to current value "
1020 		    "(%u)\n",
1021 		    adapter->irq_cpu_stride);
1022 		goto unlock;
1023 	}
1024 
1025 	ena_log(adapter->pdev, INFO,
1026 	    "Requested new IRQ CPU stride: %u, current value: %u\n",
1027 	    irq_cpu_stride, adapter->irq_cpu_stride);
1028 
1029 	error = ena_update_cpu_stride(adapter, irq_cpu_stride);
1030 	if (error != 0)
1031 		goto unlock;
1032 
1033 unlock:
1034 	ENA_LOCK_UNLOCK();
1035 
1036 	return (error);
1037 }
1038 
1039 #ifndef RSS
1040 /*
1041  * Change the Receive Side Scaling hash key.
1042  */
1043 static int
1044 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
1045 {
1046 	struct ena_adapter *adapter = arg1;
1047 	struct ena_com_dev *ena_dev = adapter->ena_dev;
1048 	enum ena_admin_hash_functions ena_func;
1049 	char msg[ENA_HASH_KEY_MSG_SIZE];
1050 	char elem[3] = { 0 };
1051 	char *endp;
1052 	u8 rss_key[ENA_HASH_KEY_SIZE];
1053 	int error, i;
1054 
1055 	ENA_LOCK_LOCK();
1056 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1057 		error = EINVAL;
1058 		goto unlock;
1059 	}
1060 
1061 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1062 		error = ENOTSUP;
1063 		goto unlock;
1064 	}
1065 
1066 	error = sysctl_wire_old_buffer(req, sizeof(msg));
1067 	if (error != 0)
1068 		goto unlock;
1069 
1070 	error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
1071 	if (error != 0) {
1072 		device_printf(adapter->pdev, "Cannot get hash function\n");
1073 		goto unlock;
1074 	}
1075 
1076 	if (ena_func != ENA_ADMIN_TOEPLITZ) {
1077 		error = EINVAL;
1078 		device_printf(adapter->pdev, "Unsupported hash algorithm\n");
1079 		goto unlock;
1080 	}
1081 
1082 	error = ena_rss_get_hash_key(ena_dev, rss_key);
1083 	if (error != 0) {
1084 		device_printf(adapter->pdev, "Cannot get hash key\n");
1085 		goto unlock;
1086 	}
1087 
1088 	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
1089 		snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
1090 
1091 	error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
1092 	if (error != 0 || req->newptr == NULL)
1093 		goto unlock;
1094 
1095 	if (strlen(msg) != sizeof(msg) - 1) {
1096 		error = EINVAL;
1097 		device_printf(adapter->pdev, "Invalid key size\n");
1098 		goto unlock;
1099 	}
1100 
1101 	for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
1102 		strncpy(elem, &msg[i * 2], 2);
1103 		rss_key[i] = strtol(elem, &endp, 16);
1104 
1105 		/* Both hex nibbles in the string must be valid to continue. */
1106 		if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
1107 			error = EINVAL;
1108 			device_printf(adapter->pdev,
1109 			    "Invalid key hex value: '%c'\n", *endp);
1110 			goto unlock;
1111 		}
1112 	}
1113 
1114 	error = ena_rss_set_hash(ena_dev, rss_key);
1115 	if (error != 0)
1116 		device_printf(adapter->pdev, "Cannot fill hash key\n");
1117 
1118 unlock:
1119 	ENA_LOCK_UNLOCK();
1120 
1121 	return (error);
1122 }
1123 
1124 /*
1125  * Change the Receive Side Scaling indirection table.
1126  *
1127  * The sysctl entry string consists of one or more `x:y` keypairs, where
1128  * x stands for the table index and y for its new value.
1129  * Table indices that don't need to be updated can be omitted from the string
1130  * and will retain their existing values. If an index is entered more than once,
1131  * the last value is used.
1132  *
1133  * Example:
1134  * To update two selected indices in the RSS indirection table, e.g. setting
1135  * index 0 to queue 5 and then index 5 to queue 0, the below command should be
1136  * used:
1137  *   sysctl dev.ena.0.rss.indir_table="0:5 5:0"
1138  */
1139 static int
1140 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
1141 {
1142 	int num_queues, error;
1143 	struct ena_adapter *adapter = arg1;
1144 	struct ena_indir *indir;
1145 	char *msg, *buf, *endp;
1146 	uint32_t idx, value;
1147 
1148 	ENA_LOCK_LOCK();
1149 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1150 		error = EINVAL;
1151 		goto unlock;
1152 	}
1153 
1154 	if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1155 		error = ENOTSUP;
1156 		goto unlock;
1157 	}
1158 
1159 	indir = adapter->rss_indir;
1160 	msg = indir->sysctl_buf;
1161 
1162 	if (unlikely(indir == NULL)) {
1163 		error = ENOTSUP;
1164 		goto unlock;
1165 	}
1166 
1167 	error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
1168 	if (error != 0 || req->newptr == NULL)
1169 		goto unlock;
1170 
1171 	num_queues = adapter->num_io_queues;
1172 
1173 	/*
1174 	 * This sysctl expects msg to be a list of `x:y` record pairs,
1175 	 * where x is the indirection table index and y is its value.
1176 	 */
1177 	for (buf = msg; *buf != '\0'; buf = endp) {
1178 		idx = strtol(buf, &endp, 10);
1179 
1180 		if (endp == buf || idx < 0) {
1181 			device_printf(adapter->pdev, "Invalid index: %s\n",
1182 			    buf);
1183 			error = EINVAL;
1184 			break;
1185 		}
1186 
1187 		if (idx >= ENA_RX_RSS_TABLE_SIZE) {
1188 			device_printf(adapter->pdev, "Index %d out of range\n",
1189 			    idx);
1190 			error = ERANGE;
1191 			break;
1192 		}
1193 
1194 		buf = endp;
1195 
1196 		if (*buf++ != ':') {
1197 			device_printf(adapter->pdev, "Missing ':' separator\n");
1198 			error = EINVAL;
1199 			break;
1200 		}
1201 
1202 		value = strtol(buf, &endp, 10);
1203 
1204 		if (endp == buf || value < 0) {
1205 			device_printf(adapter->pdev, "Invalid value: %s\n",
1206 			    buf);
1207 			error = EINVAL;
1208 			break;
1209 		}
1210 
1211 		if (value >= num_queues) {
1212 			device_printf(adapter->pdev, "Value %d out of range\n",
1213 			    value);
1214 			error = ERANGE;
1215 			break;
1216 		}
1217 
1218 		indir->table[idx] = value;
1219 	}
1220 
1221 	if (error != 0) /* Reload indirection table with last good data. */
1222 		ena_rss_indir_get(adapter, indir->table);
1223 
1224 	/* At this point msg has been clobbered by sysctl_handle_string. */
1225 	ena_rss_copy_indir_buf(msg, indir->table);
1226 
1227 	if (error == 0)
1228 		error = ena_rss_indir_set(adapter, indir->table);
1229 
1230 unlock:
1231 	ENA_LOCK_UNLOCK();
1232 
1233 	return (error);
1234 }
1235 #endif /* RSS */
1236