xref: /freebsd/sys/dev/ena/ena_sysctl.c (revision ae7e8a02e6e93455e026036132c4d053b2c12ad9)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2015-2021 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 #include <sys/param.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include "ena_sysctl.h"
35 
36 static void	ena_sysctl_add_wd(struct ena_adapter *);
37 static void	ena_sysctl_add_stats(struct ena_adapter *);
38 static void	ena_sysctl_add_eni_metrics(struct ena_adapter *);
39 static void	ena_sysctl_add_tuneables(struct ena_adapter *);
40 static int	ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
41 static int	ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
42 static int	ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
43 static int	ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS);
44 
45 /* Limit max ENI sample rate to be an hour. */
46 #define ENI_METRICS_MAX_SAMPLE_INTERVAL 3600
47 
48 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
49     "ENA driver parameters");
50 
51 /*
52  * Logging level for changing verbosity of the output
53  */
54 int ena_log_level = ENA_INFO;
55 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
56     &ena_log_level, 0, "Logging level indicating verbosity of the logs");
57 
58 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
59     DRV_MODULE_VERSION, "ENA driver version");
60 
61 /*
62  * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
63  * Using 9k mbufs in low memory conditions might cause allocation to take a lot
64  * of time and lead to the OS instability as it needs to look for the contiguous
65  * pages.
66  * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
67  * the network performance is the priority, the 9k mbufs can be used.
68  */
69 int ena_enable_9k_mbufs = 0;
70 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
71     &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
72 
73 /*
74  * Force the driver to use large LLQ (Low Latency Queue) header. Defaults to
75  * false. This option may be important for platforms, which often handle packet
76  * headers on Tx with total header size greater than 96B, as it may
77  * reduce the latency.
78  * It also reduces the maximum Tx queue size by half, so it may cause more Tx
79  * packet drops.
80  */
81 bool ena_force_large_llq_header = false;
82 SYSCTL_BOOL(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
83     &ena_force_large_llq_header, 0,
84     "Increases maximum supported header size in LLQ mode to 224 bytes, while reducing the maximum Tx queue size by half.\n");
85 
86 void
87 ena_sysctl_add_nodes(struct ena_adapter *adapter)
88 {
89 	ena_sysctl_add_wd(adapter);
90 	ena_sysctl_add_stats(adapter);
91 	ena_sysctl_add_eni_metrics(adapter);
92 	ena_sysctl_add_tuneables(adapter);
93 }
94 
95 static void
96 ena_sysctl_add_wd(struct ena_adapter *adapter)
97 {
98 	device_t dev;
99 
100 	struct sysctl_ctx_list *ctx;
101 	struct sysctl_oid *tree;
102 	struct sysctl_oid_list *child;
103 
104 	dev = adapter->pdev;
105 
106 	ctx = device_get_sysctl_ctx(dev);
107 	tree = device_get_sysctl_tree(dev);
108 	child = SYSCTL_CHILDREN(tree);
109 
110 	/* Sysctl calls for Watchdog service */
111 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active",
112 	    CTLFLAG_RWTUN, &adapter->wd_active, 0,
113 	    "Watchdog is active");
114 
115 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
116 	    CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
117 	    "Timeout for Keep Alive messages");
118 
119 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
120 	    CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
121 	    "Timeout for TX completion");
122 
123 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
124 	    CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
125 	    "Number of TX queues to check per run");
126 
127 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
128 	    CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
129 	    "Max number of timeouted packets");
130 }
131 
132 static void
133 ena_sysctl_add_stats(struct ena_adapter *adapter)
134 {
135 	device_t dev;
136 
137 	struct ena_ring *tx_ring;
138 	struct ena_ring *rx_ring;
139 
140 	struct ena_hw_stats *hw_stats;
141 	struct ena_stats_dev *dev_stats;
142 	struct ena_stats_tx *tx_stats;
143 	struct ena_stats_rx *rx_stats;
144 	struct ena_com_stats_admin *admin_stats;
145 
146 	struct sysctl_ctx_list *ctx;
147 	struct sysctl_oid *tree;
148 	struct sysctl_oid_list *child;
149 
150 	struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
151 	struct sysctl_oid *admin_node;
152 	struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
153 	struct sysctl_oid_list *admin_list;
154 
155 #define QUEUE_NAME_LEN 32
156 	char namebuf[QUEUE_NAME_LEN];
157 	int i;
158 
159 	dev = adapter->pdev;
160 
161 	ctx = device_get_sysctl_ctx(dev);
162 	tree = device_get_sysctl_tree(dev);
163 	child = SYSCTL_CHILDREN(tree);
164 
165 	tx_ring = adapter->tx_ring;
166 	rx_ring = adapter->rx_ring;
167 
168 	hw_stats = &adapter->hw_stats;
169 	dev_stats = &adapter->dev_stats;
170 	admin_stats = &adapter->ena_dev->admin_queue.stats;
171 
172 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired",
173 	    CTLFLAG_RD, &dev_stats->wd_expired,
174 	    "Watchdog expiry count");
175 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up",
176 	    CTLFLAG_RD, &dev_stats->interface_up,
177 	    "Network interface up count");
178 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
179 	    CTLFLAG_RD, &dev_stats->interface_down,
180 	    "Network interface down count");
181 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
182 	    CTLFLAG_RD, &dev_stats->admin_q_pause,
183 	    "Admin queue pauses");
184 
185 	for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
186 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
187 
188 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
189 		    namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
190 		queue_list = SYSCTL_CHILDREN(queue_node);
191 
192 		adapter->que[i].oid = queue_node;
193 
194 		/* TX specific stats */
195 		tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
196 		    "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
197 		tx_list = SYSCTL_CHILDREN(tx_node);
198 
199 		tx_stats = &tx_ring->tx_stats;
200 
201 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
202 		    "count", CTLFLAG_RD,
203 		    &tx_stats->cnt, "Packets sent");
204 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
205 		    "bytes", CTLFLAG_RD,
206 		    &tx_stats->bytes, "Bytes sent");
207 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
208 		    "prepare_ctx_err", CTLFLAG_RD,
209 		    &tx_stats->prepare_ctx_err,
210 		    "TX buffer preparation failures");
211 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
212 		    "dma_mapping_err", CTLFLAG_RD,
213 		    &tx_stats->dma_mapping_err, "DMA mapping failures");
214 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
215 		    "doorbells", CTLFLAG_RD,
216 		    &tx_stats->doorbells, "Queue doorbells");
217 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
218 		    "missing_tx_comp", CTLFLAG_RD,
219 		    &tx_stats->missing_tx_comp, "TX completions missed");
220 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
221 		    "bad_req_id", CTLFLAG_RD,
222 		    &tx_stats->bad_req_id, "Bad request id count");
223 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
224 		        "mbuf_collapses", CTLFLAG_RD,
225 		        &tx_stats->collapse,
226 		        "Mbuf collapse count");
227 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
228 		        "mbuf_collapse_err", CTLFLAG_RD,
229 		        &tx_stats->collapse_err,
230 		        "Mbuf collapse failures");
231 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
232 		    "queue_wakeups", CTLFLAG_RD,
233 		    &tx_stats->queue_wakeup, "Queue wakeups");
234 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
235 		    "queue_stops", CTLFLAG_RD,
236 		    &tx_stats->queue_stop, "Queue stops");
237 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
238 		    "llq_buffer_copy", CTLFLAG_RD,
239 		    &tx_stats->llq_buffer_copy,
240 		    "Header copies for llq transaction");
241 
242 		/* RX specific stats */
243 		rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
244 		    "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
245 		rx_list = SYSCTL_CHILDREN(rx_node);
246 
247 		rx_stats = &rx_ring->rx_stats;
248 
249 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
250 		    "count", CTLFLAG_RD,
251 		    &rx_stats->cnt, "Packets received");
252 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
253 		    "bytes", CTLFLAG_RD,
254 		    &rx_stats->bytes, "Bytes received");
255 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
256 		    "refil_partial", CTLFLAG_RD,
257 		    &rx_stats->refil_partial, "Partial refilled mbufs");
258 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
259 		    "bad_csum", CTLFLAG_RD,
260 		    &rx_stats->bad_csum, "Bad RX checksum");
261 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
262 		    "mbuf_alloc_fail", CTLFLAG_RD,
263 		    &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs");
264 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
265 		    "mjum_alloc_fail", CTLFLAG_RD,
266 		    &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs");
267 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
268 		    "dma_mapping_err", CTLFLAG_RD,
269 		    &rx_stats->dma_mapping_err, "DMA mapping errors");
270 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
271 		    "bad_desc_num", CTLFLAG_RD,
272 		    &rx_stats->bad_desc_num, "Bad descriptor count");
273 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
274 		    "bad_req_id", CTLFLAG_RD,
275 		    &rx_stats->bad_req_id, "Bad request id count");
276 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
277 		    "empty_rx_ring", CTLFLAG_RD,
278 		    &rx_stats->empty_rx_ring, "RX descriptors depletion count");
279 	}
280 
281 	/* Stats read from device */
282 	hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
283 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
284 	hw_list = SYSCTL_CHILDREN(hw_node);
285 
286 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
287 	    &hw_stats->rx_packets, "Packets received");
288 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
289 	    &hw_stats->tx_packets, "Packets transmitted");
290 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
291 	    &hw_stats->rx_bytes, "Bytes received");
292 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
293 	    &hw_stats->tx_bytes, "Bytes transmitted");
294 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
295 	    &hw_stats->rx_drops, "Receive packet drops");
296 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
297 	    &hw_stats->tx_drops, "Transmit packet drops");
298 
299 	/* ENA Admin queue stats */
300 	admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
301 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
302 	admin_list = SYSCTL_CHILDREN(admin_node);
303 
304 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
305 	    &admin_stats->aborted_cmd, 0, "Aborted commands");
306 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
307 	    &admin_stats->submitted_cmd, 0, "Submitted commands");
308 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
309 	    &admin_stats->completed_cmd, 0, "Completed commands");
310 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
311 	    &admin_stats->out_of_space, 0, "Queue out of space");
312 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
313 	    &admin_stats->no_completion, 0, "Commands not completed");
314 }
315 
316 static void
317 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
318 {
319 	device_t dev;
320 	struct ena_admin_eni_stats *eni_metrics;
321 
322 	struct sysctl_ctx_list *ctx;
323 	struct sysctl_oid *tree;
324 	struct sysctl_oid_list *child;
325 
326 	struct sysctl_oid *eni_node;
327 	struct sysctl_oid_list *eni_list;
328 
329 	dev = adapter->pdev;
330 
331 	ctx = device_get_sysctl_ctx(dev);
332 	tree = device_get_sysctl_tree(dev);
333 	child = SYSCTL_CHILDREN(tree);
334 
335 	eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
336 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
337 	eni_list = SYSCTL_CHILDREN(eni_node);
338 
339 	eni_metrics = &adapter->eni_metrics;
340 
341 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
342 	    CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
343 	    "Inbound BW allowance exceeded");
344 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
345 	    CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
346 	    "Outbound BW allowance exceeded");
347 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
348 	    CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
349 	    "PPS allowance exceeded");
350 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
351 	    CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
352 	    "Connection tracking allowance exceeded");
353 	SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
354 	    CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
355 	    "Linklocal packet rate allowance exceeded");
356 
357 	/*
358 	 * Tuneable, which determines how often ENI metrics will be read.
359 	 * 0 means it's turned off. Maximum allowed value is limited by:
360 	 * ENI_METRICS_MAX_SAMPLE_INTERVAL.
361 	 */
362 	SYSCTL_ADD_PROC(ctx, eni_list, OID_AUTO, "sample_interval",
363 	    CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
364 	    ena_sysctl_eni_metrics_interval, "SU",
365 	    "Interval in seconds for updating ENI emetrics. 0 turns off the update.");
366 }
367 
368 static void
369 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
370 {
371 	device_t dev;
372 
373 	struct sysctl_ctx_list *ctx;
374 	struct sysctl_oid *tree;
375 	struct sysctl_oid_list *child;
376 
377 	dev = adapter->pdev;
378 
379 	ctx = device_get_sysctl_ctx(dev);
380 	tree = device_get_sysctl_tree(dev);
381 	child = SYSCTL_CHILDREN(tree);
382 
383 	/* Tuneable number of buffers in the buf-ring (drbr) */
384 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
385 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
386 	    ena_sysctl_buf_ring_size, "I",
387 	    "Size of the Tx buffer ring (drbr).");
388 
389 	/* Tuneable number of the Rx ring size */
390 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
391 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
392 	    ena_sysctl_rx_queue_size, "I",
393 	    "Size of the Rx ring. The size should be a power of 2.");
394 
395 	/* Tuneable number of IO queues */
396 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
397 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
398 	    ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
399 }
400 
401 
402 /*
403  * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
404  *
405  * Whether the nodes are registered or unregistered depends on a delta between
406  * the `old` and `new` parameters, representing the number of queues.
407  *
408  * This function is used to hide sysctl attributes for queue nodes which aren't
409  * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
410  *
411  * NOTE:
412  * All unregistered nodes must be registered again at detach, i.e. by a call to
413  * this function.
414  */
415 void
416 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
417 {
418 	device_t dev;
419 	struct sysctl_oid *oid;
420 	int min, max, i;
421 
422 	dev = adapter->pdev;
423 	min = MIN(old, new);
424 	max = MIN(MAX(old, new), adapter->max_num_io_queues);
425 
426 	for (i = min; i < max; ++i) {
427 		oid = adapter->que[i].oid;
428 
429 		sysctl_wlock();
430 		if (old > new)
431 			sysctl_unregister_oid(oid);
432 		else
433 			sysctl_register_oid(oid);
434 		sysctl_wunlock();
435 	}
436 }
437 
438 static int
439 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
440 {
441 	struct ena_adapter *adapter = arg1;
442 	uint32_t val;
443 	int error;
444 
445 	val = 0;
446 	error = sysctl_wire_old_buffer(req, sizeof(val));
447 	if (error == 0) {
448 		val = adapter->buf_ring_size;
449 		error = sysctl_handle_32(oidp, &val, 0, req);
450 	}
451 	if (error != 0 || req->newptr == NULL)
452 		return (error);
453 
454 	if (!powerof2(val) || val == 0) {
455 		ena_log(adapter->pdev, ERR,
456 		    "Requested new Tx buffer ring size (%u) is not a power of 2\n",
457 		    val);
458 		return (EINVAL);
459 	}
460 
461 	if (val != adapter->buf_ring_size) {
462 		ena_log(adapter->pdev, INFO,
463 		    "Requested new Tx buffer ring size: %d. Old size: %d\n",
464 		    val, adapter->buf_ring_size);
465 
466 		error = ena_update_buf_ring_size(adapter, val);
467 	} else {
468 		ena_log(adapter->pdev, ERR,
469 		    "New Tx buffer ring size is the same as already used: %u\n",
470 		    adapter->buf_ring_size);
471 	}
472 
473 	return (error);
474 }
475 
476 static int
477 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
478 {
479 	struct ena_adapter *adapter = arg1;
480 	uint32_t val;
481 	int error;
482 
483 	val = 0;
484 	error = sysctl_wire_old_buffer(req, sizeof(val));
485 	if (error == 0) {
486 		val = adapter->requested_rx_ring_size;
487 		error = sysctl_handle_32(oidp, &val, 0, req);
488 	}
489 	if (error != 0 || req->newptr == NULL)
490 		return (error);
491 
492 	if  (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
493 		ena_log(adapter->pdev, ERR,
494 		    "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
495 		    val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
496 		return (EINVAL);
497 	}
498 
499 	/* Check if the parameter is power of 2 */
500 	if (!powerof2(val)) {
501 		ena_log(adapter->pdev, ERR,
502 		    "Requested new Rx queue size (%u) is not a power of 2\n",
503 		    val);
504 		return (EINVAL);
505 	}
506 
507 	if (val != adapter->requested_rx_ring_size) {
508 		ena_log(adapter->pdev, INFO,
509 		    "Requested new Rx queue size: %u. Old size: %u\n",
510 		    val, adapter->requested_rx_ring_size);
511 
512 		error = ena_update_queue_size(adapter,
513 		    adapter->requested_tx_ring_size, val);
514 	} else {
515 		ena_log(adapter->pdev, ERR,
516 		    "New Rx queue size is the same as already used: %u\n",
517 		    adapter->requested_rx_ring_size);
518 	}
519 
520 	return (error);
521 }
522 
523 /*
524  * Change number of effectively used IO queues adapter->num_io_queues
525  */
526 static int
527 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
528 {
529 	struct ena_adapter *adapter = arg1;
530 	uint32_t old_num_queues, tmp = 0;
531 	int error;
532 
533 	error = sysctl_wire_old_buffer(req, sizeof(tmp));
534 	if (error == 0) {
535 		tmp = adapter->num_io_queues;
536 		error = sysctl_handle_int(oidp, &tmp, 0, req);
537 	}
538 	if (error != 0 || req->newptr == NULL)
539 		return (error);
540 
541 	if (tmp == 0) {
542 		ena_log(adapter->pdev, ERR,
543 		    "Requested number of IO queues is zero\n");
544 		return (EINVAL);
545 	}
546 
547 	/*
548 	 * The adapter::max_num_io_queues is the HW capability. The system
549 	 * resources availability may potentially be a tighter limit. Therefore
550 	 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
551 	 * always holds true, while the `adapter::msix_vecs` is variable across
552 	 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
553 	 */
554 	if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
555 		ena_log(adapter->pdev, ERR,
556 		    "Requested number of IO queues is higher than maximum "
557 		    "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
558 		return (EINVAL);
559 	}
560 	if (tmp == adapter->num_io_queues) {
561 		ena_log(adapter->pdev, ERR,
562 		    "Requested number of IO queues is equal to current value "
563 		    "(%u)\n", adapter->num_io_queues);
564 	} else {
565 		ena_log(adapter->pdev, INFO,
566 		    "Requested new number of IO queues: %u, current value: "
567 		    "%u\n", tmp, adapter->num_io_queues);
568 
569 		old_num_queues = adapter->num_io_queues;
570 		error = ena_update_io_queue_nb(adapter, tmp);
571 		if (error != 0)
572 			return (error);
573 
574 		ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
575 	}
576 
577 	return (error);
578 }
579 
580 static int
581 ena_sysctl_eni_metrics_interval(SYSCTL_HANDLER_ARGS)
582 {
583 	struct ena_adapter *adapter = arg1;
584 	uint16_t interval;
585 	int error;
586 
587 	error = sysctl_wire_old_buffer(req, sizeof(interval));
588 	if (error == 0) {
589 		interval = adapter->eni_metrics_sample_interval;
590 		error = sysctl_handle_16(oidp, &interval, 0, req);
591 	}
592 	if (error != 0 || req->newptr == NULL)
593 		return (error);
594 
595 	if (interval > ENI_METRICS_MAX_SAMPLE_INTERVAL) {
596 		ena_log(adapter->pdev, ERR,
597 		    "ENI metrics update interval is out of range - maximum allowed value: %d seconds\n",
598 		    ENI_METRICS_MAX_SAMPLE_INTERVAL);
599 		return (EINVAL);
600 	}
601 
602 	if (interval == 0) {
603 		ena_log(adapter->pdev, INFO,
604 		    "ENI metrics update is now turned off\n");
605 		bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
606 	} else {
607 		ena_log(adapter->pdev, INFO,
608 		    "ENI metrics update interval is set to: %"PRIu16" seconds\n",
609 		    interval);
610 	}
611 
612 	adapter->eni_metrics_sample_interval = interval;
613 
614 	return (0);
615 }
616