xref: /freebsd/sys/dev/ena/ena_sysctl.c (revision 99282790b7d01ec3c4072621d46a0d7302517ad4)
1 /*-
2  * BSD LICENSE
3  *
4  * Copyright (c) 2015-2020 Amazon.com, Inc. or its affiliates.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include "ena_sysctl.h"
34 
35 static void	ena_sysctl_add_wd(struct ena_adapter *);
36 static void	ena_sysctl_add_stats(struct ena_adapter *);
37 static void	ena_sysctl_add_tuneables(struct ena_adapter *);
38 static int	ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
39 static int	ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
40 static int	ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
41 
42 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
43     "ENA driver parameters");
44 
45 /*
46  * Logging level for changing verbosity of the output
47  */
48 int ena_log_level = ENA_ALERT | ENA_WARNING;
49 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN,
50     &ena_log_level, 0, "Logging level indicating verbosity of the logs");
51 
52 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
53     DRV_MODULE_VERSION, "ENA driver version");
54 
55 /*
56  * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
57  * Using 9k mbufs in low memory conditions might cause allocation to take a lot
58  * of time and lead to the OS instability as it needs to look for the contiguous
59  * pages.
60  * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
61  * the network performance is the priority, the 9k mbufs can be used.
62  */
63 int ena_enable_9k_mbufs = 0;
64 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
65     &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
66 
67 void
68 ena_sysctl_add_nodes(struct ena_adapter *adapter)
69 {
70 	ena_sysctl_add_wd(adapter);
71 	ena_sysctl_add_stats(adapter);
72 	ena_sysctl_add_tuneables(adapter);
73 }
74 
75 static void
76 ena_sysctl_add_wd(struct ena_adapter *adapter)
77 {
78 	device_t dev;
79 
80 	struct sysctl_ctx_list *ctx;
81 	struct sysctl_oid *tree;
82 	struct sysctl_oid_list *child;
83 
84 	dev = adapter->pdev;
85 
86 	ctx = device_get_sysctl_ctx(dev);
87 	tree = device_get_sysctl_tree(dev);
88 	child = SYSCTL_CHILDREN(tree);
89 
90 	/* Sysctl calls for Watchdog service */
91 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active",
92 	    CTLFLAG_RWTUN, &adapter->wd_active, 0,
93 	    "Watchdog is active");
94 
95 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
96 	    CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
97 	    "Timeout for Keep Alive messages");
98 
99 	SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
100 	    CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
101 	    "Timeout for TX completion");
102 
103 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
104 	    CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
105 	    "Number of TX queues to check per run");
106 
107 	SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
108 	    CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
109 	    "Max number of timeouted packets");
110 }
111 
112 static void
113 ena_sysctl_add_stats(struct ena_adapter *adapter)
114 {
115 	device_t dev;
116 
117 	struct ena_ring *tx_ring;
118 	struct ena_ring *rx_ring;
119 
120 	struct ena_hw_stats *hw_stats;
121 	struct ena_stats_dev *dev_stats;
122 	struct ena_stats_tx *tx_stats;
123 	struct ena_stats_rx *rx_stats;
124 	struct ena_com_stats_admin *admin_stats;
125 
126 	struct sysctl_ctx_list *ctx;
127 	struct sysctl_oid *tree;
128 	struct sysctl_oid_list *child;
129 
130 	struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
131 	struct sysctl_oid *admin_node;
132 	struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
133 	struct sysctl_oid_list *admin_list;
134 
135 #define QUEUE_NAME_LEN 32
136 	char namebuf[QUEUE_NAME_LEN];
137 	int i;
138 
139 	dev = adapter->pdev;
140 
141 	ctx = device_get_sysctl_ctx(dev);
142 	tree = device_get_sysctl_tree(dev);
143 	child = SYSCTL_CHILDREN(tree);
144 
145 	tx_ring = adapter->tx_ring;
146 	rx_ring = adapter->rx_ring;
147 
148 	hw_stats = &adapter->hw_stats;
149 	dev_stats = &adapter->dev_stats;
150 	admin_stats = &adapter->ena_dev->admin_queue.stats;
151 
152 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired",
153 	    CTLFLAG_RD, &dev_stats->wd_expired,
154 	    "Watchdog expiry count");
155 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up",
156 	    CTLFLAG_RD, &dev_stats->interface_up,
157 	    "Network interface up count");
158 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down",
159 	    CTLFLAG_RD, &dev_stats->interface_down,
160 	    "Network interface down count");
161 	SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause",
162 	    CTLFLAG_RD, &dev_stats->admin_q_pause,
163 	    "Admin queue pauses");
164 
165 	for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
166 		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
167 
168 		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
169 		    namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
170 		queue_list = SYSCTL_CHILDREN(queue_node);
171 
172 		/* TX specific stats */
173 		tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
174 		    "tx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
175 		tx_list = SYSCTL_CHILDREN(tx_node);
176 
177 		tx_stats = &tx_ring->tx_stats;
178 
179 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
180 		    "count", CTLFLAG_RD,
181 		    &tx_stats->cnt, "Packets sent");
182 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
183 		    "bytes", CTLFLAG_RD,
184 		    &tx_stats->bytes, "Bytes sent");
185 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
186 		    "prepare_ctx_err", CTLFLAG_RD,
187 		    &tx_stats->prepare_ctx_err,
188 		    "TX buffer preparation failures");
189 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
190 		    "dma_mapping_err", CTLFLAG_RD,
191 		    &tx_stats->dma_mapping_err, "DMA mapping failures");
192 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
193 		    "doorbells", CTLFLAG_RD,
194 		    &tx_stats->doorbells, "Queue doorbells");
195 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
196 		    "missing_tx_comp", CTLFLAG_RD,
197 		    &tx_stats->missing_tx_comp, "TX completions missed");
198 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
199 		    "bad_req_id", CTLFLAG_RD,
200 		    &tx_stats->bad_req_id, "Bad request id count");
201 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
202 		        "mbuf_collapses", CTLFLAG_RD,
203 		        &tx_stats->collapse,
204 		        "Mbuf collapse count");
205 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
206 		        "mbuf_collapse_err", CTLFLAG_RD,
207 		        &tx_stats->collapse_err,
208 		        "Mbuf collapse failures");
209 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
210 		    "queue_wakeups", CTLFLAG_RD,
211 		    &tx_stats->queue_wakeup, "Queue wakeups");
212 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
213 		    "queue_stops", CTLFLAG_RD,
214 		    &tx_stats->queue_stop, "Queue stops");
215 		SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
216 		    "llq_buffer_copy", CTLFLAG_RD,
217 		    &tx_stats->llq_buffer_copy,
218 		    "Header copies for llq transaction");
219 
220 		/* RX specific stats */
221 		rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO,
222 		    "rx_ring", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
223 		rx_list = SYSCTL_CHILDREN(rx_node);
224 
225 		rx_stats = &rx_ring->rx_stats;
226 
227 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
228 		    "count", CTLFLAG_RD,
229 		    &rx_stats->cnt, "Packets received");
230 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
231 		    "bytes", CTLFLAG_RD,
232 		    &rx_stats->bytes, "Bytes received");
233 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
234 		    "refil_partial", CTLFLAG_RD,
235 		    &rx_stats->refil_partial, "Partial refilled mbufs");
236 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
237 		    "bad_csum", CTLFLAG_RD,
238 		    &rx_stats->bad_csum, "Bad RX checksum");
239 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
240 		    "mbuf_alloc_fail", CTLFLAG_RD,
241 		    &rx_stats->mbuf_alloc_fail, "Failed mbuf allocs");
242 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
243 		    "mjum_alloc_fail", CTLFLAG_RD,
244 		    &rx_stats->mjum_alloc_fail, "Failed jumbo mbuf allocs");
245 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
246 		    "dma_mapping_err", CTLFLAG_RD,
247 		    &rx_stats->dma_mapping_err, "DMA mapping errors");
248 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
249 		    "bad_desc_num", CTLFLAG_RD,
250 		    &rx_stats->bad_desc_num, "Bad descriptor count");
251 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
252 		    "bad_req_id", CTLFLAG_RD,
253 		    &rx_stats->bad_req_id, "Bad request id count");
254 		SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
255 		    "empty_rx_ring", CTLFLAG_RD,
256 		    &rx_stats->empty_rx_ring, "RX descriptors depletion count");
257 	}
258 
259 	/* Stats read from device */
260 	hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
261 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
262 	hw_list = SYSCTL_CHILDREN(hw_node);
263 
264 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
265 	    &hw_stats->rx_packets, "Packets received");
266 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
267 	    &hw_stats->tx_packets, "Packets transmitted");
268 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
269 	    &hw_stats->rx_bytes, "Bytes received");
270 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
271 	    &hw_stats->tx_bytes, "Bytes transmitted");
272 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
273 	    &hw_stats->rx_drops, "Receive packet drops");
274 	SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
275 	    &hw_stats->tx_drops, "Transmit packet drops");
276 
277 	/* ENA Admin queue stats */
278 	admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
279 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
280 	admin_list = SYSCTL_CHILDREN(admin_node);
281 
282 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
283 	    &admin_stats->aborted_cmd, 0, "Aborted commands");
284 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
285 	    &admin_stats->submitted_cmd, 0, "Submitted commands");
286 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
287 	    &admin_stats->completed_cmd, 0, "Completed commands");
288 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
289 	    &admin_stats->out_of_space, 0, "Queue out of space");
290 	SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
291 	    &admin_stats->no_completion, 0, "Commands not completed");
292 }
293 
294 static void
295 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
296 {
297 	device_t dev;
298 
299 	struct sysctl_ctx_list *ctx;
300 	struct sysctl_oid *tree;
301 	struct sysctl_oid_list *child;
302 
303 	dev = adapter->pdev;
304 
305 	ctx = device_get_sysctl_ctx(dev);
306 	tree = device_get_sysctl_tree(dev);
307 	child = SYSCTL_CHILDREN(tree);
308 
309 	/* Tuneable number of buffers in the buf-ring (drbr) */
310 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
311 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
312 	    ena_sysctl_buf_ring_size, "I",
313 	    "Size of the Tx buffer ring (drbr).");
314 
315 	/* Tuneable number of the Rx ring size */
316 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
317 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
318 	    ena_sysctl_rx_queue_size, "I",
319 	    "Size of the Rx ring. The size should be a power of 2.");
320 
321 	/* Tuneable number of IO queues */
322 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
323 	    CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
324 	    ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
325 }
326 
327 
328 static int
329 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
330 {
331 	struct ena_adapter *adapter = arg1;
332 	uint32_t val;
333 	int error;
334 
335 	val = 0;
336 	error = sysctl_wire_old_buffer(req, sizeof(val));
337 	if (error == 0) {
338 		val = adapter->buf_ring_size;
339 		error = sysctl_handle_int(oidp, &val, 0, req);
340 	}
341 	if (error != 0 || req->newptr == NULL)
342 		return (error);
343 
344 	if (!powerof2(val) || val == 0) {
345 		device_printf(adapter->pdev,
346 		    "Requested new Tx buffer ring size (%u) is not a power of 2\n",
347 		    val);
348 		return (EINVAL);
349 	}
350 
351 	if (val != adapter->buf_ring_size) {
352 		device_printf(adapter->pdev,
353 		    "Requested new Tx buffer ring size: %d. Old size: %d\n",
354 		    val, adapter->buf_ring_size);
355 
356 		error = ena_update_buf_ring_size(adapter, val);
357 	} else {
358 		device_printf(adapter->pdev,
359 		    "New Tx buffer ring size is the same as already used: %u\n",
360 		    adapter->buf_ring_size);
361 	}
362 
363 	return (error);
364 }
365 
366 static int
367 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
368 {
369 	struct ena_adapter *adapter = arg1;
370 	uint32_t val;
371 	int error;
372 
373 	val = 0;
374 	error = sysctl_wire_old_buffer(req, sizeof(val));
375 	if (error == 0) {
376 		val = adapter->requested_rx_ring_size;
377 		error = sysctl_handle_32(oidp, &val, 0, req);
378 	}
379 	if (error != 0 || req->newptr == NULL)
380 		return (error);
381 
382 	if  (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
383 		device_printf(adapter->pdev,
384 		    "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
385 		    val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
386 		return (EINVAL);
387 	}
388 
389 	/* Check if the parameter is power of 2 */
390 	if (!powerof2(val)) {
391 		device_printf(adapter->pdev,
392 		    "Requested new Rx queue size (%u) is not a power of 2\n",
393 		    val);
394 		return (EINVAL);
395 	}
396 
397 	if (val != adapter->requested_rx_ring_size) {
398 		device_printf(adapter->pdev,
399 		    "Requested new Rx queue size: %u. Old size: %u\n",
400 		    val, adapter->requested_rx_ring_size);
401 
402 		error = ena_update_queue_size(adapter,
403 		    adapter->requested_tx_ring_size, val);
404 	} else {
405 		device_printf(adapter->pdev,
406 		    "New Rx queue size is the same as already used: %u\n",
407 		    adapter->requested_rx_ring_size);
408 	}
409 
410 	return (error);
411 }
412 
413 /*
414  * Change number of effectively used IO queues adapter->num_io_queues
415  */
416 static int
417 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
418 {
419 	struct ena_adapter *adapter = arg1;
420 	uint32_t tmp = 0;
421 	int error;
422 
423 	error = sysctl_wire_old_buffer(req, sizeof(tmp));
424 	if (error == 0) {
425 		tmp = adapter->num_io_queues;
426 		error = sysctl_handle_int(oidp, &tmp, 0, req);
427 	}
428 	if (error != 0 || req->newptr == NULL)
429 		return (error);
430 
431 	if (tmp == 0) {
432 		device_printf(adapter->pdev,
433 		    "Requested number of IO queues is zero\n");
434 		return (EINVAL);
435 	}
436 
437 	/*
438 	 * The adapter::max_num_io_queues is the HW capability. The system
439 	 * resources availability may potentially be a tighter limit. Therefore
440 	 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
441 	 * always holds true, while the `adapter::msix_vecs` is variable across
442 	 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
443 	 */
444 	if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
445 		device_printf(adapter->pdev,
446 		    "Requested number of IO queues is higher than maximum "
447 		    "allowed (%u)\n", adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
448 		return (EINVAL);
449 	}
450 	if (tmp == adapter->num_io_queues) {
451 		device_printf(adapter->pdev,
452 		    "Requested number of IO queues is equal to current value "
453 		    "(%u)\n", adapter->num_io_queues);
454 	} else {
455 		device_printf(adapter->pdev,
456 		    "Requested new number of IO queues: %u, current value: "
457 		    "%u\n", tmp, adapter->num_io_queues);
458 
459 		error = ena_update_io_queue_nb(adapter, tmp);
460 	}
461 
462 	return (error);
463 }
464