1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2015-2024 Amazon.com, Inc. or its affiliates.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/param.h>
32 #include "opt_rss.h"
33
34 #include "ena_rss.h"
35 #include "ena_sysctl.h"
36
37 static void ena_sysctl_add_wd(struct ena_adapter *);
38 static void ena_sysctl_add_stats(struct ena_adapter *);
39 static void ena_sysctl_add_eni_metrics(struct ena_adapter *);
40 static void ena_sysctl_add_customer_metrics(struct ena_adapter *);
41 static void ena_sysctl_add_srd_info(struct ena_adapter *);
42 static void ena_sysctl_add_tuneables(struct ena_adapter *);
43 static void ena_sysctl_add_irq_affinity(struct ena_adapter *);
44 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
45 #ifndef RSS
46 static void ena_sysctl_add_rss(struct ena_adapter *);
47 #endif
48 static int ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS);
49 static int ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS);
50 static int ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS);
51 static int ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS);
52 static int ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS);
53 static int ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS);
54 #ifndef RSS
55 static int ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS);
56 static int ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS);
57 #endif
58
59 /* Limit max ENA sample rate to be an hour. */
60 #define ENA_METRICS_MAX_SAMPLE_INTERVAL 3600
61 #define ENA_HASH_KEY_MSG_SIZE (ENA_HASH_KEY_SIZE * 2 + 1)
62
63 #define SYSCTL_GSTRING_LEN 128
64
65 #define ENA_METRIC_ENI_ENTRY(stat, desc) { \
66 .name = #stat, \
67 .description = #desc, \
68 }
69
70 #define ENA_STAT_ENTRY(stat, desc, stat_type) { \
71 .name = #stat, \
72 .description = #desc, \
73 .stat_offset = offsetof(struct ena_admin_##stat_type, stat) / sizeof(u64), \
74 }
75
76 #define ENA_STAT_ENA_SRD_ENTRY(stat, desc) \
77 ENA_STAT_ENTRY(stat, desc, ena_srd_stats)
78
79 struct ena_hw_metrics {
80 char name[SYSCTL_GSTRING_LEN];
81 char description[SYSCTL_GSTRING_LEN];
82 };
83
84 struct ena_srd_metrics {
85 char name[SYSCTL_GSTRING_LEN];
86 char description[SYSCTL_GSTRING_LEN];
87 int stat_offset;
88 };
89
90 static const struct ena_srd_metrics ena_srd_stats_strings[] = {
91 ENA_STAT_ENA_SRD_ENTRY(
92 ena_srd_tx_pkts, Number of packets transmitted over ENA SRD),
93 ENA_STAT_ENA_SRD_ENTRY(
94 ena_srd_eligible_tx_pkts, Number of packets transmitted or could
95 have been transmitted over ENA SRD),
96 ENA_STAT_ENA_SRD_ENTRY(
97 ena_srd_rx_pkts, Number of packets received over ENA SRD),
98 ENA_STAT_ENA_SRD_ENTRY(
99 ena_srd_resource_utilization, Percentage of the ENA SRD resources
100 that are in use),
101 };
102
103 static const struct ena_hw_metrics ena_hw_stats_strings[] = {
104 ENA_METRIC_ENI_ENTRY(
105 bw_in_allowance_exceeded, Inbound BW allowance exceeded),
106 ENA_METRIC_ENI_ENTRY(
107 bw_out_allowance_exceeded, Outbound BW allowance exceeded),
108 ENA_METRIC_ENI_ENTRY(
109 pps_allowance_exceeded, PPS allowance exceeded),
110 ENA_METRIC_ENI_ENTRY(
111 conntrack_allowance_exceeded, Connection tracking allowance exceeded),
112 ENA_METRIC_ENI_ENTRY(
113 linklocal_allowance_exceeded, Linklocal packet rate allowance),
114 ENA_METRIC_ENI_ENTRY(
115 conntrack_allowance_available, Number of available conntracks),
116 };
117
118 #ifndef ARRAY_SIZE
119 #define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
120 #endif
121
122 #define ENA_CUSTOMER_METRICS_ARRAY_SIZE ARRAY_SIZE(ena_hw_stats_strings)
123 #define ENA_SRD_METRICS_ARRAY_SIZE ARRAY_SIZE(ena_srd_stats_strings)
124
125 static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
126 "ENA driver parameters");
127
128 /*
129 * Logging level for changing verbosity of the output
130 */
131 int ena_log_level = ENA_INFO;
132 SYSCTL_INT(_hw_ena, OID_AUTO, log_level, CTLFLAG_RWTUN, &ena_log_level, 0,
133 "Logging level indicating verbosity of the logs");
134
135 SYSCTL_CONST_STRING(_hw_ena, OID_AUTO, driver_version, CTLFLAG_RD,
136 ENA_DRV_MODULE_VERSION, "ENA driver version");
137
138 /*
139 * Use 9k mbufs for the Rx buffers. Default to 0 (use page size mbufs instead).
140 * Using 9k mbufs in low memory conditions might cause allocation to take a lot
141 * of time and lead to the OS instability as it needs to look for the contiguous
142 * pages.
143 * However, page size mbufs has a bit smaller throughput than 9k mbufs, so if
144 * the network performance is the priority, the 9k mbufs can be used.
145 */
146 int ena_enable_9k_mbufs = 0;
147 SYSCTL_INT(_hw_ena, OID_AUTO, enable_9k_mbufs, CTLFLAG_RDTUN,
148 &ena_enable_9k_mbufs, 0, "Use 9 kB mbufs for Rx descriptors");
149
150 /*
151 * Force the driver to use large or regular LLQ (Low Latency Queue) header size.
152 * Defaults to ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT. This option may be
153 * important for platforms, which often handle packet headers on Tx with total
154 * header size greater than 96B, as it may reduce the latency.
155 * It also reduces the maximum Tx queue size by half, so it may cause more Tx
156 * packet drops.
157 */
158 int ena_force_large_llq_header = ENA_LLQ_HEADER_SIZE_POLICY_DEFAULT;
159 SYSCTL_INT(_hw_ena, OID_AUTO, force_large_llq_header, CTLFLAG_RDTUN,
160 &ena_force_large_llq_header, 0,
161 "Change default LLQ entry size received from the device");
162
163 int ena_rss_table_size = ENA_RX_RSS_TABLE_SIZE;
164
ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter * adapter)165 int ena_sysctl_allocate_customer_metrics_buffer(struct ena_adapter *adapter)
166 {
167 int rc = 0;
168
169 adapter->customer_metrics_array = malloc((sizeof(u64) * ENA_CUSTOMER_METRICS_ARRAY_SIZE),
170 M_DEVBUF, M_NOWAIT | M_ZERO);
171 if (unlikely(adapter->customer_metrics_array == NULL))
172 rc = ENOMEM;
173
174 return rc;
175 }
176 void
ena_sysctl_add_nodes(struct ena_adapter * adapter)177 ena_sysctl_add_nodes(struct ena_adapter *adapter)
178 {
179 struct ena_com_dev *dev = adapter->ena_dev;
180
181 if (ena_com_get_cap(dev, ENA_ADMIN_CUSTOMER_METRICS))
182 ena_sysctl_add_customer_metrics(adapter);
183 else if (ena_com_get_cap(dev, ENA_ADMIN_ENI_STATS))
184 ena_sysctl_add_eni_metrics(adapter);
185
186 if (ena_com_get_cap(adapter->ena_dev, ENA_ADMIN_ENA_SRD_INFO))
187 ena_sysctl_add_srd_info(adapter);
188
189 ena_sysctl_add_wd(adapter);
190 ena_sysctl_add_stats(adapter);
191 ena_sysctl_add_tuneables(adapter);
192 ena_sysctl_add_irq_affinity(adapter);
193 #ifndef RSS
194 ena_sysctl_add_rss(adapter);
195 #endif
196 }
197
198 static void
ena_sysctl_add_wd(struct ena_adapter * adapter)199 ena_sysctl_add_wd(struct ena_adapter *adapter)
200 {
201 device_t dev;
202
203 struct sysctl_ctx_list *ctx;
204 struct sysctl_oid *tree;
205 struct sysctl_oid_list *child;
206
207 dev = adapter->pdev;
208
209 ctx = device_get_sysctl_ctx(dev);
210 tree = device_get_sysctl_tree(dev);
211 child = SYSCTL_CHILDREN(tree);
212
213 /* Sysctl calls for Watchdog service */
214 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "wd_active", CTLFLAG_RWTUN,
215 &adapter->wd_active, 0, "Watchdog is active");
216
217 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "keep_alive_timeout",
218 CTLFLAG_RWTUN, &adapter->keep_alive_timeout,
219 "Timeout for Keep Alive messages");
220
221 SYSCTL_ADD_QUAD(ctx, child, OID_AUTO, "missing_tx_timeout",
222 CTLFLAG_RWTUN, &adapter->missing_tx_timeout,
223 "Timeout for TX completion");
224
225 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_max_queues",
226 CTLFLAG_RWTUN, &adapter->missing_tx_max_queues, 0,
227 "Number of TX queues to check per run");
228
229 SYSCTL_ADD_U32(ctx, child, OID_AUTO, "missing_tx_threshold",
230 CTLFLAG_RWTUN, &adapter->missing_tx_threshold, 0,
231 "Max number of timeouted packets");
232 }
233
234 static void
ena_sysctl_add_stats(struct ena_adapter * adapter)235 ena_sysctl_add_stats(struct ena_adapter *adapter)
236 {
237 device_t dev;
238
239 struct ena_ring *tx_ring;
240 struct ena_ring *rx_ring;
241
242 struct ena_hw_stats *hw_stats;
243 struct ena_stats_dev *dev_stats;
244 struct ena_stats_tx *tx_stats;
245 struct ena_stats_rx *rx_stats;
246 struct ena_com_stats_admin *admin_stats;
247
248 struct sysctl_ctx_list *ctx;
249 struct sysctl_oid *tree;
250 struct sysctl_oid_list *child;
251
252 struct sysctl_oid *queue_node, *tx_node, *rx_node, *hw_node;
253 struct sysctl_oid *admin_node;
254 struct sysctl_oid_list *queue_list, *tx_list, *rx_list, *hw_list;
255 struct sysctl_oid_list *admin_list;
256
257 #define QUEUE_NAME_LEN 32
258 char namebuf[QUEUE_NAME_LEN];
259 int i;
260
261 dev = adapter->pdev;
262
263 ctx = device_get_sysctl_ctx(dev);
264 tree = device_get_sysctl_tree(dev);
265 child = SYSCTL_CHILDREN(tree);
266
267 tx_ring = adapter->tx_ring;
268 rx_ring = adapter->rx_ring;
269
270 hw_stats = &adapter->hw_stats;
271 dev_stats = &adapter->dev_stats;
272 admin_stats = &adapter->ena_dev->admin_queue.stats;
273
274 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "wd_expired", CTLFLAG_RD,
275 &dev_stats->wd_expired, "Watchdog expiry count");
276 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_up", CTLFLAG_RD,
277 &dev_stats->interface_up, "Network interface up count");
278 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "interface_down", CTLFLAG_RD,
279 &dev_stats->interface_down, "Network interface down count");
280 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_q_pause", CTLFLAG_RD,
281 &dev_stats->admin_q_pause, "Admin queue pauses");
282 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "os_trigger", CTLFLAG_RD,
283 &dev_stats->os_trigger, "OS trigger count");
284 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_tx_cmpl", CTLFLAG_RD,
285 &dev_stats->missing_tx_cmpl, "Missing TX completions resets count");
286 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_req_id", CTLFLAG_RD,
287 &dev_stats->bad_rx_req_id, "Bad RX req id count");
288 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_tx_req_id", CTLFLAG_RD,
289 &dev_stats->bad_tx_req_id, "Bad TX req id count");
290 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "bad_rx_desc_num", CTLFLAG_RD,
291 &dev_stats->bad_rx_desc_num, "Bad RX descriptors number count");
292 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "invalid_state", CTLFLAG_RD,
293 &dev_stats->invalid_state, "Driver invalid state count");
294 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_intr", CTLFLAG_RD,
295 &dev_stats->missing_intr, "Missing interrupt count");
296 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "tx_desc_malformed", CTLFLAG_RD,
297 &dev_stats->tx_desc_malformed, "TX descriptors malformed count");
298 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "rx_desc_malformed", CTLFLAG_RD,
299 &dev_stats->rx_desc_malformed, "RX descriptors malformed count");
300 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "missing_admin_interrupt", CTLFLAG_RD,
301 &dev_stats->missing_admin_interrupt, "Missing admin interrupts count");
302 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "admin_to", CTLFLAG_RD,
303 &dev_stats->admin_to, "Admin queue timeouts count");
304 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "device_request_reset", CTLFLAG_RD,
305 &dev_stats->device_request_reset, "Device reset requests count");
306 SYSCTL_ADD_COUNTER_U64(ctx, child, OID_AUTO, "total_resets", CTLFLAG_RD,
307 &dev_stats->total_resets, "Total resets count");
308
309 for (i = 0; i < adapter->num_io_queues; ++i, ++tx_ring, ++rx_ring) {
310 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
311
312 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
313 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name");
314 queue_list = SYSCTL_CHILDREN(queue_node);
315
316 adapter->que[i].oid = queue_node;
317
318 #ifdef RSS
319 /* Common stats */
320 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "cpu", CTLFLAG_RD,
321 &adapter->que[i].cpu, 0, "CPU affinity");
322 SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "domain", CTLFLAG_RD,
323 &adapter->que[i].domain, 0, "NUMA domain");
324 #endif
325
326 /* TX specific stats */
327 tx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "tx_ring",
328 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "TX ring");
329 tx_list = SYSCTL_CHILDREN(tx_node);
330
331 tx_stats = &tx_ring->tx_stats;
332
333 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "count",
334 CTLFLAG_RD, &tx_stats->cnt, "Packets sent");
335 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bytes",
336 CTLFLAG_RD, &tx_stats->bytes, "Bytes sent");
337 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
338 "prepare_ctx_err", CTLFLAG_RD, &tx_stats->prepare_ctx_err,
339 "TX buffer preparation failures");
340 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
341 "dma_mapping_err", CTLFLAG_RD, &tx_stats->dma_mapping_err,
342 "DMA mapping failures");
343 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "doorbells",
344 CTLFLAG_RD, &tx_stats->doorbells, "Queue doorbells");
345 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
346 "missing_tx_comp", CTLFLAG_RD, &tx_stats->missing_tx_comp,
347 "TX completions missed");
348 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "bad_req_id",
349 CTLFLAG_RD, &tx_stats->bad_req_id, "Bad request id count");
350 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "mbuf_collapses",
351 CTLFLAG_RD, &tx_stats->collapse, "Mbuf collapse count");
352 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
353 "mbuf_collapse_err", CTLFLAG_RD, &tx_stats->collapse_err,
354 "Mbuf collapse failures");
355 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_wakeups",
356 CTLFLAG_RD, &tx_stats->queue_wakeup, "Queue wakeups");
357 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO, "queue_stops",
358 CTLFLAG_RD, &tx_stats->queue_stop, "Queue stops");
359 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
360 "llq_buffer_copy", CTLFLAG_RD, &tx_stats->llq_buffer_copy,
361 "Header copies for llq transaction");
362 SYSCTL_ADD_COUNTER_U64(ctx, tx_list, OID_AUTO,
363 "unmask_interrupt_num", CTLFLAG_RD,
364 &tx_stats->unmask_interrupt_num,
365 "Unmasked interrupt count");
366
367 /* RX specific stats */
368 rx_node = SYSCTL_ADD_NODE(ctx, queue_list, OID_AUTO, "rx_ring",
369 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "RX ring");
370 rx_list = SYSCTL_CHILDREN(rx_node);
371
372 rx_stats = &rx_ring->rx_stats;
373
374 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "count",
375 CTLFLAG_RD, &rx_stats->cnt, "Packets received");
376 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bytes",
377 CTLFLAG_RD, &rx_stats->bytes, "Bytes received");
378 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "refil_partial",
379 CTLFLAG_RD, &rx_stats->refil_partial,
380 "Partial refilled mbufs");
381 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_bad",
382 CTLFLAG_RD, &rx_stats->csum_bad, "Bad RX checksum");
383 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
384 "mbuf_alloc_fail", CTLFLAG_RD, &rx_stats->mbuf_alloc_fail,
385 "Failed mbuf allocs");
386 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
387 "mjum_alloc_fail", CTLFLAG_RD, &rx_stats->mjum_alloc_fail,
388 "Failed jumbo mbuf allocs");
389 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO,
390 "dma_mapping_err", CTLFLAG_RD, &rx_stats->dma_mapping_err,
391 "DMA mapping errors");
392 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_desc_num",
393 CTLFLAG_RD, &rx_stats->bad_desc_num,
394 "Bad descriptor count");
395 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "bad_req_id",
396 CTLFLAG_RD, &rx_stats->bad_req_id, "Bad request id count");
397 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "empty_rx_ring",
398 CTLFLAG_RD, &rx_stats->empty_rx_ring,
399 "RX descriptors depletion count");
400 SYSCTL_ADD_COUNTER_U64(ctx, rx_list, OID_AUTO, "csum_good",
401 CTLFLAG_RD, &rx_stats->csum_good,
402 "Valid RX checksum calculations");
403 }
404
405 /* Stats read from device */
406 hw_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "hw_stats",
407 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Statistics from hardware");
408 hw_list = SYSCTL_CHILDREN(hw_node);
409
410 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_packets", CTLFLAG_RD,
411 &hw_stats->rx_packets, "Packets received");
412 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_packets", CTLFLAG_RD,
413 &hw_stats->tx_packets, "Packets transmitted");
414 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_bytes", CTLFLAG_RD,
415 &hw_stats->rx_bytes, "Bytes received");
416 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_bytes", CTLFLAG_RD,
417 &hw_stats->tx_bytes, "Bytes transmitted");
418 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_drops", CTLFLAG_RD,
419 &hw_stats->rx_drops, "Receive packet drops");
420 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "tx_drops", CTLFLAG_RD,
421 &hw_stats->tx_drops, "Transmit packet drops");
422 SYSCTL_ADD_COUNTER_U64(ctx, hw_list, OID_AUTO, "rx_overruns", CTLFLAG_RD,
423 &hw_stats->rx_overruns, "Receive overruns");
424
425 /* ENA Admin queue stats */
426 admin_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "admin_stats",
427 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA Admin Queue statistics");
428 admin_list = SYSCTL_CHILDREN(admin_node);
429
430 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "aborted_cmd", CTLFLAG_RD,
431 &admin_stats->aborted_cmd, 0, "Aborted commands");
432 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "sumbitted_cmd", CTLFLAG_RD,
433 &admin_stats->submitted_cmd, 0, "Submitted commands");
434 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "completed_cmd", CTLFLAG_RD,
435 &admin_stats->completed_cmd, 0, "Completed commands");
436 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "out_of_space", CTLFLAG_RD,
437 &admin_stats->out_of_space, 0, "Queue out of space");
438 SYSCTL_ADD_U64(ctx, admin_list, OID_AUTO, "no_completion", CTLFLAG_RD,
439 &admin_stats->no_completion, 0, "Commands not completed");
440 }
441
442 static void
ena_sysctl_add_srd_info(struct ena_adapter * adapter)443 ena_sysctl_add_srd_info(struct ena_adapter *adapter)
444 {
445 device_t dev;
446
447 struct sysctl_oid *ena_srd_info;
448 struct sysctl_oid_list *srd_list;
449
450 struct sysctl_ctx_list *ctx;
451 struct sysctl_oid *tree;
452 struct sysctl_oid_list *child;
453
454 struct ena_admin_ena_srd_stats *srd_stats_ptr;
455 struct ena_srd_metrics cur_stat_strings;
456
457 int i;
458
459 dev = adapter->pdev;
460
461 ctx = device_get_sysctl_ctx(dev);
462 tree = device_get_sysctl_tree(dev);
463 child = SYSCTL_CHILDREN(tree);
464
465 ena_srd_info = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "ena_srd_info",
466 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's SRD information");
467 srd_list = SYSCTL_CHILDREN(ena_srd_info);
468
469 SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, "ena_srd_mode",
470 CTLFLAG_RD, &adapter->ena_srd_info.flags, 0,
471 "Describes which ENA-express features are enabled");
472
473 srd_stats_ptr = &adapter->ena_srd_info.ena_srd_stats;
474
475 for (i = 0 ; i < ENA_SRD_METRICS_ARRAY_SIZE; i++) {
476 cur_stat_strings = ena_srd_stats_strings[i];
477 SYSCTL_ADD_U64(ctx, srd_list, OID_AUTO, cur_stat_strings.name,
478 CTLFLAG_RD, (u64 *)srd_stats_ptr + cur_stat_strings.stat_offset,
479 0, cur_stat_strings.description);
480 }
481 }
482
483 static void
ena_sysctl_add_customer_metrics(struct ena_adapter * adapter)484 ena_sysctl_add_customer_metrics(struct ena_adapter *adapter)
485 {
486 device_t dev;
487 struct ena_com_dev *ena_dev;
488
489 struct sysctl_ctx_list *ctx;
490 struct sysctl_oid *tree;
491 struct sysctl_oid_list *child;
492
493 struct sysctl_oid *customer_metric;
494 struct sysctl_oid_list *customer_list;
495
496 int i;
497
498 dev = adapter->pdev;
499 ena_dev = adapter->ena_dev;
500
501 ctx = device_get_sysctl_ctx(dev);
502 tree = device_get_sysctl_tree(dev);
503 child = SYSCTL_CHILDREN(tree);
504 customer_metric = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "customer_metrics",
505 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's customer metrics");
506 customer_list = SYSCTL_CHILDREN(customer_metric);
507
508 for (i = 0; i < ENA_CUSTOMER_METRICS_ARRAY_SIZE; i++) {
509 if (ena_com_get_customer_metric_support(ena_dev, i)) {
510 SYSCTL_ADD_U64(ctx, customer_list, OID_AUTO, ena_hw_stats_strings[i].name,
511 CTLFLAG_RD, &adapter->customer_metrics_array[i], 0,
512 ena_hw_stats_strings[i].description);
513 }
514 }
515 }
516
517 static void
ena_sysctl_add_eni_metrics(struct ena_adapter * adapter)518 ena_sysctl_add_eni_metrics(struct ena_adapter *adapter)
519 {
520 device_t dev;
521 struct ena_admin_eni_stats *eni_metrics;
522
523 struct sysctl_ctx_list *ctx;
524 struct sysctl_oid *tree;
525 struct sysctl_oid_list *child;
526
527 struct sysctl_oid *eni_node;
528 struct sysctl_oid_list *eni_list;
529
530 dev = adapter->pdev;
531
532 ctx = device_get_sysctl_ctx(dev);
533 tree = device_get_sysctl_tree(dev);
534 child = SYSCTL_CHILDREN(tree);
535
536 eni_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "eni_metrics",
537 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "ENA's ENI metrics");
538 eni_list = SYSCTL_CHILDREN(eni_node);
539
540 eni_metrics = &adapter->eni_metrics;
541
542 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_in_allowance_exceeded",
543 CTLFLAG_RD, &eni_metrics->bw_in_allowance_exceeded, 0,
544 "Inbound BW allowance exceeded");
545 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "bw_out_allowance_exceeded",
546 CTLFLAG_RD, &eni_metrics->bw_out_allowance_exceeded, 0,
547 "Outbound BW allowance exceeded");
548 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "pps_allowance_exceeded",
549 CTLFLAG_RD, &eni_metrics->pps_allowance_exceeded, 0,
550 "PPS allowance exceeded");
551 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "conntrack_allowance_exceeded",
552 CTLFLAG_RD, &eni_metrics->conntrack_allowance_exceeded, 0,
553 "Connection tracking allowance exceeded");
554 SYSCTL_ADD_U64(ctx, eni_list, OID_AUTO, "linklocal_allowance_exceeded",
555 CTLFLAG_RD, &eni_metrics->linklocal_allowance_exceeded, 0,
556 "Linklocal packet rate allowance exceeded");
557 }
558
559 static void
ena_sysctl_add_tuneables(struct ena_adapter * adapter)560 ena_sysctl_add_tuneables(struct ena_adapter *adapter)
561 {
562 device_t dev;
563
564 struct sysctl_ctx_list *ctx;
565 struct sysctl_oid *tree;
566 struct sysctl_oid_list *child;
567
568 dev = adapter->pdev;
569
570 ctx = device_get_sysctl_ctx(dev);
571 tree = device_get_sysctl_tree(dev);
572 child = SYSCTL_CHILDREN(tree);
573
574 /* Tuneable number of buffers in the buf-ring (drbr) */
575 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "buf_ring_size",
576 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
577 ena_sysctl_buf_ring_size, "I",
578 "Size of the Tx buffer ring (drbr).");
579
580 /* Tuneable number of the Rx ring size */
581 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_queue_size",
582 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
583 ena_sysctl_rx_queue_size, "I",
584 "Size of the Rx ring. The size should be a power of 2.");
585
586 /* Tuneable number of IO queues */
587 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "io_queues_nb",
588 CTLTYPE_U32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
589 ena_sysctl_io_queues_nb, "I", "Number of IO queues.");
590
591 /*
592 * Tuneable, which determines how often ENA metrics will be read.
593 * 0 means it's turned off. Maximum allowed value is limited by:
594 * ENA_METRICS_MAX_SAMPLE_INTERVAL.
595 */
596 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "stats_sample_interval",
597 CTLTYPE_U16 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
598 ena_sysctl_metrics_interval, "SU",
599 "Interval in seconds for updating Netword interface metrics. 0 turns off the update.");
600 }
601
602 /* Kernel option RSS prevents manipulation of key hash and indirection table. */
603 #ifndef RSS
604 static void
ena_sysctl_add_rss(struct ena_adapter * adapter)605 ena_sysctl_add_rss(struct ena_adapter *adapter)
606 {
607 device_t dev;
608
609 struct sysctl_ctx_list *ctx;
610 struct sysctl_oid *tree;
611 struct sysctl_oid_list *child;
612
613 dev = adapter->pdev;
614
615 ctx = device_get_sysctl_ctx(dev);
616 tree = device_get_sysctl_tree(dev);
617 child = SYSCTL_CHILDREN(tree);
618
619 /* RSS options */
620 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rss",
621 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Receive Side Scaling options.");
622 child = SYSCTL_CHILDREN(tree);
623
624 /* RSS hash key */
625 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "key",
626 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
627 ena_sysctl_rss_key, "A", "RSS key.");
628
629 /* Tuneable RSS indirection table */
630 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "indir_table",
631 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
632 ena_sysctl_rss_indir_table, "A", "RSS indirection table.");
633
634 /* RSS indirection table size */
635 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "indir_table_size",
636 CTLFLAG_RD | CTLFLAG_MPSAFE, &ena_rss_table_size, 0,
637 "RSS indirection table size.");
638 }
639 #endif /* RSS */
640
641 static void
ena_sysctl_add_irq_affinity(struct ena_adapter * adapter)642 ena_sysctl_add_irq_affinity(struct ena_adapter *adapter)
643 {
644 device_t dev;
645
646 struct sysctl_ctx_list *ctx;
647 struct sysctl_oid *tree;
648 struct sysctl_oid_list *child;
649
650 dev = adapter->pdev;
651
652 ctx = device_get_sysctl_ctx(dev);
653 tree = device_get_sysctl_tree(dev);
654 child = SYSCTL_CHILDREN(tree);
655
656 tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "irq_affinity",
657 CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, "Decide base CPU and stride for irqs affinity.");
658 child = SYSCTL_CHILDREN(tree);
659
660 /* Add base cpu leaf */
661 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "base_cpu",
662 CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
663 ena_sysctl_irq_base_cpu, "I", "Base cpu index for setting irq affinity.");
664
665 /* Add cpu stride leaf */
666 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "cpu_stride",
667 CTLTYPE_S32 | CTLFLAG_RW | CTLFLAG_MPSAFE, adapter, 0,
668 ena_sysctl_irq_cpu_stride, "I", "Distance between irqs when setting affinity.");
669 }
670
671
672 /*
673 * ena_sysctl_update_queue_node_nb - Register/unregister sysctl queue nodes.
674 *
675 * Whether the nodes are registered or unregistered depends on a delta between
676 * the `old` and `new` parameters, representing the number of queues.
677 *
678 * This function is used to hide sysctl attributes for queue nodes which aren't
679 * currently used by the HW (e.g. after a call to `ena_sysctl_io_queues_nb`).
680 *
681 * NOTE:
682 * All unregistered nodes must be registered again at detach, i.e. by a call to
683 * this function.
684 */
685 void
ena_sysctl_update_queue_node_nb(struct ena_adapter * adapter,int old,int new)686 ena_sysctl_update_queue_node_nb(struct ena_adapter *adapter, int old, int new)
687 {
688 struct sysctl_oid *oid;
689 int min, max, i;
690
691 min = MIN(old, new);
692 max = MIN(MAX(old, new), adapter->max_num_io_queues);
693
694 for (i = min; i < max; ++i) {
695 oid = adapter->que[i].oid;
696
697 sysctl_wlock();
698 if (old > new)
699 sysctl_unregister_oid(oid);
700 else
701 sysctl_register_oid(oid);
702 sysctl_wunlock();
703 }
704 }
705
706 static int
ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)707 ena_sysctl_buf_ring_size(SYSCTL_HANDLER_ARGS)
708 {
709 struct ena_adapter *adapter = arg1;
710 uint32_t val;
711 int error;
712
713 ENA_LOCK_LOCK();
714 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
715 error = EINVAL;
716 goto unlock;
717 }
718
719 val = 0;
720 error = sysctl_wire_old_buffer(req, sizeof(val));
721 if (error == 0) {
722 val = adapter->buf_ring_size;
723 error = sysctl_handle_32(oidp, &val, 0, req);
724 }
725 if (error != 0 || req->newptr == NULL)
726 goto unlock;
727
728 if (!powerof2(val) || val == 0) {
729 ena_log(adapter->pdev, ERR,
730 "Requested new Tx buffer ring size (%u) is not a power of 2\n",
731 val);
732 error = EINVAL;
733 goto unlock;
734 }
735
736 if (val != adapter->buf_ring_size) {
737 ena_log(adapter->pdev, INFO,
738 "Requested new Tx buffer ring size: %d. Old size: %d\n",
739 val, adapter->buf_ring_size);
740
741 error = ena_update_buf_ring_size(adapter, val);
742 } else {
743 ena_log(adapter->pdev, ERR,
744 "New Tx buffer ring size is the same as already used: %u\n",
745 adapter->buf_ring_size);
746 }
747
748 unlock:
749 ENA_LOCK_UNLOCK();
750
751 return (error);
752 }
753
754 static int
ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)755 ena_sysctl_rx_queue_size(SYSCTL_HANDLER_ARGS)
756 {
757 struct ena_adapter *adapter = arg1;
758 uint32_t val;
759 int error;
760
761 ENA_LOCK_LOCK();
762 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
763 error = EINVAL;
764 goto unlock;
765 }
766
767 val = 0;
768 error = sysctl_wire_old_buffer(req, sizeof(val));
769 if (error == 0) {
770 val = adapter->requested_rx_ring_size;
771 error = sysctl_handle_32(oidp, &val, 0, req);
772 }
773 if (error != 0 || req->newptr == NULL)
774 goto unlock;
775
776 if (val < ENA_MIN_RING_SIZE || val > adapter->max_rx_ring_size) {
777 ena_log(adapter->pdev, ERR,
778 "Requested new Rx queue size (%u) is out of range: [%u, %u]\n",
779 val, ENA_MIN_RING_SIZE, adapter->max_rx_ring_size);
780 error = EINVAL;
781 goto unlock;
782 }
783
784 /* Check if the parameter is power of 2 */
785 if (!powerof2(val)) {
786 ena_log(adapter->pdev, ERR,
787 "Requested new Rx queue size (%u) is not a power of 2\n",
788 val);
789 error = EINVAL;
790 goto unlock;
791 }
792
793 if (val != adapter->requested_rx_ring_size) {
794 ena_log(adapter->pdev, INFO,
795 "Requested new Rx queue size: %u. Old size: %u\n", val,
796 adapter->requested_rx_ring_size);
797
798 error = ena_update_queue_size(adapter,
799 adapter->requested_tx_ring_size, val);
800 } else {
801 ena_log(adapter->pdev, ERR,
802 "New Rx queue size is the same as already used: %u\n",
803 adapter->requested_rx_ring_size);
804 }
805
806 unlock:
807 ENA_LOCK_UNLOCK();
808
809 return (error);
810 }
811
812 /*
813 * Change number of effectively used IO queues adapter->num_io_queues
814 */
815 static int
ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)816 ena_sysctl_io_queues_nb(SYSCTL_HANDLER_ARGS)
817 {
818 struct ena_adapter *adapter = arg1;
819 uint32_t old_num_queues, tmp = 0;
820 int error;
821
822 ENA_LOCK_LOCK();
823 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
824 error = EINVAL;
825 goto unlock;
826 }
827
828 error = sysctl_wire_old_buffer(req, sizeof(tmp));
829 if (error == 0) {
830 tmp = adapter->num_io_queues;
831 error = sysctl_handle_int(oidp, &tmp, 0, req);
832 }
833 if (error != 0 || req->newptr == NULL)
834 goto unlock;
835
836 if (tmp == 0) {
837 ena_log(adapter->pdev, ERR,
838 "Requested number of IO queues is zero\n");
839 error = EINVAL;
840 goto unlock;
841 }
842
843 /*
844 * The adapter::max_num_io_queues is the HW capability. The system
845 * resources availability may potentially be a tighter limit. Therefore
846 * the relation `adapter::max_num_io_queues >= adapter::msix_vecs`
847 * always holds true, while the `adapter::msix_vecs` is variable across
848 * device reset (`ena_destroy_device()` + `ena_restore_device()`).
849 */
850 if (tmp > (adapter->msix_vecs - ENA_ADMIN_MSIX_VEC)) {
851 ena_log(adapter->pdev, ERR,
852 "Requested number of IO queues is higher than maximum allowed (%u)\n",
853 adapter->msix_vecs - ENA_ADMIN_MSIX_VEC);
854 error = EINVAL;
855 goto unlock;
856 }
857 if (tmp == adapter->num_io_queues) {
858 ena_log(adapter->pdev, ERR,
859 "Requested number of IO queues is equal to current value "
860 "(%u)\n",
861 adapter->num_io_queues);
862 } else {
863 ena_log(adapter->pdev, INFO,
864 "Requested new number of IO queues: %u, current value: "
865 "%u\n",
866 tmp, adapter->num_io_queues);
867
868 old_num_queues = adapter->num_io_queues;
869 error = ena_update_io_queue_nb(adapter, tmp);
870 if (error != 0)
871 return (error);
872
873 ena_sysctl_update_queue_node_nb(adapter, old_num_queues, tmp);
874 }
875
876 unlock:
877 ENA_LOCK_UNLOCK();
878
879 return (error);
880 }
881
882 static int
ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS)883 ena_sysctl_metrics_interval(SYSCTL_HANDLER_ARGS)
884 {
885 struct ena_adapter *adapter = arg1;
886 uint16_t interval;
887 int error;
888
889 ENA_LOCK_LOCK();
890 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
891 error = EINVAL;
892 goto unlock;
893 }
894
895 error = sysctl_wire_old_buffer(req, sizeof(interval));
896 if (error == 0) {
897 interval = adapter->metrics_sample_interval;
898 error = sysctl_handle_16(oidp, &interval, 0, req);
899 }
900 if (error != 0 || req->newptr == NULL)
901 goto unlock;
902
903 if (interval > ENA_METRICS_MAX_SAMPLE_INTERVAL) {
904 ena_log(adapter->pdev, ERR,
905 "ENA metrics update interval is out of range - maximum allowed value: %d seconds\n",
906 ENA_METRICS_MAX_SAMPLE_INTERVAL);
907 error = EINVAL;
908 goto unlock;
909 }
910
911 if (interval == 0) {
912 ena_log(adapter->pdev, INFO,
913 "ENA metrics update is now turned off\n");
914 bzero(&adapter->eni_metrics, sizeof(adapter->eni_metrics));
915 } else {
916 ena_log(adapter->pdev, INFO,
917 "ENA metrics update interval is set to: %" PRIu16
918 " seconds\n",
919 interval);
920 }
921
922 adapter->metrics_sample_interval = interval;
923
924 unlock:
925 ENA_LOCK_UNLOCK();
926
927 return (0);
928 }
929
930 static int
ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS)931 ena_sysctl_irq_base_cpu(SYSCTL_HANDLER_ARGS)
932 {
933 struct ena_adapter *adapter = arg1;
934 int irq_base_cpu = 0;
935 int error;
936
937 ENA_LOCK_LOCK();
938 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
939 error = ENODEV;
940 goto unlock;
941 }
942
943 error = sysctl_wire_old_buffer(req, sizeof(irq_base_cpu));
944 if (error == 0) {
945 irq_base_cpu = adapter->irq_cpu_base;
946 error = sysctl_handle_int(oidp, &irq_base_cpu, 0, req);
947 }
948 if (error != 0 || req->newptr == NULL)
949 goto unlock;
950
951 if (irq_base_cpu <= ENA_BASE_CPU_UNSPECIFIED) {
952 ena_log(adapter->pdev, ERR,
953 "Requested base CPU is less than zero.\n");
954 error = EINVAL;
955 goto unlock;
956 }
957
958 if (irq_base_cpu > mp_ncpus) {
959 ena_log(adapter->pdev, INFO,
960 "Requested base CPU is larger than the number of available CPUs. \n");
961 error = EINVAL;
962 goto unlock;
963
964 }
965
966 if (irq_base_cpu == adapter->irq_cpu_base) {
967 ena_log(adapter->pdev, INFO,
968 "Requested IRQ base CPU is equal to current value "
969 "(%d)\n",
970 adapter->irq_cpu_base);
971 goto unlock;
972 }
973
974 ena_log(adapter->pdev, INFO,
975 "Requested new IRQ base CPU: %d, current value: %d\n",
976 irq_base_cpu, adapter->irq_cpu_base);
977
978 error = ena_update_base_cpu(adapter, irq_base_cpu);
979
980 unlock:
981 ENA_LOCK_UNLOCK();
982
983 return (error);
984 }
985
986 static int
ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS)987 ena_sysctl_irq_cpu_stride(SYSCTL_HANDLER_ARGS)
988 {
989 struct ena_adapter *adapter = arg1;
990 int32_t irq_cpu_stride = 0;
991 int error;
992
993 ENA_LOCK_LOCK();
994 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
995 error = ENODEV;
996 goto unlock;
997 }
998
999 error = sysctl_wire_old_buffer(req, sizeof(irq_cpu_stride));
1000 if (error == 0) {
1001 irq_cpu_stride = adapter->irq_cpu_stride;
1002 error = sysctl_handle_int(oidp, &irq_cpu_stride, 0, req);
1003 }
1004 if (error != 0 || req->newptr == NULL)
1005 goto unlock;
1006
1007 if (irq_cpu_stride < 0) {
1008 ena_log(adapter->pdev, ERR,
1009 "Requested IRQ stride is less than zero.\n");
1010 error = EINVAL;
1011 goto unlock;
1012 }
1013
1014 if (irq_cpu_stride > mp_ncpus) {
1015 ena_log(adapter->pdev, INFO,
1016 "Warning: Requested IRQ stride is larger than the number of available CPUs.\n");
1017 }
1018
1019 if (irq_cpu_stride == adapter->irq_cpu_stride) {
1020 ena_log(adapter->pdev, INFO,
1021 "Requested IRQ CPU stride is equal to current value "
1022 "(%u)\n",
1023 adapter->irq_cpu_stride);
1024 goto unlock;
1025 }
1026
1027 ena_log(adapter->pdev, INFO,
1028 "Requested new IRQ CPU stride: %u, current value: %u\n",
1029 irq_cpu_stride, adapter->irq_cpu_stride);
1030
1031 error = ena_update_cpu_stride(adapter, irq_cpu_stride);
1032 if (error != 0)
1033 goto unlock;
1034
1035 unlock:
1036 ENA_LOCK_UNLOCK();
1037
1038 return (error);
1039 }
1040
1041 #ifndef RSS
1042 /*
1043 * Change the Receive Side Scaling hash key.
1044 */
1045 static int
ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)1046 ena_sysctl_rss_key(SYSCTL_HANDLER_ARGS)
1047 {
1048 struct ena_adapter *adapter = arg1;
1049 struct ena_com_dev *ena_dev = adapter->ena_dev;
1050 enum ena_admin_hash_functions ena_func;
1051 char msg[ENA_HASH_KEY_MSG_SIZE];
1052 char elem[3] = { 0 };
1053 char *endp;
1054 u8 rss_key[ENA_HASH_KEY_SIZE];
1055 int error, i;
1056
1057 ENA_LOCK_LOCK();
1058 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1059 error = EINVAL;
1060 goto unlock;
1061 }
1062
1063 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1064 error = ENOTSUP;
1065 goto unlock;
1066 }
1067
1068 error = sysctl_wire_old_buffer(req, sizeof(msg));
1069 if (error != 0)
1070 goto unlock;
1071
1072 error = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
1073 if (error != 0) {
1074 device_printf(adapter->pdev, "Cannot get hash function\n");
1075 goto unlock;
1076 }
1077
1078 if (ena_func != ENA_ADMIN_TOEPLITZ) {
1079 error = EINVAL;
1080 device_printf(adapter->pdev, "Unsupported hash algorithm\n");
1081 goto unlock;
1082 }
1083
1084 error = ena_rss_get_hash_key(ena_dev, rss_key);
1085 if (error != 0) {
1086 device_printf(adapter->pdev, "Cannot get hash key\n");
1087 goto unlock;
1088 }
1089
1090 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i)
1091 snprintf(&msg[i * 2], 3, "%02x", rss_key[i]);
1092
1093 error = sysctl_handle_string(oidp, msg, sizeof(msg), req);
1094 if (error != 0 || req->newptr == NULL)
1095 goto unlock;
1096
1097 if (strlen(msg) != sizeof(msg) - 1) {
1098 error = EINVAL;
1099 device_printf(adapter->pdev, "Invalid key size\n");
1100 goto unlock;
1101 }
1102
1103 for (i = 0; i < ENA_HASH_KEY_SIZE; ++i) {
1104 strncpy(elem, &msg[i * 2], 2);
1105 rss_key[i] = strtol(elem, &endp, 16);
1106
1107 /* Both hex nibbles in the string must be valid to continue. */
1108 if (endp == elem || *endp != '\0' || rss_key[i] < 0) {
1109 error = EINVAL;
1110 device_printf(adapter->pdev,
1111 "Invalid key hex value: '%c'\n", *endp);
1112 goto unlock;
1113 }
1114 }
1115
1116 error = ena_rss_set_hash(ena_dev, rss_key);
1117 if (error != 0)
1118 device_printf(adapter->pdev, "Cannot fill hash key\n");
1119
1120 unlock:
1121 ENA_LOCK_UNLOCK();
1122
1123 return (error);
1124 }
1125
1126 /*
1127 * Change the Receive Side Scaling indirection table.
1128 *
1129 * The sysctl entry string consists of one or more `x:y` keypairs, where
1130 * x stands for the table index and y for its new value.
1131 * Table indices that don't need to be updated can be omitted from the string
1132 * and will retain their existing values. If an index is entered more than once,
1133 * the last value is used.
1134 *
1135 * Example:
1136 * To update two selected indices in the RSS indirection table, e.g. setting
1137 * index 0 to queue 5 and then index 5 to queue 0, the below command should be
1138 * used:
1139 * sysctl dev.ena.0.rss.indir_table="0:5 5:0"
1140 */
1141 static int
ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)1142 ena_sysctl_rss_indir_table(SYSCTL_HANDLER_ARGS)
1143 {
1144 int num_queues, error;
1145 struct ena_adapter *adapter = arg1;
1146 struct ena_indir *indir;
1147 char *msg, *buf, *endp;
1148 uint32_t idx, value;
1149
1150 ENA_LOCK_LOCK();
1151 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_DEVICE_RUNNING, adapter))) {
1152 error = EINVAL;
1153 goto unlock;
1154 }
1155
1156 if (unlikely(!ENA_FLAG_ISSET(ENA_FLAG_RSS_ACTIVE, adapter))) {
1157 error = ENOTSUP;
1158 goto unlock;
1159 }
1160
1161 indir = adapter->rss_indir;
1162 msg = indir->sysctl_buf;
1163
1164 if (unlikely(indir == NULL)) {
1165 error = ENOTSUP;
1166 goto unlock;
1167 }
1168
1169 error = sysctl_handle_string(oidp, msg, sizeof(indir->sysctl_buf), req);
1170 if (error != 0 || req->newptr == NULL)
1171 goto unlock;
1172
1173 num_queues = adapter->num_io_queues;
1174
1175 /*
1176 * This sysctl expects msg to be a list of `x:y` record pairs,
1177 * where x is the indirection table index and y is its value.
1178 */
1179 for (buf = msg; *buf != '\0'; buf = endp) {
1180 idx = strtol(buf, &endp, 10);
1181
1182 if (endp == buf || idx < 0) {
1183 device_printf(adapter->pdev, "Invalid index: %s\n",
1184 buf);
1185 error = EINVAL;
1186 break;
1187 }
1188
1189 if (idx >= ENA_RX_RSS_TABLE_SIZE) {
1190 device_printf(adapter->pdev, "Index %d out of range\n",
1191 idx);
1192 error = ERANGE;
1193 break;
1194 }
1195
1196 buf = endp;
1197
1198 if (*buf++ != ':') {
1199 device_printf(adapter->pdev, "Missing ':' separator\n");
1200 error = EINVAL;
1201 break;
1202 }
1203
1204 value = strtol(buf, &endp, 10);
1205
1206 if (endp == buf || value < 0) {
1207 device_printf(adapter->pdev, "Invalid value: %s\n",
1208 buf);
1209 error = EINVAL;
1210 break;
1211 }
1212
1213 if (value >= num_queues) {
1214 device_printf(adapter->pdev, "Value %d out of range\n",
1215 value);
1216 error = ERANGE;
1217 break;
1218 }
1219
1220 indir->table[idx] = value;
1221 }
1222
1223 if (error != 0) /* Reload indirection table with last good data. */
1224 ena_rss_indir_get(adapter, indir->table);
1225
1226 /* At this point msg has been clobbered by sysctl_handle_string. */
1227 ena_rss_copy_indir_buf(msg, indir->table);
1228
1229 if (error == 0)
1230 error = ena_rss_indir_set(adapter, indir->table);
1231
1232 unlock:
1233 ENA_LOCK_UNLOCK();
1234
1235 return (error);
1236 }
1237 #endif /* RSS */
1238