1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3 * Copyright 2015-2020 Amazon.com, Inc. or its affiliates. All rights reserved.
4 */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/ethtool.h>
9 #include <linux/kernel.h>
10 #include <linux/module.h>
11 #include <linux/numa.h>
12 #include <linux/pci.h>
13 #include <linux/utsname.h>
14 #include <linux/version.h>
15 #include <linux/vmalloc.h>
16 #include <net/ip.h>
17
18 #include "ena_netdev.h"
19 #include "ena_pci_id_tbl.h"
20 #include "ena_xdp.h"
21
22 #include "ena_phc.h"
23
24 #include "ena_devlink.h"
25
26 #include "ena_debugfs.h"
27
28 MODULE_AUTHOR("Amazon.com, Inc. or its affiliates");
29 MODULE_DESCRIPTION(DEVICE_NAME);
30 MODULE_LICENSE("GPL");
31
32 /* Time in jiffies before concluding the transmitter is hung. */
33 #define TX_TIMEOUT (5 * HZ)
34
35 #define ENA_MAX_RINGS min_t(unsigned int, ENA_MAX_NUM_IO_QUEUES, num_possible_cpus())
36
37 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
38 NETIF_MSG_IFDOWN | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
39
40 static struct ena_aenq_handlers aenq_handlers;
41
42 static struct workqueue_struct *ena_wq;
43
44 MODULE_DEVICE_TABLE(pci, ena_pci_tbl);
45
46 static int ena_rss_init_default(struct ena_adapter *adapter);
47 static void check_for_admin_com_state(struct ena_adapter *adapter);
48
ena_tx_timeout(struct net_device * dev,unsigned int txqueue)49 static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
50 {
51 enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_OS_NETDEV_WD;
52 struct ena_adapter *adapter = netdev_priv(dev);
53 unsigned int time_since_last_napi, threshold;
54 struct ena_ring *tx_ring;
55 int napi_scheduled;
56
57 if (txqueue >= adapter->num_io_queues) {
58 netdev_err(dev, "TX timeout on invalid queue %u\n", txqueue);
59 goto schedule_reset;
60 }
61
62 threshold = jiffies_to_usecs(dev->watchdog_timeo);
63 tx_ring = &adapter->tx_ring[txqueue];
64
65 time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
66 napi_scheduled = !!(tx_ring->napi->state & NAPIF_STATE_SCHED);
67
68 netdev_err(dev,
69 "TX q %d is paused for too long (threshold %u). Time since last napi %u usec. napi scheduled: %d\n",
70 txqueue,
71 threshold,
72 time_since_last_napi,
73 napi_scheduled);
74
75 if (threshold < time_since_last_napi && napi_scheduled) {
76 netdev_err(dev,
77 "napi handler hasn't been called for a long time but is scheduled\n");
78 reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION;
79 }
80 schedule_reset:
81 /* Change the state of the device to trigger reset
82 * Check that we are not in the middle or a trigger already
83 */
84 if (test_and_set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
85 return;
86
87 ena_reset_device(adapter, reset_reason);
88 ena_increase_stat(&adapter->dev_stats.tx_timeout, 1, &adapter->syncp);
89 }
90
update_rx_ring_mtu(struct ena_adapter * adapter,int mtu)91 static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu)
92 {
93 int i;
94
95 for (i = 0; i < adapter->num_io_queues; i++)
96 adapter->rx_ring[i].mtu = mtu;
97 }
98
ena_change_mtu(struct net_device * dev,int new_mtu)99 static int ena_change_mtu(struct net_device *dev, int new_mtu)
100 {
101 struct ena_adapter *adapter = netdev_priv(dev);
102 int ret;
103
104 ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
105 if (!ret) {
106 netif_dbg(adapter, drv, dev, "Set MTU to %d\n", new_mtu);
107 update_rx_ring_mtu(adapter, new_mtu);
108 WRITE_ONCE(dev->mtu, new_mtu);
109 } else {
110 netif_err(adapter, drv, dev, "Failed to set MTU to %d\n",
111 new_mtu);
112 }
113
114 return ret;
115 }
116
ena_xmit_common(struct ena_adapter * adapter,struct ena_ring * ring,struct ena_tx_buffer * tx_info,struct ena_com_tx_ctx * ena_tx_ctx,u16 next_to_use,u32 bytes)117 int ena_xmit_common(struct ena_adapter *adapter,
118 struct ena_ring *ring,
119 struct ena_tx_buffer *tx_info,
120 struct ena_com_tx_ctx *ena_tx_ctx,
121 u16 next_to_use,
122 u32 bytes)
123 {
124 int rc, nb_hw_desc;
125
126 if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
127 ena_tx_ctx))) {
128 netif_dbg(adapter, tx_queued, adapter->netdev,
129 "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
130 ring->qid);
131 ena_ring_tx_doorbell(ring);
132 }
133
134 /* prepare the packet's descriptors to dma engine */
135 rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
136 &nb_hw_desc);
137
138 /* In case there isn't enough space in the queue for the packet,
139 * we simply drop it. All other failure reasons of
140 * ena_com_prepare_tx() are fatal and therefore require a device reset.
141 */
142 if (unlikely(rc)) {
143 netif_err(adapter, tx_queued, adapter->netdev,
144 "Failed to prepare tx bufs\n");
145 ena_increase_stat(&ring->tx_stats.prepare_ctx_err, 1, &ring->syncp);
146 if (rc != -ENOMEM)
147 ena_reset_device(adapter, ENA_REGS_RESET_DRIVER_INVALID_STATE);
148 return rc;
149 }
150
151 u64_stats_update_begin(&ring->syncp);
152 ring->tx_stats.cnt++;
153 ring->tx_stats.bytes += bytes;
154 u64_stats_update_end(&ring->syncp);
155
156 tx_info->tx_descs = nb_hw_desc;
157 tx_info->total_tx_size = bytes;
158 tx_info->last_jiffies = jiffies;
159 tx_info->print_once = 0;
160
161 ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
162 ring->ring_size);
163 return 0;
164 }
165
ena_init_io_rings_common(struct ena_adapter * adapter,struct ena_ring * ring,u16 qid)166 static void ena_init_io_rings_common(struct ena_adapter *adapter,
167 struct ena_ring *ring, u16 qid)
168 {
169 ring->qid = qid;
170 ring->pdev = adapter->pdev;
171 ring->dev = &adapter->pdev->dev;
172 ring->netdev = adapter->netdev;
173 ring->napi = &adapter->ena_napi[qid].napi;
174 ring->adapter = adapter;
175 ring->ena_dev = adapter->ena_dev;
176 ring->per_napi_packets = 0;
177 ring->cpu = 0;
178 ring->numa_node = 0;
179 ring->no_interrupt_event_cnt = 0;
180 u64_stats_init(&ring->syncp);
181 }
182
ena_init_io_rings(struct ena_adapter * adapter,int first_index,int count)183 void ena_init_io_rings(struct ena_adapter *adapter,
184 int first_index, int count)
185 {
186 struct ena_com_dev *ena_dev;
187 struct ena_ring *txr, *rxr;
188 int i;
189
190 ena_dev = adapter->ena_dev;
191
192 for (i = first_index; i < first_index + count; i++) {
193 txr = &adapter->tx_ring[i];
194 rxr = &adapter->rx_ring[i];
195
196 /* TX common ring state */
197 ena_init_io_rings_common(adapter, txr, i);
198
199 /* TX specific ring state */
200 txr->ring_size = adapter->requested_tx_ring_size;
201 txr->tx_max_header_size = ena_dev->tx_max_header_size;
202 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
203 txr->sgl_size = adapter->max_tx_sgl_size;
204 txr->smoothed_interval =
205 ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
206 txr->disable_meta_caching = adapter->disable_meta_caching;
207 spin_lock_init(&txr->xdp_tx_lock);
208
209 /* Don't init RX queues for xdp queues */
210 if (!ENA_IS_XDP_INDEX(adapter, i)) {
211 /* RX common ring state */
212 ena_init_io_rings_common(adapter, rxr, i);
213
214 /* RX specific ring state */
215 rxr->ring_size = adapter->requested_rx_ring_size;
216 rxr->rx_copybreak = adapter->rx_copybreak;
217 rxr->sgl_size = adapter->max_rx_sgl_size;
218 rxr->smoothed_interval =
219 ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
220 rxr->empty_rx_queue = 0;
221 rxr->rx_headroom = NET_SKB_PAD;
222 adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
223 rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
224 }
225 }
226 }
227
228 /* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors)
229 * @adapter: network interface device structure
230 * @qid: queue index
231 *
232 * Return 0 on success, negative on failure
233 */
ena_setup_tx_resources(struct ena_adapter * adapter,int qid)234 static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
235 {
236 struct ena_ring *tx_ring = &adapter->tx_ring[qid];
237 struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
238 int size, i, node;
239
240 if (tx_ring->tx_buffer_info) {
241 netif_err(adapter, ifup,
242 adapter->netdev, "tx_buffer_info info is not NULL");
243 return -EEXIST;
244 }
245
246 size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
247 node = cpu_to_node(ena_irq->cpu);
248
249 tx_ring->tx_buffer_info = vzalloc_node(size, node);
250 if (!tx_ring->tx_buffer_info) {
251 tx_ring->tx_buffer_info = vzalloc(size);
252 if (!tx_ring->tx_buffer_info)
253 goto err_tx_buffer_info;
254 }
255
256 size = sizeof(u16) * tx_ring->ring_size;
257 tx_ring->free_ids = vzalloc_node(size, node);
258 if (!tx_ring->free_ids) {
259 tx_ring->free_ids = vzalloc(size);
260 if (!tx_ring->free_ids)
261 goto err_tx_free_ids;
262 }
263
264 size = tx_ring->tx_max_header_size;
265 tx_ring->push_buf_intermediate_buf = vzalloc_node(size, node);
266 if (!tx_ring->push_buf_intermediate_buf) {
267 tx_ring->push_buf_intermediate_buf = vzalloc(size);
268 if (!tx_ring->push_buf_intermediate_buf)
269 goto err_push_buf_intermediate_buf;
270 }
271
272 /* Req id ring for TX out of order completions */
273 for (i = 0; i < tx_ring->ring_size; i++)
274 tx_ring->free_ids[i] = i;
275
276 /* Reset tx statistics */
277 memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats));
278
279 tx_ring->next_to_use = 0;
280 tx_ring->next_to_clean = 0;
281 tx_ring->cpu = ena_irq->cpu;
282 tx_ring->numa_node = node;
283 return 0;
284
285 err_push_buf_intermediate_buf:
286 vfree(tx_ring->free_ids);
287 tx_ring->free_ids = NULL;
288 err_tx_free_ids:
289 vfree(tx_ring->tx_buffer_info);
290 tx_ring->tx_buffer_info = NULL;
291 err_tx_buffer_info:
292 return -ENOMEM;
293 }
294
295 /* ena_free_tx_resources - Free I/O Tx Resources per Queue
296 * @adapter: network interface device structure
297 * @qid: queue index
298 *
299 * Free all transmit software resources
300 */
ena_free_tx_resources(struct ena_adapter * adapter,int qid)301 static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
302 {
303 struct ena_ring *tx_ring = &adapter->tx_ring[qid];
304
305 vfree(tx_ring->tx_buffer_info);
306 tx_ring->tx_buffer_info = NULL;
307
308 vfree(tx_ring->free_ids);
309 tx_ring->free_ids = NULL;
310
311 vfree(tx_ring->push_buf_intermediate_buf);
312 tx_ring->push_buf_intermediate_buf = NULL;
313 }
314
ena_setup_tx_resources_in_range(struct ena_adapter * adapter,int first_index,int count)315 int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
316 int first_index, int count)
317 {
318 int i, rc = 0;
319
320 for (i = first_index; i < first_index + count; i++) {
321 rc = ena_setup_tx_resources(adapter, i);
322 if (rc)
323 goto err_setup_tx;
324 }
325
326 return 0;
327
328 err_setup_tx:
329
330 netif_err(adapter, ifup, adapter->netdev,
331 "Tx queue %d: allocation failed\n", i);
332
333 /* rewind the index freeing the rings as we go */
334 while (first_index < i--)
335 ena_free_tx_resources(adapter, i);
336 return rc;
337 }
338
ena_free_all_io_tx_resources_in_range(struct ena_adapter * adapter,int first_index,int count)339 void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
340 int first_index, int count)
341 {
342 int i;
343
344 for (i = first_index; i < first_index + count; i++)
345 ena_free_tx_resources(adapter, i);
346 }
347
348 /* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
349 * @adapter: board private structure
350 *
351 * Free all transmit software resources
352 */
ena_free_all_io_tx_resources(struct ena_adapter * adapter)353 void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
354 {
355 ena_free_all_io_tx_resources_in_range(adapter,
356 0,
357 adapter->xdp_num_queues +
358 adapter->num_io_queues);
359 }
360
361 /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
362 * @adapter: network interface device structure
363 * @qid: queue index
364 *
365 * Returns 0 on success, negative on failure
366 */
ena_setup_rx_resources(struct ena_adapter * adapter,u32 qid)367 static int ena_setup_rx_resources(struct ena_adapter *adapter,
368 u32 qid)
369 {
370 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
371 struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)];
372 int size, node, i;
373
374 if (rx_ring->rx_buffer_info) {
375 netif_err(adapter, ifup, adapter->netdev,
376 "rx_buffer_info is not NULL");
377 return -EEXIST;
378 }
379
380 /* alloc extra element so in rx path
381 * we can always prefetch rx_info + 1
382 */
383 size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1);
384 node = cpu_to_node(ena_irq->cpu);
385
386 rx_ring->rx_buffer_info = vzalloc_node(size, node);
387 if (!rx_ring->rx_buffer_info) {
388 rx_ring->rx_buffer_info = vzalloc(size);
389 if (!rx_ring->rx_buffer_info)
390 return -ENOMEM;
391 }
392
393 size = sizeof(u16) * rx_ring->ring_size;
394 rx_ring->free_ids = vzalloc_node(size, node);
395 if (!rx_ring->free_ids) {
396 rx_ring->free_ids = vzalloc(size);
397 if (!rx_ring->free_ids) {
398 vfree(rx_ring->rx_buffer_info);
399 rx_ring->rx_buffer_info = NULL;
400 return -ENOMEM;
401 }
402 }
403
404 /* Req id ring for receiving RX pkts out of order */
405 for (i = 0; i < rx_ring->ring_size; i++)
406 rx_ring->free_ids[i] = i;
407
408 /* Reset rx statistics */
409 memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats));
410
411 rx_ring->next_to_clean = 0;
412 rx_ring->next_to_use = 0;
413 rx_ring->cpu = ena_irq->cpu;
414 rx_ring->numa_node = node;
415
416 return 0;
417 }
418
419 /* ena_free_rx_resources - Free I/O Rx Resources
420 * @adapter: network interface device structure
421 * @qid: queue index
422 *
423 * Free all receive software resources
424 */
ena_free_rx_resources(struct ena_adapter * adapter,u32 qid)425 static void ena_free_rx_resources(struct ena_adapter *adapter,
426 u32 qid)
427 {
428 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
429
430 vfree(rx_ring->rx_buffer_info);
431 rx_ring->rx_buffer_info = NULL;
432
433 vfree(rx_ring->free_ids);
434 rx_ring->free_ids = NULL;
435 }
436
437 /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues
438 * @adapter: board private structure
439 *
440 * Return 0 on success, negative on failure
441 */
ena_setup_all_rx_resources(struct ena_adapter * adapter)442 static int ena_setup_all_rx_resources(struct ena_adapter *adapter)
443 {
444 int i, rc = 0;
445
446 for (i = 0; i < adapter->num_io_queues; i++) {
447 rc = ena_setup_rx_resources(adapter, i);
448 if (rc)
449 goto err_setup_rx;
450 }
451
452 return 0;
453
454 err_setup_rx:
455
456 netif_err(adapter, ifup, adapter->netdev,
457 "Rx queue %d: allocation failed\n", i);
458
459 /* rewind the index freeing the rings as we go */
460 while (i--)
461 ena_free_rx_resources(adapter, i);
462 return rc;
463 }
464
465 /* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues
466 * @adapter: board private structure
467 *
468 * Free all receive software resources
469 */
ena_free_all_io_rx_resources(struct ena_adapter * adapter)470 static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
471 {
472 int i;
473
474 for (i = 0; i < adapter->num_io_queues; i++)
475 ena_free_rx_resources(adapter, i);
476 }
477
ena_alloc_map_page(struct ena_ring * rx_ring,dma_addr_t * dma)478 static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
479 dma_addr_t *dma)
480 {
481 struct page *page;
482
483 /* This would allocate the page on the same NUMA node the executing code
484 * is running on.
485 */
486 page = dev_alloc_page();
487 if (!page) {
488 ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, &rx_ring->syncp);
489 return ERR_PTR(-ENOSPC);
490 }
491
492 /* To enable NIC-side port-mirroring, AKA SPAN port,
493 * we make the buffer readable from the nic as well
494 */
495 *dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
496 DMA_BIDIRECTIONAL);
497 if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) {
498 ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
499 &rx_ring->syncp);
500 __free_page(page);
501 return ERR_PTR(-EIO);
502 }
503
504 return page;
505 }
506
ena_alloc_rx_buffer(struct ena_ring * rx_ring,struct ena_rx_buffer * rx_info)507 static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
508 struct ena_rx_buffer *rx_info)
509 {
510 int headroom = rx_ring->rx_headroom;
511 struct ena_com_buf *ena_buf;
512 struct page *page;
513 dma_addr_t dma;
514 int tailroom;
515
516 /* restore page offset value in case it has been changed by device */
517 rx_info->buf_offset = headroom;
518
519 /* if previous allocated page is not used */
520 if (unlikely(rx_info->page))
521 return 0;
522
523 /* We handle DMA here */
524 page = ena_alloc_map_page(rx_ring, &dma);
525 if (IS_ERR(page))
526 return PTR_ERR(page);
527
528 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
529 "Allocate page %p, rx_info %p\n", page, rx_info);
530
531 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
532
533 rx_info->page = page;
534 rx_info->dma_addr = dma;
535 rx_info->page_offset = 0;
536 ena_buf = &rx_info->ena_buf;
537 ena_buf->paddr = dma + headroom;
538 ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
539
540 return 0;
541 }
542
ena_unmap_rx_buff_attrs(struct ena_ring * rx_ring,struct ena_rx_buffer * rx_info,unsigned long attrs)543 static void ena_unmap_rx_buff_attrs(struct ena_ring *rx_ring,
544 struct ena_rx_buffer *rx_info,
545 unsigned long attrs)
546 {
547 dma_unmap_page_attrs(rx_ring->dev, rx_info->dma_addr, ENA_PAGE_SIZE, DMA_BIDIRECTIONAL,
548 attrs);
549 }
550
ena_free_rx_page(struct ena_ring * rx_ring,struct ena_rx_buffer * rx_info)551 static void ena_free_rx_page(struct ena_ring *rx_ring,
552 struct ena_rx_buffer *rx_info)
553 {
554 struct page *page = rx_info->page;
555
556 if (unlikely(!page)) {
557 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
558 "Trying to free unallocated buffer\n");
559 return;
560 }
561
562 ena_unmap_rx_buff_attrs(rx_ring, rx_info, 0);
563
564 __free_page(page);
565 rx_info->page = NULL;
566 }
567
ena_refill_rx_bufs(struct ena_ring * rx_ring,u32 num)568 static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
569 {
570 u16 next_to_use, req_id;
571 u32 i;
572 int rc;
573
574 next_to_use = rx_ring->next_to_use;
575
576 for (i = 0; i < num; i++) {
577 struct ena_rx_buffer *rx_info;
578
579 req_id = rx_ring->free_ids[next_to_use];
580
581 rx_info = &rx_ring->rx_buffer_info[req_id];
582
583 rc = ena_alloc_rx_buffer(rx_ring, rx_info);
584 if (unlikely(rc < 0)) {
585 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
586 "Failed to allocate buffer for rx queue %d\n",
587 rx_ring->qid);
588 break;
589 }
590 rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq,
591 &rx_info->ena_buf,
592 req_id);
593 if (unlikely(rc)) {
594 netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
595 "Failed to add buffer for rx queue %d\n",
596 rx_ring->qid);
597 break;
598 }
599 next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use,
600 rx_ring->ring_size);
601 }
602
603 if (unlikely(i < num)) {
604 ena_increase_stat(&rx_ring->rx_stats.refil_partial, 1,
605 &rx_ring->syncp);
606 netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
607 "Refilled rx qid %d with only %d buffers (from %d)\n",
608 rx_ring->qid, i, num);
609 }
610
611 /* ena_com_write_sq_doorbell issues a wmb() */
612 if (likely(i))
613 ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
614
615 rx_ring->next_to_use = next_to_use;
616
617 return i;
618 }
619
ena_free_rx_bufs(struct ena_adapter * adapter,u32 qid)620 static void ena_free_rx_bufs(struct ena_adapter *adapter,
621 u32 qid)
622 {
623 struct ena_ring *rx_ring = &adapter->rx_ring[qid];
624 u32 i;
625
626 for (i = 0; i < rx_ring->ring_size; i++) {
627 struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i];
628
629 if (rx_info->page)
630 ena_free_rx_page(rx_ring, rx_info);
631 }
632 }
633
634 /* ena_refill_all_rx_bufs - allocate all queues Rx buffers
635 * @adapter: board private structure
636 */
ena_refill_all_rx_bufs(struct ena_adapter * adapter)637 static void ena_refill_all_rx_bufs(struct ena_adapter *adapter)
638 {
639 struct ena_ring *rx_ring;
640 int i, rc, bufs_num;
641
642 for (i = 0; i < adapter->num_io_queues; i++) {
643 rx_ring = &adapter->rx_ring[i];
644 bufs_num = rx_ring->ring_size - 1;
645 rc = ena_refill_rx_bufs(rx_ring, bufs_num);
646
647 if (unlikely(rc != bufs_num))
648 netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev,
649 "Refilling Queue %d failed. allocated %d buffers from: %d\n",
650 i, rc, bufs_num);
651 }
652 }
653
ena_free_all_rx_bufs(struct ena_adapter * adapter)654 static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
655 {
656 int i;
657
658 for (i = 0; i < adapter->num_io_queues; i++)
659 ena_free_rx_bufs(adapter, i);
660 }
661
ena_unmap_tx_buff(struct ena_ring * tx_ring,struct ena_tx_buffer * tx_info)662 void ena_unmap_tx_buff(struct ena_ring *tx_ring,
663 struct ena_tx_buffer *tx_info)
664 {
665 struct ena_com_buf *ena_buf;
666 u32 cnt;
667 int i;
668
669 ena_buf = tx_info->bufs;
670 cnt = tx_info->num_of_bufs;
671
672 if (unlikely(!cnt))
673 return;
674
675 if (tx_info->map_linear_data) {
676 dma_unmap_single(tx_ring->dev,
677 dma_unmap_addr(ena_buf, paddr),
678 dma_unmap_len(ena_buf, len),
679 DMA_TO_DEVICE);
680 ena_buf++;
681 cnt--;
682 }
683
684 /* unmap remaining mapped pages */
685 for (i = 0; i < cnt; i++) {
686 dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr),
687 dma_unmap_len(ena_buf, len), DMA_TO_DEVICE);
688 ena_buf++;
689 }
690 }
691
692 /* ena_free_tx_bufs - Free Tx Buffers per Queue
693 * @tx_ring: TX ring for which buffers be freed
694 */
ena_free_tx_bufs(struct ena_ring * tx_ring)695 static void ena_free_tx_bufs(struct ena_ring *tx_ring)
696 {
697 bool print_once = true;
698 bool is_xdp_ring;
699 u32 i;
700
701 is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
702
703 for (i = 0; i < tx_ring->ring_size; i++) {
704 struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
705
706 if (!tx_info->skb)
707 continue;
708
709 if (print_once) {
710 netif_notice(tx_ring->adapter, ifdown, tx_ring->netdev,
711 "Free uncompleted tx skb qid %d idx 0x%x\n",
712 tx_ring->qid, i);
713 print_once = false;
714 } else {
715 netif_dbg(tx_ring->adapter, ifdown, tx_ring->netdev,
716 "Free uncompleted tx skb qid %d idx 0x%x\n",
717 tx_ring->qid, i);
718 }
719
720 ena_unmap_tx_buff(tx_ring, tx_info);
721
722 if (is_xdp_ring)
723 xdp_return_frame(tx_info->xdpf);
724 else
725 dev_kfree_skb_any(tx_info->skb);
726 }
727
728 if (!is_xdp_ring)
729 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
730 tx_ring->qid));
731 }
732
ena_free_all_tx_bufs(struct ena_adapter * adapter)733 static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
734 {
735 struct ena_ring *tx_ring;
736 int i;
737
738 for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
739 tx_ring = &adapter->tx_ring[i];
740 ena_free_tx_bufs(tx_ring);
741 }
742 }
743
ena_destroy_all_tx_queues(struct ena_adapter * adapter)744 static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
745 {
746 u16 ena_qid;
747 int i;
748
749 for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
750 ena_qid = ENA_IO_TXQ_IDX(i);
751 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
752 }
753 }
754
ena_destroy_all_rx_queues(struct ena_adapter * adapter)755 static void ena_destroy_all_rx_queues(struct ena_adapter *adapter)
756 {
757 u16 ena_qid;
758 int i;
759
760 for (i = 0; i < adapter->num_io_queues; i++) {
761 ena_qid = ENA_IO_RXQ_IDX(i);
762 cancel_work_sync(&adapter->ena_napi[i].dim.work);
763 ena_xdp_unregister_rxq_info(&adapter->rx_ring[i]);
764 ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
765 }
766 }
767
ena_destroy_all_io_queues(struct ena_adapter * adapter)768 static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
769 {
770 ena_destroy_all_tx_queues(adapter);
771 ena_destroy_all_rx_queues(adapter);
772 }
773
handle_invalid_req_id(struct ena_ring * ring,u16 req_id,struct ena_tx_buffer * tx_info,bool is_xdp)774 int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
775 struct ena_tx_buffer *tx_info, bool is_xdp)
776 {
777 if (tx_info)
778 netif_err(ring->adapter,
779 tx_done,
780 ring->netdev,
781 "tx_info doesn't have valid %s. qid %u req_id %u",
782 is_xdp ? "xdp frame" : "skb", ring->qid, req_id);
783 else
784 netif_err(ring->adapter,
785 tx_done,
786 ring->netdev,
787 "Invalid req_id %u in qid %u\n",
788 req_id, ring->qid);
789
790 ena_increase_stat(&ring->tx_stats.bad_req_id, 1, &ring->syncp);
791 ena_reset_device(ring->adapter, ENA_REGS_RESET_INV_TX_REQ_ID);
792
793 return -EFAULT;
794 }
795
validate_tx_req_id(struct ena_ring * tx_ring,u16 req_id)796 static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
797 {
798 struct ena_tx_buffer *tx_info;
799
800 tx_info = &tx_ring->tx_buffer_info[req_id];
801 if (likely(tx_info->skb))
802 return 0;
803
804 return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
805 }
806
ena_clean_tx_irq(struct ena_ring * tx_ring,u32 budget)807 static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
808 {
809 struct netdev_queue *txq;
810 bool above_thresh;
811 u32 tx_bytes = 0;
812 u32 total_done = 0;
813 u16 next_to_clean;
814 u16 req_id;
815 int tx_pkts = 0;
816 int rc;
817
818 next_to_clean = tx_ring->next_to_clean;
819 txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid);
820
821 while (tx_pkts < budget) {
822 struct ena_tx_buffer *tx_info;
823 struct sk_buff *skb;
824
825 rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq,
826 &req_id);
827 if (rc) {
828 if (unlikely(rc == -EINVAL))
829 handle_invalid_req_id(tx_ring, req_id, NULL, false);
830 break;
831 }
832
833 /* validate that the request id points to a valid skb */
834 rc = validate_tx_req_id(tx_ring, req_id);
835 if (rc)
836 break;
837
838 tx_info = &tx_ring->tx_buffer_info[req_id];
839 skb = tx_info->skb;
840
841 /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */
842 prefetch(&skb->end);
843
844 tx_info->skb = NULL;
845 tx_info->last_jiffies = 0;
846
847 ena_unmap_tx_buff(tx_ring, tx_info);
848
849 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
850 "tx_poll: q %d skb %p completed\n", tx_ring->qid,
851 skb);
852
853 tx_bytes += tx_info->total_tx_size;
854 dev_kfree_skb(skb);
855 tx_pkts++;
856 total_done += tx_info->tx_descs;
857
858 tx_ring->free_ids[next_to_clean] = req_id;
859 next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
860 tx_ring->ring_size);
861 }
862
863 tx_ring->next_to_clean = next_to_clean;
864 ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done);
865
866 netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
867
868 netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
869 "tx_poll: q %d done. total pkts: %d\n",
870 tx_ring->qid, tx_pkts);
871
872 /* need to make the rings circular update visible to
873 * ena_start_xmit() before checking for netif_queue_stopped().
874 */
875 smp_mb();
876
877 above_thresh = ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
878 ENA_TX_WAKEUP_THRESH);
879 if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) {
880 __netif_tx_lock(txq, smp_processor_id());
881 above_thresh =
882 ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
883 ENA_TX_WAKEUP_THRESH);
884 if (netif_tx_queue_stopped(txq) && above_thresh &&
885 test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) {
886 netif_tx_wake_queue(txq);
887 ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
888 &tx_ring->syncp);
889 }
890 __netif_tx_unlock(txq);
891 }
892
893 return tx_pkts;
894 }
895
ena_alloc_skb(struct ena_ring * rx_ring,void * first_frag,u16 len)896 static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag, u16 len)
897 {
898 struct sk_buff *skb;
899
900 if (!first_frag)
901 skb = napi_alloc_skb(rx_ring->napi, len);
902 else
903 skb = napi_build_skb(first_frag, len);
904
905 if (unlikely(!skb)) {
906 ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
907 &rx_ring->syncp);
908
909 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
910 "Failed to allocate skb. first_frag %s\n",
911 first_frag ? "provided" : "not provided");
912 }
913
914 return skb;
915 }
916
ena_try_rx_buf_page_reuse(struct ena_rx_buffer * rx_info,u16 buf_len,u16 len,int pkt_offset)917 static bool ena_try_rx_buf_page_reuse(struct ena_rx_buffer *rx_info, u16 buf_len,
918 u16 len, int pkt_offset)
919 {
920 struct ena_com_buf *ena_buf = &rx_info->ena_buf;
921
922 /* More than ENA_MIN_RX_BUF_SIZE left in the reused buffer
923 * for data + headroom + tailroom.
924 */
925 if (SKB_DATA_ALIGN(len + pkt_offset) + ENA_MIN_RX_BUF_SIZE <= ena_buf->len) {
926 page_ref_inc(rx_info->page);
927 rx_info->page_offset += buf_len;
928 ena_buf->paddr += buf_len;
929 ena_buf->len -= buf_len;
930 return true;
931 }
932
933 return false;
934 }
935
ena_rx_skb(struct ena_ring * rx_ring,struct ena_com_rx_buf_info * ena_bufs,u32 descs,u16 * next_to_clean)936 static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
937 struct ena_com_rx_buf_info *ena_bufs,
938 u32 descs,
939 u16 *next_to_clean)
940 {
941 int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
942 bool is_xdp_loaded = ena_xdp_present_ring(rx_ring);
943 struct ena_rx_buffer *rx_info;
944 struct ena_adapter *adapter;
945 int page_offset, pkt_offset;
946 dma_addr_t pre_reuse_paddr;
947 u16 len, req_id, buf = 0;
948 bool reuse_rx_buf_page;
949 struct sk_buff *skb;
950 void *buf_addr;
951 int buf_offset;
952 u16 buf_len;
953
954 len = ena_bufs[buf].len;
955 req_id = ena_bufs[buf].req_id;
956
957 rx_info = &rx_ring->rx_buffer_info[req_id];
958
959 if (unlikely(!rx_info->page)) {
960 adapter = rx_ring->adapter;
961 netif_err(adapter, rx_err, rx_ring->netdev,
962 "Page is NULL. qid %u req_id %u\n", rx_ring->qid, req_id);
963 ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1, &rx_ring->syncp);
964 ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
965 return NULL;
966 }
967
968 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
969 "rx_info %p page %p\n",
970 rx_info, rx_info->page);
971
972 buf_offset = rx_info->buf_offset;
973 pkt_offset = buf_offset - rx_ring->rx_headroom;
974 page_offset = rx_info->page_offset;
975 buf_addr = page_address(rx_info->page) + page_offset;
976
977 if (len <= rx_ring->rx_copybreak) {
978 skb = ena_alloc_skb(rx_ring, NULL, len);
979 if (unlikely(!skb))
980 return NULL;
981
982 skb_copy_to_linear_data(skb, buf_addr + buf_offset, len);
983 dma_sync_single_for_device(rx_ring->dev,
984 dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
985 len,
986 DMA_FROM_DEVICE);
987
988 skb_put(skb, len);
989 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
990 "RX allocated small packet. len %d.\n", skb->len);
991 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
992 rx_ring->free_ids[*next_to_clean] = req_id;
993 *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs,
994 rx_ring->ring_size);
995 return skb;
996 }
997
998 buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
999
1000 /* If XDP isn't loaded try to reuse part of the RX buffer */
1001 reuse_rx_buf_page = !is_xdp_loaded &&
1002 ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
1003
1004 if (!reuse_rx_buf_page)
1005 ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
1006
1007 skb = ena_alloc_skb(rx_ring, buf_addr, buf_len);
1008 if (unlikely(!skb))
1009 return NULL;
1010
1011 /* Populate skb's linear part */
1012 skb_reserve(skb, buf_offset);
1013 skb_put(skb, len);
1014 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1015
1016 do {
1017 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1018 "RX skb updated. len %d. data_len %d\n",
1019 skb->len, skb->data_len);
1020
1021 if (!reuse_rx_buf_page)
1022 rx_info->page = NULL;
1023
1024 rx_ring->free_ids[*next_to_clean] = req_id;
1025 *next_to_clean =
1026 ENA_RX_RING_IDX_NEXT(*next_to_clean,
1027 rx_ring->ring_size);
1028 if (likely(--descs == 0))
1029 break;
1030
1031 buf++;
1032 len = ena_bufs[buf].len;
1033 req_id = ena_bufs[buf].req_id;
1034
1035 rx_info = &rx_ring->rx_buffer_info[req_id];
1036
1037 /* rx_info->buf_offset includes rx_ring->rx_headroom */
1038 buf_offset = rx_info->buf_offset;
1039 pkt_offset = buf_offset - rx_ring->rx_headroom;
1040 buf_len = SKB_DATA_ALIGN(len + buf_offset + tailroom);
1041 page_offset = rx_info->page_offset;
1042
1043 pre_reuse_paddr = dma_unmap_addr(&rx_info->ena_buf, paddr);
1044
1045 reuse_rx_buf_page = !is_xdp_loaded &&
1046 ena_try_rx_buf_page_reuse(rx_info, buf_len, len, pkt_offset);
1047
1048 dma_sync_single_for_cpu(rx_ring->dev,
1049 pre_reuse_paddr + pkt_offset,
1050 len,
1051 DMA_FROM_DEVICE);
1052
1053 if (!reuse_rx_buf_page)
1054 ena_unmap_rx_buff_attrs(rx_ring, rx_info, DMA_ATTR_SKIP_CPU_SYNC);
1055
1056 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
1057 page_offset + buf_offset, len, buf_len);
1058
1059 } while (1);
1060
1061 return skb;
1062 }
1063
1064 /* ena_rx_checksum - indicate in skb if hw indicated a good cksum
1065 * @adapter: structure containing adapter specific data
1066 * @ena_rx_ctx: received packet context/metadata
1067 * @skb: skb currently being received and modified
1068 */
ena_rx_checksum(struct ena_ring * rx_ring,struct ena_com_rx_ctx * ena_rx_ctx,struct sk_buff * skb)1069 static void ena_rx_checksum(struct ena_ring *rx_ring,
1070 struct ena_com_rx_ctx *ena_rx_ctx,
1071 struct sk_buff *skb)
1072 {
1073 /* Rx csum disabled */
1074 if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) {
1075 skb->ip_summed = CHECKSUM_NONE;
1076 return;
1077 }
1078
1079 /* For fragmented packets the checksum isn't valid */
1080 if (ena_rx_ctx->frag) {
1081 skb->ip_summed = CHECKSUM_NONE;
1082 return;
1083 }
1084
1085 /* if IP and error */
1086 if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) &&
1087 (ena_rx_ctx->l3_csum_err))) {
1088 /* ipv4 checksum error */
1089 skb->ip_summed = CHECKSUM_NONE;
1090 ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
1091 &rx_ring->syncp);
1092 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1093 "RX IPv4 header checksum error\n");
1094 return;
1095 }
1096
1097 /* if TCP/UDP */
1098 if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1099 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) {
1100 if (unlikely(ena_rx_ctx->l4_csum_err)) {
1101 /* TCP/UDP checksum error */
1102 ena_increase_stat(&rx_ring->rx_stats.csum_bad, 1,
1103 &rx_ring->syncp);
1104 netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
1105 "RX L4 checksum error\n");
1106 skb->ip_summed = CHECKSUM_NONE;
1107 return;
1108 }
1109
1110 if (likely(ena_rx_ctx->l4_csum_checked)) {
1111 skb->ip_summed = CHECKSUM_UNNECESSARY;
1112 ena_increase_stat(&rx_ring->rx_stats.csum_good, 1,
1113 &rx_ring->syncp);
1114 } else {
1115 ena_increase_stat(&rx_ring->rx_stats.csum_unchecked, 1,
1116 &rx_ring->syncp);
1117 skb->ip_summed = CHECKSUM_NONE;
1118 }
1119 } else {
1120 skb->ip_summed = CHECKSUM_NONE;
1121 return;
1122 }
1123
1124 }
1125
ena_set_rx_hash(struct ena_ring * rx_ring,struct ena_com_rx_ctx * ena_rx_ctx,struct sk_buff * skb)1126 static void ena_set_rx_hash(struct ena_ring *rx_ring,
1127 struct ena_com_rx_ctx *ena_rx_ctx,
1128 struct sk_buff *skb)
1129 {
1130 enum pkt_hash_types hash_type;
1131
1132 if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) {
1133 if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) ||
1134 (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP)))
1135
1136 hash_type = PKT_HASH_TYPE_L4;
1137 else
1138 hash_type = PKT_HASH_TYPE_NONE;
1139
1140 /* Override hash type if the packet is fragmented */
1141 if (ena_rx_ctx->frag)
1142 hash_type = PKT_HASH_TYPE_NONE;
1143
1144 skb_set_hash(skb, ena_rx_ctx->hash, hash_type);
1145 }
1146 }
1147
ena_xdp_handle_buff(struct ena_ring * rx_ring,struct xdp_buff * xdp,u16 num_descs)1148 static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u16 num_descs)
1149 {
1150 struct ena_rx_buffer *rx_info;
1151 int ret;
1152
1153 /* XDP multi-buffer packets not supported */
1154 if (unlikely(num_descs > 1)) {
1155 netdev_err_once(rx_ring->adapter->netdev,
1156 "xdp: dropped unsupported multi-buffer packets\n");
1157 ena_increase_stat(&rx_ring->rx_stats.xdp_drop, 1, &rx_ring->syncp);
1158 return ENA_XDP_DROP;
1159 }
1160
1161 rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1162 xdp_prepare_buff(xdp, page_address(rx_info->page),
1163 rx_info->buf_offset,
1164 rx_ring->ena_bufs[0].len, false);
1165
1166 ret = ena_xdp_execute(rx_ring, xdp);
1167
1168 /* The xdp program might expand the headers */
1169 if (ret == ENA_XDP_PASS) {
1170 rx_info->buf_offset = xdp->data - xdp->data_hard_start;
1171 rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
1172 }
1173
1174 return ret;
1175 }
1176
1177 /* ena_clean_rx_irq - Cleanup RX irq
1178 * @rx_ring: RX ring to clean
1179 * @napi: napi handler
1180 * @budget: how many packets driver is allowed to clean
1181 *
1182 * Returns the number of cleaned buffers.
1183 */
ena_clean_rx_irq(struct ena_ring * rx_ring,struct napi_struct * napi,u32 budget)1184 static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
1185 u32 budget)
1186 {
1187 u16 next_to_clean = rx_ring->next_to_clean;
1188 struct ena_com_rx_ctx ena_rx_ctx;
1189 struct ena_rx_buffer *rx_info;
1190 struct ena_adapter *adapter;
1191 u32 res_budget, work_done;
1192 int rx_copybreak_pkt = 0;
1193 int refill_threshold;
1194 struct sk_buff *skb;
1195 int refill_required;
1196 struct xdp_buff xdp;
1197 int xdp_flags = 0;
1198 int total_len = 0;
1199 int xdp_verdict;
1200 u8 pkt_offset;
1201 int rc = 0;
1202 int i;
1203
1204 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1205 "%s qid %d\n", __func__, rx_ring->qid);
1206 res_budget = budget;
1207 xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq);
1208
1209 do {
1210 xdp_verdict = ENA_XDP_PASS;
1211 skb = NULL;
1212 ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
1213 ena_rx_ctx.max_bufs = rx_ring->sgl_size;
1214 ena_rx_ctx.descs = 0;
1215 ena_rx_ctx.pkt_offset = 0;
1216 rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
1217 rx_ring->ena_com_io_sq,
1218 &ena_rx_ctx);
1219 if (unlikely(rc))
1220 goto error;
1221
1222 if (unlikely(ena_rx_ctx.descs == 0))
1223 break;
1224
1225 /* First descriptor might have an offset set by the device */
1226 rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
1227 pkt_offset = ena_rx_ctx.pkt_offset;
1228 rx_info->buf_offset += pkt_offset;
1229
1230 netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
1231 "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
1232 rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
1233 ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
1234
1235 dma_sync_single_for_cpu(rx_ring->dev,
1236 dma_unmap_addr(&rx_info->ena_buf, paddr) + pkt_offset,
1237 rx_ring->ena_bufs[0].len,
1238 DMA_FROM_DEVICE);
1239
1240 if (ena_xdp_present_ring(rx_ring))
1241 xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp, ena_rx_ctx.descs);
1242
1243 /* allocate skb and fill it */
1244 if (xdp_verdict == ENA_XDP_PASS)
1245 skb = ena_rx_skb(rx_ring,
1246 rx_ring->ena_bufs,
1247 ena_rx_ctx.descs,
1248 &next_to_clean);
1249
1250 if (unlikely(!skb)) {
1251 for (i = 0; i < ena_rx_ctx.descs; i++) {
1252 int req_id = rx_ring->ena_bufs[i].req_id;
1253
1254 rx_ring->free_ids[next_to_clean] = req_id;
1255 next_to_clean =
1256 ENA_RX_RING_IDX_NEXT(next_to_clean,
1257 rx_ring->ring_size);
1258
1259 /* Packets was passed for transmission, unmap it
1260 * from RX side.
1261 */
1262 if (xdp_verdict & ENA_XDP_FORWARDED) {
1263 ena_unmap_rx_buff_attrs(rx_ring,
1264 &rx_ring->rx_buffer_info[req_id],
1265 DMA_ATTR_SKIP_CPU_SYNC);
1266 rx_ring->rx_buffer_info[req_id].page = NULL;
1267 }
1268 }
1269 if (xdp_verdict != ENA_XDP_PASS) {
1270 xdp_flags |= xdp_verdict;
1271 total_len += ena_rx_ctx.ena_bufs[0].len;
1272 res_budget--;
1273 continue;
1274 }
1275 break;
1276 }
1277
1278 ena_rx_checksum(rx_ring, &ena_rx_ctx, skb);
1279
1280 ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb);
1281
1282 skb_record_rx_queue(skb, rx_ring->qid);
1283
1284 if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak)
1285 rx_copybreak_pkt++;
1286
1287 total_len += skb->len;
1288
1289 napi_gro_receive(napi, skb);
1290
1291 res_budget--;
1292 } while (likely(res_budget));
1293
1294 work_done = budget - res_budget;
1295 rx_ring->per_napi_packets += work_done;
1296 u64_stats_update_begin(&rx_ring->syncp);
1297 rx_ring->rx_stats.bytes += total_len;
1298 rx_ring->rx_stats.cnt += work_done;
1299 rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt;
1300 u64_stats_update_end(&rx_ring->syncp);
1301
1302 rx_ring->next_to_clean = next_to_clean;
1303
1304 refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
1305 refill_threshold =
1306 min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
1307 ENA_RX_REFILL_THRESH_PACKET);
1308
1309 /* Optimization, try to batch new rx buffers */
1310 if (refill_required > refill_threshold)
1311 ena_refill_rx_bufs(rx_ring, refill_required);
1312
1313 if (xdp_flags & ENA_XDP_REDIRECT)
1314 xdp_do_flush();
1315
1316 return work_done;
1317
1318 error:
1319 if (xdp_flags & ENA_XDP_REDIRECT)
1320 xdp_do_flush();
1321
1322 adapter = netdev_priv(rx_ring->netdev);
1323
1324 if (rc == -ENOSPC) {
1325 ena_increase_stat(&rx_ring->rx_stats.bad_desc_num, 1, &rx_ring->syncp);
1326 ena_reset_device(adapter, ENA_REGS_RESET_TOO_MANY_RX_DESCS);
1327 } else if (rc == -EFAULT) {
1328 ena_reset_device(adapter, ENA_REGS_RESET_RX_DESCRIPTOR_MALFORMED);
1329 } else {
1330 ena_increase_stat(&rx_ring->rx_stats.bad_req_id, 1,
1331 &rx_ring->syncp);
1332 ena_reset_device(adapter, ENA_REGS_RESET_INV_RX_REQ_ID);
1333 }
1334 return 0;
1335 }
1336
ena_dim_work(struct work_struct * w)1337 static void ena_dim_work(struct work_struct *w)
1338 {
1339 struct dim *dim = container_of(w, struct dim, work);
1340 struct dim_cq_moder cur_moder =
1341 net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
1342 struct ena_napi *ena_napi = container_of(dim, struct ena_napi, dim);
1343
1344 ena_napi->rx_ring->smoothed_interval = cur_moder.usec;
1345 dim->state = DIM_START_MEASURE;
1346 }
1347
ena_adjust_adaptive_rx_intr_moderation(struct ena_napi * ena_napi)1348 static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi)
1349 {
1350 struct dim_sample dim_sample;
1351 struct ena_ring *rx_ring = ena_napi->rx_ring;
1352
1353 if (!rx_ring->per_napi_packets)
1354 return;
1355
1356 rx_ring->non_empty_napi_events++;
1357
1358 dim_update_sample(rx_ring->non_empty_napi_events,
1359 rx_ring->rx_stats.cnt,
1360 rx_ring->rx_stats.bytes,
1361 &dim_sample);
1362
1363 net_dim(&ena_napi->dim, &dim_sample);
1364
1365 rx_ring->per_napi_packets = 0;
1366 }
1367
ena_unmask_interrupt(struct ena_ring * tx_ring,struct ena_ring * rx_ring)1368 void ena_unmask_interrupt(struct ena_ring *tx_ring,
1369 struct ena_ring *rx_ring)
1370 {
1371 u32 rx_interval = tx_ring->smoothed_interval;
1372 struct ena_eth_io_intr_reg intr_reg;
1373
1374 /* Rx ring can be NULL when for XDP tx queues which don't have an
1375 * accompanying rx_ring pair.
1376 */
1377 if (rx_ring)
1378 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
1379 rx_ring->smoothed_interval :
1380 ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
1381
1382 /* Update intr register: rx intr delay,
1383 * tx intr delay and interrupt unmask
1384 */
1385 ena_com_update_intr_reg(&intr_reg,
1386 rx_interval,
1387 tx_ring->smoothed_interval,
1388 true);
1389
1390 ena_increase_stat(&tx_ring->tx_stats.unmask_interrupt, 1,
1391 &tx_ring->syncp);
1392
1393 /* It is a shared MSI-X.
1394 * Tx and Rx CQ have pointer to it.
1395 * So we use one of them to reach the intr reg
1396 * The Tx ring is used because the rx_ring is NULL for XDP queues
1397 */
1398 ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
1399 }
1400
ena_update_ring_numa_node(struct ena_ring * tx_ring,struct ena_ring * rx_ring)1401 void ena_update_ring_numa_node(struct ena_ring *tx_ring,
1402 struct ena_ring *rx_ring)
1403 {
1404 int cpu = get_cpu();
1405 int numa_node;
1406
1407 /* Check only one ring since the 2 rings are running on the same cpu */
1408 if (likely(tx_ring->cpu == cpu))
1409 goto out;
1410
1411 tx_ring->cpu = cpu;
1412 if (rx_ring)
1413 rx_ring->cpu = cpu;
1414
1415 numa_node = cpu_to_node(cpu);
1416
1417 if (likely(tx_ring->numa_node == numa_node))
1418 goto out;
1419
1420 put_cpu();
1421
1422 if (numa_node != NUMA_NO_NODE) {
1423 ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
1424 tx_ring->numa_node = numa_node;
1425 if (rx_ring) {
1426 rx_ring->numa_node = numa_node;
1427 ena_com_update_numa_node(rx_ring->ena_com_io_cq,
1428 numa_node);
1429 }
1430 }
1431
1432 return;
1433 out:
1434 put_cpu();
1435 }
1436
ena_io_poll(struct napi_struct * napi,int budget)1437 static int ena_io_poll(struct napi_struct *napi, int budget)
1438 {
1439 struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
1440 struct ena_ring *tx_ring, *rx_ring;
1441 int tx_work_done;
1442 int rx_work_done = 0;
1443 int tx_budget;
1444 int napi_comp_call = 0;
1445 int ret;
1446
1447 tx_ring = ena_napi->tx_ring;
1448 rx_ring = ena_napi->rx_ring;
1449
1450 tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
1451
1452 if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1453 test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags)) {
1454 napi_complete_done(napi, 0);
1455 return 0;
1456 }
1457
1458 tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
1459 /* On netpoll the budget is zero and the handler should only clean the
1460 * tx completions.
1461 */
1462 if (likely(budget))
1463 rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
1464
1465 /* If the device is about to reset or down, avoid unmask
1466 * the interrupt and return 0 so NAPI won't reschedule
1467 */
1468 if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
1469 test_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags))) {
1470 napi_complete_done(napi, 0);
1471 ret = 0;
1472
1473 } else if ((budget > rx_work_done) && (tx_budget > tx_work_done)) {
1474 napi_comp_call = 1;
1475
1476 /* Update numa and unmask the interrupt only when schedule
1477 * from the interrupt context (vs from sk_busy_loop)
1478 */
1479 if (napi_complete_done(napi, rx_work_done) &&
1480 READ_ONCE(ena_napi->interrupts_masked)) {
1481 smp_rmb(); /* make sure interrupts_masked is read */
1482 WRITE_ONCE(ena_napi->interrupts_masked, false);
1483 /* We apply adaptive moderation on Rx path only.
1484 * Tx uses static interrupt moderation.
1485 */
1486 if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev))
1487 ena_adjust_adaptive_rx_intr_moderation(ena_napi);
1488
1489 ena_update_ring_numa_node(tx_ring, rx_ring);
1490 ena_unmask_interrupt(tx_ring, rx_ring);
1491 }
1492
1493 ret = rx_work_done;
1494 } else {
1495 ret = budget;
1496 }
1497
1498 u64_stats_update_begin(&tx_ring->syncp);
1499 tx_ring->tx_stats.napi_comp += napi_comp_call;
1500 tx_ring->tx_stats.tx_poll++;
1501 u64_stats_update_end(&tx_ring->syncp);
1502
1503 tx_ring->tx_stats.last_napi_jiffies = jiffies;
1504
1505 return ret;
1506 }
1507
ena_intr_msix_mgmnt(int irq,void * data)1508 static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data)
1509 {
1510 struct ena_adapter *adapter = (struct ena_adapter *)data;
1511
1512 ena_com_admin_q_comp_intr_handler(adapter->ena_dev);
1513
1514 /* Don't call the aenq handler before probe is done */
1515 if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags)))
1516 ena_com_aenq_intr_handler(adapter->ena_dev, data);
1517
1518 return IRQ_HANDLED;
1519 }
1520
1521 /* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx
1522 * @irq: interrupt number
1523 * @data: pointer to a network interface private napi device structure
1524 */
ena_intr_msix_io(int irq,void * data)1525 static irqreturn_t ena_intr_msix_io(int irq, void *data)
1526 {
1527 struct ena_napi *ena_napi = data;
1528
1529 /* Used to check HW health */
1530 WRITE_ONCE(ena_napi->first_interrupt, true);
1531
1532 WRITE_ONCE(ena_napi->interrupts_masked, true);
1533 smp_wmb(); /* write interrupts_masked before calling napi */
1534
1535 napi_schedule_irqoff(&ena_napi->napi);
1536
1537 return IRQ_HANDLED;
1538 }
1539
1540 /* Reserve a single MSI-X vector for management (admin + aenq).
1541 * plus reserve one vector for each potential io queue.
1542 * the number of potential io queues is the minimum of what the device
1543 * supports and the number of vCPUs.
1544 */
ena_enable_msix(struct ena_adapter * adapter)1545 static int ena_enable_msix(struct ena_adapter *adapter)
1546 {
1547 int msix_vecs, irq_cnt;
1548
1549 if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1550 netif_err(adapter, probe, adapter->netdev,
1551 "Error, MSI-X is already enabled\n");
1552 return -EPERM;
1553 }
1554
1555 /* Reserved the max msix vectors we might need */
1556 msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues);
1557 netif_dbg(adapter, probe, adapter->netdev,
1558 "Trying to enable MSI-X, vectors %d\n", msix_vecs);
1559
1560 irq_cnt = pci_alloc_irq_vectors(adapter->pdev, ENA_MIN_MSIX_VEC,
1561 msix_vecs, PCI_IRQ_MSIX);
1562
1563 if (irq_cnt < 0) {
1564 netif_err(adapter, probe, adapter->netdev,
1565 "Failed to enable MSI-X. irq_cnt %d\n", irq_cnt);
1566 return -ENOSPC;
1567 }
1568
1569 if (irq_cnt != msix_vecs) {
1570 netif_notice(adapter, probe, adapter->netdev,
1571 "Enable only %d MSI-X (out of %d), reduce the number of queues\n",
1572 irq_cnt, msix_vecs);
1573 adapter->num_io_queues = irq_cnt - ENA_ADMIN_MSIX_VEC;
1574 }
1575
1576 if (netif_enable_cpu_rmap(adapter->netdev, adapter->num_io_queues))
1577 netif_warn(adapter, probe, adapter->netdev,
1578 "Failed to map IRQs to CPUs\n");
1579
1580 adapter->msix_vecs = irq_cnt;
1581 set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags);
1582
1583 return 0;
1584 }
1585
ena_setup_mgmnt_intr(struct ena_adapter * adapter)1586 static void ena_setup_mgmnt_intr(struct ena_adapter *adapter)
1587 {
1588 u32 cpu;
1589
1590 snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name,
1591 ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s",
1592 pci_name(adapter->pdev));
1593 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler =
1594 ena_intr_msix_mgmnt;
1595 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter;
1596 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector =
1597 pci_irq_vector(adapter->pdev, ENA_MGMNT_IRQ_IDX);
1598 cpu = cpumask_first(cpu_online_mask);
1599 adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu;
1600 cpumask_set_cpu(cpu,
1601 &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask);
1602 }
1603
ena_setup_io_intr(struct ena_adapter * adapter)1604 static void ena_setup_io_intr(struct ena_adapter *adapter)
1605 {
1606 struct net_device *netdev;
1607 int irq_idx, i, cpu;
1608 int io_queue_count;
1609
1610 netdev = adapter->netdev;
1611 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1612
1613 for (i = 0; i < io_queue_count; i++) {
1614 irq_idx = ENA_IO_IRQ_IDX(i);
1615 cpu = i % num_online_cpus();
1616
1617 snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE,
1618 "%s-Tx-Rx-%d", netdev->name, i);
1619 adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io;
1620 adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i];
1621 adapter->irq_tbl[irq_idx].vector =
1622 pci_irq_vector(adapter->pdev, irq_idx);
1623 adapter->irq_tbl[irq_idx].cpu = cpu;
1624
1625 cpumask_set_cpu(cpu,
1626 &adapter->irq_tbl[irq_idx].affinity_hint_mask);
1627 }
1628 }
1629
ena_request_mgmnt_irq(struct ena_adapter * adapter)1630 static int ena_request_mgmnt_irq(struct ena_adapter *adapter)
1631 {
1632 unsigned long flags = 0;
1633 struct ena_irq *irq;
1634 int rc;
1635
1636 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1637 rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1638 irq->data);
1639 if (rc) {
1640 netif_err(adapter, probe, adapter->netdev,
1641 "Failed to request admin irq\n");
1642 return rc;
1643 }
1644
1645 netif_dbg(adapter, probe, adapter->netdev,
1646 "Set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n",
1647 irq->affinity_hint_mask.bits[0], irq->vector);
1648
1649 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1650
1651 return rc;
1652 }
1653
ena_request_io_irq(struct ena_adapter * adapter)1654 static int ena_request_io_irq(struct ena_adapter *adapter)
1655 {
1656 u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1657 int rc = 0, i, k, irq_idx;
1658 unsigned long flags = 0;
1659 struct ena_irq *irq;
1660
1661 if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) {
1662 netif_err(adapter, ifup, adapter->netdev,
1663 "Failed to request I/O IRQ: MSI-X is not enabled\n");
1664 return -EINVAL;
1665 }
1666
1667 for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
1668 irq = &adapter->irq_tbl[i];
1669 rc = request_irq(irq->vector, irq->handler, flags, irq->name,
1670 irq->data);
1671 if (rc) {
1672 netif_err(adapter, ifup, adapter->netdev,
1673 "Failed to request I/O IRQ. index %d rc %d\n",
1674 i, rc);
1675 goto err;
1676 }
1677
1678 netif_dbg(adapter, ifup, adapter->netdev,
1679 "Set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n",
1680 i, irq->affinity_hint_mask.bits[0], irq->vector);
1681
1682 irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask);
1683 }
1684
1685 /* Now that IO IRQs have been successfully allocated map them to the
1686 * corresponding IO NAPI instance. Note that the mgmnt IRQ does not
1687 * have a NAPI, so care must be taken to correctly map IRQs to NAPIs.
1688 */
1689 for (i = 0; i < io_queue_count; i++) {
1690 irq_idx = ENA_IO_IRQ_IDX(i);
1691 irq = &adapter->irq_tbl[irq_idx];
1692 netif_napi_set_irq(&adapter->ena_napi[i].napi, irq->vector);
1693 }
1694
1695 return rc;
1696
1697 err:
1698 for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) {
1699 irq = &adapter->irq_tbl[k];
1700 free_irq(irq->vector, irq->data);
1701 }
1702
1703 return rc;
1704 }
1705
ena_free_mgmnt_irq(struct ena_adapter * adapter)1706 static void ena_free_mgmnt_irq(struct ena_adapter *adapter)
1707 {
1708 struct ena_irq *irq;
1709
1710 irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX];
1711 synchronize_irq(irq->vector);
1712 irq_set_affinity_hint(irq->vector, NULL);
1713 free_irq(irq->vector, irq->data);
1714 }
1715
ena_free_io_irq(struct ena_adapter * adapter)1716 static void ena_free_io_irq(struct ena_adapter *adapter)
1717 {
1718 u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1719 struct ena_irq *irq;
1720 int i;
1721
1722 for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) {
1723 struct ena_napi *ena_napi;
1724
1725 irq = &adapter->irq_tbl[i];
1726 irq_set_affinity_hint(irq->vector, NULL);
1727 ena_napi = irq->data;
1728 netif_napi_set_irq(&ena_napi->napi, -1);
1729 free_irq(irq->vector, irq->data);
1730 }
1731 }
1732
ena_disable_msix(struct ena_adapter * adapter)1733 static void ena_disable_msix(struct ena_adapter *adapter)
1734 {
1735 if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags))
1736 pci_free_irq_vectors(adapter->pdev);
1737 }
1738
ena_disable_io_intr_sync(struct ena_adapter * adapter)1739 static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
1740 {
1741 u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
1742 int i;
1743
1744 if (!netif_running(adapter->netdev))
1745 return;
1746
1747 for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++)
1748 synchronize_irq(adapter->irq_tbl[i].vector);
1749 }
1750
ena_del_napi_in_range(struct ena_adapter * adapter,int first_index,int count)1751 static void ena_del_napi_in_range(struct ena_adapter *adapter,
1752 int first_index,
1753 int count)
1754 {
1755 int i;
1756
1757 for (i = first_index; i < first_index + count; i++) {
1758 netif_napi_del(&adapter->ena_napi[i].napi);
1759
1760 WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
1761 adapter->ena_napi[i].rx_ring);
1762 }
1763 }
1764
ena_init_napi_in_range(struct ena_adapter * adapter,int first_index,int count)1765 static void ena_init_napi_in_range(struct ena_adapter *adapter,
1766 int first_index, int count)
1767 {
1768 int (*napi_handler)(struct napi_struct *napi, int budget);
1769 int i;
1770
1771 for (i = first_index; i < first_index + count; i++) {
1772 struct ena_napi *napi = &adapter->ena_napi[i];
1773 struct ena_ring *rx_ring, *tx_ring;
1774
1775 memset(napi, 0, sizeof(*napi));
1776
1777 rx_ring = &adapter->rx_ring[i];
1778 tx_ring = &adapter->tx_ring[i];
1779
1780 napi_handler = ena_io_poll;
1781 if (ENA_IS_XDP_INDEX(adapter, i))
1782 napi_handler = ena_xdp_io_poll;
1783
1784 netif_napi_add_config(adapter->netdev, &napi->napi, napi_handler, i);
1785
1786 if (!ENA_IS_XDP_INDEX(adapter, i))
1787 napi->rx_ring = rx_ring;
1788
1789 napi->tx_ring = tx_ring;
1790 napi->qid = i;
1791 }
1792 }
1793
ena_napi_disable_in_range(struct ena_adapter * adapter,int first_index,int count)1794 static void ena_napi_disable_in_range(struct ena_adapter *adapter,
1795 int first_index,
1796 int count)
1797 {
1798 struct napi_struct *napi;
1799 int i;
1800
1801 for (i = first_index; i < first_index + count; i++) {
1802 napi = &adapter->ena_napi[i].napi;
1803 if (!ENA_IS_XDP_INDEX(adapter, i)) {
1804 /* This API is supported for non-XDP queues only */
1805 netif_queue_set_napi(adapter->netdev, i,
1806 NETDEV_QUEUE_TYPE_TX, NULL);
1807 netif_queue_set_napi(adapter->netdev, i,
1808 NETDEV_QUEUE_TYPE_RX, NULL);
1809 }
1810 napi_disable(napi);
1811 }
1812 }
1813
ena_napi_enable_in_range(struct ena_adapter * adapter,int first_index,int count)1814 static void ena_napi_enable_in_range(struct ena_adapter *adapter,
1815 int first_index,
1816 int count)
1817 {
1818 struct napi_struct *napi;
1819 int i;
1820
1821 for (i = first_index; i < first_index + count; i++) {
1822 napi = &adapter->ena_napi[i].napi;
1823 napi_enable(napi);
1824 if (!ENA_IS_XDP_INDEX(adapter, i)) {
1825 /* This API is supported for non-XDP queues only */
1826 netif_queue_set_napi(adapter->netdev, i,
1827 NETDEV_QUEUE_TYPE_RX, napi);
1828 netif_queue_set_napi(adapter->netdev, i,
1829 NETDEV_QUEUE_TYPE_TX, napi);
1830 }
1831 }
1832 }
1833
1834 /* Configure the Rx forwarding */
ena_rss_configure(struct ena_adapter * adapter)1835 static int ena_rss_configure(struct ena_adapter *adapter)
1836 {
1837 struct ena_com_dev *ena_dev = adapter->ena_dev;
1838 int rc;
1839
1840 /* In case the RSS table wasn't initialized by probe */
1841 if (!ena_dev->rss.tbl_log_size) {
1842 rc = ena_rss_init_default(adapter);
1843 if (rc && (rc != -EOPNOTSUPP)) {
1844 netif_err(adapter, ifup, adapter->netdev, "Failed to init RSS rc: %d\n", rc);
1845 return rc;
1846 }
1847 }
1848
1849 /* Set indirect table */
1850 rc = ena_com_indirect_table_set(ena_dev);
1851 if (unlikely(rc && rc != -EOPNOTSUPP))
1852 return rc;
1853
1854 /* Configure hash function (if supported) */
1855 rc = ena_com_set_hash_function(ena_dev);
1856 if (unlikely(rc && (rc != -EOPNOTSUPP)))
1857 return rc;
1858
1859 /* Configure hash inputs (if supported) */
1860 rc = ena_com_set_hash_ctrl(ena_dev);
1861 if (unlikely(rc && (rc != -EOPNOTSUPP)))
1862 return rc;
1863
1864 return 0;
1865 }
1866
ena_up_complete(struct ena_adapter * adapter)1867 static int ena_up_complete(struct ena_adapter *adapter)
1868 {
1869 int rc;
1870
1871 rc = ena_rss_configure(adapter);
1872 if (rc)
1873 return rc;
1874
1875 ena_change_mtu(adapter->netdev, adapter->netdev->mtu);
1876
1877 ena_refill_all_rx_bufs(adapter);
1878
1879 /* enable transmits */
1880 netif_tx_start_all_queues(adapter->netdev);
1881
1882 ena_napi_enable_in_range(adapter,
1883 0,
1884 adapter->xdp_num_queues + adapter->num_io_queues);
1885
1886 return 0;
1887 }
1888
ena_create_io_tx_queue(struct ena_adapter * adapter,int qid)1889 static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
1890 {
1891 struct ena_com_create_io_ctx ctx;
1892 struct ena_com_dev *ena_dev;
1893 struct ena_ring *tx_ring;
1894 u32 msix_vector;
1895 u16 ena_qid;
1896 int rc;
1897
1898 ena_dev = adapter->ena_dev;
1899
1900 tx_ring = &adapter->tx_ring[qid];
1901 msix_vector = ENA_IO_IRQ_IDX(qid);
1902 ena_qid = ENA_IO_TXQ_IDX(qid);
1903
1904 memset(&ctx, 0x0, sizeof(ctx));
1905
1906 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
1907 ctx.qid = ena_qid;
1908 ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
1909 ctx.msix_vector = msix_vector;
1910 ctx.queue_size = tx_ring->ring_size;
1911 ctx.numa_node = tx_ring->numa_node;
1912
1913 rc = ena_com_create_io_queue(ena_dev, &ctx);
1914 if (rc) {
1915 netif_err(adapter, ifup, adapter->netdev,
1916 "Failed to create I/O TX queue num %d rc: %d\n",
1917 qid, rc);
1918 return rc;
1919 }
1920
1921 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1922 &tx_ring->ena_com_io_sq,
1923 &tx_ring->ena_com_io_cq);
1924 if (rc) {
1925 netif_err(adapter, ifup, adapter->netdev,
1926 "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
1927 qid, rc);
1928 ena_com_destroy_io_queue(ena_dev, ena_qid);
1929 return rc;
1930 }
1931
1932 ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node);
1933 return rc;
1934 }
1935
ena_create_io_tx_queues_in_range(struct ena_adapter * adapter,int first_index,int count)1936 int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
1937 int first_index, int count)
1938 {
1939 struct ena_com_dev *ena_dev = adapter->ena_dev;
1940 int rc, i;
1941
1942 for (i = first_index; i < first_index + count; i++) {
1943 rc = ena_create_io_tx_queue(adapter, i);
1944 if (rc)
1945 goto create_err;
1946 }
1947
1948 return 0;
1949
1950 create_err:
1951 while (i-- > first_index)
1952 ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
1953
1954 return rc;
1955 }
1956
ena_create_io_rx_queue(struct ena_adapter * adapter,int qid)1957 static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
1958 {
1959 struct ena_com_dev *ena_dev;
1960 struct ena_com_create_io_ctx ctx;
1961 struct ena_ring *rx_ring;
1962 u32 msix_vector;
1963 u16 ena_qid;
1964 int rc;
1965
1966 ena_dev = adapter->ena_dev;
1967
1968 rx_ring = &adapter->rx_ring[qid];
1969 msix_vector = ENA_IO_IRQ_IDX(qid);
1970 ena_qid = ENA_IO_RXQ_IDX(qid);
1971
1972 memset(&ctx, 0x0, sizeof(ctx));
1973
1974 ctx.qid = ena_qid;
1975 ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
1976 ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
1977 ctx.msix_vector = msix_vector;
1978 ctx.queue_size = rx_ring->ring_size;
1979 ctx.numa_node = rx_ring->numa_node;
1980
1981 rc = ena_com_create_io_queue(ena_dev, &ctx);
1982 if (rc) {
1983 netif_err(adapter, ifup, adapter->netdev,
1984 "Failed to create I/O RX queue num %d rc: %d\n",
1985 qid, rc);
1986 return rc;
1987 }
1988
1989 rc = ena_com_get_io_handlers(ena_dev, ena_qid,
1990 &rx_ring->ena_com_io_sq,
1991 &rx_ring->ena_com_io_cq);
1992 if (rc) {
1993 netif_err(adapter, ifup, adapter->netdev,
1994 "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
1995 qid, rc);
1996 goto err;
1997 }
1998
1999 ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
2000
2001 return rc;
2002 err:
2003 ena_com_destroy_io_queue(ena_dev, ena_qid);
2004 return rc;
2005 }
2006
ena_create_all_io_rx_queues(struct ena_adapter * adapter)2007 static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
2008 {
2009 struct ena_com_dev *ena_dev = adapter->ena_dev;
2010 int rc, i;
2011
2012 for (i = 0; i < adapter->num_io_queues; i++) {
2013 rc = ena_create_io_rx_queue(adapter, i);
2014 if (rc)
2015 goto create_err;
2016 INIT_WORK(&adapter->ena_napi[i].dim.work, ena_dim_work);
2017
2018 ena_xdp_register_rxq_info(&adapter->rx_ring[i]);
2019 }
2020
2021 return 0;
2022
2023 create_err:
2024 while (i--) {
2025 ena_xdp_unregister_rxq_info(&adapter->rx_ring[i]);
2026 cancel_work_sync(&adapter->ena_napi[i].dim.work);
2027 ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i));
2028 }
2029
2030 return rc;
2031 }
2032
set_io_rings_size(struct ena_adapter * adapter,int new_tx_size,int new_rx_size)2033 static void set_io_rings_size(struct ena_adapter *adapter,
2034 int new_tx_size,
2035 int new_rx_size)
2036 {
2037 int i;
2038
2039 for (i = 0; i < adapter->num_io_queues; i++) {
2040 adapter->tx_ring[i].ring_size = new_tx_size;
2041 adapter->rx_ring[i].ring_size = new_rx_size;
2042 }
2043 }
2044
2045 /* This function allows queue allocation to backoff when the system is
2046 * low on memory. If there is not enough memory to allocate io queues
2047 * the driver will try to allocate smaller queues.
2048 *
2049 * The backoff algorithm is as follows:
2050 * 1. Try to allocate TX and RX and if successful.
2051 * 1.1. return success
2052 *
2053 * 2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same).
2054 *
2055 * 3. If TX or RX is smaller than 256
2056 * 3.1. return failure.
2057 * 4. else
2058 * 4.1. go back to 1.
2059 */
create_queues_with_size_backoff(struct ena_adapter * adapter)2060 static int create_queues_with_size_backoff(struct ena_adapter *adapter)
2061 {
2062 int rc, cur_rx_ring_size, cur_tx_ring_size;
2063 int new_rx_ring_size, new_tx_ring_size;
2064
2065 /* current queue sizes might be set to smaller than the requested
2066 * ones due to past queue allocation failures.
2067 */
2068 set_io_rings_size(adapter, adapter->requested_tx_ring_size,
2069 adapter->requested_rx_ring_size);
2070
2071 while (1) {
2072 if (ena_xdp_present(adapter)) {
2073 rc = ena_setup_and_create_all_xdp_queues(adapter);
2074
2075 if (rc)
2076 goto err_setup_tx;
2077 }
2078 rc = ena_setup_tx_resources_in_range(adapter,
2079 0,
2080 adapter->num_io_queues);
2081 if (rc)
2082 goto err_setup_tx;
2083
2084 rc = ena_create_io_tx_queues_in_range(adapter,
2085 0,
2086 adapter->num_io_queues);
2087 if (rc)
2088 goto err_create_tx_queues;
2089
2090 rc = ena_setup_all_rx_resources(adapter);
2091 if (rc)
2092 goto err_setup_rx;
2093
2094 rc = ena_create_all_io_rx_queues(adapter);
2095 if (rc)
2096 goto err_create_rx_queues;
2097
2098 return 0;
2099
2100 err_create_rx_queues:
2101 ena_free_all_io_rx_resources(adapter);
2102 err_setup_rx:
2103 ena_destroy_all_tx_queues(adapter);
2104 err_create_tx_queues:
2105 ena_free_all_io_tx_resources(adapter);
2106 err_setup_tx:
2107 if (rc != -ENOMEM) {
2108 netif_err(adapter, ifup, adapter->netdev,
2109 "Queue creation failed with error code %d\n",
2110 rc);
2111 return rc;
2112 }
2113
2114 cur_tx_ring_size = adapter->tx_ring[0].ring_size;
2115 cur_rx_ring_size = adapter->rx_ring[0].ring_size;
2116
2117 netif_err(adapter, ifup, adapter->netdev,
2118 "Not enough memory to create queues with sizes TX=%d, RX=%d\n",
2119 cur_tx_ring_size, cur_rx_ring_size);
2120
2121 new_tx_ring_size = cur_tx_ring_size;
2122 new_rx_ring_size = cur_rx_ring_size;
2123
2124 /* Decrease the size of the larger queue, or
2125 * decrease both if they are the same size.
2126 */
2127 if (cur_rx_ring_size <= cur_tx_ring_size)
2128 new_tx_ring_size = cur_tx_ring_size / 2;
2129 if (cur_rx_ring_size >= cur_tx_ring_size)
2130 new_rx_ring_size = cur_rx_ring_size / 2;
2131
2132 if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
2133 new_rx_ring_size < ENA_MIN_RING_SIZE) {
2134 netif_err(adapter, ifup, adapter->netdev,
2135 "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
2136 ENA_MIN_RING_SIZE);
2137 return rc;
2138 }
2139
2140 netif_err(adapter, ifup, adapter->netdev,
2141 "Retrying queue creation with sizes TX=%d, RX=%d\n",
2142 new_tx_ring_size,
2143 new_rx_ring_size);
2144
2145 set_io_rings_size(adapter, new_tx_ring_size,
2146 new_rx_ring_size);
2147 }
2148 }
2149
ena_up(struct ena_adapter * adapter)2150 int ena_up(struct ena_adapter *adapter)
2151 {
2152 int io_queue_count, rc, i;
2153
2154 netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
2155
2156 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2157 ena_setup_io_intr(adapter);
2158
2159 /* napi poll functions should be initialized before running
2160 * request_irq(), to handle a rare condition where there is a pending
2161 * interrupt, causing the ISR to fire immediately while the poll
2162 * function wasn't set yet, causing a null dereference
2163 */
2164 ena_init_napi_in_range(adapter, 0, io_queue_count);
2165
2166 /* Enabling DIM needs to happen before enabling IRQs since DIM
2167 * is run from napi routine
2168 */
2169 if (ena_com_interrupt_moderation_supported(adapter->ena_dev))
2170 ena_com_enable_adaptive_moderation(adapter->ena_dev);
2171
2172 rc = ena_request_io_irq(adapter);
2173 if (rc)
2174 goto err_req_irq;
2175
2176 rc = create_queues_with_size_backoff(adapter);
2177 if (rc)
2178 goto err_create_queues_with_backoff;
2179
2180 rc = ena_up_complete(adapter);
2181 if (rc)
2182 goto err_up;
2183
2184 if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
2185 netif_carrier_on(adapter->netdev);
2186
2187 ena_increase_stat(&adapter->dev_stats.interface_up, 1,
2188 &adapter->syncp);
2189
2190 set_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2191
2192 /* Enable completion queues interrupt */
2193 for (i = 0; i < adapter->num_io_queues; i++)
2194 ena_unmask_interrupt(&adapter->tx_ring[i],
2195 &adapter->rx_ring[i]);
2196
2197 /* schedule napi in case we had pending packets
2198 * from the last time we disable napi
2199 */
2200 for (i = 0; i < io_queue_count; i++)
2201 napi_schedule(&adapter->ena_napi[i].napi);
2202
2203 return rc;
2204
2205 err_up:
2206 ena_destroy_all_tx_queues(adapter);
2207 ena_free_all_io_tx_resources(adapter);
2208 ena_destroy_all_rx_queues(adapter);
2209 ena_free_all_io_rx_resources(adapter);
2210 err_create_queues_with_backoff:
2211 ena_free_io_irq(adapter);
2212 err_req_irq:
2213 ena_del_napi_in_range(adapter, 0, io_queue_count);
2214
2215 return rc;
2216 }
2217
ena_down(struct ena_adapter * adapter)2218 void ena_down(struct ena_adapter *adapter)
2219 {
2220 int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
2221
2222 netif_dbg(adapter, ifdown, adapter->netdev, "%s\n", __func__);
2223
2224 clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2225
2226 ena_increase_stat(&adapter->dev_stats.interface_down, 1,
2227 &adapter->syncp);
2228
2229 netif_carrier_off(adapter->netdev);
2230 netif_tx_disable(adapter->netdev);
2231
2232 /* After this point the napi handler won't enable the tx queue */
2233 ena_napi_disable_in_range(adapter, 0, io_queue_count);
2234
2235 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) {
2236 int rc;
2237
2238 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
2239 if (rc)
2240 netif_err(adapter, ifdown, adapter->netdev,
2241 "Device reset failed\n");
2242 /* stop submitting admin commands on a device that was reset */
2243 ena_com_set_admin_running_state(adapter->ena_dev, false);
2244 }
2245
2246 ena_destroy_all_io_queues(adapter);
2247
2248 ena_disable_io_intr_sync(adapter);
2249 ena_free_io_irq(adapter);
2250 ena_del_napi_in_range(adapter, 0, io_queue_count);
2251
2252 ena_free_all_tx_bufs(adapter);
2253 ena_free_all_rx_bufs(adapter);
2254 ena_free_all_io_tx_resources(adapter);
2255 ena_free_all_io_rx_resources(adapter);
2256 }
2257
2258 /* ena_open - Called when a network interface is made active
2259 * @netdev: network interface device structure
2260 *
2261 * Returns 0 on success, negative value on failure
2262 *
2263 * The open entry point is called when a network interface is made
2264 * active by the system (IFF_UP). At this point all resources needed
2265 * for transmit and receive operations are allocated, the interrupt
2266 * handler is registered with the OS, the watchdog timer is started,
2267 * and the stack is notified that the interface is ready.
2268 */
ena_open(struct net_device * netdev)2269 static int ena_open(struct net_device *netdev)
2270 {
2271 struct ena_adapter *adapter = netdev_priv(netdev);
2272 int rc;
2273
2274 /* Notify the stack of the actual queue counts. */
2275 rc = netif_set_real_num_tx_queues(netdev, adapter->num_io_queues);
2276 if (rc) {
2277 netif_err(adapter, ifup, netdev, "Can't set num tx queues\n");
2278 return rc;
2279 }
2280
2281 rc = netif_set_real_num_rx_queues(netdev, adapter->num_io_queues);
2282 if (rc) {
2283 netif_err(adapter, ifup, netdev, "Can't set num rx queues\n");
2284 return rc;
2285 }
2286
2287 rc = ena_up(adapter);
2288 if (rc)
2289 return rc;
2290
2291 return rc;
2292 }
2293
2294 /* ena_close - Disables a network interface
2295 * @netdev: network interface device structure
2296 *
2297 * Returns 0, this is not allowed to fail
2298 *
2299 * The close entry point is called when an interface is de-activated
2300 * by the OS. The hardware is still under the drivers control, but
2301 * needs to be disabled. A global MAC reset is issued to stop the
2302 * hardware, and all transmit and receive resources are freed.
2303 */
ena_close(struct net_device * netdev)2304 static int ena_close(struct net_device *netdev)
2305 {
2306 struct ena_adapter *adapter = netdev_priv(netdev);
2307
2308 netif_dbg(adapter, ifdown, netdev, "%s\n", __func__);
2309
2310 if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
2311 return 0;
2312
2313 if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2314 ena_down(adapter);
2315
2316 /* Check for device status and issue reset if needed*/
2317 check_for_admin_com_state(adapter);
2318 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
2319 netif_err(adapter, ifdown, adapter->netdev,
2320 "Destroy failure, restarting device\n");
2321 ena_dump_stats_to_dmesg(adapter);
2322 /* rtnl lock already obtained in dev_ioctl() layer */
2323 ena_destroy_device(adapter, false);
2324 ena_restore_device(adapter);
2325 }
2326
2327 return 0;
2328 }
2329
ena_update_queue_params(struct ena_adapter * adapter,u32 new_tx_size,u32 new_rx_size,u32 new_llq_header_len)2330 int ena_update_queue_params(struct ena_adapter *adapter,
2331 u32 new_tx_size,
2332 u32 new_rx_size,
2333 u32 new_llq_header_len)
2334 {
2335 bool dev_was_up, large_llq_changed = false;
2336 int rc = 0;
2337
2338 dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2339 ena_close(adapter->netdev);
2340 adapter->requested_tx_ring_size = new_tx_size;
2341 adapter->requested_rx_ring_size = new_rx_size;
2342 ena_init_io_rings(adapter,
2343 0,
2344 adapter->xdp_num_queues +
2345 adapter->num_io_queues);
2346
2347 large_llq_changed = adapter->ena_dev->tx_mem_queue_type ==
2348 ENA_ADMIN_PLACEMENT_POLICY_DEV;
2349 large_llq_changed &=
2350 new_llq_header_len != adapter->ena_dev->tx_max_header_size;
2351
2352 /* a check that the configuration is valid is done by caller */
2353 if (large_llq_changed) {
2354 adapter->large_llq_header_enabled = !adapter->large_llq_header_enabled;
2355
2356 ena_destroy_device(adapter, false);
2357 rc = ena_restore_device(adapter);
2358 }
2359
2360 return dev_was_up && !rc ? ena_up(adapter) : rc;
2361 }
2362
ena_set_rx_copybreak(struct ena_adapter * adapter,u32 rx_copybreak)2363 int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak)
2364 {
2365 struct ena_ring *rx_ring;
2366 int i;
2367
2368 if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE))
2369 return -EINVAL;
2370
2371 adapter->rx_copybreak = rx_copybreak;
2372
2373 for (i = 0; i < adapter->num_io_queues; i++) {
2374 rx_ring = &adapter->rx_ring[i];
2375 rx_ring->rx_copybreak = rx_copybreak;
2376 }
2377
2378 return 0;
2379 }
2380
ena_update_queue_count(struct ena_adapter * adapter,u32 new_channel_count)2381 int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
2382 {
2383 struct ena_com_dev *ena_dev = adapter->ena_dev;
2384 int prev_channel_count;
2385 bool dev_was_up;
2386
2387 dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
2388 ena_close(adapter->netdev);
2389 prev_channel_count = adapter->num_io_queues;
2390 adapter->num_io_queues = new_channel_count;
2391 if (ena_xdp_present(adapter) &&
2392 ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
2393 adapter->xdp_first_ring = new_channel_count;
2394 adapter->xdp_num_queues = new_channel_count;
2395 if (prev_channel_count > new_channel_count)
2396 ena_xdp_exchange_program_rx_in_range(adapter,
2397 NULL,
2398 new_channel_count,
2399 prev_channel_count);
2400 else
2401 ena_xdp_exchange_program_rx_in_range(adapter,
2402 adapter->xdp_bpf_prog,
2403 prev_channel_count,
2404 new_channel_count);
2405 }
2406
2407 /* We need to destroy the rss table so that the indirection
2408 * table will be reinitialized by ena_up()
2409 */
2410 ena_com_rss_destroy(ena_dev);
2411 ena_init_io_rings(adapter,
2412 0,
2413 adapter->xdp_num_queues +
2414 adapter->num_io_queues);
2415 return dev_was_up ? ena_open(adapter->netdev) : 0;
2416 }
2417
ena_tx_csum(struct ena_com_tx_ctx * ena_tx_ctx,struct sk_buff * skb,bool disable_meta_caching)2418 static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx,
2419 struct sk_buff *skb,
2420 bool disable_meta_caching)
2421 {
2422 u32 mss = skb_shinfo(skb)->gso_size;
2423 struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta;
2424 u8 l4_protocol = 0;
2425
2426 if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) {
2427 ena_tx_ctx->l4_csum_enable = 1;
2428 if (mss) {
2429 ena_tx_ctx->tso_enable = 1;
2430 ena_meta->l4_hdr_len = tcp_hdr(skb)->doff;
2431 ena_tx_ctx->l4_csum_partial = 0;
2432 } else {
2433 ena_tx_ctx->tso_enable = 0;
2434 ena_meta->l4_hdr_len = 0;
2435 ena_tx_ctx->l4_csum_partial = 1;
2436 }
2437
2438 switch (ip_hdr(skb)->version) {
2439 case IPVERSION:
2440 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4;
2441 if (ip_hdr(skb)->frag_off & htons(IP_DF))
2442 ena_tx_ctx->df = 1;
2443 if (mss)
2444 ena_tx_ctx->l3_csum_enable = 1;
2445 l4_protocol = ip_hdr(skb)->protocol;
2446 break;
2447 case 6:
2448 ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6;
2449 l4_protocol = ipv6_hdr(skb)->nexthdr;
2450 break;
2451 default:
2452 break;
2453 }
2454
2455 if (l4_protocol == IPPROTO_TCP)
2456 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP;
2457 else
2458 ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP;
2459
2460 ena_meta->mss = mss;
2461 ena_meta->l3_hdr_len = skb_network_header_len(skb);
2462 ena_meta->l3_hdr_offset = skb_network_offset(skb);
2463 ena_tx_ctx->meta_valid = 1;
2464 } else if (disable_meta_caching) {
2465 memset(ena_meta, 0, sizeof(*ena_meta));
2466 ena_tx_ctx->meta_valid = 1;
2467 } else {
2468 ena_tx_ctx->meta_valid = 0;
2469 }
2470 }
2471
ena_check_and_linearize_skb(struct ena_ring * tx_ring,struct sk_buff * skb)2472 static int ena_check_and_linearize_skb(struct ena_ring *tx_ring,
2473 struct sk_buff *skb)
2474 {
2475 int num_frags, header_len, rc;
2476
2477 num_frags = skb_shinfo(skb)->nr_frags;
2478 header_len = skb_headlen(skb);
2479
2480 if (num_frags < tx_ring->sgl_size)
2481 return 0;
2482
2483 if ((num_frags == tx_ring->sgl_size) &&
2484 (header_len < tx_ring->tx_max_header_size))
2485 return 0;
2486
2487 ena_increase_stat(&tx_ring->tx_stats.linearize, 1, &tx_ring->syncp);
2488
2489 rc = skb_linearize(skb);
2490 if (unlikely(rc)) {
2491 ena_increase_stat(&tx_ring->tx_stats.linearize_failed, 1,
2492 &tx_ring->syncp);
2493 }
2494
2495 return rc;
2496 }
2497
ena_tx_map_skb(struct ena_ring * tx_ring,struct ena_tx_buffer * tx_info,struct sk_buff * skb,void ** push_hdr,u16 * header_len)2498 static int ena_tx_map_skb(struct ena_ring *tx_ring,
2499 struct ena_tx_buffer *tx_info,
2500 struct sk_buff *skb,
2501 void **push_hdr,
2502 u16 *header_len)
2503 {
2504 struct ena_adapter *adapter = tx_ring->adapter;
2505 struct ena_com_buf *ena_buf;
2506 dma_addr_t dma;
2507 u32 skb_head_len, frag_len, last_frag;
2508 u16 push_len = 0;
2509 u16 delta = 0;
2510 int i = 0;
2511
2512 skb_head_len = skb_headlen(skb);
2513 tx_info->skb = skb;
2514 ena_buf = tx_info->bufs;
2515
2516 if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2517 /* When the device is LLQ mode, the driver will copy
2518 * the header into the device memory space.
2519 * the ena_com layer assume the header is in a linear
2520 * memory space.
2521 * This assumption might be wrong since part of the header
2522 * can be in the fragmented buffers.
2523 * Use skb_header_pointer to make sure the header is in a
2524 * linear memory space.
2525 */
2526
2527 push_len = min_t(u32, skb->len, tx_ring->tx_max_header_size);
2528 *push_hdr = skb_header_pointer(skb, 0, push_len,
2529 tx_ring->push_buf_intermediate_buf);
2530 *header_len = push_len;
2531 if (unlikely(skb->data != *push_hdr)) {
2532 ena_increase_stat(&tx_ring->tx_stats.llq_buffer_copy, 1,
2533 &tx_ring->syncp);
2534
2535 delta = push_len - skb_head_len;
2536 }
2537 } else {
2538 *push_hdr = NULL;
2539 *header_len = min_t(u32, skb_head_len,
2540 tx_ring->tx_max_header_size);
2541 }
2542
2543 netif_dbg(adapter, tx_queued, adapter->netdev,
2544 "skb: %p header_buf->vaddr: %p push_len: %d\n", skb,
2545 *push_hdr, push_len);
2546
2547 if (skb_head_len > push_len) {
2548 dma = dma_map_single(tx_ring->dev, skb->data + push_len,
2549 skb_head_len - push_len, DMA_TO_DEVICE);
2550 if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2551 goto error_report_dma_error;
2552
2553 ena_buf->paddr = dma;
2554 ena_buf->len = skb_head_len - push_len;
2555
2556 ena_buf++;
2557 tx_info->num_of_bufs++;
2558 tx_info->map_linear_data = 1;
2559 } else {
2560 tx_info->map_linear_data = 0;
2561 }
2562
2563 last_frag = skb_shinfo(skb)->nr_frags;
2564
2565 for (i = 0; i < last_frag; i++) {
2566 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2567
2568 frag_len = skb_frag_size(frag);
2569
2570 if (unlikely(delta >= frag_len)) {
2571 delta -= frag_len;
2572 continue;
2573 }
2574
2575 dma = skb_frag_dma_map(tx_ring->dev, frag, delta,
2576 frag_len - delta, DMA_TO_DEVICE);
2577 if (unlikely(dma_mapping_error(tx_ring->dev, dma)))
2578 goto error_report_dma_error;
2579
2580 ena_buf->paddr = dma;
2581 ena_buf->len = frag_len - delta;
2582 ena_buf++;
2583 tx_info->num_of_bufs++;
2584 delta = 0;
2585 }
2586
2587 return 0;
2588
2589 error_report_dma_error:
2590 ena_increase_stat(&tx_ring->tx_stats.dma_mapping_err, 1,
2591 &tx_ring->syncp);
2592 netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map skb\n");
2593
2594 tx_info->skb = NULL;
2595
2596 tx_info->num_of_bufs += i;
2597 ena_unmap_tx_buff(tx_ring, tx_info);
2598
2599 return -EINVAL;
2600 }
2601
2602 /* Called with netif_tx_lock. */
ena_start_xmit(struct sk_buff * skb,struct net_device * dev)2603 static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
2604 {
2605 struct ena_adapter *adapter = netdev_priv(dev);
2606 struct ena_tx_buffer *tx_info;
2607 struct ena_com_tx_ctx ena_tx_ctx;
2608 struct ena_ring *tx_ring;
2609 struct netdev_queue *txq;
2610 void *push_hdr;
2611 u16 next_to_use, req_id, header_len;
2612 int qid, rc;
2613
2614 netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
2615 /* Determine which tx ring we will be placed on */
2616 qid = skb_get_queue_mapping(skb);
2617 tx_ring = &adapter->tx_ring[qid];
2618 txq = netdev_get_tx_queue(dev, qid);
2619
2620 rc = ena_check_and_linearize_skb(tx_ring, skb);
2621 if (unlikely(rc))
2622 goto error_drop_packet;
2623
2624 next_to_use = tx_ring->next_to_use;
2625 req_id = tx_ring->free_ids[next_to_use];
2626 tx_info = &tx_ring->tx_buffer_info[req_id];
2627 tx_info->num_of_bufs = 0;
2628
2629 WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id);
2630
2631 rc = ena_tx_map_skb(tx_ring, tx_info, skb, &push_hdr, &header_len);
2632 if (unlikely(rc))
2633 goto error_drop_packet;
2634
2635 memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx));
2636 ena_tx_ctx.ena_bufs = tx_info->bufs;
2637 ena_tx_ctx.push_header = push_hdr;
2638 ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
2639 ena_tx_ctx.req_id = req_id;
2640 ena_tx_ctx.header_len = header_len;
2641
2642 /* set flags and meta data */
2643 ena_tx_csum(&ena_tx_ctx, skb, tx_ring->disable_meta_caching);
2644
2645 rc = ena_xmit_common(adapter,
2646 tx_ring,
2647 tx_info,
2648 &ena_tx_ctx,
2649 next_to_use,
2650 skb->len);
2651 if (rc)
2652 goto error_unmap_dma;
2653
2654 netdev_tx_sent_queue(txq, skb->len);
2655
2656 /* stop the queue when no more space available, the packet can have up
2657 * to sgl_size + 2. one for the meta descriptor and one for header
2658 * (if the header is larger than tx_max_header_size).
2659 */
2660 if (unlikely(!ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2661 tx_ring->sgl_size + 2))) {
2662 netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n",
2663 __func__, qid);
2664
2665 netif_tx_stop_queue(txq);
2666 ena_increase_stat(&tx_ring->tx_stats.queue_stop, 1,
2667 &tx_ring->syncp);
2668
2669 /* There is a rare condition where this function decide to
2670 * stop the queue but meanwhile clean_tx_irq updates
2671 * next_to_completion and terminates.
2672 * The queue will remain stopped forever.
2673 * To solve this issue add a mb() to make sure that
2674 * netif_tx_stop_queue() write is vissible before checking if
2675 * there is additional space in the queue.
2676 */
2677 smp_mb();
2678
2679 if (ena_com_sq_have_enough_space(tx_ring->ena_com_io_sq,
2680 ENA_TX_WAKEUP_THRESH)) {
2681 netif_tx_wake_queue(txq);
2682 ena_increase_stat(&tx_ring->tx_stats.queue_wakeup, 1,
2683 &tx_ring->syncp);
2684 }
2685 }
2686
2687 skb_tx_timestamp(skb);
2688
2689 if (netif_xmit_stopped(txq) || !netdev_xmit_more())
2690 /* trigger the dma engine. ena_ring_tx_doorbell()
2691 * calls a memory barrier inside it.
2692 */
2693 ena_ring_tx_doorbell(tx_ring);
2694
2695 return NETDEV_TX_OK;
2696
2697 error_unmap_dma:
2698 ena_unmap_tx_buff(tx_ring, tx_info);
2699 tx_info->skb = NULL;
2700
2701 error_drop_packet:
2702 dev_kfree_skb(skb);
2703 return NETDEV_TX_OK;
2704 }
2705
ena_config_host_info(struct ena_com_dev * ena_dev,struct pci_dev * pdev)2706 static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
2707 {
2708 struct device *dev = &pdev->dev;
2709 struct ena_admin_host_info *host_info;
2710 ssize_t ret;
2711 int rc;
2712
2713 /* Allocate only the host info */
2714 rc = ena_com_allocate_host_info(ena_dev);
2715 if (rc) {
2716 dev_err(dev, "Cannot allocate host info\n");
2717 return;
2718 }
2719
2720 host_info = ena_dev->host_attr.host_info;
2721
2722 host_info->bdf = pci_dev_id(pdev);
2723 host_info->os_type = ENA_ADMIN_OS_LINUX;
2724 host_info->kernel_ver = LINUX_VERSION_CODE;
2725 ret = strscpy(host_info->kernel_ver_str, utsname()->version,
2726 sizeof(host_info->kernel_ver_str));
2727 if (ret < 0)
2728 dev_dbg(dev,
2729 "kernel version string will be truncated, status = %zd\n", ret);
2730
2731 host_info->os_dist = 0;
2732 ret = strscpy(host_info->os_dist_str, utsname()->release,
2733 sizeof(host_info->os_dist_str));
2734 if (ret < 0)
2735 dev_dbg(dev,
2736 "OS distribution string will be truncated, status = %zd\n", ret);
2737
2738 host_info->driver_version =
2739 (DRV_MODULE_GEN_MAJOR) |
2740 (DRV_MODULE_GEN_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) |
2741 (DRV_MODULE_GEN_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT) |
2742 ("K"[0] << ENA_ADMIN_HOST_INFO_MODULE_TYPE_SHIFT);
2743 host_info->num_cpus = num_online_cpus();
2744
2745 host_info->driver_supported_features =
2746 ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
2747 ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK |
2748 ENA_ADMIN_HOST_INFO_RX_BUF_MIRRORING_MASK |
2749 ENA_ADMIN_HOST_INFO_RSS_CONFIGURABLE_FUNCTION_KEY_MASK |
2750 ENA_ADMIN_HOST_INFO_RX_PAGE_REUSE_MASK |
2751 ENA_ADMIN_HOST_INFO_PHC_MASK;
2752
2753 rc = ena_com_set_host_attributes(ena_dev);
2754 if (rc) {
2755 if (rc == -EOPNOTSUPP)
2756 dev_warn(dev, "Cannot set host attributes\n");
2757 else
2758 dev_err(dev, "Cannot set host attributes\n");
2759
2760 goto err;
2761 }
2762
2763 return;
2764
2765 err:
2766 ena_com_delete_host_info(ena_dev);
2767 }
2768
ena_config_debug_area(struct ena_adapter * adapter)2769 static void ena_config_debug_area(struct ena_adapter *adapter)
2770 {
2771 u32 debug_area_size;
2772 int rc, ss_count;
2773
2774 ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS);
2775 if (ss_count <= 0) {
2776 netif_err(adapter, drv, adapter->netdev,
2777 "SS count is negative\n");
2778 return;
2779 }
2780
2781 /* allocate 32 bytes for each string and 64bit for the value */
2782 debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count;
2783
2784 rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size);
2785 if (rc) {
2786 netif_err(adapter, drv, adapter->netdev,
2787 "Cannot allocate debug area\n");
2788 return;
2789 }
2790
2791 rc = ena_com_set_host_attributes(adapter->ena_dev);
2792 if (rc) {
2793 if (rc == -EOPNOTSUPP)
2794 netif_warn(adapter, drv, adapter->netdev, "Cannot set host attributes\n");
2795 else
2796 netif_err(adapter, drv, adapter->netdev,
2797 "Cannot set host attributes\n");
2798 goto err;
2799 }
2800
2801 return;
2802 err:
2803 ena_com_delete_debug_area(adapter->ena_dev);
2804 }
2805
ena_get_stats64(struct net_device * netdev,struct rtnl_link_stats64 * stats)2806 static void ena_get_stats64(struct net_device *netdev,
2807 struct rtnl_link_stats64 *stats)
2808 {
2809 struct ena_adapter *adapter = netdev_priv(netdev);
2810 struct ena_ring *rx_ring, *tx_ring;
2811 u64 total_xdp_rx_drops = 0;
2812 unsigned int start;
2813 u64 rx_drops;
2814 u64 tx_drops;
2815 int i;
2816
2817 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
2818 return;
2819
2820 for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
2821 u64 bytes, packets, xdp_rx_drops;
2822
2823 tx_ring = &adapter->tx_ring[i];
2824
2825 do {
2826 start = u64_stats_fetch_begin(&tx_ring->syncp);
2827 packets = tx_ring->tx_stats.cnt;
2828 bytes = tx_ring->tx_stats.bytes;
2829 } while (u64_stats_fetch_retry(&tx_ring->syncp, start));
2830
2831 stats->tx_packets += packets;
2832 stats->tx_bytes += bytes;
2833
2834 /* In XDP there isn't an RX queue counterpart */
2835 if (ENA_IS_XDP_INDEX(adapter, i))
2836 continue;
2837
2838 rx_ring = &adapter->rx_ring[i];
2839
2840 do {
2841 start = u64_stats_fetch_begin(&rx_ring->syncp);
2842 packets = rx_ring->rx_stats.cnt;
2843 bytes = rx_ring->rx_stats.bytes;
2844 xdp_rx_drops = rx_ring->rx_stats.xdp_drop;
2845 } while (u64_stats_fetch_retry(&rx_ring->syncp, start));
2846
2847 stats->rx_packets += packets;
2848 stats->rx_bytes += bytes;
2849 total_xdp_rx_drops += xdp_rx_drops;
2850 }
2851
2852 do {
2853 start = u64_stats_fetch_begin(&adapter->syncp);
2854 rx_drops = adapter->dev_stats.rx_drops;
2855 tx_drops = adapter->dev_stats.tx_drops;
2856 } while (u64_stats_fetch_retry(&adapter->syncp, start));
2857
2858 stats->rx_dropped = rx_drops + total_xdp_rx_drops;
2859 stats->tx_dropped = tx_drops;
2860
2861 stats->multicast = 0;
2862 stats->collisions = 0;
2863
2864 stats->rx_length_errors = 0;
2865 stats->rx_crc_errors = 0;
2866 stats->rx_frame_errors = 0;
2867 stats->rx_fifo_errors = 0;
2868 stats->rx_missed_errors = 0;
2869 stats->tx_window_errors = 0;
2870
2871 stats->rx_errors = 0;
2872 stats->tx_errors = 0;
2873 }
2874
2875 static const struct net_device_ops ena_netdev_ops = {
2876 .ndo_open = ena_open,
2877 .ndo_stop = ena_close,
2878 .ndo_start_xmit = ena_start_xmit,
2879 .ndo_get_stats64 = ena_get_stats64,
2880 .ndo_tx_timeout = ena_tx_timeout,
2881 .ndo_change_mtu = ena_change_mtu,
2882 .ndo_validate_addr = eth_validate_addr,
2883 .ndo_bpf = ena_xdp,
2884 .ndo_xdp_xmit = ena_xdp_xmit,
2885 };
2886
ena_calc_io_queue_size(struct ena_adapter * adapter,struct ena_com_dev_get_features_ctx * get_feat_ctx)2887 static int ena_calc_io_queue_size(struct ena_adapter *adapter,
2888 struct ena_com_dev_get_features_ctx *get_feat_ctx)
2889 {
2890 struct ena_admin_feature_llq_desc *llq = &get_feat_ctx->llq;
2891 struct ena_com_dev *ena_dev = adapter->ena_dev;
2892 u32 tx_queue_size = ENA_DEFAULT_RING_SIZE;
2893 u32 rx_queue_size = ENA_DEFAULT_RING_SIZE;
2894 u32 max_tx_queue_size;
2895 u32 max_rx_queue_size;
2896
2897 /* If this function is called after driver load, the ring sizes have already
2898 * been configured. Take it into account when recalculating ring size.
2899 */
2900 if (adapter->tx_ring->ring_size)
2901 tx_queue_size = adapter->tx_ring->ring_size;
2902
2903 if (adapter->rx_ring->ring_size)
2904 rx_queue_size = adapter->rx_ring->ring_size;
2905
2906 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
2907 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
2908 &get_feat_ctx->max_queue_ext.max_queue_ext;
2909 max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth,
2910 max_queue_ext->max_rx_sq_depth);
2911 max_tx_queue_size = max_queue_ext->max_tx_cq_depth;
2912
2913 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2914 max_tx_queue_size = min_t(u32, max_tx_queue_size,
2915 llq->max_llq_depth);
2916 else
2917 max_tx_queue_size = min_t(u32, max_tx_queue_size,
2918 max_queue_ext->max_tx_sq_depth);
2919
2920 adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2921 max_queue_ext->max_per_packet_tx_descs);
2922 adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2923 max_queue_ext->max_per_packet_rx_descs);
2924 } else {
2925 struct ena_admin_queue_feature_desc *max_queues =
2926 &get_feat_ctx->max_queues;
2927 max_rx_queue_size = min_t(u32, max_queues->max_cq_depth,
2928 max_queues->max_sq_depth);
2929 max_tx_queue_size = max_queues->max_cq_depth;
2930
2931 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
2932 max_tx_queue_size = min_t(u32, max_tx_queue_size,
2933 llq->max_llq_depth);
2934 else
2935 max_tx_queue_size = min_t(u32, max_tx_queue_size,
2936 max_queues->max_sq_depth);
2937
2938 adapter->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2939 max_queues->max_packet_tx_descs);
2940 adapter->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS,
2941 max_queues->max_packet_rx_descs);
2942 }
2943
2944 max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size);
2945 max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size);
2946
2947 if (max_tx_queue_size < ENA_MIN_RING_SIZE) {
2948 netdev_err(adapter->netdev, "Device max TX queue size: %d < minimum: %d\n",
2949 max_tx_queue_size, ENA_MIN_RING_SIZE);
2950 return -EINVAL;
2951 }
2952
2953 if (max_rx_queue_size < ENA_MIN_RING_SIZE) {
2954 netdev_err(adapter->netdev, "Device max RX queue size: %d < minimum: %d\n",
2955 max_rx_queue_size, ENA_MIN_RING_SIZE);
2956 return -EINVAL;
2957 }
2958
2959 /* When forcing large headers, we multiply the entry size by 2, and therefore divide
2960 * the queue size by 2, leaving the amount of memory used by the queues unchanged.
2961 */
2962 if (adapter->large_llq_header_enabled) {
2963 if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
2964 ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
2965 max_tx_queue_size /= 2;
2966 dev_info(&adapter->pdev->dev,
2967 "Forcing large headers and decreasing maximum TX queue size to %d\n",
2968 max_tx_queue_size);
2969 } else {
2970 dev_err(&adapter->pdev->dev,
2971 "Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
2972
2973 adapter->large_llq_header_enabled = false;
2974 }
2975 }
2976
2977 tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE,
2978 max_tx_queue_size);
2979 rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE,
2980 max_rx_queue_size);
2981
2982 tx_queue_size = rounddown_pow_of_two(tx_queue_size);
2983 rx_queue_size = rounddown_pow_of_two(rx_queue_size);
2984
2985 adapter->max_tx_ring_size = max_tx_queue_size;
2986 adapter->max_rx_ring_size = max_rx_queue_size;
2987 adapter->requested_tx_ring_size = tx_queue_size;
2988 adapter->requested_rx_ring_size = rx_queue_size;
2989
2990 return 0;
2991 }
2992
ena_device_validate_params(struct ena_adapter * adapter,struct ena_com_dev_get_features_ctx * get_feat_ctx)2993 static int ena_device_validate_params(struct ena_adapter *adapter,
2994 struct ena_com_dev_get_features_ctx *get_feat_ctx)
2995 {
2996 struct net_device *netdev = adapter->netdev;
2997 int rc;
2998
2999 rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr,
3000 adapter->mac_addr);
3001 if (!rc) {
3002 netif_err(adapter, drv, netdev,
3003 "Error, mac address are different\n");
3004 return -EINVAL;
3005 }
3006
3007 if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) {
3008 netif_err(adapter, drv, netdev,
3009 "Error, device max mtu is smaller than netdev MTU\n");
3010 return -EINVAL;
3011 }
3012
3013 return 0;
3014 }
3015
set_default_llq_configurations(struct ena_adapter * adapter,struct ena_llq_configurations * llq_config,struct ena_admin_feature_llq_desc * llq)3016 static void set_default_llq_configurations(struct ena_adapter *adapter,
3017 struct ena_llq_configurations *llq_config,
3018 struct ena_admin_feature_llq_desc *llq)
3019 {
3020 struct ena_com_dev *ena_dev = adapter->ena_dev;
3021
3022 llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER;
3023 llq_config->llq_stride_ctrl = ENA_ADMIN_MULTIPLE_DESCS_PER_ENTRY;
3024 llq_config->llq_num_decs_before_header = ENA_ADMIN_LLQ_NUM_DESCS_BEFORE_HEADER_2;
3025
3026 adapter->large_llq_header_supported =
3027 !!(ena_dev->supported_features & BIT(ENA_ADMIN_LLQ));
3028 adapter->large_llq_header_supported &=
3029 !!(llq->entry_size_ctrl_supported &
3030 ENA_ADMIN_LIST_ENTRY_SIZE_256B);
3031
3032 if ((llq->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
3033 adapter->large_llq_header_enabled) {
3034 llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_256B;
3035 llq_config->llq_ring_entry_size_value = 256;
3036 } else {
3037 llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B;
3038 llq_config->llq_ring_entry_size_value = 128;
3039 }
3040 }
3041
ena_set_queues_placement_policy(struct pci_dev * pdev,struct ena_com_dev * ena_dev,struct ena_admin_feature_llq_desc * llq,struct ena_llq_configurations * llq_default_configurations)3042 static int ena_set_queues_placement_policy(struct pci_dev *pdev,
3043 struct ena_com_dev *ena_dev,
3044 struct ena_admin_feature_llq_desc *llq,
3045 struct ena_llq_configurations *llq_default_configurations)
3046 {
3047 int rc;
3048 u32 llq_feature_mask;
3049
3050 llq_feature_mask = 1 << ENA_ADMIN_LLQ;
3051 if (!(ena_dev->supported_features & llq_feature_mask)) {
3052 dev_warn(&pdev->dev,
3053 "LLQ is not supported Fallback to host mode policy.\n");
3054 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3055 return 0;
3056 }
3057
3058 if (!ena_dev->mem_bar) {
3059 netdev_err(ena_dev->net_device,
3060 "LLQ is advertised as supported but device doesn't expose mem bar\n");
3061 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3062 return 0;
3063 }
3064
3065 rc = ena_com_config_dev_mode(ena_dev, llq, llq_default_configurations);
3066 if (unlikely(rc)) {
3067 dev_err(&pdev->dev,
3068 "Failed to configure the device mode. Fallback to host mode policy.\n");
3069 ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
3070 }
3071
3072 return 0;
3073 }
3074
ena_map_llq_mem_bar(struct pci_dev * pdev,struct ena_com_dev * ena_dev,int bars)3075 static int ena_map_llq_mem_bar(struct pci_dev *pdev, struct ena_com_dev *ena_dev,
3076 int bars)
3077 {
3078 bool has_mem_bar = !!(bars & BIT(ENA_MEM_BAR));
3079
3080 if (!has_mem_bar)
3081 return 0;
3082
3083 ena_dev->mem_bar = devm_ioremap_wc(&pdev->dev,
3084 pci_resource_start(pdev, ENA_MEM_BAR),
3085 pci_resource_len(pdev, ENA_MEM_BAR));
3086
3087 if (!ena_dev->mem_bar)
3088 return -EFAULT;
3089
3090 return 0;
3091 }
3092
ena_device_init(struct ena_adapter * adapter,struct pci_dev * pdev,struct ena_com_dev_get_features_ctx * get_feat_ctx,bool * wd_state)3093 static int ena_device_init(struct ena_adapter *adapter, struct pci_dev *pdev,
3094 struct ena_com_dev_get_features_ctx *get_feat_ctx,
3095 bool *wd_state)
3096 {
3097 struct ena_com_dev *ena_dev = adapter->ena_dev;
3098 struct net_device *netdev = adapter->netdev;
3099 struct ena_llq_configurations llq_config;
3100 struct device *dev = &pdev->dev;
3101 bool readless_supported;
3102 u32 aenq_groups;
3103 int dma_width;
3104 int rc;
3105
3106 rc = ena_com_mmio_reg_read_request_init(ena_dev);
3107 if (rc) {
3108 dev_err(dev, "Failed to init mmio read less\n");
3109 return rc;
3110 }
3111
3112 /* The PCIe configuration space revision id indicate if mmio reg
3113 * read is disabled
3114 */
3115 readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ);
3116 ena_com_set_mmio_read_mode(ena_dev, readless_supported);
3117
3118 rc = ena_com_dev_reset(ena_dev, ENA_REGS_RESET_NORMAL);
3119 if (rc) {
3120 dev_err(dev, "Can not reset device\n");
3121 goto err_mmio_read_less;
3122 }
3123
3124 rc = ena_com_validate_version(ena_dev);
3125 if (rc) {
3126 dev_err(dev, "Device version is too low\n");
3127 goto err_mmio_read_less;
3128 }
3129
3130 dma_width = ena_com_get_dma_width(ena_dev);
3131 if (dma_width < 0) {
3132 dev_err(dev, "Invalid dma width value %d", dma_width);
3133 rc = dma_width;
3134 goto err_mmio_read_less;
3135 }
3136
3137 rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
3138 if (rc) {
3139 dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
3140 goto err_mmio_read_less;
3141 }
3142
3143 ena_devlink_params_get(adapter->devlink);
3144
3145 /* ENA admin level init */
3146 rc = ena_com_admin_init(ena_dev, &aenq_handlers);
3147 if (rc) {
3148 dev_err(dev,
3149 "Can not initialize ena admin queue with device\n");
3150 goto err_mmio_read_less;
3151 }
3152
3153 /* To enable the msix interrupts the driver needs to know the number
3154 * of queues. So the driver uses polling mode to retrieve this
3155 * information
3156 */
3157 ena_com_set_admin_polling_mode(ena_dev, true);
3158
3159 ena_config_host_info(ena_dev, pdev);
3160
3161 /* Get Device Attributes*/
3162 rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx);
3163 if (rc) {
3164 dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc);
3165 goto err_admin_init;
3166 }
3167
3168 /* Try to turn all the available aenq groups */
3169 aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) |
3170 BIT(ENA_ADMIN_FATAL_ERROR) |
3171 BIT(ENA_ADMIN_WARNING) |
3172 BIT(ENA_ADMIN_NOTIFICATION) |
3173 BIT(ENA_ADMIN_KEEP_ALIVE);
3174
3175 aenq_groups &= get_feat_ctx->aenq.supported_groups;
3176
3177 rc = ena_com_set_aenq_config(ena_dev, aenq_groups);
3178 if (rc) {
3179 dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc);
3180 goto err_admin_init;
3181 }
3182
3183 *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE));
3184
3185 set_default_llq_configurations(adapter, &llq_config, &get_feat_ctx->llq);
3186
3187 rc = ena_set_queues_placement_policy(pdev, ena_dev, &get_feat_ctx->llq,
3188 &llq_config);
3189 if (rc) {
3190 netdev_err(netdev, "Cannot set queues placement policy rc= %d\n", rc);
3191 goto err_admin_init;
3192 }
3193
3194 rc = ena_calc_io_queue_size(adapter, get_feat_ctx);
3195 if (unlikely(rc))
3196 goto err_admin_init;
3197
3198 rc = ena_phc_init(adapter);
3199 if (unlikely(rc && (rc != -EOPNOTSUPP)))
3200 netdev_err(netdev, "Failed initializing PHC, error: %d\n", rc);
3201
3202 return 0;
3203
3204 err_admin_init:
3205 ena_com_abort_admin_commands(ena_dev);
3206 ena_com_wait_for_abort_completion(ena_dev);
3207 ena_com_delete_host_info(ena_dev);
3208 ena_com_admin_destroy(ena_dev);
3209 err_mmio_read_less:
3210 ena_com_mmio_reg_read_request_destroy(ena_dev);
3211
3212 return rc;
3213 }
3214
ena_enable_msix_and_set_admin_interrupts(struct ena_adapter * adapter)3215 static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter)
3216 {
3217 struct ena_com_dev *ena_dev = adapter->ena_dev;
3218 struct device *dev = &adapter->pdev->dev;
3219 int rc;
3220
3221 rc = ena_enable_msix(adapter);
3222 if (rc) {
3223 dev_err(dev, "Can not reserve msix vectors\n");
3224 return rc;
3225 }
3226
3227 ena_setup_mgmnt_intr(adapter);
3228
3229 rc = ena_request_mgmnt_irq(adapter);
3230 if (rc) {
3231 dev_err(dev, "Can not setup management interrupts\n");
3232 goto err_disable_msix;
3233 }
3234
3235 ena_com_set_admin_polling_mode(ena_dev, false);
3236
3237 ena_com_admin_aenq_enable(ena_dev);
3238
3239 return 0;
3240
3241 err_disable_msix:
3242 ena_disable_msix(adapter);
3243
3244 return rc;
3245 }
3246
ena_destroy_device(struct ena_adapter * adapter,bool graceful)3247 int ena_destroy_device(struct ena_adapter *adapter, bool graceful)
3248 {
3249 struct net_device *netdev = adapter->netdev;
3250 struct ena_com_dev *ena_dev = adapter->ena_dev;
3251 bool dev_up;
3252 int rc = 0;
3253
3254 if (!test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
3255 return 0;
3256
3257 netif_carrier_off(netdev);
3258
3259 timer_delete_sync(&adapter->timer_service);
3260
3261 dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
3262 adapter->dev_up_before_reset = dev_up;
3263 if (!graceful)
3264 ena_com_set_admin_running_state(ena_dev, false);
3265
3266 if (dev_up)
3267 ena_down(adapter);
3268
3269 /* Stop the device from sending AENQ events (in case reset flag is set
3270 * and device is up, ena_down() already reset the device.
3271 */
3272 if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
3273 rc = ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
3274
3275 ena_free_mgmnt_irq(adapter);
3276
3277 ena_disable_msix(adapter);
3278
3279 ena_com_abort_admin_commands(ena_dev);
3280
3281 ena_com_wait_for_abort_completion(ena_dev);
3282
3283 ena_com_admin_destroy(ena_dev);
3284
3285 ena_phc_destroy(adapter);
3286
3287 ena_com_mmio_reg_read_request_destroy(ena_dev);
3288
3289 /* return reset reason to default value */
3290 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3291
3292 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
3293 clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3294
3295 return rc;
3296 }
3297
ena_restore_device(struct ena_adapter * adapter)3298 int ena_restore_device(struct ena_adapter *adapter)
3299 {
3300 struct ena_com_dev_get_features_ctx get_feat_ctx;
3301 struct ena_com_dev *ena_dev = adapter->ena_dev;
3302 struct pci_dev *pdev = adapter->pdev;
3303 struct ena_ring *txr;
3304 int rc, count, i;
3305 bool wd_state;
3306
3307 set_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3308 rc = ena_device_init(adapter, adapter->pdev, &get_feat_ctx, &wd_state);
3309 if (rc) {
3310 dev_err(&pdev->dev, "Can not initialize device\n");
3311 goto err;
3312 }
3313 adapter->wd_state = wd_state;
3314
3315 count = adapter->xdp_num_queues + adapter->num_io_queues;
3316 for (i = 0 ; i < count; i++) {
3317 txr = &adapter->tx_ring[i];
3318 txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
3319 txr->tx_max_header_size = ena_dev->tx_max_header_size;
3320 }
3321
3322 rc = ena_device_validate_params(adapter, &get_feat_ctx);
3323 if (rc) {
3324 dev_err(&pdev->dev, "Validation of device parameters failed\n");
3325 goto err_device_destroy;
3326 }
3327
3328 rc = ena_enable_msix_and_set_admin_interrupts(adapter);
3329 if (rc) {
3330 dev_err(&pdev->dev, "Enable MSI-X failed\n");
3331 goto err_device_destroy;
3332 }
3333 /* If the interface was up before the reset bring it up */
3334 if (adapter->dev_up_before_reset) {
3335 rc = ena_up(adapter);
3336 if (rc) {
3337 dev_err(&pdev->dev, "Failed to create I/O queues\n");
3338 goto err_disable_msix;
3339 }
3340 }
3341
3342 set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3343
3344 clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3345 if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags))
3346 netif_carrier_on(adapter->netdev);
3347
3348 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3349 adapter->last_keep_alive_jiffies = jiffies;
3350
3351 return rc;
3352 err_disable_msix:
3353 ena_free_mgmnt_irq(adapter);
3354 ena_disable_msix(adapter);
3355 err_device_destroy:
3356 ena_com_abort_admin_commands(ena_dev);
3357 ena_com_wait_for_abort_completion(ena_dev);
3358 ena_com_admin_destroy(ena_dev);
3359 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_DRIVER_INVALID_STATE);
3360 ena_phc_destroy(adapter);
3361 ena_com_mmio_reg_read_request_destroy(ena_dev);
3362 err:
3363 clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
3364 clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags);
3365 dev_err(&pdev->dev,
3366 "Reset attempt failed. Can not reset the device\n");
3367
3368 return rc;
3369 }
3370
ena_fw_reset_device(struct work_struct * work)3371 static void ena_fw_reset_device(struct work_struct *work)
3372 {
3373 int rc = 0;
3374
3375 struct ena_adapter *adapter =
3376 container_of(work, struct ena_adapter, reset_task);
3377
3378 rtnl_lock();
3379
3380 if (likely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3381 rc |= ena_destroy_device(adapter, false);
3382 rc |= ena_restore_device(adapter);
3383 adapter->dev_stats.reset_fail += !!rc;
3384
3385 dev_err(&adapter->pdev->dev, "Device reset completed successfully\n");
3386 }
3387
3388 rtnl_unlock();
3389 }
3390
check_for_rx_interrupt_queue(struct ena_adapter * adapter,struct ena_ring * rx_ring)3391 static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
3392 struct ena_ring *rx_ring)
3393 {
3394 struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi);
3395
3396 if (likely(READ_ONCE(ena_napi->first_interrupt)))
3397 return 0;
3398
3399 if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
3400 return 0;
3401
3402 rx_ring->no_interrupt_event_cnt++;
3403
3404 if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
3405 netif_err(adapter, rx_err, adapter->netdev,
3406 "Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
3407 rx_ring->qid);
3408
3409 ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3410 return -EIO;
3411 }
3412
3413 return 0;
3414 }
3415
check_missing_comp_in_tx_queue(struct ena_adapter * adapter,struct ena_ring * tx_ring)3416 static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
3417 struct ena_ring *tx_ring)
3418 {
3419 struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
3420 enum ena_regs_reset_reason_types reset_reason = ENA_REGS_RESET_MISS_TX_CMPL;
3421 unsigned int time_since_last_napi;
3422 unsigned int missing_tx_comp_to;
3423 bool is_tx_comp_time_expired;
3424 struct ena_tx_buffer *tx_buf;
3425 unsigned long last_jiffies;
3426 int napi_scheduled;
3427 u32 missed_tx = 0;
3428 int i, rc = 0;
3429
3430 missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
3431
3432 for (i = 0; i < tx_ring->ring_size; i++) {
3433 tx_buf = &tx_ring->tx_buffer_info[i];
3434 last_jiffies = tx_buf->last_jiffies;
3435
3436 if (last_jiffies == 0)
3437 /* no pending Tx at this location */
3438 continue;
3439
3440 is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
3441 2 * adapter->missing_tx_completion_to);
3442
3443 if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) {
3444 /* If after graceful period interrupt is still not
3445 * received, we schedule a reset
3446 */
3447 netif_err(adapter, tx_err, adapter->netdev,
3448 "Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
3449 tx_ring->qid);
3450 ena_reset_device(adapter, ENA_REGS_RESET_MISS_INTERRUPT);
3451 return -EIO;
3452 }
3453
3454 is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
3455 adapter->missing_tx_completion_to);
3456
3457 if (unlikely(is_tx_comp_time_expired)) {
3458 time_since_last_napi =
3459 jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
3460 napi_scheduled = !!(ena_napi->napi.state & NAPIF_STATE_SCHED);
3461
3462 if (missing_tx_comp_to < time_since_last_napi && napi_scheduled) {
3463 /* We suspect napi isn't called because the
3464 * bottom half is not run. Require a bigger
3465 * timeout for these cases
3466 */
3467 if (!time_is_before_jiffies(last_jiffies +
3468 2 * adapter->missing_tx_completion_to))
3469 continue;
3470
3471 reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION;
3472 }
3473
3474 missed_tx++;
3475
3476 if (tx_buf->print_once)
3477 continue;
3478
3479 netif_notice(adapter, tx_err, adapter->netdev,
3480 "TX hasn't completed, qid %d, index %d. %u usecs from last napi execution, napi scheduled: %d\n",
3481 tx_ring->qid, i, time_since_last_napi, napi_scheduled);
3482
3483 tx_buf->print_once = 1;
3484 }
3485 }
3486
3487 if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
3488 netif_err(adapter, tx_err, adapter->netdev,
3489 "Lost TX completions are above the threshold (%d > %d). Completion transmission timeout: %u.\n",
3490 missed_tx,
3491 adapter->missing_tx_completion_threshold,
3492 missing_tx_comp_to);
3493 netif_err(adapter, tx_err, adapter->netdev,
3494 "Resetting the device\n");
3495
3496 ena_reset_device(adapter, reset_reason);
3497 rc = -EIO;
3498 }
3499
3500 ena_increase_stat(&tx_ring->tx_stats.missed_tx, missed_tx,
3501 &tx_ring->syncp);
3502
3503 return rc;
3504 }
3505
check_for_missing_completions(struct ena_adapter * adapter)3506 static void check_for_missing_completions(struct ena_adapter *adapter)
3507 {
3508 struct ena_ring *tx_ring;
3509 struct ena_ring *rx_ring;
3510 int qid, budget, rc;
3511 int io_queue_count;
3512
3513 io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
3514
3515 /* Make sure the driver doesn't turn the device in other process */
3516 smp_rmb();
3517
3518 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3519 return;
3520
3521 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3522 return;
3523
3524 if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
3525 return;
3526
3527 budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
3528
3529 qid = adapter->last_monitored_tx_qid;
3530
3531 while (budget) {
3532 qid = (qid + 1) % io_queue_count;
3533
3534 tx_ring = &adapter->tx_ring[qid];
3535 rx_ring = &adapter->rx_ring[qid];
3536
3537 rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
3538 if (unlikely(rc))
3539 return;
3540
3541 rc = !ENA_IS_XDP_INDEX(adapter, qid) ?
3542 check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
3543 if (unlikely(rc))
3544 return;
3545
3546 budget--;
3547 }
3548
3549 adapter->last_monitored_tx_qid = qid;
3550 }
3551
3552 /* trigger napi schedule after 2 consecutive detections */
3553 #define EMPTY_RX_REFILL 2
3554 /* For the rare case where the device runs out of Rx descriptors and the
3555 * napi handler failed to refill new Rx descriptors (due to a lack of memory
3556 * for example).
3557 * This case will lead to a deadlock:
3558 * The device won't send interrupts since all the new Rx packets will be dropped
3559 * The napi handler won't allocate new Rx descriptors so the device will be
3560 * able to send new packets.
3561 *
3562 * This scenario can happen when the kernel's vm.min_free_kbytes is too small.
3563 * It is recommended to have at least 512MB, with a minimum of 128MB for
3564 * constrained environment).
3565 *
3566 * When such a situation is detected - Reschedule napi
3567 */
check_for_empty_rx_ring(struct ena_adapter * adapter)3568 static void check_for_empty_rx_ring(struct ena_adapter *adapter)
3569 {
3570 struct ena_ring *rx_ring;
3571 int i, refill_required;
3572
3573 if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
3574 return;
3575
3576 if (test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))
3577 return;
3578
3579 for (i = 0; i < adapter->num_io_queues; i++) {
3580 rx_ring = &adapter->rx_ring[i];
3581
3582 refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
3583 if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
3584 rx_ring->empty_rx_queue++;
3585
3586 if (rx_ring->empty_rx_queue >= EMPTY_RX_REFILL) {
3587 ena_increase_stat(&rx_ring->rx_stats.empty_rx_ring, 1,
3588 &rx_ring->syncp);
3589
3590 netif_err(adapter, drv, adapter->netdev,
3591 "Trigger refill for ring %d\n", i);
3592
3593 napi_schedule(rx_ring->napi);
3594 rx_ring->empty_rx_queue = 0;
3595 }
3596 } else {
3597 rx_ring->empty_rx_queue = 0;
3598 }
3599 }
3600 }
3601
3602 /* Check for keep alive expiration */
check_for_missing_keep_alive(struct ena_adapter * adapter)3603 static void check_for_missing_keep_alive(struct ena_adapter *adapter)
3604 {
3605 unsigned long keep_alive_expired;
3606
3607 if (!adapter->wd_state)
3608 return;
3609
3610 if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3611 return;
3612
3613 keep_alive_expired = adapter->last_keep_alive_jiffies +
3614 adapter->keep_alive_timeout;
3615 if (unlikely(time_is_before_jiffies(keep_alive_expired))) {
3616 netif_err(adapter, drv, adapter->netdev,
3617 "Keep alive watchdog timeout.\n");
3618 ena_increase_stat(&adapter->dev_stats.wd_expired, 1,
3619 &adapter->syncp);
3620 ena_reset_device(adapter, ENA_REGS_RESET_KEEP_ALIVE_TO);
3621 }
3622 }
3623
check_for_admin_com_state(struct ena_adapter * adapter)3624 static void check_for_admin_com_state(struct ena_adapter *adapter)
3625 {
3626 if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) {
3627 netif_err(adapter, drv, adapter->netdev,
3628 "ENA admin queue is not in running state!\n");
3629 ena_increase_stat(&adapter->dev_stats.admin_q_pause, 1,
3630 &adapter->syncp);
3631 ena_reset_device(adapter, ENA_REGS_RESET_ADMIN_TO);
3632 }
3633 }
3634
ena_update_hints(struct ena_adapter * adapter,struct ena_admin_ena_hw_hints * hints)3635 static void ena_update_hints(struct ena_adapter *adapter,
3636 struct ena_admin_ena_hw_hints *hints)
3637 {
3638 struct net_device *netdev = adapter->netdev;
3639
3640 if (hints->admin_completion_tx_timeout)
3641 adapter->ena_dev->admin_queue.completion_timeout =
3642 hints->admin_completion_tx_timeout * 1000;
3643
3644 if (hints->mmio_read_timeout)
3645 /* convert to usec */
3646 adapter->ena_dev->mmio_read.reg_read_to =
3647 hints->mmio_read_timeout * 1000;
3648
3649 if (hints->missed_tx_completion_count_threshold_to_reset)
3650 adapter->missing_tx_completion_threshold =
3651 hints->missed_tx_completion_count_threshold_to_reset;
3652
3653 if (hints->missing_tx_completion_timeout) {
3654 if (hints->missing_tx_completion_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3655 adapter->missing_tx_completion_to = ENA_HW_HINTS_NO_TIMEOUT;
3656 else
3657 adapter->missing_tx_completion_to =
3658 msecs_to_jiffies(hints->missing_tx_completion_timeout);
3659 }
3660
3661 if (hints->netdev_wd_timeout)
3662 netdev->watchdog_timeo = msecs_to_jiffies(hints->netdev_wd_timeout);
3663
3664 if (hints->driver_watchdog_timeout) {
3665 if (hints->driver_watchdog_timeout == ENA_HW_HINTS_NO_TIMEOUT)
3666 adapter->keep_alive_timeout = ENA_HW_HINTS_NO_TIMEOUT;
3667 else
3668 adapter->keep_alive_timeout =
3669 msecs_to_jiffies(hints->driver_watchdog_timeout);
3670 }
3671 }
3672
ena_update_host_info(struct ena_admin_host_info * host_info,struct net_device * netdev)3673 static void ena_update_host_info(struct ena_admin_host_info *host_info,
3674 struct net_device *netdev)
3675 {
3676 host_info->supported_network_features[0] =
3677 netdev->features & GENMASK_ULL(31, 0);
3678 host_info->supported_network_features[1] =
3679 (netdev->features & GENMASK_ULL(63, 32)) >> 32;
3680 }
3681
ena_timer_service(struct timer_list * t)3682 static void ena_timer_service(struct timer_list *t)
3683 {
3684 struct ena_adapter *adapter = timer_container_of(adapter, t,
3685 timer_service);
3686 u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr;
3687 struct ena_admin_host_info *host_info =
3688 adapter->ena_dev->host_attr.host_info;
3689
3690 check_for_missing_keep_alive(adapter);
3691
3692 check_for_admin_com_state(adapter);
3693
3694 check_for_missing_completions(adapter);
3695
3696 check_for_empty_rx_ring(adapter);
3697
3698 if (debug_area)
3699 ena_dump_stats_to_buf(adapter, debug_area);
3700
3701 if (host_info)
3702 ena_update_host_info(host_info, adapter->netdev);
3703
3704 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
3705 netif_err(adapter, drv, adapter->netdev,
3706 "Trigger reset is on\n");
3707 ena_dump_stats_to_dmesg(adapter);
3708 queue_work(ena_wq, &adapter->reset_task);
3709 return;
3710 }
3711
3712 /* Reset the timer */
3713 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
3714 }
3715
ena_calc_max_io_queue_num(struct pci_dev * pdev,struct ena_com_dev * ena_dev,struct ena_com_dev_get_features_ctx * get_feat_ctx)3716 static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
3717 struct ena_com_dev *ena_dev,
3718 struct ena_com_dev_get_features_ctx *get_feat_ctx)
3719 {
3720 u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
3721
3722 if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
3723 struct ena_admin_queue_ext_feature_fields *max_queue_ext =
3724 &get_feat_ctx->max_queue_ext.max_queue_ext;
3725 io_rx_num = min_t(u32, max_queue_ext->max_rx_sq_num,
3726 max_queue_ext->max_rx_cq_num);
3727
3728 io_tx_sq_num = max_queue_ext->max_tx_sq_num;
3729 io_tx_cq_num = max_queue_ext->max_tx_cq_num;
3730 } else {
3731 struct ena_admin_queue_feature_desc *max_queues =
3732 &get_feat_ctx->max_queues;
3733 io_tx_sq_num = max_queues->max_sq_num;
3734 io_tx_cq_num = max_queues->max_cq_num;
3735 io_rx_num = min_t(u32, io_tx_sq_num, io_tx_cq_num);
3736 }
3737
3738 /* In case of LLQ use the llq fields for the tx SQ/CQ */
3739 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
3740 io_tx_sq_num = get_feat_ctx->llq.max_llq_num;
3741
3742 max_num_io_queues = min_t(u32, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES);
3743 max_num_io_queues = min_t(u32, max_num_io_queues, io_rx_num);
3744 max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_sq_num);
3745 max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num);
3746 /* 1 IRQ for mgmnt and 1 IRQs for each IO direction */
3747 max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1);
3748
3749 return max_num_io_queues;
3750 }
3751
ena_set_dev_offloads(struct ena_com_dev_get_features_ctx * feat,struct net_device * netdev)3752 static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat,
3753 struct net_device *netdev)
3754 {
3755 netdev_features_t dev_features = 0;
3756
3757 /* Set offload features */
3758 if (feat->offload.tx &
3759 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK)
3760 dev_features |= NETIF_F_IP_CSUM;
3761
3762 if (feat->offload.tx &
3763 ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK)
3764 dev_features |= NETIF_F_IPV6_CSUM;
3765
3766 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK)
3767 dev_features |= NETIF_F_TSO;
3768
3769 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK)
3770 dev_features |= NETIF_F_TSO6;
3771
3772 if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK)
3773 dev_features |= NETIF_F_TSO_ECN;
3774
3775 if (feat->offload.rx_supported &
3776 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK)
3777 dev_features |= NETIF_F_RXCSUM;
3778
3779 if (feat->offload.rx_supported &
3780 ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK)
3781 dev_features |= NETIF_F_RXCSUM;
3782
3783 netdev->features =
3784 dev_features |
3785 NETIF_F_SG |
3786 NETIF_F_RXHASH |
3787 NETIF_F_HIGHDMA;
3788
3789 netdev->hw_features |= netdev->features;
3790 netdev->vlan_features |= netdev->features;
3791 }
3792
ena_set_conf_feat_params(struct ena_adapter * adapter,struct ena_com_dev_get_features_ctx * feat)3793 static void ena_set_conf_feat_params(struct ena_adapter *adapter,
3794 struct ena_com_dev_get_features_ctx *feat)
3795 {
3796 struct net_device *netdev = adapter->netdev;
3797
3798 /* Copy mac address */
3799 if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) {
3800 eth_hw_addr_random(netdev);
3801 ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
3802 } else {
3803 ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
3804 eth_hw_addr_set(netdev, adapter->mac_addr);
3805 }
3806
3807 /* Set offload features */
3808 ena_set_dev_offloads(feat, netdev);
3809
3810 adapter->max_mtu = feat->dev_attr.max_mtu;
3811 netdev->max_mtu = adapter->max_mtu;
3812 netdev->min_mtu = ENA_MIN_MTU;
3813 }
3814
ena_rss_init_default(struct ena_adapter * adapter)3815 static int ena_rss_init_default(struct ena_adapter *adapter)
3816 {
3817 struct ena_com_dev *ena_dev = adapter->ena_dev;
3818 struct device *dev = &adapter->pdev->dev;
3819 int rc, i;
3820 u32 val;
3821
3822 rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE);
3823 if (unlikely(rc)) {
3824 dev_err(dev, "Cannot init indirect table\n");
3825 goto err_rss_init;
3826 }
3827
3828 for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
3829 val = ethtool_rxfh_indir_default(i, adapter->num_io_queues);
3830 rc = ena_com_indirect_table_fill_entry(ena_dev, i,
3831 ENA_IO_RXQ_IDX(val));
3832 if (unlikely(rc)) {
3833 dev_err(dev, "Cannot fill indirect table\n");
3834 goto err_fill_indir;
3835 }
3836 }
3837
3838 rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL, ENA_HASH_KEY_SIZE,
3839 0xFFFFFFFF);
3840 if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3841 dev_err(dev, "Cannot fill hash function\n");
3842 goto err_fill_indir;
3843 }
3844
3845 rc = ena_com_set_default_hash_ctrl(ena_dev);
3846 if (unlikely(rc && (rc != -EOPNOTSUPP))) {
3847 dev_err(dev, "Cannot fill hash control\n");
3848 goto err_fill_indir;
3849 }
3850
3851 return 0;
3852
3853 err_fill_indir:
3854 ena_com_rss_destroy(ena_dev);
3855 err_rss_init:
3856
3857 return rc;
3858 }
3859
ena_release_bars(struct ena_com_dev * ena_dev,struct pci_dev * pdev)3860 static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
3861 {
3862 int release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3863
3864 pci_release_selected_regions(pdev, release_bars);
3865 }
3866
3867 /* ena_probe - Device Initialization Routine
3868 * @pdev: PCI device information struct
3869 * @ent: entry in ena_pci_tbl
3870 *
3871 * Returns 0 on success, negative on failure
3872 *
3873 * ena_probe initializes an adapter identified by a pci_dev structure.
3874 * The OS initialization, configuring of the adapter private structure,
3875 * and a hardware reset occur.
3876 */
ena_probe(struct pci_dev * pdev,const struct pci_device_id * ent)3877 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
3878 {
3879 struct ena_com_dev_get_features_ctx get_feat_ctx;
3880 struct ena_com_dev *ena_dev = NULL;
3881 struct ena_adapter *adapter;
3882 struct net_device *netdev;
3883 static int adapters_found;
3884 struct devlink *devlink;
3885 u32 max_num_io_queues;
3886 bool wd_state;
3887 int bars, rc;
3888
3889 dev_dbg(&pdev->dev, "%s\n", __func__);
3890
3891 rc = pci_enable_device_mem(pdev);
3892 if (rc) {
3893 dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n");
3894 return rc;
3895 }
3896
3897 rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
3898 if (rc) {
3899 dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
3900 goto err_disable_device;
3901 }
3902
3903 pci_set_master(pdev);
3904
3905 ena_dev = vzalloc(sizeof(*ena_dev));
3906 if (!ena_dev) {
3907 rc = -ENOMEM;
3908 goto err_disable_device;
3909 }
3910
3911 bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK;
3912 rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
3913 if (rc) {
3914 dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
3915 rc);
3916 goto err_free_ena_dev;
3917 }
3918
3919 ena_dev->reg_bar = devm_ioremap(&pdev->dev,
3920 pci_resource_start(pdev, ENA_REG_BAR),
3921 pci_resource_len(pdev, ENA_REG_BAR));
3922 if (!ena_dev->reg_bar) {
3923 dev_err(&pdev->dev, "Failed to remap regs bar\n");
3924 rc = -EFAULT;
3925 goto err_free_region;
3926 }
3927
3928 ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
3929
3930 ena_dev->dmadev = &pdev->dev;
3931
3932 netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), ENA_MAX_RINGS);
3933 if (!netdev) {
3934 dev_err(&pdev->dev, "alloc_etherdev_mq failed\n");
3935 rc = -ENOMEM;
3936 goto err_free_region;
3937 }
3938
3939 SET_NETDEV_DEV(netdev, &pdev->dev);
3940 adapter = netdev_priv(netdev);
3941 adapter->ena_dev = ena_dev;
3942 adapter->netdev = netdev;
3943 adapter->pdev = pdev;
3944 adapter->msg_enable = DEFAULT_MSG_ENABLE;
3945
3946 ena_dev->net_device = netdev;
3947
3948 pci_set_drvdata(pdev, adapter);
3949
3950 rc = ena_phc_alloc(adapter);
3951 if (rc) {
3952 netdev_err(netdev, "ena_phc_alloc failed\n");
3953 goto err_netdev_destroy;
3954 }
3955
3956 rc = ena_com_allocate_customer_metrics_buffer(ena_dev);
3957 if (rc) {
3958 netdev_err(netdev, "ena_com_allocate_customer_metrics_buffer failed\n");
3959 goto err_free_phc;
3960 }
3961
3962 rc = ena_map_llq_mem_bar(pdev, ena_dev, bars);
3963 if (rc) {
3964 dev_err(&pdev->dev, "ENA LLQ bar mapping failed\n");
3965 goto err_metrics_destroy;
3966 }
3967
3968 /* Need to do this before ena_device_init */
3969 devlink = ena_devlink_alloc(adapter);
3970 if (!devlink) {
3971 netdev_err(netdev, "ena_devlink_alloc failed\n");
3972 rc = -ENOMEM;
3973 goto err_metrics_destroy;
3974 }
3975
3976 rc = ena_device_init(adapter, pdev, &get_feat_ctx, &wd_state);
3977 if (rc) {
3978 dev_err(&pdev->dev, "ENA device init failed\n");
3979 if (rc == -ETIME)
3980 rc = -EPROBE_DEFER;
3981 goto ena_devlink_destroy;
3982 }
3983
3984 /* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
3985 * Updated during device initialization with the real granularity
3986 */
3987 ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
3988 ena_dev->intr_moder_rx_interval = ENA_INTR_INITIAL_RX_INTERVAL_USECS;
3989 ena_dev->intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
3990 max_num_io_queues = ena_calc_max_io_queue_num(pdev, ena_dev, &get_feat_ctx);
3991 if (unlikely(!max_num_io_queues)) {
3992 rc = -EFAULT;
3993 goto err_device_destroy;
3994 }
3995
3996 ena_set_conf_feat_params(adapter, &get_feat_ctx);
3997
3998 adapter->reset_reason = ENA_REGS_RESET_NORMAL;
3999
4000 adapter->num_io_queues = max_num_io_queues;
4001 adapter->max_num_io_queues = max_num_io_queues;
4002 adapter->last_monitored_tx_qid = 0;
4003
4004 adapter->xdp_first_ring = 0;
4005 adapter->xdp_num_queues = 0;
4006
4007 adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
4008 if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV)
4009 adapter->disable_meta_caching =
4010 !!(get_feat_ctx.llq.accel_mode.u.get.supported_flags &
4011 BIT(ENA_ADMIN_DISABLE_META_CACHING));
4012
4013 adapter->wd_state = wd_state;
4014
4015 snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found);
4016
4017 rc = ena_com_init_interrupt_moderation(adapter->ena_dev);
4018 if (rc) {
4019 dev_err(&pdev->dev,
4020 "Failed to query interrupt moderation feature\n");
4021 goto err_device_destroy;
4022 }
4023
4024 ena_init_io_rings(adapter,
4025 0,
4026 adapter->xdp_num_queues +
4027 adapter->num_io_queues);
4028
4029 netdev->netdev_ops = &ena_netdev_ops;
4030 netdev->watchdog_timeo = TX_TIMEOUT;
4031 ena_set_ethtool_ops(netdev);
4032
4033 netdev->priv_flags |= IFF_UNICAST_FLT;
4034
4035 u64_stats_init(&adapter->syncp);
4036
4037 rc = ena_enable_msix_and_set_admin_interrupts(adapter);
4038 if (rc) {
4039 dev_err(&pdev->dev,
4040 "Failed to enable and set the admin interrupts\n");
4041 goto err_worker_destroy;
4042 }
4043 rc = ena_rss_init_default(adapter);
4044 if (rc && (rc != -EOPNOTSUPP)) {
4045 dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc);
4046 goto err_free_msix;
4047 }
4048
4049 ena_config_debug_area(adapter);
4050
4051 if (ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
4052 netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
4053 NETDEV_XDP_ACT_REDIRECT;
4054
4055 memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
4056
4057 netif_carrier_off(netdev);
4058
4059 rc = register_netdev(netdev);
4060 if (rc) {
4061 dev_err(&pdev->dev, "Cannot register net device\n");
4062 goto err_rss;
4063 }
4064
4065 ena_debugfs_init(netdev);
4066
4067 INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
4068
4069 adapter->last_keep_alive_jiffies = jiffies;
4070 adapter->keep_alive_timeout = ENA_DEVICE_KALIVE_TIMEOUT;
4071 adapter->missing_tx_completion_to = TX_TIMEOUT;
4072 adapter->missing_tx_completion_threshold = MAX_NUM_OF_TIMEOUTED_PACKETS;
4073
4074 ena_update_hints(adapter, &get_feat_ctx.hw_hints);
4075
4076 timer_setup(&adapter->timer_service, ena_timer_service, 0);
4077 mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
4078
4079 dev_info(&pdev->dev,
4080 "%s found at mem %lx, mac addr %pM\n",
4081 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
4082 netdev->dev_addr);
4083
4084 set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
4085
4086 adapters_found++;
4087
4088 /* From this point, the devlink device is visible to users.
4089 * Perform the registration last to ensure that all the resources
4090 * are available and that the netdevice is registered.
4091 */
4092 ena_devlink_register(devlink, &pdev->dev);
4093
4094 return 0;
4095
4096 err_rss:
4097 ena_com_delete_debug_area(ena_dev);
4098 ena_com_rss_destroy(ena_dev);
4099 err_free_msix:
4100 ena_com_dev_reset(ena_dev, ENA_REGS_RESET_INIT_ERR);
4101 /* stop submitting admin commands on a device that was reset */
4102 ena_com_set_admin_running_state(ena_dev, false);
4103 ena_free_mgmnt_irq(adapter);
4104 ena_disable_msix(adapter);
4105 err_worker_destroy:
4106 timer_delete(&adapter->timer_service);
4107 err_device_destroy:
4108 ena_com_delete_host_info(ena_dev);
4109 ena_com_admin_destroy(ena_dev);
4110 ena_devlink_destroy:
4111 ena_devlink_free(devlink);
4112 err_metrics_destroy:
4113 ena_com_delete_customer_metrics_buffer(ena_dev);
4114 err_free_phc:
4115 ena_phc_free(adapter);
4116 err_netdev_destroy:
4117 free_netdev(netdev);
4118 err_free_region:
4119 ena_release_bars(ena_dev, pdev);
4120 err_free_ena_dev:
4121 vfree(ena_dev);
4122 err_disable_device:
4123 pci_disable_device(pdev);
4124 return rc;
4125 }
4126
4127 /*****************************************************************************/
4128
4129 /* __ena_shutoff - Helper used in both PCI remove/shutdown routines
4130 * @pdev: PCI device information struct
4131 * @shutdown: Is it a shutdown operation? If false, means it is a removal
4132 *
4133 * __ena_shutoff is a helper routine that does the real work on shutdown and
4134 * removal paths; the difference between those paths is with regards to whether
4135 * dettach or unregister the netdevice.
4136 */
__ena_shutoff(struct pci_dev * pdev,bool shutdown)4137 static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
4138 {
4139 struct ena_adapter *adapter = pci_get_drvdata(pdev);
4140 struct ena_com_dev *ena_dev;
4141 struct net_device *netdev;
4142
4143 ena_dev = adapter->ena_dev;
4144 netdev = adapter->netdev;
4145
4146 ena_debugfs_terminate(netdev);
4147
4148 /* Make sure timer and reset routine won't be called after
4149 * freeing device resources.
4150 */
4151 timer_delete_sync(&adapter->timer_service);
4152 cancel_work_sync(&adapter->reset_task);
4153
4154 rtnl_lock(); /* lock released inside the below if-else block */
4155 adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
4156 ena_destroy_device(adapter, true);
4157
4158 ena_phc_free(adapter);
4159
4160 ena_devlink_unregister(adapter->devlink);
4161 ena_devlink_free(adapter->devlink);
4162
4163 if (shutdown) {
4164 netif_device_detach(netdev);
4165 dev_close(netdev);
4166 rtnl_unlock();
4167 } else {
4168 rtnl_unlock();
4169 unregister_netdev(netdev);
4170 free_netdev(netdev);
4171 }
4172
4173 ena_com_rss_destroy(ena_dev);
4174
4175 ena_com_delete_debug_area(ena_dev);
4176
4177 ena_com_delete_host_info(ena_dev);
4178
4179 ena_com_delete_customer_metrics_buffer(ena_dev);
4180
4181 ena_release_bars(ena_dev, pdev);
4182
4183 pci_disable_device(pdev);
4184
4185 vfree(ena_dev);
4186 }
4187
4188 /* ena_remove - Device Removal Routine
4189 * @pdev: PCI device information struct
4190 *
4191 * ena_remove is called by the PCI subsystem to alert the driver
4192 * that it should release a PCI device.
4193 */
4194
ena_remove(struct pci_dev * pdev)4195 static void ena_remove(struct pci_dev *pdev)
4196 {
4197 __ena_shutoff(pdev, false);
4198 }
4199
4200 /* ena_shutdown - Device Shutdown Routine
4201 * @pdev: PCI device information struct
4202 *
4203 * ena_shutdown is called by the PCI subsystem to alert the driver that
4204 * a shutdown/reboot (or kexec) is happening and device must be disabled.
4205 */
4206
ena_shutdown(struct pci_dev * pdev)4207 static void ena_shutdown(struct pci_dev *pdev)
4208 {
4209 __ena_shutoff(pdev, true);
4210 }
4211
4212 /* ena_suspend - PM suspend callback
4213 * @dev_d: Device information struct
4214 */
ena_suspend(struct device * dev_d)4215 static int __maybe_unused ena_suspend(struct device *dev_d)
4216 {
4217 struct pci_dev *pdev = to_pci_dev(dev_d);
4218 struct ena_adapter *adapter = pci_get_drvdata(pdev);
4219
4220 ena_increase_stat(&adapter->dev_stats.suspend, 1, &adapter->syncp);
4221
4222 rtnl_lock();
4223 if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
4224 dev_err(&pdev->dev,
4225 "Ignoring device reset request as the device is being suspended\n");
4226 clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
4227 }
4228 ena_destroy_device(adapter, true);
4229 rtnl_unlock();
4230 return 0;
4231 }
4232
4233 /* ena_resume - PM resume callback
4234 * @dev_d: Device information struct
4235 */
ena_resume(struct device * dev_d)4236 static int __maybe_unused ena_resume(struct device *dev_d)
4237 {
4238 struct ena_adapter *adapter = dev_get_drvdata(dev_d);
4239 int rc;
4240
4241 ena_increase_stat(&adapter->dev_stats.resume, 1, &adapter->syncp);
4242
4243 rtnl_lock();
4244 rc = ena_restore_device(adapter);
4245 rtnl_unlock();
4246 return rc;
4247 }
4248
4249 static SIMPLE_DEV_PM_OPS(ena_pm_ops, ena_suspend, ena_resume);
4250
4251 static struct pci_driver ena_pci_driver = {
4252 .name = DRV_MODULE_NAME,
4253 .id_table = ena_pci_tbl,
4254 .probe = ena_probe,
4255 .remove = ena_remove,
4256 .shutdown = ena_shutdown,
4257 .driver.pm = &ena_pm_ops,
4258 .sriov_configure = pci_sriov_configure_simple,
4259 };
4260
ena_init(void)4261 static int __init ena_init(void)
4262 {
4263 int ret;
4264
4265 ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME);
4266 if (!ena_wq) {
4267 pr_err("Failed to create workqueue\n");
4268 return -ENOMEM;
4269 }
4270
4271 ret = pci_register_driver(&ena_pci_driver);
4272 if (ret)
4273 destroy_workqueue(ena_wq);
4274
4275 return ret;
4276 }
4277
ena_cleanup(void)4278 static void __exit ena_cleanup(void)
4279 {
4280 pci_unregister_driver(&ena_pci_driver);
4281
4282 if (ena_wq) {
4283 destroy_workqueue(ena_wq);
4284 ena_wq = NULL;
4285 }
4286 }
4287
4288 /******************************************************************************
4289 ******************************** AENQ Handlers *******************************
4290 *****************************************************************************/
4291 /* ena_update_on_link_change:
4292 * Notify the network interface about the change in link status
4293 */
ena_update_on_link_change(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)4294 static void ena_update_on_link_change(void *adapter_data,
4295 struct ena_admin_aenq_entry *aenq_e)
4296 {
4297 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4298 struct ena_admin_aenq_link_change_desc *aenq_desc =
4299 (struct ena_admin_aenq_link_change_desc *)aenq_e;
4300 int status = aenq_desc->flags &
4301 ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK;
4302
4303 if (status) {
4304 netif_dbg(adapter, ifup, adapter->netdev, "%s\n", __func__);
4305 set_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4306 if (!test_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags))
4307 netif_carrier_on(adapter->netdev);
4308 } else {
4309 clear_bit(ENA_FLAG_LINK_UP, &adapter->flags);
4310 netif_carrier_off(adapter->netdev);
4311 }
4312 }
4313
ena_keep_alive_wd(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)4314 static void ena_keep_alive_wd(void *adapter_data,
4315 struct ena_admin_aenq_entry *aenq_e)
4316 {
4317 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4318 struct ena_admin_aenq_keep_alive_desc *desc;
4319 u64 rx_drops;
4320 u64 tx_drops;
4321
4322 desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
4323 adapter->last_keep_alive_jiffies = jiffies;
4324
4325 rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
4326 tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
4327
4328 u64_stats_update_begin(&adapter->syncp);
4329 /* These stats are accumulated by the device, so the counters indicate
4330 * all drops since last reset.
4331 */
4332 adapter->dev_stats.rx_drops = rx_drops;
4333 adapter->dev_stats.tx_drops = tx_drops;
4334 u64_stats_update_end(&adapter->syncp);
4335 }
4336
ena_notification(void * adapter_data,struct ena_admin_aenq_entry * aenq_e)4337 static void ena_notification(void *adapter_data,
4338 struct ena_admin_aenq_entry *aenq_e)
4339 {
4340 struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
4341 struct ena_admin_ena_hw_hints *hints;
4342
4343 WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION,
4344 "Invalid group(%x) expected %x\n",
4345 aenq_e->aenq_common_desc.group,
4346 ENA_ADMIN_NOTIFICATION);
4347
4348 switch (aenq_e->aenq_common_desc.syndrome) {
4349 case ENA_ADMIN_UPDATE_HINTS:
4350 hints = (struct ena_admin_ena_hw_hints *)
4351 (&aenq_e->inline_data_w4);
4352 ena_update_hints(adapter, hints);
4353 break;
4354 default:
4355 netif_err(adapter, drv, adapter->netdev,
4356 "Invalid aenq notification link state %d\n",
4357 aenq_e->aenq_common_desc.syndrome);
4358 }
4359 }
4360
4361 /* This handler will called for unknown event group or unimplemented handlers*/
unimplemented_aenq_handler(void * data,struct ena_admin_aenq_entry * aenq_e)4362 static void unimplemented_aenq_handler(void *data,
4363 struct ena_admin_aenq_entry *aenq_e)
4364 {
4365 struct ena_adapter *adapter = (struct ena_adapter *)data;
4366
4367 netif_err(adapter, drv, adapter->netdev,
4368 "Unknown event was received or event with unimplemented handler\n");
4369 }
4370
4371 static struct ena_aenq_handlers aenq_handlers = {
4372 .handlers = {
4373 [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change,
4374 [ENA_ADMIN_NOTIFICATION] = ena_notification,
4375 [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd,
4376 },
4377 .unimplemented_handler = unimplemented_aenq_handler
4378 };
4379
4380 module_init(ena_init);
4381 module_exit(ena_cleanup);
4382